From 3e2d5b2b9a45fd185c847f432950f2530171d216 Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Sat, 29 Mar 2025 21:46:58 -0400 Subject: [PATCH] Update bcachefs sources to 0a2abe7ce837 bcachefs: Don't use designated initializers for disk_accounting_pos Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- .bcachefs_revision | 2 +- include/linux/sched/sysctl.h | 6 + libbcachefs/alloc_background.c | 22 +-- libbcachefs/backpointers.c | 43 +++--- libbcachefs/bkey.h | 1 - libbcachefs/btree_cache.c | 2 +- libbcachefs/btree_gc.c | 23 ++- libbcachefs/btree_io.c | 65 ++++---- libbcachefs/btree_iter.c | 14 +- libbcachefs/btree_iter.h | 1 - libbcachefs/btree_journal_iter.c | 2 + libbcachefs/btree_node_scan.c | 14 +- libbcachefs/btree_update_interior.c | 89 ++++++----- libbcachefs/buckets.c | 161 +++++++++++--------- libbcachefs/chardev.c | 6 +- libbcachefs/disk_accounting.c | 29 ++-- libbcachefs/ec.c | 22 +-- libbcachefs/errcode.h | 3 + libbcachefs/error.c | 226 ++++++++++++++++++++-------- libbcachefs/error.h | 48 +++--- libbcachefs/extents.c | 13 +- libbcachefs/fs-io.c | 29 ++-- libbcachefs/fs-ioctl.c | 1 + libbcachefs/fs.c | 5 +- libbcachefs/fsck.c | 18 ++- libbcachefs/io_read.c | 124 +-------------- libbcachefs/io_read.h | 5 +- libbcachefs/journal_io.c | 17 ++- libbcachefs/lru.c | 7 +- libbcachefs/move.c | 26 +--- libbcachefs/namei.c | 4 +- libbcachefs/opts.c | 4 +- libbcachefs/printbuf.c | 19 +++ libbcachefs/printbuf.h | 1 + libbcachefs/progress.c | 6 +- libbcachefs/rebalance.c | 11 +- libbcachefs/recovery_passes.c | 12 +- libbcachefs/reflink.c | 12 +- libbcachefs/sb-counters_format.h | 1 - libbcachefs/sb-errors_format.h | 3 +- libbcachefs/snapshot.c | 16 +- libbcachefs/str_hash.c | 2 +- libbcachefs/subvolume.c | 1 + libbcachefs/super.c | 38 ++--- libbcachefs/sysfs.c | 125 +-------------- libbcachefs/time_stats.c | 14 +- libbcachefs/trace.h | 5 - libbcachefs/util.c | 2 +- libbcachefs/util.h | 1 + 49 files changed, 608 insertions(+), 693 deletions(-) create mode 100644 include/linux/sched/sysctl.h diff --git a/.bcachefs_revision b/.bcachefs_revision index b81d691d..4ddc31c3 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e2e7dcddb3660e90a972473bb10de570964754d7 +0a2abe7ce8373ede16f2666b5a789f389ac292ef diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h new file mode 100644 index 00000000..1bda4020 --- /dev/null +++ b/include/linux/sched/sysctl.h @@ -0,0 +1,6 @@ +#ifndef __TOOLS_LINUX_SCHED_SYSCTL_H +#define __TOOLS_LINUX_SCHED_SYSCTL_H + +#define sysctl_hung_task_timeout_secs (HZ * 10) + +#endif /* __TOOLS_LINUX_SCHED_SYSCTL_H */ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 5fb396be..c12ca753 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -589,6 +589,8 @@ iter_err: int bch2_alloc_read(struct bch_fs *c) { + down_read(&c->state_lock); + struct btree_trans *trans = bch2_trans_get(c); struct bch_dev *ca = NULL; int ret; @@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c) bch2_dev_put(ca); bch2_trans_put(trans); + up_read(&c->state_lock); bch_err_fn(c, ret); return ret; } @@ -673,8 +676,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, alloc_k); int ret = __bch2_fsck_err(NULL, trans, flags, err_id, - "bucket incorrectly %sset in %s btree\n" - " %s", + "bucket incorrectly %sset in %s btree\n%s", set ? "" : "un", bch2_btree_id_str(btree), buf.buf); @@ -1027,7 +1029,7 @@ fsck_err: bch2_dev_put(ca); return ret; invalid_bucket: - bch2_fs_inconsistent(c, "reference to invalid bucket\n %s", + bch2_fs_inconsistent(c, "reference to invalid bucket\n%s", (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); ret = -BCH_ERR_trigger_alloc; goto err; @@ -1201,8 +1203,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), trans, bucket_gens_key_wrong, - "incorrect gen in bucket_gens btree (got %u should be %u)\n" - " %s", + "incorrect gen in bucket_gens btree (got %u should be %u)\n%s", alloc_gen(k, gens_offset), a->gen, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { @@ -1260,7 +1261,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, if (fsck_err_on(k.k->type != KEY_TYPE_set, trans, freespace_hole_missing, "hole in alloc btree missing in freespace btree\n" - " device %llu buckets %llu-%llu", + "device %llu buckets %llu-%llu", freespace_iter->pos.inode, freespace_iter->pos.offset, end->offset)) { @@ -1419,7 +1420,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a))) { if (fsck_err(trans, need_discard_freespace_key_bad, - "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", + "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_id_str(iter->btree_id), iter->pos.inode, @@ -1500,7 +1501,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); if (!ca) { if (fsck_err(trans, bucket_gens_to_invalid_dev, - "bucket_gens key for invalid device:\n %s", + "bucket_gens key for invalid device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, 0); goto out; @@ -1509,7 +1510,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, if (fsck_err_on(end <= ca->mi.first_bucket || start >= ca->mi.nbuckets, trans, bucket_gens_to_invalid_buckets, - "bucket_gens key for invalid buckets:\n %s", + "bucket_gens key for invalid buckets:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); goto out; @@ -1712,8 +1713,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (fsck_err_on(!a->io_time[READ], trans, alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n" - " %s", + "cached bucket with read_time 0\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { struct bkey_i_alloc_v4 *a_mut = diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 20c497f0..21d1d86d 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -96,6 +96,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + int ret = 0; if (insert) { prt_printf(&buf, "existing backpointer found when inserting "); @@ -125,17 +126,15 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, prt_printf(&buf, "for "); bch2_bkey_val_to_text(&buf, c, orig_k); - - bch_err(c, "%s", buf.buf); } + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers && + __bch2_inconsistent_error(c, &buf)) + ret = -BCH_ERR_erofs_unfixed_errors; + + bch_err(c, "%s", buf.buf); printbuf_exit(&buf); - - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { - return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; - } else { - return 0; - } + return ret; } int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, @@ -210,11 +209,11 @@ static int backpointer_target_not_found(struct btree_trans *trans, if (ret) return ret; - prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", + prt_printf(&buf, "backpointer doesn't match %s it points to:\n", bp.v->level ? "btree node" : "extent"); bch2_bkey_val_to_text(&buf, c, bp.s_c); - prt_printf(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, target_k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k); @@ -222,7 +221,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, struct extent_ptr_decoded p; bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry) if (p.ptr.dev == bp.k->p.inode) { - prt_printf(&buf, "\n "); + prt_newline(&buf); struct bkey_i_backpointer bp2; bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i)); @@ -443,12 +442,11 @@ found: if (ret) goto err; - prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); - prt_printf(&buf, "\n "); + prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n"); bch2_btree_id_to_text(&buf, btree); prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, extent); - prt_printf(&buf, "\n "); + prt_newline(&buf); bch2_btree_id_to_text(&buf, o_btree); prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, extent2); @@ -539,9 +537,9 @@ check_existing_bp: if (bch2_extents_match(orig_k, other_extent)) { printbuf_reset(&buf); - prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); + prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n"); bch2_bkey_val_to_text(&buf, c, orig_k); - prt_str(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, other_extent); bch_err(c, "%s", buf.buf); @@ -580,20 +578,20 @@ check_existing_bp: } printbuf_reset(&buf); - prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); + prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode); bch2_bkey_val_to_text(&buf, c, orig_k); - prt_str(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, other_extent); bch_err(c, "%s", buf.buf); ret = -BCH_ERR_fsck_repair_unimplemented; goto err; missing: printbuf_reset(&buf); - prt_str(&buf, "missing backpointer\n for: "); + prt_str(&buf, "missing backpointer\nfor: "); bch2_bkey_val_to_text(&buf, c, orig_k); - prt_printf(&buf, "\n want: "); + prt_printf(&buf, "\nwant: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); - prt_printf(&buf, "\n got: "); + prt_printf(&buf, "\ngot: "); bch2_bkey_val_to_text(&buf, c, bp_k); if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) @@ -1023,7 +1021,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) * Can't allow devices to come/go/resize while we have bucket bitmaps * allocated */ - lockdep_assert_held(&c->state_lock); + down_read(&c->state_lock); for_each_member_device(c, ca) { BUG_ON(ca->bucket_backpointer_mismatches); @@ -1108,6 +1106,7 @@ err_free_bitmaps: ca->bucket_backpointer_mismatches = NULL; } + up_read(&c->state_lock); bch_err_fn(c, ret); return ret; } diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 08263290..054e2d5e 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -191,7 +191,6 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r) static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) { return bpos_eq(l.k->p, r.k->p) && - l.k->size == r.k->size && bkey_bytes(l.k) == bkey_bytes(r.k) && !memcmp(l.v, r.v, bkey_val_bytes(l.k)); } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 54666027..9b80201c 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -1417,7 +1417,7 @@ void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "%u", r->level); else prt_printf(out, "(unknown)"); - prt_printf(out, "\n "); + prt_newline(out); bch2_bkey_val_to_text(out, c, k); } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index ff681e73..2025d408 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -213,15 +213,15 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * prt_printf(&buf, " at "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_printf(&buf, ":\n parent: "); + prt_printf(&buf, ":\nparent: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); if (prev) { - prt_printf(&buf, "\n prev: "); + prt_printf(&buf, "\nprev: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key)); } - prt_str(&buf, "\n next: "); + prt_str(&buf, "\nnext: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key)); if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */ @@ -280,12 +280,12 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, if (bpos_eq(child->key.k.p, b->key.k.p)) return 0; - prt_printf(&buf, " at "); + prt_printf(&buf, "\nat: "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_printf(&buf, ":\n parent: "); + prt_printf(&buf, "\nparent: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n child: "); + prt_str(&buf, "\nchild: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key)); if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key, @@ -351,8 +351,7 @@ again: if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), trans, btree_node_read_error, - "Topology repair: unreadable btree node at\n" - " %s", + "Topology repair: unreadable btree node at\n%s", buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); cur = NULL; @@ -612,7 +611,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, if (fsck_err_on(btree_id != BTREE_ID_accounting && k.k->bversion.lo > atomic64_read(&c->key_version), trans, bkey_version_in_future, - "key version number higher than recorded %llu\n %s", + "key version number higher than recorded %llu\n%s", atomic64_read(&c->key_version), (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) atomic64_set(&c->key_version, k.k->bversion.lo); @@ -620,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), trans, btree_bitmap_not_marked, - "btree ptr not marked in member info btree allocated bitmap\n %s", + "btree ptr not marked in member info btree allocated bitmap\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1021,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c) { int ret; - lockdep_assert_held(&c->state_lock); - + down_read(&c->state_lock); down_write(&c->gc_lock); bch2_btree_interior_updates_flush(c); @@ -1060,6 +1058,7 @@ out: percpu_up_write(&c->mark_lock); up_write(&c->gc_lock); + up_read(&c->state_lock); /* * At startup, allocations can happen directly instead of via the diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 2ba33ffc..1d94a2bf 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); - printbuf_indent_add(out, 2); - prt_printf(out, "\nnode offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) @@ -550,23 +548,7 @@ static int __btree_err(int ret, enum bch_sb_error_id err_type, const char *fmt, ...) { - struct printbuf out = PRINTBUF; bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; - va_list args; - - btree_err_msg(&out, c, ca, b, i, k, b->written, write); - - va_start(args, fmt); - prt_vprintf(&out, fmt, args); - va_end(args); - - if (write == WRITE) { - bch2_print_string_as_lines(KERN_ERR, out.buf); - ret = c->opts.errors == BCH_ON_ERROR_continue - ? 0 - : -BCH_ERR_fsck_errors_not_fixed; - goto out; - } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = -BCH_ERR_btree_node_read_err_fixable; @@ -576,6 +558,29 @@ static int __btree_err(int ret, if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) bch2_sb_error_count(c, err_type); + struct printbuf out = PRINTBUF; + if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { + printbuf_indent_add_nextline(&out, 2); +#ifdef BCACHEFS_LOG_PREFIX + prt_printf(&out, bch2_log_msg(c, "")); +#endif + } + + btree_err_msg(&out, c, ca, b, i, k, b->written, write); + + va_list args; + va_start(args, fmt); + prt_vprintf(&out, fmt, args); + va_end(args); + + if (write == WRITE) { + prt_str(&out, ", "); + ret = __bch2_inconsistent_error(c, &out) + ? -BCH_ERR_fsck_errors_not_fixed + : 0; + silent = false; + } + switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret = !silent @@ -585,25 +590,21 @@ static int __btree_err(int ret, ret != -BCH_ERR_fsck_ignore) goto fsck_err; ret = -BCH_ERR_fsck_fix; - break; - case -BCH_ERR_btree_node_read_err_want_retry: - case -BCH_ERR_btree_node_read_err_must_retry: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); - break; + goto out; case -BCH_ERR_btree_node_read_err_bad_node: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); - ret = bch2_topology_error(c); + prt_str(&out, ", "); + ret = __bch2_topology_error(c, &out); + if (ret) + silent = false; break; case -BCH_ERR_btree_node_read_err_incompatible: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); ret = -BCH_ERR_fsck_errors_not_fixed; + silent = false; break; - default: - BUG(); } + + if (!silent) + bch2_print_string_as_lines(KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); @@ -817,7 +818,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i, NULL, btree_node_bad_format, - "invalid bkey format: %s\n %s", buf1.buf, + "invalid bkey format: %s\n%s", buf1.buf, (printbuf_reset(&buf2), bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); printbuf_reset(&buf1); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 7542c6f9..a9c110b8 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1487,22 +1487,14 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) for (struct jset_entry *e = trans->journal_entries; e != btree_trans_journal_entries_top(trans); - e = vstruct_next(e)) + e = vstruct_next(e)) { bch2_journal_entry_to_text(buf, trans->c, e); + prt_newline(buf); + } printbuf_indent_sub(buf, 2); } -noinline __cold -void bch2_dump_trans_updates(struct btree_trans *trans) -{ - struct printbuf buf = PRINTBUF; - - bch2_trans_updates_to_text(&buf, trans); - bch2_print_str(trans->c, buf.buf); - printbuf_exit(&buf); -} - static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) { struct btree_path *path = trans->paths + path_idx; diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 8823eec6..e6f51a3b 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -9,7 +9,6 @@ void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); -void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); static inline int __bkey_err(const struct bkey *k) diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index 6d25e3f8..d1ad1a76 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -644,6 +644,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, */ static int journal_sort_key_cmp(const void *_l, const void *_r) { + cond_resched(); + const struct journal_key *l = _l; const struct journal_key *r = _r; diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c index 67816132..25d54b77 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree_node_scan.c @@ -13,6 +13,7 @@ #include <linux/kthread.h> #include <linux/min_heap.h> +#include <linux/sched/sysctl.h> #include <linux/sort.h> struct find_btree_nodes_worker { @@ -313,7 +314,8 @@ static int read_btree_nodes(struct find_btree_nodes *f) wake_up_process(t); } err: - closure_sync(&cl); + while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; return f->ret ?: ret; } @@ -577,10 +579,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, found_btree_node_to_key(&tmp.k, &n); - struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); - bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); + bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); + printbuf_exit(&buf); + } BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), (struct bkey_validate_context) { diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 67f1e320..bf7e1dac 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -35,6 +35,8 @@ static const char * const bch2_btree_update_modes[] = { NULL }; +static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *); + static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, btree_path_idx_t, struct btree *, struct keylist *); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -54,6 +56,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) struct bkey_buf prev; int ret = 0; + printbuf_indent_add_nextline(&buf, 2); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, b->data->min_key)); @@ -64,19 +68,20 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) if (b == btree_node_root(c, b)) { if (!bpos_eq(b->data->min_key, POS_MIN)) { - printbuf_reset(&buf); + ret = __bch2_topology_error(c, &buf); + bch2_bpos_to_text(&buf, b->data->min_key); log_fsck_err(trans, btree_root_bad_min_key, "btree root with incorrect min_key: %s", buf.buf); - goto topology_repair; + goto out; } if (!bpos_eq(b->data->max_key, SPOS_MAX)) { - printbuf_reset(&buf); + ret = __bch2_topology_error(c, &buf); bch2_bpos_to_text(&buf, b->data->max_key); log_fsck_err(trans, btree_root_bad_max_key, "btree root with incorrect max_key: %s", buf.buf); - goto topology_repair; + goto out; } } @@ -94,20 +99,19 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : bpos_successor(prev.k->k.p); if (!bpos_eq(expected_min, bp.v->min_key)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); - prt_str(&buf, "end of prev node doesn't match start of next node\n in "); + prt_str(&buf, "end of prev node doesn't match start of next node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n prev "); + prt_str(&buf, "\nprev "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); - prt_str(&buf, "\n next "); + prt_str(&buf, "\nnext "); bch2_bkey_val_to_text(&buf, c, k); log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); - goto topology_repair; + goto out; } bch2_bkey_buf_reassemble(&prev, c, k); @@ -115,29 +119,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) } if (bkey_deleted(&prev.k->k)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); - prt_str(&buf, "empty interior node\n in "); + prt_str(&buf, "empty interior node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); - goto topology_repair; } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); - prt_str(&buf, "last child node doesn't end at end of parent node\n in "); + prt_str(&buf, "last child node doesn't end at end of parent node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_str(&buf, "\n last key "); + prt_str(&buf, "\nlast key "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); - goto topology_repair; } out: fsck_err: @@ -145,9 +145,6 @@ fsck_err: bch2_bkey_buf_exit(&prev, c); printbuf_exit(&buf); return ret; -topology_repair: - ret = bch2_topology_error(c); - goto out; } /* Calculate ideal packed bkey format for new btree nodes: */ @@ -1271,7 +1268,8 @@ err: bch2_btree_update_free(as, trans); if (!bch2_err_matches(ret, ENOSPC) && !bch2_err_matches(ret, EROFS) && - ret != -BCH_ERR_journal_reclaim_would_deadlock) + ret != -BCH_ERR_journal_reclaim_would_deadlock && + ret != -BCH_ERR_journal_shutdown) bch_err_fn_ratelimited(c, ret); return ERR_PTR(ret); } @@ -1782,11 +1780,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t int ret; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(path, b->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); + if (!btree_node_intent_locked(path, b->c.level)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "%s(): node not locked at level %u\n", + __func__, b->c.level); + bch2_btree_update_to_text(&buf, as); + bch2_btree_path_to_text(&buf, trans, path_idx); + + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); + bch2_fs_emergency_read_only(c); + return -EIO; + } + ret = bch2_btree_node_lock_write(trans, path, &b->c); if (ret) return ret; @@ -2007,18 +2018,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; - bch2_bpos_to_text(&buf1, prev->data->max_key); - bch2_bpos_to_text(&buf2, next->data->min_key); - bch_err(c, - "%s(): btree topology error:\n" - " prev ends at %s\n" - " next starts at %s", - __func__, buf1.buf, buf2.buf); - printbuf_exit(&buf1); - printbuf_exit(&buf2); - ret = bch2_topology_error(c); + printbuf_indent_add_nextline(&buf, 2); + prt_printf(&buf, "%s(): ", __func__); + ret = __bch2_topology_error(c, &buf); + prt_newline(&buf); + + prt_printf(&buf, "prev ends at "); + bch2_bpos_to_text(&buf, prev->data->max_key); + prt_newline(&buf); + + prt_printf(&buf, "next starts at "); + bch2_bpos_to_text(&buf, next->data->min_key); + + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); goto err; } @@ -2288,7 +2303,9 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, a->btree_id, a->level, a->key.k, 0)); - if (ret != -ENOENT) + if (ret != -ENOENT && + !bch2_err_matches(ret, EROFS) && + ret != -BCH_ERR_journal_shutdown) bch_err_fn_ratelimited(c, ret); spin_lock(&c->btree_node_rewrites_lock); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index e56ef623..0903311c 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -381,6 +381,36 @@ err: return ret; } +static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, + struct bkey_s_c k, bool insert, enum bch_sb_error_id id) +{ + struct bch_fs *c = trans->c; + bool repeat = false, print = true, suppress = false; + + prt_printf(buf, "\nwhile marking "); + bch2_bkey_val_to_text(buf, c, k); + prt_newline(buf); + + __bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress); + + int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); + + if (insert) { + print = true; + suppress = false; + + bch2_trans_updates_to_text(buf, trans); + __bch2_inconsistent_error(c, buf); + ret = -BCH_ERR_bucket_ref_update; + } + + if (suppress) + prt_printf(buf, "Ratelimiting new instances of previous error\n"); + if (print) + bch2_print_string_as_lines(KERN_ERR, buf->buf); + return ret; +} + int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -396,32 +426,29 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, BUG_ON(!sectors); - if (gen_after(ptr->gen, b_gen)) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" - "while marking %s", + if (unlikely(gen_after(ptr->gen, b_gen))) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); goto out; } - if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_too_stale, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", + if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_too_stale); goto out; } @@ -430,62 +457,50 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, goto out; } - if (b_gen != ptr->gen) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, stale_dirty_ptr, - "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" - "while marking %s", + if (unlikely(b_gen != ptr->gen)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)", ptr->dev, bucket_nr, b_gen, bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_stale_dirty_ptr); goto out; } - if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_bucket_data_type_mismatch, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, - bch2_data_type_str(bucket_data_type), - bch2_data_type_str(ptr_data_type), - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s", + ptr->dev, bucket_nr, b_gen, + bch2_data_type_str(bucket_data_type), + bch2_data_type_str(ptr_data_type)); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); goto out; } - if ((u64) *bucket_sectors + sectors > U32_MAX) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, bucket_sector_count_overflow, - "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" - "while marking %s", + if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - *bucket_sectors, sectors, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + *bucket_sectors, sectors); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_bucket_sector_count_overflow); sectors = -*bucket_sectors; + goto out; } *bucket_sectors += sectors; out: printbuf_exit(&buf); return ret; -err: -fsck_err: - bch2_dump_trans_updates(trans); - bch2_inconsistent_error(c); - ret = -BCH_ERR_bucket_ref_update; - goto out; } void bch2_trans_account_disk_usage_change(struct btree_trans *trans) @@ -651,9 +666,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, stripe_blockcount_get(&s->v, p.ec.block) + sectors); - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = data_type; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); @@ -677,19 +692,21 @@ err: if (!m || !m->alive) { gc_stripe_unlock(m); struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", + (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", - (u64) p.ec.idx, buf.buf); + __bch2_inconsistent_error(c, &buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); - bch2_inconsistent_error(c); return -BCH_ERR_trigger_stripe_pointer; } m->block_sectors[p.ec.block] += sectors; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); gc_stripe_unlock(m); @@ -717,12 +734,12 @@ static int __trigger_extent(struct btree_trans *trans, : BCH_DATA_user; int ret = 0; - struct disk_accounting_pos acc_replicas_key = { - .type = BCH_DISK_ACCOUNTING_replicas, - .replicas.data_type = data_type, - .replicas.nr_devs = 0, - .replicas.nr_required = 1, - }; + struct disk_accounting_pos acc_replicas_key; + memset(&acc_replicas_key, 0, sizeof(acc_replicas_key)); + acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas; + acc_replicas_key.replicas.data_type = data_type; + acc_replicas_key.replicas.nr_devs = 0; + acc_replicas_key.replicas.nr_required = 1; unsigned cur_compression_type = 0; u64 compression_acct[3] = { 1, 0, 0 }; diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 57d55b3d..584f4a3e 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -426,10 +426,8 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, arg.replica_entries_bytes = replicas.nr; for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) { - struct disk_accounting_pos k = { - .type = BCH_DISK_ACCOUNTING_persistent_reserved, - .persistent_reserved.nr_replicas = i, - }; + struct disk_accounting_pos k; + disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i); bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&k), diff --git a/libbcachefs/disk_accounting.c b/libbcachefs/disk_accounting.c index 8a8de614..a59f6c12 100644 --- a/libbcachefs/disk_accounting.c +++ b/libbcachefs/disk_accounting.c @@ -114,10 +114,9 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, unsigned dev, s64 sectors, bool gc) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; - + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_replicas_entry_cached(&acc.replicas, dev); return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); @@ -646,7 +645,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e), trans, accounting_replicas_not_marked, - "accounting not marked in superblock replicas\n %s", + "accounting not marked in superblock replicas\n%s", (printbuf_reset(&buf), bch2_accounting_key_to_text(&buf, &acc), buf.buf))) { @@ -676,7 +675,7 @@ fsck_err: return ret; invalid_device: if (fsck_err(trans, accounting_to_invalid_device, - "accounting entry points to invalid device %i\n %s", + "accounting entry points to invalid device %i\n%s", invalid_dev, (printbuf_reset(&buf), bch2_accounting_key_to_text(&buf, &acc), @@ -737,7 +736,9 @@ int bch2_accounting_read(struct bch_fs *c) break; if (!bch2_accounting_is_mem(acc_k)) { - struct disk_accounting_pos next = { .type = acc_k.type + 1 }; + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); continue; } @@ -893,15 +894,13 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) int bch2_dev_usage_init(struct bch_dev *ca, bool gc) { struct bch_fs *c = ca->fs; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_dev_data_type, - .dev_data_type.dev = ca->dev_idx, - .dev_data_type.data_type = BCH_DATA_free, - }; u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; int ret = bch2_trans_do(c, ({ - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?: + bch2_disk_accounting_mod2(trans, gc, + v, dev_data_type, + .dev = ca->dev_idx, + .data_type = BCH_DATA_free) ?: (!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0); })); bch_err_fn(c, ret); @@ -928,7 +927,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c) break; if (!bch2_accounting_is_mem(acc_k)) { - struct disk_accounting_pos next = { .type = acc_k.type + 1 }; + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); continue; } diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index f2b9225f..6faeda7a 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -320,7 +320,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, if (flags & BTREE_TRIGGER_gc) { struct bucket *g = gc_bucket(ca, bucket.offset); - if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", + if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s", ptr->dev, (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { ret = -BCH_ERR_mark_stripe; @@ -453,9 +453,9 @@ int bch2_trigger_stripe(struct btree_trans *trans, if (new_s) { s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, new); int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) @@ -468,9 +468,9 @@ int bch2_trigger_stripe(struct btree_trans *trans, if (old_s) { s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, old); int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) @@ -2110,14 +2110,14 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ if (ret) return ret; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; s64 sectors = 0; for (unsigned i = 0; i < s->v.nr_blocks; i++) sectors -= stripe_blockcount_get(&s->v, i); + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); @@ -2131,6 +2131,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ sectors = -sectors; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 101806d7..c8696f01 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -5,6 +5,8 @@ #define BCH_ERRCODES() \ x(ERANGE, ERANGE_option_too_small) \ x(ERANGE, ERANGE_option_too_big) \ + x(EINVAL, injected) \ + x(BCH_ERR_injected, injected_fs_start) \ x(EINVAL, mount_option) \ x(BCH_ERR_mount_option, option_name) \ x(BCH_ERR_mount_option, option_value) \ @@ -308,6 +310,7 @@ x(BCH_ERR_data_write, data_write_misaligned) \ x(BCH_ERR_decompress, data_read) \ x(BCH_ERR_data_read, no_device_to_read_from) \ + x(BCH_ERR_data_read, no_devices_valid) \ x(BCH_ERR_data_read, data_read_io_err) \ x(BCH_ERR_data_read, data_read_csum_err) \ x(BCH_ERR_data_read, data_read_retry) \ diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 207f35d3..d4dfd13a 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -11,7 +11,16 @@ #define FSCK_ERR_RATELIMIT_NR 10 -bool bch2_inconsistent_error(struct bch_fs *c) +void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out) +{ + printbuf_indent_add_nextline(out, 2); + +#ifdef BCACHEFS_LOG_PREFIX + prt_printf(out, bch2_log_msg(c, "")); +#endif +} + +bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) { set_bit(BCH_FS_error, &c->flags); @@ -21,10 +30,11 @@ bool bch2_inconsistent_error(struct bch_fs *c) case BCH_ON_ERROR_fix_safe: case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) - bch_err(c, "inconsistency detected - emergency read only at journal seq %llu", - journal_cur_seq(&c->journal)); + prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n", + journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: + bch2_print_string_as_lines(KERN_ERR, out->buf); panic(bch2_fmt(c, "panic after error")); return true; default: @@ -32,11 +42,63 @@ bool bch2_inconsistent_error(struct bch_fs *c) } } -int bch2_topology_error(struct bch_fs *c) +bool bch2_inconsistent_error(struct bch_fs *c) { + struct printbuf buf = PRINTBUF; + printbuf_indent_add_nextline(&buf, 2); + + bool ret = __bch2_inconsistent_error(c, &buf); + if (ret) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + +__printf(3, 0) +static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, + const char *fmt, va_list args) +{ + struct printbuf buf = PRINTBUF; + + bch2_log_msg_start(c, &buf); + + prt_vprintf(&buf, fmt, args); + prt_newline(&buf); + + if (trans) + bch2_trans_updates_to_text(&buf, trans); + bool ret = __bch2_inconsistent_error(c, &buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + + printbuf_exit(&buf); + return ret; +} + +bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args); + va_end(args); + return ret; +} + +bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args); + va_end(args); + return ret; +} + +int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) +{ + prt_printf(out, "btree topology error: "); + set_bit(BCH_FS_topology_error, &c->flags); if (!test_bit(BCH_FS_recovery_running, &c->flags)) { - bch2_inconsistent_error(c); + __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: @@ -44,6 +106,24 @@ int bch2_topology_error(struct bch_fs *c) } } +int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + + bch2_log_msg_start(c, &buf); + + va_list args; + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + + int ret = __bch2_topology_error(c, &buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + + printbuf_exit(&buf); + return ret; +} + void bch2_fatal_error(struct bch_fs *c) { if (bch2_fs_emergency_read_only(c)) @@ -184,7 +264,8 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) #endif -static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) +static struct fsck_err_state *fsck_err_get(struct bch_fs *c, + enum bch_sb_error_id id) { struct fsck_err_state *s; @@ -192,7 +273,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) return NULL; list_for_each_entry(s, &c->fsck_error_msgs, list) - if (s->fmt == fmt) { + if (s->id == id) { /* * move it to the head of the list: repeated fsck errors * are common @@ -210,7 +291,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) } INIT_LIST_HEAD(&s->list); - s->fmt = fmt; + s->id = id; list_add(&s->list, &c->fsck_error_msgs); return s; } @@ -260,15 +341,59 @@ static int do_fsck_ask_yn(struct bch_fs *c, return ask; } +static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, + enum bch_sb_error_id id, const char *msg, + bool *repeat, bool *print, bool *suppress) +{ + bch2_sb_error_count(c, id); + + struct fsck_err_state *s = fsck_err_get(c, id); + if (s) { + /* + * We may be called multiple times for the same error on + * transaction restart - this memoizes instead of asking the user + * multiple times for the same error: + */ + if (s->last_msg && !strcmp(msg, s->last_msg)) { + *repeat = true; + *print = false; + return s; + } + + kfree(s->last_msg); + s->last_msg = kstrdup(msg, GFP_KERNEL); + + if (c->opts.ratelimit_errors && + s->nr >= FSCK_ERR_RATELIMIT_NR) { + if (s->nr == FSCK_ERR_RATELIMIT_NR) + *suppress = true; + else + *print = false; + } + + s->nr++; + } + return s; +} + +void __bch2_count_fsck_err(struct bch_fs *c, + enum bch_sb_error_id id, const char *msg, + bool *repeat, bool *print, bool *suppress) +{ + bch2_sb_error_count(c, id); + + mutex_lock(&c->fsck_error_msgs_lock); + count_fsck_err_locked(c, id, msg, repeat, print, suppress); + mutex_unlock(&c->fsck_error_msgs_lock); +} + int __bch2_fsck_err(struct bch_fs *c, struct btree_trans *trans, enum bch_fsck_flags flags, enum bch_sb_error_id err, const char *fmt, ...) { - struct fsck_err_state *s = NULL; va_list args; - bool print = true, suppressing = false, inconsistent = false, exiting = false; struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; const char *action_orig = "fix?", *action = action_orig; @@ -303,7 +428,12 @@ int __bch2_fsck_err(struct bch_fs *c, ? -BCH_ERR_fsck_fix : -BCH_ERR_fsck_ignore; - bch2_sb_error_count(c, err); + printbuf_indent_add_nextline(out, 2); + +#ifdef BCACHEFS_LOG_PREFIX + if (strncmp(fmt, "bcachefs", 8)) + prt_printf(out, bch2_log_msg(c, "")); +#endif va_start(args, fmt); prt_vprintf(out, fmt, args); @@ -323,42 +453,15 @@ int __bch2_fsck_err(struct bch_fs *c, } mutex_lock(&c->fsck_error_msgs_lock); - s = fsck_err_get(c, fmt); - if (s) { - /* - * We may be called multiple times for the same error on - * transaction restart - this memoizes instead of asking the user - * multiple times for the same error: - */ - if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { - ret = s->ret; - goto err_unlock; - } - - kfree(s->last_msg); - s->last_msg = kstrdup(buf.buf, GFP_KERNEL); - if (!s->last_msg) { - ret = -ENOMEM; - goto err_unlock; - } - - if (c->opts.ratelimit_errors && - !(flags & FSCK_NO_RATELIMIT) && - s->nr >= FSCK_ERR_RATELIMIT_NR) { - if (s->nr == FSCK_ERR_RATELIMIT_NR) - suppressing = true; - else - print = false; - } - - s->nr++; + bool repeat = false, print = true, suppress = false; + bool inconsistent = false, exiting = false; + struct fsck_err_state *s = + count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress); + if (repeat) { + ret = s->ret; + goto err_unlock; } -#ifdef BCACHEFS_LOG_PREFIX - if (!strncmp(fmt, "bcachefs:", 9)) - prt_printf(out, bch2_log_msg(c, "")); -#endif - if ((flags & FSCK_AUTOFIX) && (c->opts.errors == BCH_ON_ERROR_continue || c->opts.errors == BCH_ON_ERROR_fix_safe)) { @@ -377,6 +480,7 @@ int __bch2_fsck_err(struct bch_fs *c, !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); inconsistent = true; + print = true; ret = -BCH_ERR_fsck_errors_not_fixed; } else if (flags & FSCK_CAN_FIX) { prt_str(out, ", "); @@ -435,24 +539,30 @@ int __bch2_fsck_err(struct bch_fs *c, print = true; } print: + prt_newline(out); + + if (inconsistent) + __bch2_inconsistent_error(c, out); + else if (exiting) + prt_printf(out, "Unable to continue, halting\n"); + else if (suppress) + prt_printf(out, "Ratelimiting new instances of previous error\n"); + if (print) { + /* possibly strip an empty line, from printbuf_indent_add */ + while (out->pos && out->buf[out->pos - 1] == ' ') + --out->pos; + printbuf_nul_terminate(out); + if (bch2_fs_stdio_redirect(c)) - bch2_print(c, "%s\n", out->buf); + bch2_print(c, "%s", out->buf); else bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (exiting) - bch_err(c, "Unable to continue, halting"); - else if (suppressing) - bch_err(c, "Ratelimiting new instances of previous error"); - if (s) s->ret = ret; - if (inconsistent) - bch2_inconsistent_error(c); - /* * We don't yet track whether the filesystem currently has errors, for * log_fsck_err()s: that would require us to track for every error type @@ -514,16 +624,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, prt_printf(&buf, " level=%u: ", from.level); bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, "\n "); + prt_newline(&buf); va_list args; va_start(args, fmt); prt_vprintf(&buf, fmt, args); va_end(args); - prt_str(&buf, ": delete?"); - - int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); printbuf_exit(&buf); return ret; } @@ -536,7 +644,7 @@ void bch2_flush_fsck_errs(struct bch_fs *c) list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { if (s->ratelimited && s->last_msg) - bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); + bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); kfree(s->last_msg); diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 7d3f0e2a..d0d024dc 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -18,6 +18,8 @@ struct work_struct; /* Error messages: */ +void bch2_log_msg_start(struct bch_fs *, struct printbuf *); + /* * Inconsistency errors: The on disk data is inconsistent. If these occur during * initial recovery, they don't indicate a bug in the running code - we walk all @@ -29,21 +31,10 @@ struct work_struct; * BCH_ON_ERROR_CONTINUE mode */ +bool __bch2_inconsistent_error(struct bch_fs *, struct printbuf *); bool bch2_inconsistent_error(struct bch_fs *); - -int bch2_topology_error(struct bch_fs *); - -#define bch2_fs_topology_error(c, ...) \ -({ \ - bch_err(c, "btree topology error: " __VA_ARGS__); \ - bch2_topology_error(c); \ -}) - -#define bch2_fs_inconsistent(c, ...) \ -({ \ - bch_err(c, __VA_ARGS__); \ - bch2_inconsistent_error(c); \ -}) +__printf(2, 3) +bool bch2_fs_inconsistent(struct bch_fs *, const char *, ...); #define bch2_fs_inconsistent_on(cond, ...) \ ({ \ @@ -53,26 +44,21 @@ int bch2_topology_error(struct bch_fs *); _ret; \ }) -/* - * When a transaction update discovers or is causing a fs inconsistency, it's - * helpful to also dump the pending updates: - */ -#define bch2_trans_inconsistent(trans, ...) \ -({ \ - bch_err(trans->c, __VA_ARGS__); \ - bch2_dump_trans_updates(trans); \ - bch2_inconsistent_error(trans->c); \ -}) +__printf(2, 3) +bool bch2_trans_inconsistent(struct btree_trans *, const char *, ...); -#define bch2_trans_inconsistent_on(cond, trans, ...) \ +#define bch2_trans_inconsistent_on(cond, ...) \ ({ \ bool _ret = unlikely(!!(cond)); \ - \ if (_ret) \ - bch2_trans_inconsistent(trans, __VA_ARGS__); \ + bch2_trans_inconsistent(__VA_ARGS__); \ _ret; \ }) +int __bch2_topology_error(struct bch_fs *, struct printbuf *); +__printf(2, 3) +int bch2_fs_topology_error(struct bch_fs *, const char *, ...); + /* * Fsck errors: inconsistency errors we detect at mount time, and should ideally * be able to repair: @@ -80,7 +66,7 @@ int bch2_topology_error(struct bch_fs *); struct fsck_err_state { struct list_head list; - const char *fmt; + enum bch_sb_error_id id; u64 nr; bool ratelimited; int ret; @@ -90,6 +76,12 @@ struct fsck_err_state { #define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) +void __bch2_count_fsck_err(struct bch_fs *, + enum bch_sb_error_id, const char *, + bool *, bool *, bool *); +#define bch2_count_fsck_err(_c, _err, ...) \ + __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) + __printf(5, 6) __cold int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, enum bch_fsck_flags, diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index ca2073db..ae7c7a17 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -136,8 +136,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (k.k->type == KEY_TYPE_error) return -BCH_ERR_key_type_error; - rcu_read_lock(); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) + return -BCH_ERR_extent_poisened; + + rcu_read_lock(); const union bch_extent_entry *entry; struct extent_ptr_decoded p; u64 pick_latency; @@ -223,8 +227,11 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (have_io_errors) return -BCH_ERR_data_read_io_err; - WARN_ONCE(1, "unhandled error case in %s\n", __func__); - return -EINVAL; + /* + * If we get here, we have pointers (bkey_ptrs_validate() ensures that), + * but they don't point to valid devices: + */ + return -BCH_ERR_no_devices_valid; } /* KEY_TYPE_btree_ptr: */ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 717e7b94..c80ed3a5 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -999,17 +999,28 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, U64_MAX), inum.subvol, BTREE_ITER_slots, k, ({ - if (k.k->p.inode != inode->v.i_ino) { - next_hole = bch2_seek_pagecache_hole(&inode->v, - offset, MAX_LFS_FILESIZE, 0, false); - break; - } else if (!bkey_extent_is_data(k.k)) { - next_hole = bch2_seek_pagecache_hole(&inode->v, - max(offset, bkey_start_offset(k.k) << 9), - k.k->p.offset << 9, 0, false); + if (k.k->p.inode != inode->v.i_ino || + !bkey_extent_is_data(k.k)) { + loff_t start_offset = k.k->p.inode == inode->v.i_ino + ? max(offset, bkey_start_offset(k.k) << 9) + : offset; + loff_t end_offset = k.k->p.inode == inode->v.i_ino + ? MAX_LFS_FILESIZE + : k.k->p.offset << 9; - if (next_hole < k.k->p.offset << 9) + /* + * Found a hole in the btree, now make sure it's + * a hole in the pagecache. We might have to + * keep searching if this hole is entirely dirty + * in the page cache: + */ + bch2_trans_unlock(trans); + loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, + start_offset, end_offset, 0, false); + if (pagecache_hole < end_offset) { + next_hole = pagecache_hole; break; + } } else { offset = max(offset, bkey_start_offset(k.k) << 9); } diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index e3a3230f..c9b5de56 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -543,6 +543,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, } ret = __bch2_unlink(dir, victim, true); if (!ret) { + shrink_dcache_parent(victim); fsnotify_rmdir(dir, victim); d_delete(victim); } diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 94e97e28..bb303791 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -673,7 +673,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, * back to this dirent */ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), - c, "dirent to missing inode:\n %s", + c, "dirent to missing inode:\n%s", (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)); if (ret) goto err; @@ -2290,7 +2290,8 @@ err_stop_fs: goto err; err_put_super: - __bch2_fs_stop(c); + if (!sb->s_root) + __bch2_fs_stop(c); deactivate_locked_super(sb); goto err; } diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index f955b8f9..52320295 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1421,14 +1421,14 @@ static int check_key_has_inode(struct btree_trans *trans, if (fsck_err_on(!i, trans, key_in_missing_inode, - "key in missing inode:\n %s", + "key in missing inode:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) goto delete; if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), trans, key_in_wrong_inode_type, - "key for wrong inode mode %o:\n %s", + "key for wrong inode mode %o:\n%s", i->inode.bi_mode, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) @@ -1571,13 +1571,13 @@ static int overlapping_extents_found(struct btree_trans *trans, if (ret) goto err; - prt_str(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k1); if (!bpos_eq(pos1, k1.k->p)) { - prt_str(&buf, "\n wanted\n "); + prt_str(&buf, "\nwanted\n "); bch2_bpos_to_text(&buf, pos1); - prt_str(&buf, "\n "); + prt_str(&buf, "\n"); bch2_bkey_to_text(&buf, &pos2); bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", @@ -1600,7 +1600,7 @@ static int overlapping_extents_found(struct btree_trans *trans, break; } - prt_str(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k2); if (bpos_gt(k2.k->p, pos2.p) || @@ -1611,7 +1611,7 @@ static int overlapping_extents_found(struct btree_trans *trans, goto err; } - prt_printf(&buf, "\n overwriting %s extent", + prt_printf(&buf, "\noverwriting %s extent", pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); if (fsck_err(trans, extent_overlapping, @@ -1632,6 +1632,8 @@ static int overlapping_extents_found(struct btree_trans *trans, bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(c, &res); + bch_info(c, "repair ret %s", bch2_err_str(ret)); + if (ret) goto err; @@ -1784,7 +1786,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && !bkey_extent_is_reservation(k), trans, extent_past_end_of_inode, - "extent type past end of inode %llu:%u, i_size %llu\n %s", + "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct btree_iter iter2; diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index a04dffa4..fafd00a3 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -295,13 +295,6 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, bool *read_full, struct bch_io_failures *failed) { - /* - * We're in the retry path, but we don't know what to repair yet, and we - * don't want to do a promote here: - */ - if (failed && !failed->nr) - return NULL; - struct bch_fs *c = trans->c; /* * if failed != NULL we're not actually doing a promote, we're @@ -436,71 +429,6 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) bio_endio(&rbio->bio); } -static void get_rbio_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, - struct bkey_buf *sk) -{ - struct btree_iter iter; - struct bkey_s_c k; - int ret = lockrestart_do(trans, - bkey_err(k = bch2_bkey_get_iter(trans, &iter, - rbio->data_btree, rbio->data_pos, 0))); - if (ret) - return; - - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr(ptrs, ptr) - if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) { - bch2_bkey_buf_reassemble(sk, trans->c, k); - break; - } - - bch2_trans_iter_exit(trans, &iter); -} - -static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio, - enum btree_id btree, struct bkey_s_c read_k) -{ - struct bch_fs *c = trans->c; - - struct data_update *u = rbio_data_update(rbio); - if (u) - read_k = bkey_i_to_s_c(u->k.k); - - u64 flags = bch2_bkey_extent_flags(read_k); - if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return 0; - - struct btree_iter iter; - struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k), - BTREE_ITER_intent); - int ret = bkey_err(k); - if (ret) - return ret; - - if (!bkey_and_val_eq(k, read_k)) - goto out; - - struct bkey_i *new = bch2_trans_kmalloc(trans, - bkey_bytes(k.k) + sizeof(struct bch_extent_flags)); - ret = PTR_ERR_OR_ZERO(new) ?: - (bkey_reassemble(new, k), 0) ?: - bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?: - bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?: - bch2_trans_commit(trans, NULL, NULL, 0); - - /* - * Propagate key change back to data update path, in particular so it - * knows the extent has been poisoned and it's safe to change the - * checksum - */ - if (u && !ret) - bch2_bkey_buf_copy(&u->k, c, new); -out: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, @@ -534,8 +462,7 @@ retry: err: bch2_trans_iter_exit(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || - bch2_err_matches(ret, BCH_ERR_data_read_retry)) + if (bch2_err_matches(ret, BCH_ERR_data_read_retry)) goto retry; if (ret) { @@ -559,21 +486,13 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; - struct btree_trans *trans = bch2_trans_get(c); - struct bkey_buf sk; - bch2_bkey_buf_init(&sk); - bkey_init(&sk.k->k); - trace_io_read_retry(&rbio->bio); this_cpu_add(c->counters[BCH_COUNTER_io_read_retry], bvec_iter_sectors(rbio->bvec_iter)); - get_rbio_extent(trans, rbio, &sk); - - if (!bkey_deleted(&sk.k->k) && - bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid)) + if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid)) bch2_mark_io_failure(&failed, &rbio->pick, rbio->ret == -BCH_ERR_data_read_retry_csum_err); @@ -594,7 +513,7 @@ static void bch2_rbio_retry(struct work_struct *work) int ret = rbio->data_update ? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags) - : __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags); + : __bch2_read(trans, rbio, iter, inum, &failed, flags); if (ret) { rbio->ret = ret; @@ -615,7 +534,6 @@ static void bch2_rbio_retry(struct work_struct *work) } bch2_rbio_done(rbio); - bch2_bkey_buf_exit(&sk, c); bch2_trans_put(trans); } @@ -1040,10 +958,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, bvec_iter_sectors(iter)); goto out_read_done; } - - if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) && - !orig->data_update) - return -BCH_ERR_extent_poisened; retry_pick: ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); @@ -1052,16 +966,6 @@ retry_pick: goto hole; if (unlikely(ret < 0)) { - if (ret == -BCH_ERR_data_read_csum_err) { - int ret2 = maybe_poison_extent(trans, orig, data_btree, k); - if (ret2) { - ret = ret2; - goto err; - } - - trace_and_count(c, io_read_fail_and_poison, &orig->bio); - } - struct printbuf buf = PRINTBUF; bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); @@ -1228,10 +1132,6 @@ retry_pick: rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; - /* XXX: also nvme read recovery level */ - if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev))) - rbio->bio.bi_opf |= REQ_FUA; - if (rbio->bounce) trace_and_count(c, io_read_bounce, &rbio->bio); @@ -1359,15 +1259,12 @@ out_read_done: int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, subvol_inum inum, - struct bch_io_failures *failed, - struct bkey_buf *prev_read, - unsigned flags) + struct bch_io_failures *failed, unsigned flags) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; - enum btree_id data_btree; int ret; EBUG_ON(rbio->data_update); @@ -1378,7 +1275,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, BTREE_ITER_slots); while (1) { - data_btree = BTREE_ID_extents; + enum btree_id data_btree = BTREE_ID_extents; bch2_trans_begin(trans); @@ -1410,12 +1307,6 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, k = bkey_i_to_s_c(sk.k); - if (unlikely(flags & BCH_READ_in_retry)) { - if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k))) - failed->nr = 0; - bch2_bkey_buf_copy(prev_read, c, sk.k); - } - /* * With indirect extents, the amount of data to read is the min * of the original extent and the indirect extent: @@ -1450,7 +1341,9 @@ err: break; } - if (unlikely(ret)) { + bch2_trans_iter_exit(trans, &iter); + + if (ret) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, @@ -1466,7 +1359,6 @@ err: bch2_rbio_done(rbio); } - bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); return ret; } diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h index 1a85b092..c78025d8 100644 --- a/libbcachefs/io_read.h +++ b/libbcachefs/io_read.h @@ -144,8 +144,7 @@ static inline void bch2_read_extent(struct btree_trans *trans, } int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, - subvol_inum, - struct bch_io_failures *, struct bkey_buf *, unsigned flags); + subvol_inum, struct bch_io_failures *, unsigned flags); static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, subvol_inum inum) @@ -155,7 +154,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, rbio->subvol = inum.subvol; bch2_trans_run(c, - __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, + __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, BCH_READ_retry_if_stale| BCH_READ_may_promote| BCH_READ_user_mapped)); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 4ed6137f..f461cb06 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -214,12 +214,12 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, fsck_err_on(same_device, c, journal_entry_dup_same_device, - "duplicate journal entry on same device\n %s", + "duplicate journal entry on same device\n%s", buf.buf); fsck_err_on(not_identical, c, journal_entry_replicas_data_mismatch, - "found duplicate but non identical journal entries\n %s", + "found duplicate but non identical journal entries\n%s", buf.buf); if (entry_ptr.csum_good && !identical) @@ -308,8 +308,8 @@ static void journal_entry_err_msg(struct printbuf *out, break; \ case WRITE: \ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \ - bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\ - if (bch2_fs_inconsistent(c)) { \ + if (bch2_fs_inconsistent(c, \ + "corrupt metadata before write: %s\n", _buf.buf)) {\ ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ } \ @@ -1371,8 +1371,8 @@ int bch2_journal_read(struct bch_fs *c, missing_end = seq - 1; fsck_err(c, journal_entries_missing, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" - " prev at %s\n" - " next at %s, continue?", + "prev at %s\n" + "next at %s, continue?", missing_start, missing_end, *last_seq, *blacklist_seq - 1, buf1.buf, buf2.buf); @@ -1426,7 +1426,7 @@ int bch2_journal_read(struct bch_fs *c, !bch2_replicas_marked(c, &replicas.e) && (le64_to_cpu(i->j.seq) == *last_seq || fsck_err(c, journal_entry_replicas_not_marked, - "superblock not marked as containing replicas for journal entry %llu\n %s", + "superblock not marked as containing replicas for journal entry %llu\n%s", le64_to_cpu(i->j.seq), buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) @@ -1623,7 +1623,8 @@ static CLOSURE_CALLBACK(journal_write_done) : j->noflush_write_time, j->write_start_time); if (!w->devs_written.nr) { - bch_err(c, "unable to write journal to sufficient devices"); + if (!bch2_journal_error(j)) + bch_err(c, "unable to write journal to sufficient devices"); err = -BCH_ERR_journal_write_err; } else { bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index a299d9ec..2f63fc6d 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -101,8 +101,7 @@ int bch2_lru_check_set(struct btree_trans *trans, goto err; if (fsck_err(trans, alloc_key_to_missing_lru_entry, - "missing %s lru entry\n" - " %s", + "missing %s lru entry\n%s", bch2_lru_types[lru_type(lru_k)], (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { ret = bch2_lru_set(trans, lru_id, dev_bucket, time); @@ -190,8 +189,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, if (fsck_err(trans, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" - " %s\n" - " for %s", + "%s\n" + "for %s", bch2_lru_types[type], lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 66d1c055..5d41260e 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -126,40 +126,26 @@ static void move_write_done(struct bch_write_op *op) static void move_write(struct moving_io *io) { - struct bch_fs *c = io->write.op.c; struct moving_context *ctxt = io->write.ctxt; - struct bch_read_bio *rbio = &io->write.rbio; if (ctxt->stats) { - if (rbio->bio.bi_status) + if (io->write.rbio.bio.bi_status) atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, &ctxt->stats->sectors_error_uncorrected); - else if (rbio->saw_error) + else if (io->write.rbio.saw_error) atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, &ctxt->stats->sectors_error_corrected); } - /* - * If the extent has been bitrotted, we're going to have to give it a - * new checksum in order to move it - but the poison bit will ensure - * that userspace still gets the appropriate error. - */ - if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err && - (bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) { - struct bch_extent_crc_unpacked crc = rbio->pick.crc; - struct nonce nonce = extent_nonce(rbio->version, crc); - - rbio->pick.crc.csum = bch2_checksum_bio(c, rbio->pick.crc.csum_type, - nonce, &rbio->bio); - rbio->ret = 0; - } - - if (unlikely(rbio->ret || io->write.data_opts.scrub)) { + if (unlikely(io->write.rbio.ret || + io->write.rbio.bio.bi_status || + io->write.data_opts.scrub)) { move_free(io); return; } if (trace_io_move_write_enabled()) { + struct bch_fs *c = io->write.op.c; struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); diff --git a/libbcachefs/namei.c b/libbcachefs/namei.c index 93246ad3..ee725170 100644 --- a/libbcachefs/namei.c +++ b/libbcachefs/namei.c @@ -700,9 +700,9 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, if (bch2_inode_should_have_single_bp(target) && !fsck_err(trans, inode_wrong_backpointer, - "dirent points to inode that does not point back:\n %s", + "dirent points to inode that does not point back:\n%s", (bch2_bkey_val_to_text(&buf, c, d.s_c), - prt_printf(&buf, "\n "), + prt_newline(&buf), bch2_inode_unpacked_to_text(&buf, target), buf.buf))) goto err; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index e5c42e20..af325881 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -482,14 +482,12 @@ void bch2_opts_to_text(struct printbuf *out, int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v) { - lockdep_assert_held(&c->state_lock); - int ret = 0; switch (id) { case Opt_state: if (ca) - return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED); + return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED); break; case Opt_compression: diff --git a/libbcachefs/printbuf.c b/libbcachefs/printbuf.c index 4cf5a2af..3302bbc7 100644 --- a/libbcachefs/printbuf.c +++ b/libbcachefs/printbuf.c @@ -276,6 +276,25 @@ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces) buf->has_indent_or_tabstops = true; } +/** + * bch2_printbuf_indent_add_nextline() - add to the current indent level for + * subsequent lines + * + * @buf: printbuf to control + * @spaces: number of spaces to add to the current indent level + * + * Subsequent lines - not the current line - will be indented by @spaces more + * spaces. + */ +void bch2_printbuf_indent_add_nextline(struct printbuf *buf, unsigned spaces) +{ + if (WARN_ON_ONCE(buf->indent + spaces < buf->indent)) + spaces = 0; + + buf->indent += spaces; + buf->has_indent_or_tabstops = true; +} + /** * bch2_printbuf_indent_sub() - subtract from the current indent level * diff --git a/libbcachefs/printbuf.h b/libbcachefs/printbuf.h index d0dd398b..1ca476ad 100644 --- a/libbcachefs/printbuf.h +++ b/libbcachefs/printbuf.h @@ -112,6 +112,7 @@ void bch2_printbuf_tabstop_pop(struct printbuf *); int bch2_printbuf_tabstop_push(struct printbuf *, unsigned); void bch2_printbuf_indent_add(struct printbuf *, unsigned); +void bch2_printbuf_indent_add_nextline(struct printbuf *, unsigned); void bch2_printbuf_indent_sub(struct printbuf *, unsigned); void bch2_prt_newline(struct printbuf *); diff --git a/libbcachefs/progress.c b/libbcachefs/progress.c index bafd1c91..d0989856 100644 --- a/libbcachefs/progress.c +++ b/libbcachefs/progress.c @@ -16,10 +16,8 @@ void bch2_progress_init(struct progress_indicator_state *s, if (!(btree_id_mask & BIT_ULL(i))) continue; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_btree, - .btree.id = i, - }; + struct disk_accounting_pos acc; + disk_accounting_key_init(acc, btree, .id = i); u64 v; bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 10c6a7fd..b9bde04b 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -95,9 +95,6 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return 0; - return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) | bch2_bkey_ptrs_need_move(c, opts, ptrs); } @@ -110,9 +107,6 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) if (!opts) return 0; - if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return 0; - const union bch_extent_entry *entry; struct extent_ptr_decoded p; u64 sectors = 0; @@ -606,12 +600,13 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) struct bch_fs_rebalance *r = &c->rebalance; /* print pending work */ - struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_rebalance_work, }; + struct disk_accounting_pos acc; + disk_accounting_key_init(acc, rebalance_work); u64 v; bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); prt_printf(out, "pending work:\t"); - prt_human_readable_u64(out, v); + prt_human_readable_u64(out, v << 9); prt_printf(out, "\n\n"); prt_str(out, bch2_rebalance_state_strs[r->state]); diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c index 0b3c951c..593ff142 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/recovery_passes.c @@ -234,28 +234,22 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) int bch2_run_online_recovery_passes(struct bch_fs *c) { - int ret = 0; - - down_read(&c->state_lock); - for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { struct recovery_pass_fn *p = recovery_pass_fns + i; if (!(p->when & PASS_ONLINE)) continue; - ret = bch2_run_recovery_pass(c, i); + int ret = bch2_run_recovery_pass(c, i); if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { i = c->curr_recovery_pass; continue; } if (ret) - break; + return ret; } - up_read(&c->state_lock); - - return ret; + return 0; } int bch2_run_recovery_passes(struct bch_fs *c) diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 68172c6e..ee23f1f9 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -193,10 +193,10 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, if (ret) goto err; - prt_printf(&buf, "-%llu\n ", (missing_pos.offset + (missing_end - missing_start)) << 9); + prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9); bch2_bkey_val_to_text(&buf, c, p.s_c); - prt_printf(&buf, "\n missing reflink btree range %llu-%llu", + prt_printf(&buf, "\nmissing reflink btree range %llu-%llu", missing_start, missing_end); if (fsck_err(trans, reflink_p_to_missing_reflink_v, "%s", buf.buf)) { @@ -323,10 +323,10 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, __le64 *refcount = bkey_refcount(bkey_i_to_s(new)); if (!*refcount && (flags & BTREE_TRIGGER_overwrite)) { bch2_bkey_val_to_text(&buf, c, p.s_c); - prt_printf(&buf, "\n "); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k); log_fsck_err(trans, reflink_refcount_underflow, - "indirect extent refcount underflow while marking\n %s", + "indirect extent refcount underflow while marking\n%s", buf.buf); goto next; } @@ -795,8 +795,8 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), trans, reflink_v_refcount_wrong, "reflink key has wrong refcount:\n" - " %s\n" - " should be %u", + "%s\n" + "should be %u", (bch2_bkey_val_to_text(&buf, c, k), buf.buf), r->refcount)) { struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); diff --git a/libbcachefs/sb-counters_format.h b/libbcachefs/sb-counters_format.h index 5c4e5de7..fa27ec59 100644 --- a/libbcachefs/sb-counters_format.h +++ b/libbcachefs/sb-counters_format.h @@ -16,7 +16,6 @@ enum counters_flags { x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ - x(io_read_fail_and_poison, 82, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index 1736abea..5d43e350 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -5,8 +5,7 @@ enum bch_fsck_flags { FSCK_CAN_FIX = 1 << 0, FSCK_CAN_IGNORE = 1 << 1, - FSCK_NO_RATELIMIT = 1 << 2, - FSCK_AUTOFIX = 1 << 3, + FSCK_AUTOFIX = 1 << 2, }; #define BCH_SB_ERRS() \ diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index e7f19789..0c65065b 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -485,7 +485,7 @@ static int check_snapshot_tree(struct btree_trans *trans, root_id != bch2_snapshot_root(c, root_id) || st.k->p.offset != le32_to_cpu(s.tree), trans, snapshot_tree_to_missing_snapshot, - "snapshot tree points to missing/incorrect snapshot:\n %s", + "snapshot tree points to missing/incorrect snapshot:\n%s", (bch2_bkey_val_to_text(&buf, c, st.s_c), prt_newline(&buf), ret @@ -505,19 +505,19 @@ static int check_snapshot_tree(struct btree_trans *trans, if (fsck_err_on(ret, trans, snapshot_tree_to_missing_subvol, - "snapshot tree points to missing subvolume:\n %s", + "snapshot tree points to missing subvolume:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || fsck_err_on(!bch2_snapshot_is_ancestor(c, le32_to_cpu(subvol.snapshot), root_id), trans, snapshot_tree_to_wrong_subvol, - "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", + "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), trans, snapshot_tree_to_snapshot_subvol, - "snapshot tree points to snapshot subvolume:\n %s", + "snapshot tree points to snapshot subvolume:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { struct bkey_i_snapshot_tree *u; @@ -756,7 +756,7 @@ static int check_snapshot(struct btree_trans *trans, } else { if (fsck_err_on(s.subvol, trans, snapshot_should_not_have_subvol, - "snapshot should not point to subvol:\n %s", + "snapshot should not point to subvol:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -774,7 +774,7 @@ static int check_snapshot(struct btree_trans *trans, if (fsck_err_on(!ret, trans, snapshot_to_bad_snapshot_tree, - "snapshot points to missing/incorrect tree:\n %s", + "snapshot points to missing/incorrect tree:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = snapshot_tree_ptr_repair(trans, iter, k, &s); if (ret) @@ -786,7 +786,7 @@ static int check_snapshot(struct btree_trans *trans, if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, trans, snapshot_bad_depth, - "snapshot with incorrect depth field, should be %u:\n %s", + "snapshot with incorrect depth field, should be %u:\n%s", real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -803,7 +803,7 @@ static int check_snapshot(struct btree_trans *trans, if (fsck_err_on(!ret, trans, snapshot_bad_skiplist, - "snapshot with bad skiplist field:\n %s", + "snapshot with bad skiplist field:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); diff --git a/libbcachefs/str_hash.c b/libbcachefs/str_hash.c index 93e71119..602afca2 100644 --- a/libbcachefs/str_hash.c +++ b/libbcachefs/str_hash.c @@ -232,7 +232,7 @@ bad_hash: goto out; if (fsck_err(trans, hash_table_key_wrong_offset, - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", + "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index b7b96283..cd0d8e5e 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -561,6 +561,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) } SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); + n->v.fs_path_parent = 0; bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 99f9a0aa..20208f3c 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -533,9 +533,11 @@ int bch2_fs_read_write(struct bch_fs *c) int bch2_fs_read_write_early(struct bch_fs *c) { - lockdep_assert_held(&c->state_lock); + down_write(&c->state_lock); + int ret = __bch2_fs_read_write(c, true); + up_write(&c->state_lock); - return __bch2_fs_read_write(c, true); + return ret; } /* Filesystem startup/shutdown: */ @@ -1019,38 +1021,39 @@ static void print_mount_opts(struct bch_fs *c) int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); - int ret; + int ret = 0; print_mount_opts(c); down_write(&c->state_lock); + mutex_lock(&c->sb_lock); BUG_ON(test_bit(BCH_FS_started, &c->flags)); - mutex_lock(&c->sb_lock); + if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, + sizeof(struct bch_sb_field_ext) / sizeof(u64))) { + mutex_unlock(&c->sb_lock); + up_write(&c->state_lock); + ret = -BCH_ERR_ENOSPC_sb; + goto err; + } ret = bch2_sb_members_v2_init(c); if (ret) { mutex_unlock(&c->sb_lock); + up_write(&c->state_lock); goto err; } for_each_online_member(c, ca) bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); - struct bch_sb_field_ext *ext = - bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); mutex_unlock(&c->sb_lock); - if (!ext) { - bch_err(c, "insufficient space in superblock for sb_field_ext"); - ret = -BCH_ERR_ENOSPC_sb; - goto err; - } - for_each_rw_member(c, ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + up_write(&c->state_lock); c->recovery_task = current; ret = BCH_SB_INITIALIZED(c->disk_sb.sb) @@ -1066,31 +1069,28 @@ int bch2_fs_start(struct bch_fs *c) goto err; if (bch2_fs_init_fault("fs_start")) { - bch_err(c, "fs_start fault injected"); - ret = -EINVAL; + ret = -BCH_ERR_injected_fs_start; goto err; } set_bit(BCH_FS_started, &c->flags); wake_up(&c->ro_ref_wait); + down_write(&c->state_lock); if (c->opts.read_only) { bch2_fs_read_only(c); } else { ret = !test_bit(BCH_FS_rw, &c->flags) ? bch2_fs_read_write(c) : bch2_fs_read_write_late(c); - if (ret) - goto err; } + up_write(&c->state_lock); - ret = 0; err: if (ret) bch_err_msg(c, ret, "starting filesystem"); else bch_verbose(c, "done starting filesystem"); - up_write(&c->state_lock); return ret; } @@ -2259,7 +2259,7 @@ BCH_DEBUG_PARAMS() __maybe_unused static unsigned bch2_metadata_version = bcachefs_metadata_version_current; -module_param_named(version, bch2_metadata_version, uint, 0400); +module_param_named(version, bch2_metadata_version, uint, 0444); module_exit(bcachefs_exit); module_init(bcachefs_init); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 8c200b55..e5f003c2 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -148,7 +148,6 @@ write_attribute(trigger_btree_key_cache_shrink); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_btree_updates); read_attribute(gc_gens_pos); -__sysfs_attribute(read_fua_test, 0400); read_attribute(uuid); read_attribute(minor); @@ -258,10 +257,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c prt_printf(out, "type\tcompressed\runcompressed\raverage extent size\r\n"); for (unsigned i = 1; i < BCH_COMPRESSION_TYPE_NR; i++) { - struct disk_accounting_pos a = { - .type = BCH_DISK_ACCOUNTING_compression, - .compression.type = i, - }; + struct disk_accounting_pos a; + disk_accounting_key_init(a, compression, .type = i); struct bpos p = disk_accounting_pos_to_bpos(&a); u64 v[3]; bch2_accounting_mem_read(c, p, v, ARRAY_SIZE(v)); @@ -311,116 +308,6 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c) prt_printf(out, "nr_inodes:\t%llu\n", b.nr_inodes); } -static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca) -{ - struct bch_fs *c = ca->fs; - struct bio *bio = NULL; - void *buf = NULL; - unsigned bs = c->opts.block_size, iters; - u64 end, test_duration = NSEC_PER_SEC * 2; - struct bch2_time_stats stats_nofua, stats_fua, stats_random; - int ret = 0; - - bch2_time_stats_init_no_pcpu(&stats_nofua); - bch2_time_stats_init_no_pcpu(&stats_fua); - bch2_time_stats_init_no_pcpu(&stats_random); - - if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) { - prt_str(out, "offline\n"); - return 0; - } - - struct block_device *bdev = ca->disk_sb.bdev; - - bio = bio_kmalloc(1, GFP_KERNEL); - if (!bio) { - ret = -ENOMEM; - goto err; - } - - buf = kmalloc(bs, GFP_KERNEL); - if (!buf) - goto err; - - end = ktime_get_ns() + test_duration; - for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { - bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); - bch2_bio_map(bio, buf, bs); - - u64 submit_time = ktime_get_ns(); - ret = submit_bio_wait(bio); - bch2_time_stats_update(&stats_nofua, submit_time); - - if (ret) - goto err; - } - - end = ktime_get_ns() + test_duration; - for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { - bio_init(bio, bdev, bio->bi_inline_vecs, 1, REQ_FUA|READ); - bch2_bio_map(bio, buf, bs); - - u64 submit_time = ktime_get_ns(); - ret = submit_bio_wait(bio); - bch2_time_stats_update(&stats_fua, submit_time); - - if (ret) - goto err; - } - - u64 dev_size = ca->mi.nbuckets * bucket_bytes(ca); - - end = ktime_get_ns() + test_duration; - for (iters = 0; iters < 1000 && time_before64(ktime_get_ns(), end); iters++) { - bio_init(bio, bdev, bio->bi_inline_vecs, 1, READ); - bio->bi_iter.bi_sector = (bch2_get_random_u64_below(dev_size) & ~((u64) bs - 1)) >> 9; - bch2_bio_map(bio, buf, bs); - - u64 submit_time = ktime_get_ns(); - ret = submit_bio_wait(bio); - bch2_time_stats_update(&stats_random, submit_time); - - if (ret) - goto err; - } - - u64 ns_nofua = mean_and_variance_get_mean(stats_nofua.duration_stats); - u64 ns_fua = mean_and_variance_get_mean(stats_fua.duration_stats); - u64 ns_rand = mean_and_variance_get_mean(stats_random.duration_stats); - - u64 stddev_nofua = mean_and_variance_get_stddev(stats_nofua.duration_stats); - u64 stddev_fua = mean_and_variance_get_stddev(stats_fua.duration_stats); - u64 stddev_rand = mean_and_variance_get_stddev(stats_random.duration_stats); - - printbuf_tabstop_push(out, 8); - printbuf_tabstop_push(out, 12); - printbuf_tabstop_push(out, 12); - prt_printf(out, "This test must be run on an idle drive for accurate results\n"); - prt_printf(out, "%s\n", dev_name(&ca->disk_sb.bdev->bd_device)); - prt_printf(out, "fua support advertized: %s\n", bdev_fua(bdev) ? "yes" : "no"); - prt_newline(out); - prt_printf(out, "ns:\tlatency\rstddev\r\n"); - prt_printf(out, "nofua\t%llu\r%llu\r\n", ns_nofua, stddev_nofua); - prt_printf(out, "fua\t%llu\r%llu\r\n", ns_fua, stddev_fua); - prt_printf(out, "random\t%llu\r%llu\r\n", ns_rand, stddev_rand); - - bool read_cache = ns_nofua * 2 < ns_rand; - bool fua_cached = read_cache && ns_fua < (ns_nofua + ns_rand) / 2; - - if (!read_cache) - prt_str(out, "reads don't appear to be cached - safe\n"); - else if (!fua_cached) - prt_str(out, "fua reads don't appear to be cached - safe\n"); - else - prt_str(out, "fua reads appear to be cached - unsafe\n"); -err: - kfree(buf); - kfree(bio); - percpu_ref_put(&ca->io_ref); - bch_err_fn(c, ret); - return ret; -} - SHOW(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); @@ -742,8 +629,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))) return -EROFS; - down_write(&c->state_lock); - char *tmp = kstrdup(buf, GFP_KERNEL); if (!tmp) { ret = -ENOMEM; @@ -786,7 +671,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, ret = size; err: - up_write(&c->state_lock); bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); return ret; } @@ -934,9 +818,6 @@ SHOW(bch2_dev) if (attr == &sysfs_open_buckets) bch2_open_buckets_to_text(out, c, ca); - if (attr == &sysfs_read_fua_test) - return bch2_read_fua_test(out, ca); - int opt_id = bch2_opt_lookup(attr->name); if (opt_id >= 0) return sysfs_opt_show(c, ca, opt_id, out); @@ -993,8 +874,6 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_stats_write, &sysfs_congested, - &sysfs_read_fua_test, - /* debug: */ &sysfs_alloc_debug, &sysfs_open_buckets, diff --git a/libbcachefs/time_stats.c b/libbcachefs/time_stats.c index a8382d87..2c34fe4b 100644 --- a/libbcachefs/time_stats.c +++ b/libbcachefs/time_stats.c @@ -11,7 +11,7 @@ #include "time_stats.h" /* disable automatic switching to percpu mode */ -#define TIME_STATS_NONPCPU ((struct time_stat_buffer *) 1) +#define TIME_STATS_NONPCPU ((unsigned long) 1) static const struct time_unit time_units[] = { { "ns", 1 }, @@ -126,7 +126,7 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) { unsigned long flags; - if ((unsigned long) stats->buffer <= 1) { + if ((unsigned long) stats->buffer <= TIME_STATS_NONPCPU) { spin_lock_irqsave(&stats->lock, flags); time_stats_update_one(stats, start, end); @@ -161,8 +161,7 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats) unsigned offset = offsetof(struct bch2_time_stats, min_duration); memset((void *) stats + offset, 0, sizeof(*stats) - offset); - if (stats->buffer && - stats->buffer != TIME_STATS_NONPCPU) { + if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU) { int cpu; for_each_possible_cpu(cpu) per_cpu_ptr(stats->buffer, cpu)->nr = 0; @@ -172,10 +171,9 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats) void bch2_time_stats_exit(struct bch2_time_stats *stats) { - if (stats->buffer != TIME_STATS_NONPCPU) { + if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU) free_percpu(stats->buffer); - stats->buffer = NULL; - } + stats->buffer = NULL; } void bch2_time_stats_init(struct bch2_time_stats *stats) @@ -189,5 +187,5 @@ void bch2_time_stats_init(struct bch2_time_stats *stats) void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *stats) { bch2_time_stats_init(stats); - stats->buffer = TIME_STATS_NONPCPU; + stats->buffer = (struct time_stat_buffer __percpu *) TIME_STATS_NONPCPU; } diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index 8c07189a..519d00d6 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -339,11 +339,6 @@ DEFINE_EVENT(bio, io_read_reuse_race, TP_ARGS(bio) ); -DEFINE_EVENT(bio, io_read_fail_and_poison, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); - /* ec.c */ TRACE_EVENT(stripe_create, diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 553de8d8..87af5516 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -270,7 +270,7 @@ static void __bch2_print_string_as_lines(const char *prefix, const char *lines, locked = console_trylock(); } - while (1) { + while (*lines) { p = strchrnul(lines, '\n'); printk("%s%.*s\n", prefix, (int) (p - lines), lines); if (!*p) diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 7d921fc9..1e94f89a 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -94,6 +94,7 @@ do { \ #define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n) #define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n) +#define printbuf_indent_add_nextline(_out, _n) bch2_printbuf_indent_add_nextline(_out, _n) #define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n) #define prt_newline(_out) bch2_prt_newline(_out)