From 35a13784a2a5fe4646a51b284a599f664eaf7d63 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Jan 2025 12:19:55 -0500 Subject: [PATCH] Update bcachefs sources to 78c6c8127e21 bcachefs: Fix check_inode_hash_info_matches_root() --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 3 +- libbcachefs/backpointers.c | 36 ++-- libbcachefs/bcachefs.h | 3 +- libbcachefs/bcachefs_format.h | 3 +- libbcachefs/bcachefs_ioctl.h | 14 +- libbcachefs/btree_io.c | 189 +---------------- libbcachefs/btree_io.h | 3 - libbcachefs/btree_key_cache.c | 4 +- libbcachefs/btree_locking.c | 62 +++--- libbcachefs/btree_update_interior.c | 20 -- libbcachefs/btree_update_interior.h | 4 - libbcachefs/chardev.c | 33 +-- libbcachefs/data_update.c | 21 +- libbcachefs/data_update.h | 3 - libbcachefs/debug.c | 2 +- libbcachefs/dirent.h | 5 + libbcachefs/ec_format.h | 17 ++ libbcachefs/errcode.h | 1 - libbcachefs/extents.c | 9 +- libbcachefs/extents.h | 2 +- libbcachefs/fs-common.c | 11 + libbcachefs/fs.c | 13 +- libbcachefs/fsck.c | 79 ++++++- libbcachefs/io_read.c | 79 +++---- libbcachefs/io_read.h | 5 +- libbcachefs/journal_io.c | 4 +- libbcachefs/move.c | 314 ++++++++++------------------ libbcachefs/move_types.h | 18 +- libbcachefs/recovery_passes_types.h | 2 +- libbcachefs/sb-downgrade.c | 5 +- libbcachefs/sb-errors_format.h | 4 +- libbcachefs/sb-members.h | 12 -- libbcachefs/six.c | 10 +- libbcachefs/snapshot.c | 15 +- libbcachefs/str_hash.c | 13 +- libbcachefs/trace.h | 52 +++++ 37 files changed, 434 insertions(+), 638 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index ae0826de..29c360d6 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -f4b9a91fce7373d163343e5ca3a9b483d113706d +78c6c8127e21fe2c8bf5c1d6a5e6832e28136f8f diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 94e7bc88..fc2ef33b 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1402,7 +1402,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite struct btree_iter alloc_iter; struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, - BTREE_ID_alloc, bucket, BTREE_ITER_cached); + BTREE_ID_alloc, bucket, + async_repair ? BTREE_ITER_cached : 0); int ret = bkey_err(alloc_k); if (ret) return ret; diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 655be233..ebeb6a5f 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -244,31 +244,27 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) return bkey_s_c_null; - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, - 0, - bp.v->level, - iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { + if (likely(!bp.v->level)) { + bch2_trans_node_iter_init(trans, iter, + bp.v->btree_id, + bp.v->pos, + 0, 0, + iter_flags); + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } + + if (k.k && + extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) + return k; + bch2_trans_iter_exit(trans, iter); - return k; - } - - if (k.k && - extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) - return k; - - bch2_trans_iter_exit(trans, iter); - - if (!bp.v->level) { int ret = backpointer_target_not_found(trans, bp, k, last_flushed); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); - if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) - return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) return ((struct bkey_s_c) { .k = ERR_CAST(b) }); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 13acfbf3..161cf2f0 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -687,8 +687,7 @@ struct btree_trans_buf { x(gc_gens) \ x(snapshot_delete_pagecache) \ x(sysfs) \ - x(btree_write_buffer) \ - x(btree_node_scrub) + x(btree_write_buffer) enum bch_write_ref { #define x(n) BCH_WRITE_REF_##n, diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 06809305..f70f0108 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -685,7 +685,8 @@ struct bch_sb_field_ext { x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ x(inode_depth, BCH_VERSION(1, 17)) \ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ - x(autofix_errors, BCH_VERSION(1, 19)) + x(autofix_errors, BCH_VERSION(1, 19)) \ + x(directory_size, BCH_VERSION(1, 20)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index f176f192..3c23bdf7 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -213,10 +213,6 @@ struct bch_ioctl_data { struct bpos end_pos; union { - struct { - __u32 dev; - __u32 data_types; - } scrub; struct { __u32 dev; __u32 pad; @@ -241,19 +237,11 @@ struct bch_ioctl_data_progress { __u64 sectors_done; __u64 sectors_total; - __u64 sectors_error_corrected; - __u64 sectors_error_uncorrected; } __packed __aligned(8); -enum bch_ioctl_data_event_ret { - BCH_IOCTL_DATA_EVENT_RET_done = 1, - BCH_IOCTL_DATA_EVENT_RET_device_offline = 2, -}; - struct bch_ioctl_data_event { __u8 type; - __u8 ret; - __u8 pad[6]; + __u8 pad[7]; union { struct bch_ioctl_data_progress p; __u64 pad2[15]; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 13ab827d..e371e60e 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_buf.h" #include "bkey_methods.h" #include "bkey_sort.h" #include "btree_cache.h" @@ -1353,7 +1352,7 @@ start: can_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - &failed, &rb->pick, -1) > 0; + &failed, &rb->pick) > 0; if (!bio->bi_status && !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { @@ -1698,7 +1697,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, return; ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - NULL, &pick, -1); + NULL, &pick); if (ret <= 0) { struct printbuf buf = PRINTBUF; @@ -1812,190 +1811,6 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } -struct btree_node_scrub { - struct bch_fs *c; - struct bch_dev *ca; - void *buf; - bool used_mempool; - unsigned written; - - enum btree_id btree; - unsigned level; - struct bkey_buf key; - __le64 seq; - - struct work_struct work; - struct bio bio; -}; - -static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written, - struct printbuf *err) -{ - unsigned written = 0; - - if (le64_to_cpu(data->magic) != bset_magic(c)) { - prt_printf(err, "bad magic: want %llx, got %llx", - bset_magic(c), le64_to_cpu(data->magic)); - return false; - } - - while (written < (ptr_written ?: btree_sectors(c))) { - struct btree_node_entry *bne; - struct bset *i; - bool first = !written; - - if (first) { - bne = NULL; - i = &data->keys; - } else { - bne = (void *) data + (written << 9); - i = &bne->keys; - - if (!ptr_written && i->seq != data->keys.seq) - break; - } - - struct nonce nonce = btree_nonce(i, written << 9); - bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); - - if (first) { - if (good_csum_type) { - struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data); - if (bch2_crc_cmp(data->csum, csum)) { - bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum); - return false; - } - } - - written += vstruct_sectors(data, c->block_bits); - } else { - if (good_csum_type) { - struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); - if (bch2_crc_cmp(bne->csum, csum)) { - bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum); - return false; - } - } - - written += vstruct_sectors(bne, c->block_bits); - } - } - - return true; -} - -static void btree_node_scrub_work(struct work_struct *work) -{ - struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work); - struct bch_fs *c = scrub->c; - struct printbuf err = PRINTBUF; - - __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level, - bkey_i_to_s_c(scrub->key.k)); - prt_newline(&err); - - if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) { - struct btree_trans *trans = bch2_trans_get(c); - - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, scrub->btree, - scrub->key.k->k.p, 0, scrub->level - 1, 0); - - struct btree *b; - int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter))); - if (ret) - goto err; - - if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) { - bch_err(c, "error validating btree node during scrub on %s at btree %s", - scrub->ca->name, err.buf); - - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); - } -err: - bch2_trans_iter_exit(trans, &iter); - bch2_trans_begin(trans); - bch2_trans_put(trans); - } - - printbuf_exit(&err); - bch2_bkey_buf_exit(&scrub->key, c);; - btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); - percpu_ref_put(&scrub->ca->io_ref); - kfree(scrub); - bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); -} - -static void btree_node_scrub_endio(struct bio *bio) -{ - struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio); - - queue_work(scrub->c->btree_read_complete_wq, &scrub->work); -} - -int bch2_btree_node_scrub(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c k, unsigned dev) -{ - if (k.k->type != KEY_TYPE_btree_ptr_v2) - return 0; - - struct bch_fs *c = trans->c; - - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub)) - return -BCH_ERR_erofs_no_writes; - - struct extent_ptr_decoded pick; - int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev); - if (ret <= 0) - goto err; - - struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); - if (!ca) { - ret = -BCH_ERR_device_offline; - goto err; - } - - bool used_mempool = false; - void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool); - - unsigned vecs = buf_pages(buf, c->opts.btree_node_size); - - struct btree_node_scrub *scrub = - kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL); - if (!scrub) { - ret = -ENOMEM; - goto err_free; - } - - scrub->c = c; - scrub->ca = ca; - scrub->buf = buf; - scrub->used_mempool = used_mempool; - scrub->written = btree_ptr_sectors_written(k); - - scrub->btree = btree; - scrub->level = level; - bch2_bkey_buf_init(&scrub->key); - bch2_bkey_buf_reassemble(&scrub->key, c, k); - scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq; - - INIT_WORK(&scrub->work, btree_node_scrub_work); - - bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ); - bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size); - scrub->bio.bi_iter.bi_sector = pick.ptr.offset; - scrub->bio.bi_end_io = btree_node_scrub_endio; - submit_bio(&scrub->bio); - return 0; -err_free: - btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); - percpu_ref_put(&ca->io_ref); -err: - bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); - return ret; -} - static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, struct btree_write *w) { diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 75ead381..6f9e4a6d 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -132,9 +132,6 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); -int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, unsigned); - bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); enum btree_write_flags { diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 7636a5e9..3b62296c 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -291,10 +291,8 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, unsigned flags) { - if (flags & BTREE_ITER_cached_nofill) { - ck_path->uptodate = BTREE_ITER_UPTODATE; + if (flags & BTREE_ITER_cached_nofill) return 0; - } struct bch_fs *c = trans->c; struct btree_iter iter; diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 85039314..10b805a6 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -109,6 +109,12 @@ static noinline void lock_graph_pop_all(struct lock_graph *g) lock_graph_up(g); } +static noinline void lock_graph_pop_from(struct lock_graph *g, struct trans_waiting_for_lock *i) +{ + while (g->g + g->nr > i) + lock_graph_up(g); +} + static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans) { g->g[g->nr++] = (struct trans_waiting_for_lock) { @@ -124,15 +130,20 @@ static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) __lock_graph_down(g, trans); } -static bool lock_graph_remove_non_waiters(struct lock_graph *g) +static bool lock_graph_remove_non_waiters(struct lock_graph *g, + struct trans_waiting_for_lock *from) { struct trans_waiting_for_lock *i; - for (i = g->g + 1; i < g->g + g->nr; i++) + if (from->trans->locking != from->node_want) { + lock_graph_pop_from(g, from); + return true; + } + + for (i = from + 1; i < g->g + g->nr; i++) if (i->trans->locking != i->node_want || i->trans->locking_wait.start_time != i[-1].lock_start_time) { - while (g->g + g->nr > i) - lock_graph_up(g); + lock_graph_pop_from(g, i); return true; } @@ -179,13 +190,14 @@ static int btree_trans_abort_preference(struct btree_trans *trans) return 3; } -static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) +static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle, + struct trans_waiting_for_lock *from) { struct trans_waiting_for_lock *i, *abort = NULL; unsigned best = 0, pref; int ret; - if (lock_graph_remove_non_waiters(g)) + if (lock_graph_remove_non_waiters(g, from)) return 0; /* Only checking, for debugfs: */ @@ -195,7 +207,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) goto out; } - for (i = g->g; i < g->g + g->nr; i++) { + for (i = from; i < g->g + g->nr; i++) { pref = btree_trans_abort_preference(i->trans); if (pref > best) { abort = i; @@ -229,8 +241,9 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) ret = abort_lock(g, abort); out: if (ret) - while (g->nr) - lock_graph_up(g); + lock_graph_pop_all(g); + else + lock_graph_pop_from(g, abort); return ret; } @@ -243,7 +256,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, for (i = g->g; i < g->g + g->nr; i++) if (i->trans == trans) { closure_put(&trans->ref); - return break_cycle(g, cycle); + return break_cycle(g, cycle, i); } if (g->nr == ARRAY_SIZE(g->g)) { @@ -252,8 +265,7 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, if (orig_trans->lock_may_not_fail) return 0; - while (g->nr) - lock_graph_up(g); + lock_graph_pop_all(g); if (cycle) return 0; @@ -281,7 +293,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) g.nr = 0; - if (trans->lock_must_abort) { + if (trans->lock_must_abort && !trans->lock_may_not_fail) { if (cycle) return -1; @@ -336,7 +348,7 @@ next: * structures - which means it can't be blocked * waiting on a lock: */ - if (!lock_graph_remove_non_waiters(&g)) { + if (!lock_graph_remove_non_waiters(&g, g.g)) { /* * If lock_graph_remove_non_waiters() * didn't do anything, it must be @@ -512,7 +524,6 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, struct btree_path *path, unsigned level) { struct btree *b = path->l[level].b; - struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level); if (!is_btree_node(path, level)) return false; @@ -536,24 +547,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, if (race_fault()) return false; - if (btree_node_locked(path, level)) { - bool ret; + if (btree_node_locked(path, level) + ? six_lock_tryupgrade(&b->c.lock) + : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) + goto success; - six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]); - ret = six_lock_tryupgrade(&b->c.lock); - six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]); - - if (ret) - goto success; - } else { - if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) - goto success; - } - - /* - * Do we already have an intent lock via another path? If so, just bump - * lock count: - */ if (btree_node_lock_seq_matches(path, b, level) && btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) { btree_node_unlock(trans, path, level); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index ab111fec..f4aeadbe 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -2189,26 +2189,6 @@ err: goto out; } -int bch2_btree_node_rewrite_key(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bpos pos, unsigned flags) -{ - BUG_ON(!level); - - /* Traverse one depth lower to get a pointer to the node itself: */ - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); - int ret = PTR_ERR_OR_ZERO(b); - if (ret) - goto err; - - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - struct async_btree_rewrite { struct bch_fs *c; struct work_struct work; diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index fa5a88f9..7930ffea 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -169,11 +169,7 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, struct btree *, unsigned); -int bch2_btree_node_rewrite_key(struct btree_trans *, - enum btree_id, unsigned, - struct bpos, unsigned); void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); - int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, struct btree *, struct bkey_i *, unsigned, bool); diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 0eb32074..46e9e321 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -312,10 +312,7 @@ static int bch2_data_thread(void *arg) struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); - if (ctx->thr.ret == -BCH_ERR_device_offline) - ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; - else - ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; + ctx->stats.data_type = U8_MAX; return 0; } @@ -334,30 +331,14 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); struct bch_fs *c = ctx->c; struct bch_ioctl_data_event e = { - .type = BCH_DATA_EVENT_PROGRESS, - .ret = ctx->stats.ret, - .p.data_type = ctx->stats.data_type, - .p.btree_id = ctx->stats.pos.btree, - .p.pos = ctx->stats.pos.pos, - .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), - .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected), - .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected), + .type = BCH_DATA_EVENT_PROGRESS, + .p.data_type = ctx->stats.data_type, + .p.btree_id = ctx->stats.pos.btree, + .p.pos = ctx->stats.pos.pos, + .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), + .p.sectors_total = bch2_fs_usage_read_short(c).used, }; - if (ctx->arg.op == BCH_DATA_OP_scrub) { - struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); - if (ca) { - struct bch_dev_usage u; - bch2_dev_usage_read_fast(ca, &u); - for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) - if (ctx->arg.scrub.data_types & BIT(i)) - e.p.sectors_total += u.d[i].sectors; - bch2_dev_put(ca); - } - } else { - e.p.sectors_total = bch2_fs_usage_read_short(c).used; - } - if (len < sizeof(e)) return -EINVAL; diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 58521493..19ee424c 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -524,35 +524,42 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + prt_printf(out, "rewrite ptrs:\t"); bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); prt_newline(out); - prt_str(out, "kill ptrs:\t"); + prt_printf(out, "kill ptrs:\t"); bch2_prt_u64_base2(out, data_opts->kill_ptrs); prt_newline(out); - prt_str(out, "target:\t"); + prt_printf(out, "target:\t"); bch2_target_to_text(out, c, data_opts->target); prt_newline(out); - prt_str(out, "compression:\t"); + prt_printf(out, "compression:\t"); bch2_compression_opt_to_text(out, io_opts->background_compression); prt_newline(out); - prt_str(out, "opts.replicas:\t"); + prt_printf(out, "opts.replicas:\t"); prt_u64(out, io_opts->data_replicas); + prt_newline(out); - prt_str(out, "extra replicas:\t"); + prt_printf(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); + prt_newline(out); } void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) { bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); prt_newline(out); + printbuf_indent_add(out, 2); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); + bch2_write_op_to_text(out, &m->op); + printbuf_indent_sub(out, 2); } int bch2_extent_drop_ptrs(struct btree_trans *trans, diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h index 144b935c..e4b50723 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data_update.h @@ -15,9 +15,6 @@ struct data_update_opts { u8 extra_replicas; unsigned btree_insert_flags; unsigned write_flags; - - int read_dev; - bool scrub; }; void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 3722c2ac..b5de52a5 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -189,7 +189,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, unsigned offset = 0; int ret; - if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick, -1) <= 0) { + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { prt_printf(out, "error getting device to read from: invalid device\n"); return; } diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index 362b3b2f..a633f83c 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -31,6 +31,11 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } +static inline unsigned int dirent_occupied_size(const struct qstr *name) +{ + return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64); +} + int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); diff --git a/libbcachefs/ec_format.h b/libbcachefs/ec_format.h index 64ef52e0..b9770f24 100644 --- a/libbcachefs/ec_format.h +++ b/libbcachefs/ec_format.h @@ -20,6 +20,23 @@ struct bch_stripe { */ __u8 disk_label; + /* + * Variable length sections: + * - Pointers + * - Checksums + * 2D array of [stripe block/device][csum block], with checksum block + * size given by csum_granularity_bits + * - Block sector counts: per-block array of u16s + * + * XXX: + * Either checksums should have come last, or we should have included a + * checksum_size field (the size in bytes of the checksum itself, not + * the blocksize the checksum covers). + * + * Currently we aren't able to access the block sector counts if the + * checksum type is unknown. + */ + struct bch_extent_ptr ptrs[]; } __packed __aligned(8); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index cae4fc3e..4590cd0c 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -269,7 +269,6 @@ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ x(EIO, insufficient_journal_devices) \ - x(EIO, device_offline) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 78a51d96..05d5f71a 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -114,9 +114,8 @@ static inline bool ptr_better(struct bch_fs *c, * other devices, it will still pick a pointer from avoid. */ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick, - int dev) + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -138,10 +137,6 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, break; } - /* Are we being asked to read from a specific device? */ - if (dev >= 0 && p.ptr.dev != dev) - continue; - /* * If there are any dirty pointers it's an error if we can't * read: diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 8fae6b23..620b284a 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -404,7 +404,7 @@ void bch2_mark_io_failure(struct bch_io_failures *, struct extent_ptr_decoded *); int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, - struct extent_ptr_decoded *, int); + struct extent_ptr_decoded *); /* KEY_TYPE_btree_ptr: */ diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 2c3d46ac..d70d9f63 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -152,6 +152,7 @@ int bch2_create_trans(struct btree_trans *trans, if (is_subdir_for_nlink(new_inode)) dir_u->bi_nlink++; dir_u->bi_mtime = dir_u->bi_ctime = now; + dir_u->bi_size += dirent_occupied_size(name); ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) @@ -220,6 +221,7 @@ int bch2_link_trans(struct btree_trans *trans, } dir_u->bi_mtime = dir_u->bi_ctime = now; + dir_u->bi_size += dirent_occupied_size(name); dir_hash = bch2_hash_info_init(c, dir_u); @@ -322,6 +324,7 @@ int bch2_unlink_trans(struct btree_trans *trans, dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); + dir_u->bi_size -= dirent_occupied_size(name); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, &dir_hash, &dirent_iter, @@ -460,6 +463,14 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } + if (mode == BCH_RENAME) { + src_dir_u->bi_size -= dirent_occupied_size(src_name); + dst_dir_u->bi_size += dirent_occupied_size(dst_name); + } + + if (mode == BCH_RENAME_OVERWRITE) + src_dir_u->bi_size -= dirent_occupied_size(src_name); + if (src_inode_u->bi_parent_subvol) src_inode_u->bi_parent_subvol = dst_dir.subvol; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 3f83f131..90ade8f6 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -67,6 +67,9 @@ void bch2_inode_update_after_write(struct btree_trans *trans, i_gid_write(&inode->v, bi->bi_gid); inode->v.i_mode = bi->bi_mode; + if (fields & ATTR_SIZE) + i_size_write(&inode->v, bi->bi_size); + if (fields & ATTR_ATIME) inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime)); if (fields & ATTR_MTIME) @@ -582,7 +585,7 @@ err_before_quota: if (!(flags & BCH_CREATE_TMPFILE)) { bch2_inode_update_after_write(trans, dir, &dir_u, - ATTR_MTIME|ATTR_CTIME); + ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); mutex_unlock(&dir->ei_update_lock); } @@ -739,7 +742,7 @@ static int __bch2_link(struct bch_fs *c, if (likely(!ret)) { bch2_inode_update_after_write(trans, dir, &dir_u, - ATTR_MTIME|ATTR_CTIME); + ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); } @@ -792,7 +795,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, goto err; bch2_inode_update_after_write(trans, dir, &dir_u, - ATTR_MTIME|ATTR_CTIME); + ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_MTIME); @@ -970,11 +973,11 @@ err_tx_restart: dst_inode->v.i_ino != dst_inode_u.bi_inum); bch2_inode_update_after_write(trans, src_dir, &src_dir_u, - ATTR_MTIME|ATTR_CTIME); + ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); if (src_dir != dst_dir) bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u, - ATTR_MTIME|ATTR_CTIME); + ATTR_MTIME|ATTR_CTIME|ATTR_SIZE); bch2_inode_update_after_write(trans, src_inode, &src_inode_u, ATTR_CTIME); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 206fc046..8fcf7c8e 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -832,11 +832,13 @@ struct inode_walker { struct bpos last_pos; DARRAY(struct inode_walker_entry) inodes; + snapshot_id_list deletes; }; static void inode_walker_exit(struct inode_walker *w) { darray_exit(&w->inodes); + darray_exit(&w->deletes); } static struct inode_walker inode_walker_init(void) @@ -960,8 +962,9 @@ static int get_visible_inodes(struct btree_trans *trans, int ret; w->inodes.nr = 0; + w->deletes.nr = 0; - for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), + for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), BTREE_ITER_all_snapshots, k, ret) { if (k.k->p.offset != inum) break; @@ -969,10 +972,13 @@ static int get_visible_inodes(struct btree_trans *trans, if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) continue; - if (bkey_is_inode(k.k)) - add_inode(c, w, k); + if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) + continue; - if (k.k->p.snapshot >= s->pos.snapshot) + ret = bkey_is_inode(k.k) + ? add_inode(c, w, k) + : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); + if (ret) break; } bch2_trans_iter_exit(trans, &iter); @@ -1110,6 +1116,37 @@ err: return ret; } +static int check_directory_size(struct btree_trans *trans, + struct bch_inode_unpacked *inode_u, + struct bkey_s_c inode_k, bool *write_inode) +{ + struct btree_iter iter; + struct bkey_s_c k; + u64 new_size = 0; + int ret; + + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, + SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot), + POS(inode_k.k->p.offset, U64_MAX), + 0, k, ret) { + if (k.k->type != KEY_TYPE_dirent) + continue; + + struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k); + struct qstr name = bch2_dirent_get_name(dirent); + + new_size += dirent_occupied_size(&name); + } + bch2_trans_iter_exit(trans, &iter); + + if (!ret && inode_u->bi_size != new_size) { + inode_u->bi_size = new_size; + *write_inode = true; + } + + return ret; +} + static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -1298,6 +1335,16 @@ static int check_inode(struct btree_trans *trans, u.bi_journal_seq = journal_cur_seq(&c->journal); do_update = true; } + + if (S_ISDIR(u.bi_mode)) { + ret = check_directory_size(trans, &u, k, &do_update); + + fsck_err_on(ret, + trans, directory_size_mismatch, + "directory inode %llu:%u with the mismatch directory size", + u.bi_inum, k.k->p.snapshot); + ret = 0; + } do_update: if (do_update) { ret = __bch2_fsck_write_inode(trans, &u); @@ -2380,6 +2427,30 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; } + + darray_for_each(target->deletes, i) + if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i), + trans, dirent_to_overwritten_inode, + "dirent points to inode overwritten in snapshot %u:\n%s", + *i, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { + struct btree_iter delete_iter; + bch2_trans_iter_init(trans, &delete_iter, + BTREE_ID_dirents, + SPOS(k.k->p.inode, k.k->p.offset, *i), + BTREE_ITER_intent); + ret = bch2_btree_iter_traverse(&delete_iter) ?: + bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + hash_info, + &delete_iter, + BTREE_UPDATE_internal_snapshot_node); + bch2_trans_iter_exit(trans, &delete_iter); + if (ret) + goto err; + + } } ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index 87fe5997..8c7b2d3d 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -91,13 +91,18 @@ static const struct rhashtable_params bch_promote_params = { .automatic_shrinking = true, }; +static inline bool have_io_error(struct bch_io_failures *failed) +{ + return failed && failed->nr; +} + static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, struct bpos pos, struct bch_io_opts opts, unsigned flags, struct bch_io_failures *failed) { - if (!failed) { + if (!have_io_error(failed)) { BUG_ON(!opts.promote_target); if (!(flags & BCH_READ_MAY_PROMOTE)) @@ -224,7 +229,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, struct data_update_opts update_opts = {}; - if (!failed) { + if (!have_io_error(failed)) { update_opts.target = opts.promote_target; update_opts.extra_replicas = 1; update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED; @@ -286,7 +291,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, * if failed != NULL we're not actually doing a promote, we're * recovering from an io/checksum error */ - bool promote_full = (failed || + bool promote_full = (have_io_error(failed) || *read_full || READ_ONCE(c->opts.promote_whole_extents)); /* data might have to be decompressed in the write path: */ @@ -444,7 +449,7 @@ retry: ret = __bch2_read_extent(trans, rbio, bvec_iter, rbio->read_pos, rbio->data_btree, - k, 0, failed, flags, -1); + k, 0, failed, flags); if (ret == READ_RETRY) goto retry; if (ret) @@ -499,7 +504,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, blk_status_t error) { rbio->retry = retry; - rbio->saw_error = true; if (rbio->flags & BCH_READ_IN_RETRY) return; @@ -741,7 +745,7 @@ static void __bch2_read_endio(struct work_struct *work) bio_copy_data_iter(dst, &dst_iter, src, &src_iter); } } -nodecode: + if (rbio->promote) { /* * Re encrypt data we decrypted, so it's consistent with @@ -754,7 +758,7 @@ nodecode: promote_start(rbio->promote, rbio); rbio->promote = NULL; } - +nodecode: if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { rbio = bch2_rbio_free(rbio); bch2_rbio_done(rbio); @@ -879,7 +883,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bvec_iter iter, struct bpos read_pos, enum btree_id data_btree, struct bkey_s_c k, unsigned offset_into_extent, - struct bch_io_failures *failed, unsigned flags, int dev) + struct bch_io_failures *failed, unsigned flags) { struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; @@ -889,8 +893,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bpos data_pos = bkey_start_pos(k.k); int pick_ret; - //BUG_ON(failed && failed->nr); - if (bkey_extent_is_inline_data(k.k)) { unsigned bytes = min_t(unsigned, iter.bi_size, bkey_inline_data_bytes(k.k)); @@ -903,7 +905,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto out_read_done; } retry_pick: - pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev); + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ if (!pick_ret) @@ -955,30 +957,7 @@ retry_pick: */ bch2_trans_unlock(trans); - if (!(flags & BCH_READ_NODECODE)) { - if (!(flags & BCH_READ_LAST_FRAGMENT) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_MUST_CLONE; - - narrow_crcs = !(flags & BCH_READ_IN_RETRY) && - bch2_can_narrow_extent_crcs(k, pick.crc); - - if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) - flags |= BCH_READ_MUST_BOUNCE; - - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); - - if (crc_is_compressed(pick.crc) || - (pick.crc.csum_type != BCH_CSUM_none && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - (bch2_csum_type_is_encryption(pick.crc.csum_type) && - (flags & BCH_READ_USER_MAPPED)) || - (flags & BCH_READ_MUST_BOUNCE)))) { - read_full = true; - bounce = true; - } - } else { - read_full = true; + if (flags & BCH_READ_NODECODE) { /* * can happen if we retry, and the extent we were going to read * has been merged in the meantime: @@ -990,10 +969,32 @@ retry_pick: } iter.bi_size = pick.crc.compressed_size << 9; + goto get_bio; } - if ((orig->opts.promote_target && !(flags & BCH_READ_NODECODE)) || - (failed && failed->nr)) + if (!(flags & BCH_READ_LAST_FRAGMENT) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_MUST_CLONE; + + narrow_crcs = !(flags & BCH_READ_IN_RETRY) && + bch2_can_narrow_extent_crcs(k, pick.crc); + + if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) + flags |= BCH_READ_MUST_BOUNCE; + + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); + + if (crc_is_compressed(pick.crc) || + (pick.crc.csum_type != BCH_CSUM_none && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + (bch2_csum_type_is_encryption(pick.crc.csum_type) && + (flags & BCH_READ_USER_MAPPED)) || + (flags & BCH_READ_MUST_BOUNCE)))) { + read_full = true; + bounce = true; + } + + if (orig->opts.promote_target || have_io_error(failed)) promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, &rbio, &bounce, &read_full, failed); @@ -1014,7 +1015,7 @@ retry_pick: pick.crc.offset = 0; pick.crc.live_size = bvec_iter_sectors(iter); } - +get_bio: if (rbio) { /* * promote already allocated bounce rbio: @@ -1265,7 +1266,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos, data_btree, k, - offset_into_extent, failed, flags, -1); + offset_into_extent, failed, flags); if (ret) goto err; diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h index b62fcee7..a82e8a94 100644 --- a/libbcachefs/io_read.h +++ b/libbcachefs/io_read.h @@ -41,7 +41,6 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, hole:1, - saw_error:1, retry:2, context:2; }; @@ -125,7 +124,7 @@ enum bch_read_flags { int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, struct bpos, enum btree_id, struct bkey_s_c, unsigned, - struct bch_io_failures *, unsigned, int); + struct bch_io_failures *, unsigned); static inline void bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bpos read_pos, @@ -133,7 +132,7 @@ static inline void bch2_read_extent(struct btree_trans *trans, unsigned offset_into_extent, unsigned flags) { __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, - data_btree, k, offset_into_extent, NULL, flags, -1); + data_btree, k, offset_into_extent, NULL, flags); } void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index e1773ac2..7f2efe85 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1114,10 +1114,8 @@ reread: (printbuf_reset(&err), prt_str(&err, "journal "), bch2_csum_err_msg(&err, csum_type, j->csum, csum), - err.buf))) { + err.buf))) saw_bad = true; - bch2_fatal_error(c); - } ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), j->encrypted_start, diff --git a/libbcachefs/move.c b/libbcachefs/move.c index cc852492..6c6ece52 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -88,13 +88,12 @@ static void move_free(struct moving_io *io) if (io->b) atomic_dec(&io->b->count); - bch2_data_update_exit(&io->write); - mutex_lock(&ctxt->lock); list_del(&io->io_list); wake_up(&ctxt->wait); mutex_unlock(&ctxt->lock); + bch2_data_update_exit(&io->write); kfree(io); } @@ -114,20 +113,7 @@ static void move_write_done(struct bch_write_op *op) static void move_write(struct moving_io *io) { - struct moving_context *ctxt = io->write.ctxt; - - if (ctxt->stats) { - if (io->rbio.bio.bi_status) - atomic64_add(io->rbio.bvec_iter.bi_size >> 9, - &ctxt->stats->sectors_error_uncorrected); - else if (io->rbio.saw_error) - atomic64_add(io->rbio.bvec_iter.bi_size >> 9, - &ctxt->stats->sectors_error_corrected); - } - - if (unlikely(io->rbio.bio.bi_status || - io->rbio.hole || - io->write.data_opts.scrub)) { + if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { move_free(io); return; } @@ -286,8 +272,7 @@ int bch2_move_extent(struct moving_context *ctxt, bch2_data_update_opts_normalize(k, &data_opts); if (!data_opts.rewrite_ptrs && - !data_opts.extra_replicas && - !data_opts.scrub) { + !data_opts.extra_replicas) { if (data_opts.kill_ptrs) return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); return 0; @@ -333,18 +318,12 @@ int bch2_move_extent(struct moving_context *ctxt, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; - if (!data_opts.scrub) { - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, iter->btree_id, k); - if (ret) - goto err_free_pages; + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, + io_opts, data_opts, iter->btree_id, k); + if (ret) + goto err_free_pages; - io->write.op.end_io = move_write_done; - } else { - bch2_bkey_buf_init(&io->write.k); - io->write.op.c = c; - io->write.data_opts = data_opts; - } + io->write.op.end_io = move_write_done; if (ctxt->rate) bch2_ratelimit_increment(ctxt->rate, k.k->size); @@ -376,14 +355,11 @@ int bch2_move_extent(struct moving_context *ctxt, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - __bch2_read_extent(trans, &io->rbio, - io->rbio.bio.bi_iter, - bkey_start_pos(k.k), - iter->btree_id, k, 0, - NULL, - BCH_READ_NODECODE| - BCH_READ_LAST_FRAGMENT, - data_opts.scrub ? data_opts.read_dev : -1); + bch2_read_extent(trans, &io->rbio, + bkey_start_pos(k.k), + iter->btree_id, k, 0, + BCH_READ_NODECODE| + BCH_READ_LAST_FRAGMENT); return 0; err_free_pages: bio_free_pages(&io->write.op.wbio.bio); @@ -712,22 +688,21 @@ int bch2_move_data(struct bch_fs *c, bool wait_on_copygc, move_pred_fn pred, void *arg) { + struct moving_context ctxt; + int ret; bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - int ret = __bch2_move_data(&ctxt, start, end, pred, arg); + ret = __bch2_move_data(&ctxt, start, end, pred, arg); bch2_moving_ctxt_exit(&ctxt); return ret; } -static int __bch2_move_data_phys(struct moving_context *ctxt, - struct move_bucket_in_flight *bucket_in_flight, - unsigned dev, - u64 bucket_start, - u64 bucket_end, - unsigned data_types, - move_pred_fn pred, void *arg) +int bch2_evacuate_bucket(struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, + struct bpos bucket, int gen, + struct data_update_opts _data_opts) { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; @@ -736,22 +711,16 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct btree_iter iter = {}, bp_iter = {}; struct bkey_buf sk; struct bkey_s_c k; + struct data_update_opts data_opts; unsigned sectors_moved = 0; struct bkey_buf last_flushed; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, dev); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); if (!ca) return 0; - bucket_end = min(bucket_end, ca->mi.nbuckets); - - struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start)); - struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end)); - bch2_dev_put(ca); - ca = NULL; - - //trace_bucket_evacuate(c, &bucket); + trace_bucket_evacuate(c, &bucket); bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); @@ -762,7 +731,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, */ bch2_trans_begin(trans); - bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + bucket_pos_to_bp_start(ca, bucket), 0); bch_err_msg(c, ret, "looking up alloc key"); if (ret) @@ -786,7 +756,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (ret) goto err; - if (!k.k || bkey_gt(k.k->p, bp_end)) + if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket))) break; if (k.k->type != KEY_TYPE_backpointer) @@ -794,145 +764,107 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); - if (ctxt->stats) - ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; - - if (!(data_types & BIT(bp.v->data_type))) - goto next; - - k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); - ret = bkey_err(k); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - goto err; - if (!k.k) - goto next; - if (!bp.v->level) { + k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + goto err; + if (!k.k) + goto next; + + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k); if (ret) { bch2_trans_iter_exit(trans, &iter); continue; } - } - struct data_update_opts data_opts = {}; - if (!pred(c, arg, k, &io_opts, &data_opts)) { + data_opts = _data_opts; + data_opts.target = io_opts.background_target; + data_opts.rewrite_ptrs = 0; + + unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */ + unsigned i = 0; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (p.ptr.dev == bucket.inode) { + if (p.ptr.cached) { + bch2_trans_iter_exit(trans, &iter); + goto next; + } + data_opts.rewrite_ptrs |= 1U << i; + break; + } + i++; + } + + ret = bch2_move_extent(ctxt, bucket_in_flight, + &iter, k, io_opts, data_opts); bch2_trans_iter_exit(trans, &iter); - goto next; - } - if (data_opts.scrub && - !bch2_dev_idx_is_online(c, data_opts.read_dev)) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret == -ENOMEM) { + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(ctxt); + continue; + } + if (ret) + goto err; + + if (ctxt->stats) + atomic64_add(sectors, &ctxt->stats->sectors_seen); + sectors_moved += sectors; + } else { + struct btree *b; + + b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); + ret = PTR_ERR_OR_ZERO(b); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + goto next; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + goto err; + if (!b) + goto next; + + unsigned sectors = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); + + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); bch2_trans_iter_exit(trans, &iter); - ret = -BCH_ERR_device_offline; - break; + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + goto err; + + if (ctxt->rate) + bch2_ratelimit_increment(ctxt->rate, sectors); + if (ctxt->stats) { + atomic64_add(sectors, &ctxt->stats->sectors_seen); + atomic64_add(sectors, &ctxt->stats->sectors_moved); + } + sectors_moved += btree_sectors(c); } - - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - - /* move_extent will drop locks */ - unsigned sectors = bp.v->bucket_len; - - if (!bp.v->level) - ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); - else if (!data_opts.scrub) - ret = bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); - else - ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); - - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret == -ENOMEM) { - /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt); - continue; - } - if (ret) - goto err; - - if (ctxt->stats) - atomic64_add(sectors, &ctxt->stats->sectors_seen); - sectors_moved += sectors; next: bch2_btree_iter_advance(&bp_iter); } - //trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); + trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret); err: bch2_trans_iter_exit(trans, &bp_iter); + bch2_dev_put(ca); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); return ret; } -static int bch2_move_data_phys(struct bch_fs *c, - unsigned dev, - u64 start, - u64 end, - unsigned data_types, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) -{ - struct moving_context ctxt; - - bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); - - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ctxt.stats->phys = true; - - int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg); - bch2_moving_ctxt_exit(&ctxt); - - return ret; -} - -struct evacuate_bucket_arg { - struct bpos bucket; - int gen; - struct data_update_opts data_opts; -}; - -static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - struct evacuate_bucket_arg *arg = _arg; - - *data_opts = arg->data_opts; - - unsigned i = 0; - bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { - if (ptr->dev == arg->bucket.inode && !ptr->cached) - data_opts->rewrite_ptrs |= BIT(i); - i++; - } - - return data_opts->rewrite_ptrs != 0; -} - -int bch2_evacuate_bucket(struct moving_context *ctxt, - struct move_bucket_in_flight *bucket_in_flight, - struct bpos bucket, int gen, - struct data_update_opts data_opts) -{ - struct evacuate_bucket_arg arg = { bucket, gen, data_opts, }; - - return __bch2_move_data_phys(ctxt, bucket_in_flight, - bucket.inode, - bucket.offset, - bucket.offset + 1, - ~0, - evacuate_bucket_pred, &arg); -} - typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct btree *, struct bch_io_opts *, struct data_update_opts *); @@ -1171,30 +1103,6 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } -static bool scrub_pred(struct bch_fs *c, void *_arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - struct bch_ioctl_data *arg = _arg; - - if (k.k->type != KEY_TYPE_btree_ptr_v2) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (p.ptr.dev == arg->migrate.dev) { - if (!p.crc.csum_type) - return false; - break; - } - } - - data_opts->scrub = true; - data_opts->read_dev = arg->migrate.dev; - return true; -} - int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -1209,16 +1117,6 @@ int bch2_data_job(struct bch_fs *c, bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); switch (op.op) { - case BCH_DATA_OP_scrub: - ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, - op.scrub.data_types, - NULL, - stats, - writepoint_hashed((unsigned long) current), - false, - scrub_pred, &op) ?: ret; - break; - case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); @@ -1317,7 +1215,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str mutex_lock(&ctxt->lock); list_for_each_entry(io, &ctxt->ios, io_list) - bch2_write_op_to_text(out, &io->write.op); + bch2_data_update_to_text(out, &io->write); mutex_unlock(&ctxt->lock); printbuf_indent_sub(out, 4); diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h index 82e473ed..e22841ef 100644 --- a/libbcachefs/move_types.h +++ b/libbcachefs/move_types.h @@ -3,31 +3,17 @@ #define _BCACHEFS_MOVE_TYPES_H #include "bbpos_types.h" -#include "bcachefs_ioctl.h" struct bch_move_stats { + enum bch_data_type data_type; + struct bbpos pos; char name[32]; - bool phys; - enum bch_ioctl_data_event_ret ret; - - union { - struct { - enum bch_data_type data_type; - struct bbpos pos; - }; - struct { - unsigned dev; - u64 offset; - }; - }; atomic64_t keys_moved; atomic64_t keys_raced; atomic64_t sectors_seen; atomic64_t sectors_moved; atomic64_t sectors_raced; - atomic64_t sectors_error_corrected; - atomic64_t sectors_error_uncorrected; }; struct move_bucket_key { diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/recovery_passes_types.h index 71baad41..41855796 100644 --- a/libbcachefs/recovery_passes_types.h +++ b/libbcachefs/recovery_passes_types.h @@ -53,7 +53,7 @@ x(check_dirents, 27, PASS_FSCK) \ x(check_xattrs, 28, PASS_FSCK) \ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ - x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ + x(check_unreachable_inodes, 40, PASS_FSCK) \ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c index 051214fd..14f6b6a5 100644 --- a/libbcachefs/sb-downgrade.c +++ b/libbcachefs/sb-downgrade.c @@ -90,7 +90,10 @@ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch, \ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(directory_size, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BCH_FSCK_ERR_directory_size_mismatch) \ #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index e26317c3..0b4fe899 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -256,6 +256,7 @@ enum bch_fsck_flags { x(dirent_in_missing_dir_inode, 227, 0) \ x(dirent_in_non_dir_inode, 228, 0) \ x(dirent_to_missing_inode, 229, 0) \ + x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ x(quota_type_invalid, 232, 0) \ @@ -312,7 +313,8 @@ enum bch_fsck_flags { x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ - x(MAX, 302, 0) + x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ + x(MAX, 304, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h index b29b6c6c..762083b5 100644 --- a/libbcachefs/sb-members.h +++ b/libbcachefs/sb-members.h @@ -23,18 +23,6 @@ static inline bool bch2_dev_is_online(struct bch_dev *ca) return !percpu_ref_is_zero(&ca->io_ref); } -static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned); - -static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev) -{ - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, dev); - bool ret = ca && bch2_dev_is_online(ca); - rcu_read_unlock(); - - return ret; -} - static inline bool bch2_dev_is_readable(struct bch_dev *ca) { return bch2_dev_is_online(ca) && diff --git a/libbcachefs/six.c b/libbcachefs/six.c index 537bf049..7e7c66a1 100644 --- a/libbcachefs/six.c +++ b/libbcachefs/six.c @@ -491,8 +491,12 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, list_del(&wait->list); raw_spin_unlock(&lock->wait_lock); - if (unlikely(acquired)) + if (unlikely(acquired)) { do_six_unlock_type(lock, type); + } else if (type == SIX_LOCK_write) { + six_clear_bitmask(lock, SIX_LOCK_HELD_write); + six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); + } break; } @@ -501,10 +505,6 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, __set_current_state(TASK_RUNNING); out: - if (ret && type == SIX_LOCK_write) { - six_clear_bitmask(lock, SIX_LOCK_HELD_write); - six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); - } trace_contention_end(lock, 0); return ret; diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index cf6b3256..c54091a2 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -1563,7 +1563,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) */ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior)); - bch_err_msg(c, ret, "walking snapshots"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "walking snapshots"); if (ret) goto err; @@ -1602,7 +1603,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_disk_reservation_put(c, &res); - bch_err_msg(c, ret, "deleting keys from dying snapshots"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "deleting keys from dying snapshots"); if (ret) goto err; } @@ -1610,7 +1612,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) darray_for_each(delete_leaves, i) { ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, *i)); - bch_err_msg(c, ret, "deleting snapshot %u", *i); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "deleting snapshot %u", *i); if (ret) goto err; } @@ -1630,7 +1633,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) darray_for_each(delete_interior, i) { ret = commit_do(trans, NULL, NULL, 0, bch2_snapshot_node_delete(trans, i->id)); - bch_err_msg(c, ret, "deleting snapshot %u", i->id); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "deleting snapshot %u", i->id); if (ret) goto err; } @@ -1638,7 +1642,8 @@ err: darray_exit(&delete_interior); darray_exit(&delete_leaves); bch2_trans_put(trans); - bch_err_fn(c, ret); + if (!bch2_err_matches(ret, EROFS)) + bch_err_fn(c, ret); return ret; } diff --git a/libbcachefs/str_hash.c b/libbcachefs/str_hash.c index f5977c5c..8c2c5539 100644 --- a/libbcachefs/str_hash.c +++ b/libbcachefs/str_hash.c @@ -167,10 +167,19 @@ found:; goto err; struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); - if (memcmp(hash_info, &hash2, sizeof(hash2))) { + if (hash_info->type != hash2.type || + memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) { ret = repair_inode_hash_info(trans, &inode); if (!ret) { - bch_err(c, "inode hash info mismatch with root, but mismatch not found"); + bch_err(c, "inode hash info mismatch with root, but mismatch not found\n" + "%u %llx %llx\n" + "%u %llx %llx", + hash_info->type, + hash_info->siphash_key.k0, + hash_info->siphash_key.k1, + hash2.type, + hash2.siphash_key.k0, + hash2.siphash_key.k1); ret = -BCH_ERR_fsck_repair_unimplemented; } } diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index e4c7dc0e..9d40b7d4 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -785,6 +785,27 @@ TRACE_EVENT(bucket_invalidate, /* Moving IO */ +TRACE_EVENT(bucket_evacuate, + TP_PROTO(struct bch_fs *c, struct bpos *bucket), + TP_ARGS(c, bucket), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u32, dev_idx ) + __field(u64, bucket ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->dev_idx = bucket->inode; + __entry->bucket = bucket->offset; + ), + + TP_printk("%d:%d %u:%llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dev_idx, __entry->bucket) +); + DEFINE_EVENT(fs_str, move_extent, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) @@ -848,6 +869,37 @@ TRACE_EVENT(move_data, __entry->sectors_raced) ); +TRACE_EVENT(evacuate_bucket, + TP_PROTO(struct bch_fs *c, struct bpos *bucket, + unsigned sectors, unsigned bucket_size, + int ret), + TP_ARGS(c, bucket, sectors, bucket_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u64, member ) + __field(u64, bucket ) + __field(u32, sectors ) + __field(u32, bucket_size ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->member = bucket->inode; + __entry->bucket = bucket->offset; + __entry->sectors = sectors; + __entry->bucket_size = bucket_size; + __entry->ret = ret; + ), + + TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->member, __entry->bucket, + __entry->sectors, __entry->bucket_size, + __entry->ret) +); + TRACE_EVENT(copygc, TP_PROTO(struct bch_fs *c, u64 sectors_moved, u64 sectors_not_moved,