bcachefs-tools/libbcachefs/fs/check_extents.c
Kent Overstreet 3609bf8161 Update bcachefs sources to 9df3841c199d bcachefs: bch2_dev_data_drop() -> try()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2025-10-19 09:05:26 -04:00

450 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "alloc/buckets.h"
#include "data/io_misc.h"
#include "fs/check.h"
#include "fs/namei.h"
#include "init/progress.h"
static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{
u32 *i;
__darray_for_each(s->ids, i) {
if (*i == id)
return 0;
if (*i > id)
break;
}
int ret = darray_insert_item(&s->ids, i - s->ids.data, id);
if (ret)
bch_err(c, "error reallocating snapshots_seen table (size %zu)",
s->ids.size);
return ret;
}
/*
* XXX: this is handling transaction restarts without returning
* -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
*/
static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
u32 snapshot)
{
u64 sectors = 0;
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
SPOS(inum, 0, snapshot),
POS(inum, U64_MAX),
0, k, ({
if (bkey_extent_is_allocation(k.k))
sectors += k.k->size;
0;
}));
return ret ?: sectors;
}
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
int ret = 0;
s64 count2;
darray_for_each(w->inodes, i) {
if (i->inode.bi_sectors == i->count)
continue;
CLASS(printbuf, buf)();
lockrestart_do(trans,
bch2_inum_snapshot_to_path(trans,
i->inode.bi_inum,
i->inode.bi_snapshot, NULL, &buf));
count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (w->recalculate_sums)
i->count = count2;
if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s",
i->count, count2, buf.buf);
i->count = count2;
}
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) &&
i->inode.bi_sectors != i->count,
trans, inode_i_sectors_wrong,
"incorrect i_sectors: got %llu, should be %llu\n%s",
i->inode.bi_sectors, i->count, buf.buf)) {
i->inode.bi_sectors = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)
break;
}
}
fsck_err:
bch_err_fn(c, ret);
return ret;
}
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{
u32 restart_count = trans->restart_count;
return check_i_sectors_notnested(trans, w) ?:
trans_was_restarted(trans, restart_count);
}
struct extent_end {
u32 snapshot;
u64 offset;
struct snapshots_seen seen;
};
struct extent_ends {
struct bpos last_pos;
DARRAY(struct extent_end) e;
};
static void extent_ends_reset(struct extent_ends *extent_ends)
{
darray_for_each(extent_ends->e, i)
snapshots_seen_exit(&i->seen);
extent_ends->e.nr = 0;
}
static void extent_ends_exit(struct extent_ends *extent_ends)
{
extent_ends_reset(extent_ends);
darray_exit(&extent_ends->e);
}
static struct extent_ends extent_ends_init(void)
{
return (struct extent_ends) {};
}
DEFINE_CLASS(extent_ends, struct extent_ends,
extent_ends_exit(&_T),
extent_ends_init(), void)
static int extent_ends_at(struct bch_fs *c,
struct extent_ends *extent_ends,
struct snapshots_seen *seen,
struct bkey_s_c k)
{
struct extent_end *i, n = (struct extent_end) {
.offset = k.k->p.offset,
.snapshot = k.k->p.snapshot,
.seen = *seen,
};
n.seen.ids.data = kmemdup(seen->ids.data,
sizeof(seen->ids.data[0]) * seen->ids.size,
GFP_KERNEL);
if (!n.seen.ids.data)
return bch_err_throw(c, ENOMEM_fsck_extent_ends_at);
__darray_for_each(extent_ends->e, i) {
if (i->snapshot == k.k->p.snapshot) {
snapshots_seen_exit(&i->seen);
*i = n;
return 0;
}
if (i->snapshot >= k.k->p.snapshot)
break;
}
return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n);
}
static int overlapping_extents_found(struct btree_trans *trans,
struct disk_reservation *res,
enum btree_id btree,
struct bpos pos1, struct snapshots_seen *pos1_seen,
struct bkey pos2,
bool *fixed,
struct extent_end *extent_end)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
int ret = 0;
BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
CLASS(btree_iter, iter1)(trans, btree, pos1,
BTREE_ITER_all_snapshots|
BTREE_ITER_not_extents);
struct bkey_s_c k1 = bkey_try(bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)));
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k1);
if (!bpos_eq(pos1, k1.k->p)) {
prt_str(&buf, "\nwanted\n ");
bch2_bpos_to_text(&buf, pos1);
prt_str(&buf, "\n");
bch2_bkey_to_text(&buf, &pos2);
bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
__func__, buf.buf);
return bch_err_throw(c, internal_fsck_err);
}
CLASS(btree_iter_copy, iter2)(&iter1);
struct bkey_s_c k2;
do {
bch2_btree_iter_advance(&iter2);
k2 = bkey_try(bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)));
} while (bpos_lt(k2.k->p, pos2.p));
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k2);
if (bpos_gt(k2.k->p, pos2.p) ||
pos2.size != k2.k->size) {
bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
__func__, buf.buf);
return bch_err_throw(c, internal_fsck_err);
}
prt_printf(&buf, "\noverwriting %s extent",
pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
if (fsck_err(trans, extent_overlapping,
"overlapping extents%s", buf.buf)) {
struct btree_iter *old_iter = &iter1;
if (pos1.snapshot < pos2.p.snapshot) {
old_iter = &iter2;
swap(k1, k2);
}
trans->extra_disk_res += bch2_bkey_sectors_compressed(k2);
try(bch2_trans_update_extent_overwrite(trans, old_iter,
BTREE_UPDATE_internal_snapshot_node,
k1, k2));
try(bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc));
*fixed = true;
if (pos1.snapshot == pos2.p.snapshot) {
/*
* We overwrote the first extent, and did the overwrite
* in the same snapshot:
*/
extent_end->offset = bkey_start_offset(&pos2);
} else if (pos1.snapshot > pos2.p.snapshot) {
/*
* We overwrote the first extent in pos2's snapshot:
*/
ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
} else {
/*
* We overwrote the second extent - restart
* check_extent() from the top:
*/
ret = bch_err_throw(c, transaction_restart_nested);
}
}
fsck_err:
return ret;
}
static int check_overlapping_extents(struct btree_trans *trans,
struct disk_reservation *res,
struct snapshots_seen *seen,
struct extent_ends *extent_ends,
struct bkey_s_c k,
struct btree_iter *iter,
bool *fixed)
{
struct bch_fs *c = trans->c;
/* transaction restart, running again */
if (bpos_eq(extent_ends->last_pos, k.k->p))
return 0;
if (extent_ends->last_pos.inode != k.k->p.inode)
extent_ends_reset(extent_ends);
darray_for_each(extent_ends->e, i) {
if (i->offset <= bkey_start_offset(k.k))
continue;
if (!bch2_ref_visible2(c,
k.k->p.snapshot, seen,
i->snapshot, &i->seen))
continue;
try(overlapping_extents_found(trans, res, iter->btree_id,
SPOS(iter->pos.inode,
i->offset,
i->snapshot),
&i->seen,
*k.k, fixed, i));
}
extent_ends->last_pos = k.k->p;
return 0;
}
static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *i;
unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
bkey_for_each_crc(k.k, ptrs, crc, i)
if (crc_is_encoded(crc) &&
crc.uncompressed_size > encoded_extent_max_sectors) {
CLASS(printbuf, buf)();
bch2_bkey_val_to_text(&buf, c, k);
bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
}
return 0;
}
noinline_for_stack
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k,
struct inode_walker *inode,
struct snapshots_seen *s,
struct extent_ends *extent_ends,
struct disk_reservation *res)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
int ret = 0;
ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret)
return ret < 0 ? ret : 0;
if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes)
try(check_i_sectors(trans, inode));
try(bch2_snapshots_seen_update(c, s, iter->btree_id, k.k->p));
struct inode_walker_entry *extent_i = errptr_try(bch2_walk_inode(trans, inode, k));
try(bch2_check_key_has_inode(trans, iter, inode, extent_i, k));
if (k.k->type != KEY_TYPE_whiteout)
try(check_overlapping_extents(trans, res, s, extent_ends, k, iter,
&inode->recalculate_sums));
if (!bkey_extent_whiteout(k.k)) {
/*
* Check inodes in reverse order, from oldest snapshots to
* newest, starting from the inode that matches this extent's
* snapshot. If we didn't have one, iterate over all inodes:
*/
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->inode.bi_snapshot > k.k->p.snapshot ||
!bch2_key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9;
if (fsck_err_on(k.k->p.offset > last_block &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
try(snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot));
try(bch2_fpunch_snapshot(trans,
SPOS(i->inode.bi_inum,
last_block,
i->inode.bi_snapshot),
POS(i->inode.bi_inum, U64_MAX)));
iter->k.type = KEY_TYPE_whiteout;
break;
}
}
}
try(check_extent_overbig(trans, iter, k));
try(bch2_bkey_drop_stale_ptrs(trans, iter, k));
try(bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc));
if (bkey_extent_is_allocation(k.k)) {
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->whiteout ||
i->inode.bi_snapshot > k.k->p.snapshot ||
!bch2_key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
i->count += k.k->size;
}
}
if (k.k->type != KEY_TYPE_whiteout)
try(extent_ends_at(c, extent_ends, s, k));
fsck_err:
return ret;
}
/*
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
* that i_size an i_sectors are consistent
*/
int bch2_check_extents(struct bch_fs *c)
{
CLASS(disk_reservation, res)(c);
CLASS(btree_trans, trans)(c);
CLASS(snapshots_seen, s)();
CLASS(inode_walker, w)();
CLASS(extent_ends, extent_ends)();
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_extents));
return for_each_btree_key(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
bch2_disk_reservation_put(c, &res.r);
progress_update_iter(trans, &progress, &iter) ?:
check_extent(trans, &iter, k, &w, &s, &extent_ends, &res.r);
})) ?:
check_i_sectors_notnested(trans, &w);
}
int bch2_check_indirect_extents(struct bch_fs *c)
{
CLASS(disk_reservation, res)(c);
CLASS(btree_trans, trans)(c);
struct progress_indicator_state progress;
bch2_progress_init(&progress, c, BIT_ULL(BTREE_ID_reflink));
return for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
POS_MIN,
BTREE_ITER_prefetch, k,
&res.r, NULL,
BCH_TRANS_COMMIT_no_enospc, ({
bch2_disk_reservation_put(c, &res.r);
progress_update_iter(trans, &progress, &iter) ?:
check_extent_overbig(trans, &iter, k) ?:
bch2_bkey_drop_stale_ptrs(trans, &iter, k);
}));
}