diff --git a/.bcachefs_revision b/.bcachefs_revision index 7fb9f9a3..b2d87414 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -33a60d9b05f523be93973b25e0df1ab2d65fa4fc +5b8c4a1366df20bc043404cb882230ce86296590 diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 82b0706a..e1f1e8e8 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -208,6 +208,7 @@ #include "fifo.h" #include "nocow_locking_types.h" #include "opts.h" +#include "recovery_types.h" #include "seqmutex.h" #include "util.h" @@ -452,6 +453,7 @@ enum gc_phase { GC_PHASE_BTREE_backpointers, GC_PHASE_BTREE_bucket_gens, GC_PHASE_BTREE_snapshot_trees, + GC_PHASE_BTREE_deleted_inodes, GC_PHASE_PENDING_DELETE, }; @@ -655,48 +657,6 @@ enum bch_write_ref { BCH_WRITE_REF_NR, }; -#define PASS_SILENT BIT(0) -#define PASS_FSCK BIT(1) -#define PASS_UNCLEAN BIT(2) -#define PASS_ALWAYS BIT(3) - -#define BCH_RECOVERY_PASSES() \ - x(alloc_read, PASS_ALWAYS) \ - x(stripes_read, PASS_ALWAYS) \ - x(initialize_subvolumes, 0) \ - x(snapshots_read, PASS_ALWAYS) \ - x(check_topology, 0) \ - x(check_allocations, PASS_FSCK) \ - x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ - x(journal_replay, PASS_ALWAYS) \ - x(check_alloc_info, PASS_FSCK) \ - x(check_lrus, PASS_FSCK) \ - x(check_btree_backpointers, PASS_FSCK) \ - x(check_backpointers_to_extents,PASS_FSCK) \ - x(check_extents_to_backpointers,PASS_FSCK) \ - x(check_alloc_to_lru_refs, PASS_FSCK) \ - x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ - x(bucket_gens_init, 0) \ - x(check_snapshot_trees, PASS_FSCK) \ - x(check_snapshots, PASS_FSCK) \ - x(check_subvols, PASS_FSCK) \ - x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ - x(fs_upgrade_for_subvolumes, 0) \ - x(check_inodes, PASS_FSCK|PASS_UNCLEAN) \ - x(check_extents, PASS_FSCK) \ - x(check_dirents, PASS_FSCK) \ - x(check_xattrs, PASS_FSCK) \ - x(check_root, PASS_FSCK) \ - x(check_directory_structure, PASS_FSCK) \ - x(check_nlinks, PASS_FSCK) \ - x(fix_reflink_p, 0) \ - -enum bch_recovery_pass { -#define x(n, when) BCH_RECOVERY_PASS_##n, - BCH_RECOVERY_PASSES() -#undef x -}; - struct bch_fs { struct closure cl; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index b771d80f..5ec218ee 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1629,7 +1629,9 @@ struct bch_sb_field_journal_seq_blacklist { x(major_minor, BCH_VERSION(1, 0), \ 0) \ x(snapshot_skiplists, BCH_VERSION(1, 1), \ - BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ + x(deleted_inodes, BCH_VERSION(1, 2), \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -2251,7 +2253,9 @@ enum btree_id_flags { x(bucket_gens, 14, 0, \ BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ - BIT_ULL(KEY_TYPE_snapshot_tree)) + BIT_ULL(KEY_TYPE_snapshot_tree)) \ + x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \ + BIT_ULL(KEY_TYPE_set)) enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 6b691b2b..d433f4d5 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -35,7 +35,7 @@ #include -static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned); +static int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool); struct folio_vec { struct folio *fv_folio; @@ -3410,11 +3410,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, } if (!(mode & FALLOC_FL_ZERO_RANGE)) { - ret = drop_locks_do(&trans, - (bch2_clamp_data_hole(&inode->v, - &hole_start, - &hole_end, - opts.data_replicas), 0)); + if (bch2_clamp_data_hole(&inode->v, + &hole_start, + &hole_end, + opts.data_replicas, true)) + ret = drop_locks_do(&trans, + (bch2_clamp_data_hole(&inode->v, + &hole_start, + &hole_end, + opts.data_replicas, false), 0)); bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); if (ret) @@ -3714,7 +3718,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos, static loff_t bch2_seek_pagecache_data(struct inode *vinode, loff_t start_offset, loff_t end_offset, - unsigned min_replicas) + unsigned min_replicas, + bool nonblock) { struct folio_batch fbatch; pgoff_t start_index = start_offset >> PAGE_SHIFT; @@ -3731,7 +3736,13 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - folio_lock(folio); + if (!nonblock) { + folio_lock(folio); + } else if (!folio_trylock(folio)) { + folio_batch_release(&fbatch); + return -EAGAIN; + } + offset = folio_data_offset(folio, max(folio_pos(folio), start_offset), min_replicas); @@ -3796,7 +3807,7 @@ err: if (next_data > offset) next_data = bch2_seek_pagecache_data(&inode->v, - offset, next_data, 0); + offset, next_data, 0, false); if (next_data >= isize) return -ENXIO; @@ -3804,18 +3815,24 @@ err: return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); } -static bool folio_hole_offset(struct address_space *mapping, loff_t *offset, - unsigned min_replicas) +static int folio_hole_offset(struct address_space *mapping, loff_t *offset, + unsigned min_replicas, bool nonblock) { struct folio *folio; struct bch_folio *s; unsigned i, sectors; bool ret = true; - folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT); + folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT, + !nonblock ? FGP_LOCK : 0, 0); if (IS_ERR_OR_NULL(folio)) return true; + if (nonblock && !folio_trylock(folio)) { + folio_put(folio); + return -EAGAIN; + } + s = bch2_folio(folio); if (!s) goto unlock; @@ -3833,37 +3850,51 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset, ret = false; unlock: folio_unlock(folio); + folio_put(folio); return ret; } static loff_t bch2_seek_pagecache_hole(struct inode *vinode, loff_t start_offset, loff_t end_offset, - unsigned min_replicas) + unsigned min_replicas, + bool nonblock) { struct address_space *mapping = vinode->i_mapping; loff_t offset = start_offset; while (offset < end_offset && - !folio_hole_offset(mapping, &offset, min_replicas)) + !folio_hole_offset(mapping, &offset, min_replicas, nonblock)) ; return min(offset, end_offset); } -static void bch2_clamp_data_hole(struct inode *inode, - u64 *hole_start, - u64 *hole_end, - unsigned min_replicas) +static int bch2_clamp_data_hole(struct inode *inode, + u64 *hole_start, + u64 *hole_end, + unsigned min_replicas, + bool nonblock) { - *hole_start = bch2_seek_pagecache_hole(inode, - *hole_start << 9, *hole_end << 9, min_replicas) >> 9; + loff_t ret; + + ret = bch2_seek_pagecache_hole(inode, + *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; + if (ret < 0) + return ret; + + *hole_start = ret; if (*hole_start == *hole_end) - return; + return 0; - *hole_end = bch2_seek_pagecache_data(inode, - *hole_start << 9, *hole_end << 9, min_replicas) >> 9; + ret = bch2_seek_pagecache_data(inode, + *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; + if (ret < 0) + return ret; + + *hole_end = ret; + return 0; } static loff_t bch2_seek_hole(struct file *file, u64 offset) @@ -3895,12 +3926,12 @@ retry: BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { next_hole = bch2_seek_pagecache_hole(&inode->v, - offset, MAX_LFS_FILESIZE, 0); + offset, MAX_LFS_FILESIZE, 0, false); break; } else if (!bkey_extent_is_data(k.k)) { next_hole = bch2_seek_pagecache_hole(&inode->v, max(offset, bkey_start_offset(k.k) << 9), - k.k->p.offset << 9, 0); + k.k->p.offset << 9, 0, false); if (next_hole < k.k->p.offset << 9) break; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 8c186acc..0852dbe9 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1123,73 +1123,100 @@ static int extent_ends_at(struct bch_fs *c, static int overlapping_extents_found(struct btree_trans *trans, enum btree_id btree, struct bpos pos1, struct bkey pos2, - bool *fixed) + bool *fixed, + struct extent_end *extent_end) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - struct btree_iter iter; - struct bkey_s_c k; - u32 snapshot = min(pos1.snapshot, pos2.p.snapshot); + struct btree_iter iter1, iter2 = { NULL }; + struct bkey_s_c k1, k2; int ret; BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); - bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0); - k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k); + bch2_trans_iter_init(trans, &iter1, btree, pos1, + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_NOT_EXTENTS); + k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); + ret = bkey_err(k1); if (ret) goto err; prt_str(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); + bch2_bkey_val_to_text(&buf, c, k1); - if (!bpos_eq(pos1, k.k->p)) { - bch_err(c, "%s: error finding first overlapping extent when repairing%s", + if (!bpos_eq(pos1, k1.k->p)) { + prt_str(&buf, "\n wanted\n "); + bch2_bpos_to_text(&buf, pos1); + prt_str(&buf, "\n "); + bch2_bkey_to_text(&buf, &pos2); + + bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", __func__, buf.buf); ret = -BCH_ERR_internal_fsck_err; goto err; } - while (1) { - bch2_btree_iter_advance(&iter); + bch2_trans_copy_iter(&iter2, &iter1); - k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX)); - ret = bkey_err(k); + while (1) { + bch2_btree_iter_advance(&iter2); + + k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); + ret = bkey_err(k2); if (ret) goto err; - if (bkey_ge(k.k->p, pos2.p)) + if (bpos_ge(k2.k->p, pos2.p)) break; - } prt_str(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); + bch2_bkey_val_to_text(&buf, c, k2); - if (bkey_gt(k.k->p, pos2.p) || - pos2.size != k.k->size) { + if (bpos_gt(k2.k->p, pos2.p) || + pos2.size != k2.k->size) { bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", __func__, buf.buf); ret = -BCH_ERR_internal_fsck_err; goto err; } - if (fsck_err(c, "overlapping extents%s", buf.buf)) { - struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p; - struct btree_iter update_iter; + prt_printf(&buf, "\n overwriting %s extent", + pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); - struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter, - btree, update_pos, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - bch2_trans_iter_exit(trans, &update_iter); - if ((ret = PTR_ERR_OR_ZERO(update))) + if (fsck_err(c, "overlapping extents%s", buf.buf)) { + struct btree_iter *old_iter = &iter1; + struct disk_reservation res = { 0 }; + + if (pos1.snapshot < pos2.p.snapshot) { + old_iter = &iter2; + swap(k1, k2); + } + + trans->extra_journal_res += bch2_bkey_sectors_compressed(k2); + + ret = bch2_trans_update_extent_overwrite(trans, old_iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, + k1, k2) ?: + bch2_trans_commit(trans, &res, NULL, + BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL); + bch2_disk_reservation_put(c, &res); + + if (ret) goto err; *fixed = true; + + if (pos1.snapshot == pos2.p.snapshot) + extent_end->offset = bkey_start_offset(&pos2); + else + ret = -BCH_ERR_transaction_restart_nested; } fsck_err: err: - bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(trans, &iter2); + bch2_trans_iter_exit(trans, &iter1); printbuf_exit(&buf); return ret; } @@ -1199,11 +1226,11 @@ static int check_overlapping_extents(struct btree_trans *trans, struct extent_ends *extent_ends, struct bkey_s_c k, u32 equiv, - struct btree_iter *iter) + struct btree_iter *iter, + bool *fixed) { struct bch_fs *c = trans->c; struct extent_end *i; - bool fixed = false; int ret = 0; /* transaction restart, running again */ @@ -1226,7 +1253,7 @@ static int check_overlapping_extents(struct btree_trans *trans, SPOS(iter->pos.inode, i->offset, i->snapshot), - *k.k, &fixed); + *k.k, fixed, i); if (ret) goto err; } @@ -1237,7 +1264,7 @@ static int check_overlapping_extents(struct btree_trans *trans, extent_ends->last_pos = k.k->p; err: - return ret ?: fixed; + return ret; } static int check_extent(struct btree_trans *trans, struct btree_iter *iter, @@ -1292,13 +1319,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto delete; ret = check_overlapping_extents(trans, s, extent_ends, k, - equiv.snapshot, iter); - if (ret < 0) - goto err; - + equiv.snapshot, iter, + &inode->recalculate_sums); if (ret) - inode->recalculate_sums = true; - ret = 0; + goto err; } /* @@ -1373,7 +1397,7 @@ int bch2_check_extents(struct bch_fs *c) snapshots_seen_init(&s); extent_ends_init(&extent_ends); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 755cf7d1..fea21e1e 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" +#include "btree_write_buffer.h" #include "bkey_methods.h" #include "btree_update.h" #include "buckets.h" @@ -519,6 +520,23 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c __bch2_inode_unpacked_to_text(out, &inode); } +static inline bool bkey_is_deleted_inode(struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_inode: + return bkey_s_c_to_inode(k).v->bi_flags & + cpu_to_le32(BCH_INODE_UNLINKED); + case KEY_TYPE_inode_v2: + return bkey_s_c_to_inode_v2(k).v->bi_flags & + cpu_to_le32(BCH_INODE_UNLINKED); + case KEY_TYPE_inode_v3: + return bkey_s_c_to_inode_v3(k).v->bi_flags & + cpu_to_le64(BCH_INODE_UNLINKED); + default: + return false; + } +} + int bch2_trans_mark_inode(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, @@ -526,6 +544,8 @@ int bch2_trans_mark_inode(struct btree_trans *trans, unsigned flags) { int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); + bool old_deleted = bkey_is_deleted_inode(old); + bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new)); if (nr) { int ret = bch2_replicas_deltas_realloc(trans, 0); @@ -537,6 +557,12 @@ int bch2_trans_mark_inode(struct btree_trans *trans, d->nr_inodes += nr; } + if (old_deleted != new_deleted) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted); + if (ret) + return ret; + } + return 0; } @@ -986,3 +1012,90 @@ err: return ret ?: -BCH_ERR_transaction_restart_nested; } + +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + struct bch_inode_unpacked inode; + int ret; + + if (bch2_snapshot_is_internal_node(c, pos.snapshot)) + return 0; + + if (!fsck_err_on(c->sb.clean, c, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) + return 0; + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); + ret = bkey_err(k); + if (ret) + return ret; + + ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; + if (fsck_err_on(!bkey_is_inode(k.k), c, + "nonexistent inode %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + + ret = bch2_inode_unpack(k, &inode); + if (ret) + goto err; + + if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c, + "non-deleted inode %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + + return 1; +err: +fsck_err: + return ret; +delete: + return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); +} + +int bch2_delete_dead_inodes(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + ret = bch2_btree_write_buffer_flush_sync(&trans); + if (ret) + goto err; + + /* + * Weird transaction restart handling here because on successful delete, + * bch2_inode_rm_snapshot() will return a nested transaction restart, + * but we can't retry because the btree write buffer won't have been + * flushed and we'd spin: + */ + for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p)); + if (ret < 0) + break; + + if (ret) { + if (!test_bit(BCH_FS_RW, &c->flags)) { + bch2_trans_unlock(&trans); + bch2_fs_lazy_rw(c); + } + + ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + break; + } + } + bch2_trans_iter_exit(&trans, &iter); +err: + bch2_trans_exit(&trans); + + return ret; +} diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 1b9dc27e..22b24405 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -199,5 +199,6 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); +int bch2_delete_dead_inodes(struct bch_fs *); #endif /* _BCACHEFS_INODE_H */ diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 4d0daeba..960bb247 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -445,6 +445,13 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, if (!options) return 0; + /* + * sys_fsconfig() is now occasionally providing us with option lists + * starting with a comma - weird. + */ + if (*options == ',') + options++; + copied_opts = kstrdup(options, GFP_KERNEL); if (!copied_opts) return -1; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index dcd4f9f4..55a233c2 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1458,6 +1458,29 @@ use_clean: if (ret) goto err; + /* If we fixed errors, verify that fs is actually clean now: */ + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && + test_bit(BCH_FS_ERRORS_FIXED, &c->flags) && + !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) && + !test_bit(BCH_FS_ERROR, &c->flags)) { + bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); + clear_bit(BCH_FS_ERRORS_FIXED, &c->flags); + + c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; + + ret = bch2_run_recovery_passes(c); + if (ret) + goto err; + + if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) || + test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { + bch_err(c, "Second fsck run was not clean"); + set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); + } + + set_bit(BCH_FS_ERRORS_FIXED, &c->flags); + } + if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas"); ret = bch2_fs_quota_read(c); diff --git a/libbcachefs/recovery_types.h b/libbcachefs/recovery_types.h new file mode 100644 index 00000000..abf1f834 --- /dev/null +++ b/libbcachefs/recovery_types.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_RECOVERY_TYPES_H +#define _BCACHEFS_RECOVERY_TYPES_H + +#define PASS_SILENT BIT(0) +#define PASS_FSCK BIT(1) +#define PASS_UNCLEAN BIT(2) +#define PASS_ALWAYS BIT(3) + +#define BCH_RECOVERY_PASSES() \ + x(alloc_read, PASS_ALWAYS) \ + x(stripes_read, PASS_ALWAYS) \ + x(initialize_subvolumes, 0) \ + x(snapshots_read, PASS_ALWAYS) \ + x(check_topology, 0) \ + x(check_allocations, PASS_FSCK) \ + x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, PASS_ALWAYS) \ + x(check_alloc_info, PASS_FSCK) \ + x(check_lrus, PASS_FSCK) \ + x(check_btree_backpointers, PASS_FSCK) \ + x(check_backpointers_to_extents,PASS_FSCK) \ + x(check_extents_to_backpointers,PASS_FSCK) \ + x(check_alloc_to_lru_refs, PASS_FSCK) \ + x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 0) \ + x(check_snapshot_trees, PASS_FSCK) \ + x(check_snapshots, PASS_FSCK) \ + x(check_subvols, PASS_FSCK) \ + x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ + x(fs_upgrade_for_subvolumes, 0) \ + x(check_inodes, PASS_FSCK) \ + x(check_extents, PASS_FSCK) \ + x(check_dirents, PASS_FSCK) \ + x(check_xattrs, PASS_FSCK) \ + x(check_root, PASS_FSCK) \ + x(check_directory_structure, PASS_FSCK) \ + x(check_nlinks, PASS_FSCK) \ + x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ + x(fix_reflink_p, 0) \ + +enum bch_recovery_pass { +#define x(n, when) BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + +#endif /* _BCACHEFS_RECOVERY_TYPES_H */ diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index cef23d2c..1d4b0a58 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -503,6 +503,36 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr) __test_extent_overwrite(c, 32, 64, 32, 128); } +static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid) +{ + struct bkey_i_cookie k; + int ret; + + bkey_cookie_init(&k.k_i); + k.k_i.k.p.inode = inum; + k.k_i.k.p.offset = start + len; + k.k_i.k.p.snapshot = snapid; + k.k_i.k.size = len; + + ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); + if (ret) + bch_err_fn(c, ret); + return ret; +} + +static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) +{ + return insert_test_overlapping_extent(c, inum, 0, 16, U32_MAX - 2) ?: /* overwrite entire */ + insert_test_overlapping_extent(c, inum, 2, 8, U32_MAX - 2) ?: + insert_test_overlapping_extent(c, inum, 4, 4, U32_MAX) ?: + insert_test_overlapping_extent(c, inum, 32, 8, U32_MAX - 2) ?: /* overwrite front/back */ + insert_test_overlapping_extent(c, inum, 36, 8, U32_MAX) ?: + insert_test_overlapping_extent(c, inum, 60, 8, U32_MAX - 2) ?: + insert_test_overlapping_extent(c, inum, 64, 8, U32_MAX); +} + /* snapshot unit tests */ /* Test skipping over keys in unrelated snapshots: */ @@ -901,6 +931,7 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, perf_test(test_extent_overwrite_back); perf_test(test_extent_overwrite_middle); perf_test(test_extent_overwrite_all); + perf_test(test_extent_create_overlapping); perf_test(test_snapshots);