#include "bcache.h" #include "btree_update.h" #include "dirent.h" #include "error.h" #include "fs.h" #include "fs-gc.h" #include "inode.h" #include "keylist.h" #include "super.h" #include struct nlink { u32 count; u32 dir_count; }; DECLARE_GENRADIX_TYPE(nlinks, struct nlink); static void inc_link(struct cache_set *c, struct nlinks *links, u64 range_start, u64 *range_end, u64 inum, bool dir) { struct nlink *link; if (inum < range_start || inum >= *range_end) return; link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); if (!link) { bch_verbose(c, "allocation failed during fs gc - will need another pass"); *range_end = inum; return; } if (dir) link->dir_count++; else link->count++; } /* * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents * are reachable */ noinline_for_stack static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links, u64 range_start, u64 *range_end) { struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent d; u64 d_inum; int ret; inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false); for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) { switch (k.k->type) { case BCH_DIRENT: d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); if (d.v->d_type == DT_DIR) inc_link(c, links, range_start, range_end, d.k->p.inode, true); inc_link(c, links, range_start, range_end, d_inum, false); break; } bch_btree_iter_cond_resched(&iter); } ret = bch_btree_iter_unlock(&iter); if (ret) bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); return ret; } s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) { struct btree_iter iter; struct bkey_s_c k; u64 sectors = 0; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) { if (k.k->p.inode != inum) break; if (bkey_extent_is_allocation(k.k)) sectors += k.k->size; } return bch_btree_iter_unlock(&iter) ?: sectors; } static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, struct bkey_s_c_inode inode, struct nlink link) { u16 i_mode = le16_to_cpu(inode.v->i_mode); u32 i_flags = le32_to_cpu(inode.v->i_flags); u32 i_nlink = le32_to_cpu(inode.v->i_nlink); u64 i_size = le64_to_cpu(inode.v->i_size); s64 i_sectors = 0; int ret = 0; u32 real_i_nlink; fsck_err_on(i_nlink < link.count, c, "inode %llu i_link too small (%u < %u, type %i)", inode.k->p.inode, i_nlink, link.count, mode_to_type(i_mode)); if (S_ISDIR(i_mode)) { unfixable_fsck_err_on(link.count > 1, c, "directory %llu with multiple hardlinks: %u", inode.k->p.inode, link.count); real_i_nlink = link.count * 2 + link.dir_count; } else { unfixable_fsck_err_on(link.dir_count, c, "found dirents for non directory %llu", inode.k->p.inode); real_i_nlink = link.count + link.dir_count; } if (!link.count) { fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but found orphaned inode %llu", inode.k->p.inode); unfixable_fsck_err_on(S_ISDIR(i_mode) && bch_empty_dir(c, inode.k->p.inode), c, "non empty directory with link count 0, " "inode nlink %u, dir links found %u", i_nlink, link.dir_count); bch_verbose(c, "deleting inode %llu", inode.k->p.inode); ret = bch_inode_rm(c, inode.k->p.inode); if (ret) bch_err(c, "error in fs gc: error %i " "while deleting inode", ret); return ret; } if (i_flags & BCH_INODE_I_SIZE_DIRTY) { fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has i_size dirty", inode.k->p.inode); bch_verbose(c, "truncating inode %llu", inode.k->p.inode); /* * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away */ ret = bch_inode_truncate(c, inode.k->p.inode, round_up(i_size, PAGE_SIZE) >> 9, NULL, NULL); if (ret) { bch_err(c, "error in fs gc: error %i " "truncating inode", ret); return ret; } /* * We truncated without our normal sector accounting hook, just * make sure we recalculate it: */ i_flags |= BCH_INODE_I_SECTORS_DIRTY; } if (i_flags & BCH_INODE_I_SECTORS_DIRTY) { fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has i_sectors dirty", inode.k->p.inode); bch_verbose(c, "recounting sectors for inode %llu", inode.k->p.inode); i_sectors = bch_count_inode_sectors(c, inode.k->p.inode); if (i_sectors < 0) { bch_err(c, "error in fs gc: error %i " "recounting inode sectors", (int) i_sectors); return i_sectors; } } if (i_nlink != real_i_nlink) { fsck_err_on(c->sb.clean, c, "filesystem marked clean, " "but inode %llu has wrong i_nlink " "(type %u i_nlink %u, should be %u)", inode.k->p.inode, mode_to_type(i_mode), i_nlink, real_i_nlink); bch_verbose(c, "setting inode %llu nlinks from %u to %u", inode.k->p.inode, i_nlink, real_i_nlink); } if (i_nlink != real_i_nlink|| i_flags & BCH_INODE_I_SECTORS_DIRTY || i_flags & BCH_INODE_I_SIZE_DIRTY) { struct bkey_i_inode update; bkey_reassemble(&update.k_i, inode.s_c); update.v.i_nlink = cpu_to_le32(real_i_nlink); update.v.i_flags = cpu_to_le32(i_flags & ~(BCH_INODE_I_SIZE_DIRTY| BCH_INODE_I_SECTORS_DIRTY)); if (i_flags & BCH_INODE_I_SECTORS_DIRTY) update.v.i_sectors = cpu_to_le64(i_sectors); ret = bch_btree_insert_at(c, NULL, NULL, NULL, BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(iter, &update.k_i)); if (ret && ret != -EINTR) bch_err(c, "error in fs gc: error %i " "updating inode", ret); } fsck_err: return ret; } noinline_for_stack static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links, u64 range_start, u64 range_end) { struct btree_iter iter; struct bkey_s_c k; struct nlink *link, zero_links = { 0, 0 }; struct genradix_iter nlinks_iter; int ret = 0, ret2 = 0; u64 nlinks_pos; bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0)); genradix_iter_init(&nlinks_iter); while ((k = bch_btree_iter_peek(&iter)).k && !btree_iter_err(k)) { peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (!link && (!k.k || iter.pos.inode >= range_end)) break; nlinks_pos = range_start + nlinks_iter.pos; if (iter.pos.inode > nlinks_pos) { unfixable_fsck_err_on(link && link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); goto peek_nlinks; } if (iter.pos.inode < nlinks_pos || !link) link = &zero_links; if (k.k && k.k->type == BCH_INODE_FS) { /* * Avoid potential deadlocks with iter for * truncate/rm/etc.: */ bch_btree_iter_unlock(&iter); ret = bch_gc_do_inode(c, &iter, bkey_s_c_to_inode(k), *link); if (ret == -EINTR) continue; if (ret) break; if (link->count) atomic_long_inc(&c->nr_inodes); } else { unfixable_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); } if (nlinks_pos == iter.pos.inode) genradix_iter_advance(&nlinks_iter, links); bch_btree_iter_advance_pos(&iter); bch_btree_iter_cond_resched(&iter); } fsck_err: ret2 = bch_btree_iter_unlock(&iter); if (ret2) bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2); return ret ?: ret2; } int bch_gc_inode_nlinks(struct cache_set *c) { struct nlinks links; u64 this_iter_range_start, next_iter_range_start = 0; int ret = 0; genradix_init(&links); do { this_iter_range_start = next_iter_range_start; next_iter_range_start = U64_MAX; ret = bch_gc_walk_dirents(c, &links, this_iter_range_start, &next_iter_range_start); if (ret) break; ret = bch_gc_walk_inodes(c, &links, this_iter_range_start, next_iter_range_start); if (ret) break; genradix_free(&links); } while (next_iter_range_start != U64_MAX); genradix_free(&links); return ret; } static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum, struct bkey_i_inode *inode, bool *first_this_inode, bool *have_inode, u64 *i_size, u16 *i_mode) { *first_this_inode = inum != *cur_inum; *cur_inum = inum; if (*first_this_inode) { *have_inode = !bch_inode_find_by_inum(c, inum, inode); if (*have_inode) { *i_mode = le16_to_cpu(inode->v.i_mode); *i_size = le64_to_cpu(inode->v.i_size); } } } /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ int bch_fsck(struct cache_set *c) { struct btree_iter iter; struct bkey_s_c k; struct bkey_i_inode inode; bool first_this_inode, have_inode; u64 cur_inum, i_sectors; u64 i_size = 0; u16 i_mode = 0; int ret = 0; cur_inum = -1; have_inode = false; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(BCACHE_ROOT_INO, 0), k) { if (k.k->type == KEY_TYPE_DISCARD) continue; next_inode(c, k.k->p.inode, &cur_inum, &inode, &first_this_inode, &have_inode, &i_size, &i_mode); unfixable_fsck_err_on(!have_inode, c, "extent type %u for missing inode %llu", k.k->type, k.k->p.inode); unfixable_fsck_err_on(first_this_inode && have_inode && le64_to_cpu(inode.v.i_sectors) != (i_sectors = bch_count_inode_sectors(c, cur_inum)), c, "i_sectors wrong: got %llu, should be %llu", le64_to_cpu(inode.v.i_sectors), i_sectors); unfixable_fsck_err_on(have_inode && !S_ISREG(i_mode) && !S_ISLNK(i_mode), c, "extent type %u for non regular file, inode %llu mode %o", k.k->type, k.k->p.inode, i_mode); unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, i_size); } ret = bch_btree_iter_unlock(&iter); if (ret) return ret; cur_inum = -1; have_inode = false; for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(BCACHE_ROOT_INO, 0), k) { struct bkey_s_c_dirent d; struct bkey_i_inode target; bool have_target; u64 d_inum; next_inode(c, k.k->p.inode, &cur_inum, &inode, &first_this_inode, &have_inode, &i_size, &i_mode); unfixable_fsck_err_on(!have_inode, c, "dirent in nonexisting directory %llu", k.k->p.inode); unfixable_fsck_err_on(!S_ISDIR(i_mode), c, "dirent in non directory inode %llu, type %u", k.k->p.inode, mode_to_type(i_mode)); if (k.k->type != BCH_DIRENT) continue; d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); unfixable_fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory"); have_target = !bch_inode_find_by_inum(c, d_inum, &target); unfixable_fsck_err_on(!have_target, c, "dirent points to missing inode %llu, type %u filename %s", d_inum, d.v->d_type, d.v->d_name); unfixable_fsck_err_on(have_target && d.v->d_type != mode_to_type(le16_to_cpu(target.v.i_mode)), c, "incorrect d_type: got %u should be %u, filename %s", d.v->d_type, mode_to_type(le16_to_cpu(target.v.i_mode)), d.v->d_name); } ret = bch_btree_iter_unlock(&iter); if (ret) return ret; cur_inum = -1; have_inode = false; for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(BCACHE_ROOT_INO, 0), k) { next_inode(c, k.k->p.inode, &cur_inum, &inode, &first_this_inode, &have_inode, &i_size, &i_mode); unfixable_fsck_err_on(!have_inode, c, "xattr for missing inode %llu", k.k->p.inode); } ret = bch_btree_iter_unlock(&iter); if (ret) return ret; return 0; fsck_err: bch_btree_iter_unlock(&iter); return ret; }