diff --git a/.bcachefs_revision b/.bcachefs_revision index cd1097b5..43da7f3f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -d9d1235f3c568a47b3547c0b0adad0d7948f18aa +4dd9a5a488857137ce6eecadddd9304440fb03e9 diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 0b8eabe5..e268125b 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -913,18 +913,24 @@ struct bch_stripe { struct bch_reflink_p { struct bch_val v; __le64 idx; - - __le32 reservation_generation; - __u8 nr_replicas; - __u8 pad[3]; -}; + /* + * A reflink pointer might point to an indirect extent which is then + * later split (by copygc or rebalance). If we only pointed to part of + * the original indirect extent, and then one of the fragments is + * outside the range we point to, we'd leak a refcount: so when creating + * reflink pointers, we need to store pad values to remember the full + * range we were taking a reference on. + */ + __le32 front_pad; + __le32 back_pad; +} __attribute__((packed, aligned(8))); struct bch_reflink_v { struct bch_val v; __le64 refcount; union bch_extent_entry start[0]; __u64 _data[0]; -}; +} __attribute__((packed, aligned(8))); struct bch_indirect_inline_data { struct bch_val v; @@ -1259,7 +1265,8 @@ enum bcachefs_metadata_version { bcachefs_metadata_version_inode_backpointers = 13, bcachefs_metadata_version_btree_ptr_sectors_written = 14, bcachefs_metadata_version_snapshot_2 = 15, - bcachefs_metadata_version_max = 16, + bcachefs_metadata_version_reflink_p_fix = 16, + bcachefs_metadata_version_max = 17, }; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index d5ec4d72..97151ec8 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1176,8 +1176,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = p.k->size; + u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + u64 sectors = (u64) le32_to_cpu(p.v->front_pad) + + le32_to_cpu(p.v->back_pad) + + p.k->size; s64 ret = 0; BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == @@ -1753,12 +1755,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, bch2_fs_inconsistent(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", p.k->p.inode, p.k->p.offset, p.k->size, idx); - bch2_inconsistent_error(c); ret = -EIO; goto err; } - BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)); + if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { + bch2_fs_inconsistent(c, + "%llu:%llu len %u idx %llu indirect extent refcount underflow", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + ret = -EIO; + goto err; + } + + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; + u64 pad; + + pad = max_t(s64, le32_to_cpu(v->front_pad), + le64_to_cpu(v->idx) - bkey_start_offset(k.k)); + BUG_ON(pad > U32_MAX); + v->front_pad = cpu_to_le32(pad); + + pad = max_t(s64, le32_to_cpu(v->back_pad), + k.k->p.offset - p.k->size - le64_to_cpu(v->idx)); + BUG_ON(pad > U32_MAX); + v->back_pad = cpu_to_le32(pad); + } + le64_add_cpu(refcount, add); if (!*refcount) { @@ -1781,10 +1804,20 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c k, unsigned flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = p.k->size; + u64 idx, sectors; s64 ret = 0; + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; + + v->front_pad = v->back_pad = 0; + } + + idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + sectors = (u64) le32_to_cpu(p.v->front_pad) + + le32_to_cpu(p.v->back_pad) + + p.k->size; + while (sectors) { ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags); if (ret < 0) diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index a36bc840..c99e1514 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -2154,6 +2154,72 @@ static int check_nlinks(struct bch_fs *c) return ret; } +static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter) +{ + struct bkey_s_c k; + struct bkey_s_c_reflink_p p; + struct bkey_i_reflink_p *u; + int ret; + + k = bch2_btree_iter_peek(iter); + if (!k.k) + return 0; + + ret = bkey_err(k); + if (ret) + return ret; + + if (k.k->type != KEY_TYPE_reflink_p) + return 0; + + p = bkey_s_c_to_reflink_p(k); + + if (!p.v->front_pad && !p.v->back_pad) + return 0; + + u = bch2_trans_kmalloc(trans, sizeof(*u)); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + return ret; + + bkey_reassemble(&u->k_i, k); + u->v.front_pad = 0; + u->v.back_pad = 0; + + return bch2_trans_update(trans, iter, &u->k_i, 0); +} + +static int fix_reflink_p(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) + return 0; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (k.k->type == KEY_TYPE_reflink_p) { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + fix_reflink_p_key(&trans, &iter)); + if (ret) + break; + } + } + bch2_trans_iter_exit(&trans, &iter); + + bch2_trans_exit(&trans); + return ret; +} + /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: @@ -2168,7 +2234,8 @@ int bch2_fsck_full(struct bch_fs *c) check_xattrs(c) ?: check_root(c) ?: check_directory_structure(c) ?: - check_nlinks(c); + check_nlinks(c) ?: + fix_reflink_p(c); } int bch2_fsck_walk_inodes_only(struct bch_fs *c) diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 6afb37a2..8c53b1e9 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1086,12 +1086,10 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_btree_ptr_sectors_written) { - bch_info(c, "version prior to btree_ptr_sectors_written, upgrade required"); - c->opts.version_upgrade = true; - } else if (c->sb.version < bcachefs_metadata_version_snapshot_2) { - bch_info(c, "filesystem version is prior to snapshots - upgrading"); + } else if (c->sb.version < bcachefs_metadata_version_reflink_p_fix) { + bch_info(c, "filesystem version is prior to reflink_p fix - upgrading"); c->opts.version_upgrade = true; + c->opts.fsck = true; } ret = bch2_blacklist_table_initialize(c); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 9bcf4216..2827d0ef 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -32,6 +32,10 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k) if (bkey_val_bytes(p.k) != sizeof(*p.v)) return "incorrect value size"; + if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix && + le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) + return "idx < front_pad"; + return NULL; }