diff --git a/.bcachefs_revision b/.bcachefs_revision index 978f8ce2..7ae58f9f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -710cd382bf5f50ab8114a7cc22d78b5b2f574529 +720f644e63e0f5b24bb69f2ffb70cdc2dd162810 diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 1c4b76ed..0b8eabe5 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -913,24 +913,18 @@ struct bch_stripe { struct bch_reflink_p { struct bch_val v; __le64 idx; - /* - * A reflink pointer might point to an indirect extent which is then - * later split (by copygc or rebalance). If we only pointed to part of - * the original indirect extent, and then one of the fragments is - * outside the range we point to, we'd leak a refcount: so when creating - * reflink pointers, we need to store pad values to remember the full - * range we were taking a reference on. - */ - __le32 front_pad; - __le32 back_pad; -} __attribute__((packed, aligned(8))); + + __le32 reservation_generation; + __u8 nr_replicas; + __u8 pad[3]; +}; struct bch_reflink_v { struct bch_val v; __le64 refcount; union bch_extent_entry start[0]; __u64 _data[0]; -} __attribute__((packed, aligned(8))); +}; struct bch_indirect_inline_data { struct bch_val v; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 236ecbd8..8f6e73b1 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -738,7 +738,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id, *max_stale = max(*max_stale, ptr_stale(ca, ptr)); } - bch2_mark_key(c, *k, flags); + ret = bch2_mark_key(c, *k, flags); fsck_err: err: if (ret) diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 18d2733e..14fb1ad8 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1688,7 +1688,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) struct btree_path *path; struct btree_insert_entry *i; unsigned idx; - char buf[300]; + char buf1[300], buf2[300]; btree_trans_verify_sorted(trans); @@ -1697,7 +1697,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) path->idx, path->ref, path->intent_ref, path->preserve ? " preserve" : "", bch2_btree_ids[path->btree_id], - (bch2_bpos_to_text(&PBUF(buf), path->pos), buf), + (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1), #ifdef CONFIG_BCACHEFS_DEBUG (void *) path->ip_allocated #else @@ -1705,11 +1705,16 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) #endif ); - trans_for_each_update(trans, i) - printk(KERN_ERR "update: btree %s %s %pS\n", + trans_for_each_update(trans, i) { + struct bkey u; + struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u); + + printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s", bch2_btree_ids[i->btree_id], - (bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf), - (void *) i->ip_allocated); + (void *) i->ip_allocated, + (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1), + (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2)); + } } static struct btree_path *btree_path_alloc(struct btree_trans *trans, diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c45bcd0b..d5ec4d72 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -14,6 +14,7 @@ #include "ec.h" #include "error.h" #include "movinggc.h" +#include "recovery.h" #include "reflink.h" #include "replicas.h" #include "subvolume.h" @@ -1111,10 +1112,9 @@ static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p, { struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + s64 ret = 0; - while (1) { - if (*r_idx >= c->reflink_gc_nr) - goto not_found; + while (*r_idx < c->reflink_gc_nr) { r = genradix_ptr(&c->reflink_gc_table, *r_idx); BUG_ON(!r); @@ -1123,16 +1123,49 @@ static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p, (*r_idx)++; } + if (*r_idx >= c->reflink_gc_nr || + idx < r->offset - r->size) { + ret = p.k->size; + goto not_found; + } + BUG_ON((s64) r->refcount + add < 0); r->refcount += add; return r->offset - idx; not_found: - bch2_fs_inconsistent(c, - "%llu:%llu len %u points to nonexistent indirect extent %llu", - p.k->p.inode, p.k->p.offset, p.k->size, idx); - bch2_inconsistent_error(c); - return -EIO; + if ((flags & BTREE_TRIGGER_GC) && + (flags & BTREE_TRIGGER_NOATOMIC)) { + /* + * XXX: we're replacing the entire reflink pointer with an error + * key, we should just be replacing the part that was missing: + */ + if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", + p.k->p.inode, p.k->p.offset, p.k->size, idx)) { + struct bkey_i_error *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + bch_err(c, "%s: error allocating new key", __func__); + return -ENOMEM; + } + + bkey_init(&new->k); + new->k.type = KEY_TYPE_error; + new->k.p = p.k->p; + new->k.size = p.k->size; + ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i); + + } + } else { + bch2_fs_inconsistent(c, + "%llu:%llu len %u points to nonexistent indirect extent %llu", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + bch2_inconsistent_error(c); + ret = -EIO; + } +fsck_err: + return ret; } static int bch2_mark_reflink_p(struct bch_fs *c, @@ -1143,10 +1176,8 @@ static int bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - u64 sectors = (u64) le32_to_cpu(p.v->front_pad) + - le32_to_cpu(p.v->back_pad) + - p.k->size; + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = p.k->size; s64 ret = 0; BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == @@ -1166,7 +1197,7 @@ static int bch2_mark_reflink_p(struct bch_fs *c, while (sectors) { ret = __bch2_mark_reflink_p(c, p, idx, flags, &l); - if (ret < 0) + if (ret <= 0) return ret; ret = min_t(s64, ret, sectors); @@ -1722,33 +1753,12 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, bch2_fs_inconsistent(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", p.k->p.inode, p.k->p.offset, p.k->size, idx); + bch2_inconsistent_error(c); ret = -EIO; goto err; } - if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { - bch2_fs_inconsistent(c, - "%llu:%llu len %u idx %llu indirect extent refcount underflow", - p.k->p.inode, p.k->p.offset, p.k->size, idx); - ret = -EIO; - goto err; - } - - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; - u64 pad; - - pad = max_t(s64, le32_to_cpu(v->front_pad), - le64_to_cpu(v->idx) - bkey_start_offset(k.k)); - BUG_ON(pad > U32_MAX); - v->front_pad = cpu_to_le32(pad); - - pad = max_t(s64, le32_to_cpu(v->back_pad), - k.k->p.offset - p.k->size - le64_to_cpu(v->idx)); - BUG_ON(pad > U32_MAX); - v->back_pad = cpu_to_le32(pad); - } - + BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)); le64_add_cpu(refcount, add); if (!*refcount) { @@ -1771,20 +1781,10 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c k, unsigned flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx, sectors; + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = p.k->size; s64 ret = 0; - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; - - v->front_pad = v->back_pad = 0; - } - - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - sectors = (u64) le32_to_cpu(p.v->front_pad) + - le32_to_cpu(p.v->back_pad) + - p.k->size; - while (sectors) { ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags); if (ret < 0) diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index c7344ac8..cd5468b1 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -183,6 +183,7 @@ int __bch2_dirent_read_target(struct btree_trans *trans, u32 *subvol, u32 *snapshot, u64 *inum, bool is_fsck) { + struct bch_subvolume s; int ret = 0; *subvol = 0; @@ -191,9 +192,6 @@ int __bch2_dirent_read_target(struct btree_trans *trans, if (likely(d.v->d_type != DT_SUBVOL)) { *inum = le64_to_cpu(d.v->d_inum); } else { - struct bch_subvolume s; - int ret; - *subvol = le64_to_cpu(d.v->d_inum); ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 208bf6df..826a3577 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1050,6 +1050,8 @@ static int inode_backpointer_exists(struct btree_trans *trans, { struct btree_iter iter; struct bkey_s_c k; + u32 target_subvol, target_snapshot; + u64 target_inum; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, @@ -1061,7 +1063,15 @@ static int inode_backpointer_exists(struct btree_trans *trans, if (k.k->type != KEY_TYPE_dirent) goto out; - ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum; + ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k), + &target_subvol, + &target_snapshot, + &target_inum, + true); + if (ret) + goto out; + + ret = target_inum == inode->bi_inum; out: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1754,7 +1764,17 @@ static int check_path(struct btree_trans *trans, snapshot = snapshot_t(c, snapshot)->equiv; p->nr = 0; - while (inode->bi_inum != BCACHEFS_ROOT_INO) { + while (!(inode->bi_inum == BCACHEFS_ROOT_INO && + inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) { + if (inode->bi_parent_subvol) { + u64 inum; + + ret = subvol_lookup(trans, inode->bi_parent_subvol, + &snapshot, &inum); + if (ret) + break; + } + ret = lockrestart_do(trans, inode_backpointer_exists(trans, inode, snapshot)); if (ret < 0) diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 7a0ae5d3..c468d597 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -653,7 +653,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) atomic_long_read(&c->btree_key_cache.nr_dirty), atomic_long_read(&c->btree_key_cache.nr_keys)); - min_key_cache = min(bch2_nr_btree_keys_need_flush(c), 128UL); + min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128); nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr, min_key_cache); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 8726943a..c63c95fc 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -32,9 +32,6 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k) if (bkey_val_bytes(p.k) != sizeof(*p.v)) return "incorrect value size"; - if (le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) - return "idx < front_pad"; - return NULL; } @@ -169,9 +166,15 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (ret) goto err; + /* + * orig is in a bkey_buf which statically allocates 5 64s for the val, + * so we know it will be big enough: + */ orig->k.type = KEY_TYPE_reflink_p; r_p = bkey_i_to_reflink_p(orig); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); + memset(&r_p->v, 0, sizeof(r_p->v)); + r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index d1c11105..9bd8d61c 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -886,6 +886,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, u32 *new_snapshotid, bool ro) { + struct bch_fs *c = trans->c; struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; struct bkey_i_subvolume *new_subvol = NULL; struct bkey_i_subvolume *src_subvol = NULL; @@ -897,7 +898,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) break; - if (bkey_deleted(k.k)) + + /* + * bch2_subvolume_delete() doesn't flush the btree key cache - + * ideally it would but that's tricky + */ + if (bkey_deleted(k.k) && + !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos)) goto found_slot; } @@ -925,7 +932,7 @@ found_slot: goto err; if (k.k->type != KEY_TYPE_subvolume) { - bch_err(trans->c, "subvolume %u not found", src_subvolid); + bch_err(c, "subvolume %u not found", src_subvolid); ret = -ENOENT; goto err; } diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index ed02b982..f98c8c0d 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -75,7 +75,7 @@ static inline void snapshots_seen_init(struct snapshots_seen *s) static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) { if (s->nr == s->size) { - size_t new_size = max(s->size, 128UL) * 2; + size_t new_size = max(s->size, (size_t) 128) * 2; u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL); if (!d) {