diff --git a/.bcachefs_revision b/.bcachefs_revision index d1622d3d..978f8ce2 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -07c2895cb3372c0e7b406ab13264de80c7ff19eb +710cd382bf5f50ab8114a7cc22d78b5b2f574529 diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 0b8eabe5..1c4b76ed 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -913,18 +913,24 @@ struct bch_stripe { struct bch_reflink_p { struct bch_val v; __le64 idx; - - __le32 reservation_generation; - __u8 nr_replicas; - __u8 pad[3]; -}; + /* + * A reflink pointer might point to an indirect extent which is then + * later split (by copygc or rebalance). If we only pointed to part of + * the original indirect extent, and then one of the fragments is + * outside the range we point to, we'd leak a refcount: so when creating + * reflink pointers, we need to store pad values to remember the full + * range we were taking a reference on. + */ + __le32 front_pad; + __le32 back_pad; +} __attribute__((packed, aligned(8))); struct bch_reflink_v { struct bch_val v; __le64 refcount; union bch_extent_entry start[0]; __u64 _data[0]; -}; +} __attribute__((packed, aligned(8))); struct bch_indirect_inline_data { struct bch_val v; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 07942422..236ecbd8 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1767,7 +1767,6 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_bkey_buf_reassemble(&sk, c, k); bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - commit_err = bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 5fd3aabb..c45bcd0b 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1143,8 +1143,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = p.k->size; + u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + u64 sectors = (u64) le32_to_cpu(p.v->front_pad) + + le32_to_cpu(p.v->back_pad) + + p.k->size; s64 ret = 0; BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == @@ -1720,12 +1722,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, bch2_fs_inconsistent(c, "%llu:%llu len %u points to nonexistent indirect extent %llu", p.k->p.inode, p.k->p.offset, p.k->size, idx); - bch2_inconsistent_error(c); ret = -EIO; goto err; } - BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)); + if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { + bch2_fs_inconsistent(c, + "%llu:%llu len %u idx %llu indirect extent refcount underflow", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + ret = -EIO; + goto err; + } + + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; + u64 pad; + + pad = max_t(s64, le32_to_cpu(v->front_pad), + le64_to_cpu(v->idx) - bkey_start_offset(k.k)); + BUG_ON(pad > U32_MAX); + v->front_pad = cpu_to_le32(pad); + + pad = max_t(s64, le32_to_cpu(v->back_pad), + k.k->p.offset - p.k->size - le64_to_cpu(v->idx)); + BUG_ON(pad > U32_MAX); + v->back_pad = cpu_to_le32(pad); + } + le64_add_cpu(refcount, add); if (!*refcount) { @@ -1748,10 +1771,20 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c k, unsigned flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = p.k->size; + u64 idx, sectors; s64 ret = 0; + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; + + v->front_pad = v->back_pad = 0; + } + + idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + sectors = (u64) le32_to_cpu(p.v->front_pad) + + le32_to_cpu(p.v->back_pad) + + p.k->size; + while (sectors) { ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags); if (ret < 0) diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 6c2eed77..194fbe21 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -480,7 +480,7 @@ restart_narrow_pointers: bkey_for_each_ptr_decode(&k->k, ptrs, p, i) if (can_narrow_crc(p.crc, n)) { - bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr); + __bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr); p.ptr.offset += p.crc.offset; p.crc = n; bch2_extent_ptr_decoded_append(k, &p); @@ -785,41 +785,85 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, return i; } -union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, - struct bch_extent_ptr *ptr) +static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) +{ + union bch_extent_entry *next = extent_entry_next(entry); + + /* stripes have ptrs, but their layout doesn't work with this code */ + BUG_ON(k.k->type == KEY_TYPE_stripe); + + memmove_u64s_down(entry, next, + (u64 *) bkey_val_end(k) - (u64 *) next); + k.k->u64s -= (u64 *) next - (u64 *) entry; +} + +/* + * Returns pointer to the next entry after the one being dropped: + */ +union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); - union bch_extent_entry *dst, *src, *prev; + union bch_extent_entry *entry = to_entry(ptr), *next; + union bch_extent_entry *ret = entry; bool drop_crc = true; EBUG_ON(ptr < &ptrs.start->ptr || ptr >= &ptrs.end->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); - src = extent_entry_next(to_entry(ptr)); - if (src != ptrs.end && - !extent_entry_is_crc(src)) - drop_crc = false; - - dst = to_entry(ptr); - while ((prev = extent_entry_prev(ptrs, dst))) { - if (extent_entry_is_ptr(prev)) + for (next = extent_entry_next(entry); + next != ptrs.end; + next = extent_entry_next(next)) { + if (extent_entry_is_crc(next)) { break; - - if (extent_entry_is_crc(prev)) { - if (drop_crc) - dst = prev; + } else if (extent_entry_is_ptr(next)) { + drop_crc = false; break; } - - dst = prev; } - memmove_u64s_down(dst, src, - (u64 *) ptrs.end - (u64 *) src); - k.k->u64s -= (u64 *) src - (u64 *) dst; + extent_entry_drop(k, entry); - return dst; + while ((entry = extent_entry_prev(ptrs, entry))) { + if (extent_entry_is_ptr(entry)) + break; + + if ((extent_entry_is_crc(entry) && drop_crc) || + extent_entry_is_stripe_ptr(entry)) { + ret = (void *) ret - extent_entry_bytes(entry); + extent_entry_drop(k, entry); + } + } + + return ret; +} + +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) +{ + bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr; + union bch_extent_entry *ret = + __bch2_bkey_drop_ptr(k, ptr); + + /* + * If we deleted all the dirty pointers and there's still cached + * pointers, we could set the cached pointers to dirty if they're not + * stale - but to do that correctly we'd need to grab an open_bucket + * reference so that we don't race with bucket reuse: + */ + if (have_dirty && + !bch2_bkey_dirty_devs(k.s_c).nr) { + k.k->type = KEY_TYPE_error; + set_bkey_val_u64s(k.k, 0); + ret = NULL; + } else if (!bch2_bkey_nr_ptrs(k.s_c)) { + k.k->type = KEY_TYPE_deleted; + set_bkey_val_u64s(k.k, 0); + ret = NULL; + } + + return ret; } void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) @@ -889,10 +933,6 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); - /* will only happen if all pointers were cached: */ - if (!bch2_bkey_nr_ptrs(k.s_c)) - k.k->type = KEY_TYPE_deleted; - return bkey_deleted(k.k); } diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index afd3067b..9c256727 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -78,12 +78,12 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry) static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) { - switch (extent_entry_type(e)) { - case BCH_EXTENT_ENTRY_ptr: - return true; - default: - return false; - } + return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; +} + +static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e) +{ + return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr; } static inline bool extent_entry_is_crc(const union bch_extent_entry *e) @@ -578,6 +578,8 @@ void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); void bch2_extent_ptr_decoded_append(struct bkey_i *, struct extent_ptr_decoded *); +union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, + struct bch_extent_ptr *); union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 0bc72d2a..65f8645c 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -782,6 +782,8 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c, ? ((unsigned long) buf & (PAGE_SIZE - 1)) : 0), PAGE_SIZE); + pages = min(pages, BIO_MAX_VECS); + bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write); wbio = wbio_init(bio); wbio->put_bio = true; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 9f9eb799..94d5d99f 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -73,6 +73,15 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags */ bch2_extent_normalize(c, bkey_i_to_s(sk.k)); + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&sk.k->k)) + sk.k->k.size = 0; + ret = bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, sk.k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 0a8fe708..790389d4 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -196,7 +196,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) extent_for_each_ptr(extent_i_to_s(new), new_ptr) new_ptr->cached = true; - bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); } extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 92ff6094..8726943a 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -32,6 +32,9 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k) if (bkey_val_bytes(p.k) != sizeof(*p.v)) return "incorrect value size"; + if (le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) + return "idx < front_pad"; + return NULL; } diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 9f21f68e..52de7c49 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -525,7 +525,11 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) if (!page) return -ENOMEM; - BUG_ON(!bio_add_page(bio, page, len, 0)); + if (unlikely(!bio_add_page(bio, page, len, 0))) { + __free_page(page); + break; + } + size -= len; }