From f39f0bde7875aceb3e82a330f3a86223c6dd8af4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Jan 2021 19:49:15 -0500 Subject: [PATCH] Update bcachefs sources to fcf8a0889c bcachefs: bch2_alloc_write() should be writing for all devices --- .bcachefs_revision | 2 +- cmd_migrate.c | 2 +- libbcachefs/alloc_background.c | 12 +- libbcachefs/alloc_foreground.c | 60 +++---- libbcachefs/alloc_types.h | 13 +- libbcachefs/bcachefs.h | 2 +- libbcachefs/bcachefs_format.h | 2 - libbcachefs/bkey_buf.h | 60 +++++++ libbcachefs/bkey_on_stack.h | 43 ----- libbcachefs/bkey_sort.c | 18 +-- libbcachefs/btree_cache.c | 13 +- libbcachefs/btree_gc.c | 31 ++-- libbcachefs/btree_io.c | 23 +-- libbcachefs/btree_iter.c | 44 +++-- libbcachefs/btree_key_cache.c | 2 - libbcachefs/btree_types.h | 2 +- libbcachefs/btree_update.h | 2 - libbcachefs/btree_update_interior.c | 30 +--- libbcachefs/btree_update_leaf.c | 11 +- libbcachefs/buckets.c | 3 +- libbcachefs/compress.c | 13 +- libbcachefs/ec.c | 242 +++++++++++++++++----------- libbcachefs/ec.h | 12 +- libbcachefs/extent_update.c | 1 - libbcachefs/extents.c | 32 +++- libbcachefs/extents.h | 4 +- libbcachefs/fs-io.c | 22 +-- libbcachefs/fs.c | 19 +-- libbcachefs/fsck.c | 10 +- libbcachefs/io.c | 98 ++++++----- libbcachefs/io.h | 8 +- libbcachefs/journal.c | 4 +- libbcachefs/journal_io.c | 23 ++- libbcachefs/journal_types.h | 2 +- libbcachefs/migrate.c | 20 +-- libbcachefs/move.c | 68 ++++---- libbcachefs/movinggc.c | 5 + libbcachefs/recovery.c | 49 +++--- libbcachefs/reflink.c | 21 +-- libbcachefs/sysfs.c | 2 - 40 files changed, 589 insertions(+), 441 deletions(-) create mode 100644 libbcachefs/bkey_buf.h delete mode 100644 libbcachefs/bkey_on_stack.h diff --git a/.bcachefs_revision b/.bcachefs_revision index 6bdc42aa..14540446 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -5241335413ef160e309fd41ab909532fec656a3a +fcf8a0889c125511ae841960c73df62237ab05a7 diff --git a/cmd_migrate.c b/cmd_migrate.c index 42fbc2bc..40d72671 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -301,7 +301,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, while (length) { struct bkey_i_extent *e; - BKEY_PADDED(k) k; + __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k; u64 b = sector_to_bucket(ca, physical); struct disk_reservation res; unsigned sectors; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 62ca9b7a..60c2c38b 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -319,9 +319,7 @@ retry: bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - flags); + BTREE_INSERT_NOFAIL|flags); err: if (ret == -EINTR) goto retry; @@ -368,7 +366,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) unsigned i; int ret = 0; - for_each_rw_member(ca, c, i) { + for_each_member_device(ca, c, i) { bch2_dev_alloc_write(c, ca, flags); if (ret) { percpu_ref_put(&ca->io_ref); @@ -575,8 +573,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca) if (available > fifo_free(&ca->free_inc) || (available && - (!fifo_full(&ca->free[RESERVE_BTREE]) || - !fifo_full(&ca->free[RESERVE_MOVINGGC])))) + !fifo_full(&ca->free[RESERVE_MOVINGGC]))) break; up_read(&c->gc_lock); @@ -977,8 +974,7 @@ retry: BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| + BTREE_INSERT_JOURNAL_RESERVED| flags); if (ret == -EINTR) goto retry; diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 7a92e3d5..dcbe0404 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -204,9 +204,10 @@ success: static inline unsigned open_buckets_reserved(enum alloc_reserve reserve) { switch (reserve) { - case RESERVE_ALLOC: - return 0; case RESERVE_BTREE: + case RESERVE_BTREE_MOVINGGC: + return 0; + case RESERVE_MOVINGGC: return OPEN_BUCKETS_COUNT / 4; default: return OPEN_BUCKETS_COUNT / 2; @@ -263,16 +264,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, goto out; switch (reserve) { - case RESERVE_ALLOC: - if (fifo_pop(&ca->free[RESERVE_BTREE], bucket)) - goto out; - break; - case RESERVE_BTREE: - if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >= - ca->free[RESERVE_BTREE].size && - fifo_pop(&ca->free[RESERVE_BTREE], bucket)) - goto out; - break; + case RESERVE_BTREE_MOVINGGC: case RESERVE_MOVINGGC: if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket)) goto out; @@ -458,16 +450,18 @@ bch2_bucket_alloc_set(struct bch_fs *c, * it's to a device we don't want: */ -static void bucket_alloc_from_stripe(struct bch_fs *c, - struct open_buckets *ptrs, - struct write_point *wp, - struct bch_devs_mask *devs_may_alloc, - u16 target, - unsigned erasure_code, - unsigned nr_replicas, - unsigned *nr_effective, - bool *have_cache, - unsigned flags) +static enum bucket_alloc_ret +bucket_alloc_from_stripe(struct bch_fs *c, + struct open_buckets *ptrs, + struct write_point *wp, + struct bch_devs_mask *devs_may_alloc, + u16 target, + unsigned erasure_code, + unsigned nr_replicas, + unsigned *nr_effective, + bool *have_cache, + unsigned flags, + struct closure *cl) { struct dev_alloc_list devs_sorted; struct ec_stripe_head *h; @@ -476,17 +470,21 @@ static void bucket_alloc_from_stripe(struct bch_fs *c, unsigned i, ec_idx; if (!erasure_code) - return; + return 0; if (nr_replicas < 2) - return; + return 0; if (ec_open_bucket(c, ptrs)) - return; + return 0; - h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1); + h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1, + wp == &c->copygc_write_point, + cl); + if (IS_ERR(h)) + return -PTR_ERR(h); if (!h) - return; + return 0; devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); @@ -508,6 +506,7 @@ got_bucket: atomic_inc(&h->s->pin); out_put_head: bch2_ec_stripe_head_put(c, h); + return 0; } /* Sector allocator */ @@ -585,10 +584,13 @@ open_bucket_add_buckets(struct bch_fs *c, } if (!ec_open_bucket(c, ptrs)) { - bucket_alloc_from_stripe(c, ptrs, wp, &devs, + ret = bucket_alloc_from_stripe(c, ptrs, wp, &devs, target, erasure_code, nr_replicas, nr_effective, - have_cache, flags); + have_cache, flags, _cl); + if (ret == FREELIST_EMPTY || + ret == OPEN_BUCKETS_EMPTY) + return ret; if (*nr_effective >= nr_replicas) return 0; } diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index 20705460..1abfff52 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -34,14 +34,12 @@ struct bucket_clock { struct mutex lock; }; -/* There is one reserve for each type of btree, one for prios and gens - * and one for moving GC */ enum alloc_reserve { - RESERVE_ALLOC = -1, - RESERVE_BTREE = 0, - RESERVE_MOVINGGC = 1, - RESERVE_NONE = 2, - RESERVE_NR = 3, + RESERVE_BTREE_MOVINGGC = -2, + RESERVE_BTREE = -1, + RESERVE_MOVINGGC = 0, + RESERVE_NONE = 1, + RESERVE_NR = 2, }; typedef FIFO(long) alloc_fifo; @@ -89,7 +87,6 @@ struct write_point { u64 last_used; unsigned long write_point; enum bch_data_type type; - bool is_ec; /* calculated based on how many pointers we're actually going to use: */ unsigned sectors_free; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index eb5b4080..505777ba 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -510,7 +510,7 @@ enum { /* misc: */ BCH_FS_FIXED_GENS, - BCH_FS_ALLOC_WRITTEN, + BCH_FS_NEED_ALLOC_WRITE, BCH_FS_REBUILD_REPLICAS, BCH_FS_HOLD_BTREE_WRITES, }; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 9f59c6b3..307d5523 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -634,8 +634,6 @@ struct bch_reservation { #define BKEY_EXTENT_VAL_U64s_MAX \ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) -#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX) - /* * Maximum possible size of an entire extent, key + value: */ #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/bkey_buf.h new file mode 100644 index 00000000..0d7c67a9 --- /dev/null +++ b/libbcachefs/bkey_buf.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_BUF_H +#define _BCACHEFS_BKEY_BUF_H + +#include "bcachefs.h" + +struct bkey_buf { + struct bkey_i *k; + u64 onstack[12]; +}; + +static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, + struct bch_fs *c, unsigned u64s) +{ + if (s->k == (void *) s->onstack && + u64s > ARRAY_SIZE(s->onstack)) { + s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + memcpy(s->k, s->onstack, sizeof(s->onstack)); + } +} + +static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_s_c k) +{ + bch2_bkey_buf_realloc(s, c, k.k->u64s); + bkey_reassemble(s->k, k); +} + +static inline void bch2_bkey_buf_copy(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_i *src) +{ + bch2_bkey_buf_realloc(s, c, src->k.u64s); + bkey_copy(s->k, src); +} + +static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, + struct bch_fs *c, + struct btree *b, + struct bkey_packed *src) +{ + bch2_bkey_buf_realloc(s, c, BKEY_U64s + + bkeyp_val_u64s(&b->format, src)); + bch2_bkey_unpack(b, s->k, src); +} + +static inline void bch2_bkey_buf_init(struct bkey_buf *s) +{ + s->k = (void *) s->onstack; +} + +static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c) +{ + if (s->k != (void *) s->onstack) + mempool_free(s->k, &c->large_bkey_pool); + s->k = NULL; +} + +#endif /* _BCACHEFS_BKEY_BUF_H */ diff --git a/libbcachefs/bkey_on_stack.h b/libbcachefs/bkey_on_stack.h deleted file mode 100644 index f607a0cb..00000000 --- a/libbcachefs/bkey_on_stack.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_BKEY_ON_STACK_H -#define _BCACHEFS_BKEY_ON_STACK_H - -#include "bcachefs.h" - -struct bkey_on_stack { - struct bkey_i *k; - u64 onstack[12]; -}; - -static inline void bkey_on_stack_realloc(struct bkey_on_stack *s, - struct bch_fs *c, unsigned u64s) -{ - if (s->k == (void *) s->onstack && - u64s > ARRAY_SIZE(s->onstack)) { - s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); - memcpy(s->k, s->onstack, sizeof(s->onstack)); - } -} - -static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s, - struct bch_fs *c, - struct bkey_s_c k) -{ - bkey_on_stack_realloc(s, c, k.k->u64s); - bkey_reassemble(s->k, k); -} - -static inline void bkey_on_stack_init(struct bkey_on_stack *s) -{ - s->k = (void *) s->onstack; -} - -static inline void bkey_on_stack_exit(struct bkey_on_stack *s, - struct bch_fs *c) -{ - if (s->k != (void *) s->onstack) - mempool_free(s->k, &c->large_bkey_pool); - s->k = NULL; -} - -#endif /* _BCACHEFS_BKEY_ON_STACK_H */ diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c index 99e0a401..2e1d9cd6 100644 --- a/libbcachefs/bkey_sort.c +++ b/libbcachefs/bkey_sort.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bkey_sort.h" #include "bset.h" #include "extents.h" @@ -187,11 +187,11 @@ bch2_sort_repack_merge(struct bch_fs *c, bool filter_whiteouts) { struct bkey_packed *out = vstruct_last(dst), *k_packed; - struct bkey_on_stack k; + struct bkey_buf k; struct btree_nr_keys nr; memset(&nr, 0, sizeof(nr)); - bkey_on_stack_init(&k); + bch2_bkey_buf_init(&k); while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) { if (filter_whiteouts && bkey_whiteout(k_packed)) @@ -204,7 +204,7 @@ bch2_sort_repack_merge(struct bch_fs *c, * node; we have to make a copy of the entire key before calling * normalize */ - bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s); + bch2_bkey_buf_realloc(&k, c, k_packed->u64s + BKEY_U64s); bch2_bkey_unpack(src, k.k, k_packed); if (filter_whiteouts && @@ -215,7 +215,7 @@ bch2_sort_repack_merge(struct bch_fs *c, } dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - bkey_on_stack_exit(&k, c); + bch2_bkey_buf_exit(&k, c); return nr; } @@ -315,11 +315,11 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, struct bkey l_unpacked, r_unpacked; struct bkey_s l, r; struct btree_nr_keys nr; - struct bkey_on_stack split; + struct bkey_buf split; unsigned i; memset(&nr, 0, sizeof(nr)); - bkey_on_stack_init(&split); + bch2_bkey_buf_init(&split); sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); for (i = 0; i < iter->used;) { @@ -379,7 +379,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, /* * r wins, but it overlaps in the middle of l - split l: */ - bkey_on_stack_reassemble(&split, c, l.s_c); + bch2_bkey_buf_reassemble(&split, c, l.s_c); bch2_cut_back(bkey_start_pos(r.k), split.k); bch2_cut_front_s(r.k->p, l); @@ -398,7 +398,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - bkey_on_stack_exit(&split, c); + bch2_bkey_buf_exit(&split, c); return nr; } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 09774f56..fda6540b 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" @@ -898,10 +899,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree *parent; struct btree_node_iter node_iter; struct bkey_packed *k; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; struct btree *ret = NULL; unsigned level = b->c.level; + bch2_bkey_buf_init(&tmp); + parent = btree_iter_node(iter, level + 1); if (!parent) return NULL; @@ -935,9 +938,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, if (!k) goto out; - bch2_bkey_unpack(parent, &tmp.k, k); + bch2_bkey_buf_unpack(&tmp, c, parent, k); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, + ret = bch2_btree_node_get(c, iter, tmp.k, level, SIX_LOCK_intent, _THIS_IP_); if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { @@ -957,7 +960,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, if (sib == btree_prev_sib) btree_node_unlock(iter, level); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, + ret = bch2_btree_node_get(c, iter, tmp.k, level, SIX_LOCK_intent, _THIS_IP_); /* @@ -998,6 +1001,8 @@ out: bch2_btree_trans_verify_locks(trans); + bch2_bkey_buf_exit(&tmp, c); + return ret; } diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 6268ea63..6b06f607 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -8,7 +8,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "bkey_methods.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "btree_io.h" @@ -132,6 +132,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, ptr->gen)) { g2->_mark.gen = g->_mark.gen = ptr->gen; g2->gen_valid = g->gen_valid = true; + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, @@ -145,6 +146,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, g2->_mark.dirty_sectors = 0; g2->_mark.cached_sectors = 0; set_bit(BCH_FS_FIXED_GENS, &c->flags); + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } } } @@ -233,7 +235,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (max_stale > 64) bch2_btree_node_rewrite(c, iter, b->data->keys.seq, - BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!bch2_btree_gc_rewrite_disabled && @@ -268,10 +269,12 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, struct btree_and_journal_iter iter; struct bkey_s_c k; struct bpos next_node_start = b->data->min_key; + struct bkey_buf tmp; u8 max_stale = 0; int ret = 0; bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + bch2_bkey_buf_init(&tmp); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { bch2_bkey_debugcheck(c, b, k); @@ -285,10 +288,9 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, if (b->c.level) { struct btree *child; - BKEY_PADDED(k) tmp; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bch2_bkey_buf_reassemble(&tmp, c, k); + k = bkey_i_to_s_c(tmp.k); bch2_btree_and_journal_iter_advance(&iter); @@ -300,7 +302,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, break; if (b->c.level > target_depth) { - child = bch2_btree_node_get_noiter(c, &tmp.k, + child = bch2_btree_node_get_noiter(c, tmp.k, b->c.btree_id, b->c.level - 1); ret = PTR_ERR_OR_ZERO(child); if (ret) @@ -318,6 +320,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, } } + bch2_bkey_buf_exit(&tmp, c); return ret; } @@ -570,7 +573,7 @@ static int bch2_gc_done(struct bch_fs *c, fsck_err(c, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f); \ dst->_f = src->_f; \ - ret = 1; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_stripe_field(_f, _msg, ...) \ if (dst->_f != src->_f) { \ @@ -581,7 +584,7 @@ static int bch2_gc_done(struct bch_fs *c, dst->_f, src->_f); \ dst->_f = src->_f; \ dst->dirty = true; \ - ret = 1; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_bucket_field(_f) \ if (dst->b[b].mark._f != src->b[b].mark._f) { \ @@ -592,7 +595,7 @@ static int bch2_gc_done(struct bch_fs *c, bch2_data_types[dst->b[b].mark.data_type],\ dst->b[b].mark._f, src->b[b].mark._f); \ dst->b[b]._mark._f = src->b[b].mark._f; \ - ret = 1; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_dev_field(_f, _msg, ...) \ copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__) @@ -930,10 +933,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -942,7 +945,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { if (gc_btree_gens_key(c, k)) { - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); bch2_extent_normalize(c, bkey_i_to_s(sk.k)); bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); @@ -962,7 +965,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) } bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -1074,7 +1077,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, } if (bch2_keylist_realloc(&keylist, NULL, 0, - (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) { + BKEY_BTREE_PTR_U64s_MAX * nr_old_nodes)) { trace_btree_gc_coalesce_fail(c, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC); return; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 4dde972d..768fc85e 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1320,12 +1320,13 @@ static void bch2_btree_node_write_error(struct bch_fs *c, struct btree_write_bio *wbio) { struct btree *b = wbio->wbio.bio.bi_private; - __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; + struct bkey_buf k; struct bch_extent_ptr *ptr; struct btree_trans trans; struct btree_iter *iter; int ret; + bch2_bkey_buf_init(&k); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p, @@ -1344,21 +1345,22 @@ retry: BUG_ON(!btree_node_hashed(b)); - bkey_copy(&tmp.k, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(k.k), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k))) + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k))) goto err; - ret = bch2_btree_node_update_key(c, iter, b, &tmp.k); + ret = bch2_btree_node_update_key(c, iter, b, k.k); if (ret == -EINTR) goto retry; if (ret) goto err; out: bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&k, c); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; @@ -1476,7 +1478,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct bset *i; struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; - BKEY_PADDED(key) k; + struct bkey_buf k; struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; @@ -1487,6 +1489,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool validate_before_checksum = false; void *data; + bch2_bkey_buf_init(&k); + if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) return; @@ -1695,15 +1699,16 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, * just make all btree node writes FUA to keep things sane. */ - bkey_copy(&k.key, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr) ptr->offset += b->written; b->written += sectors_to_write; /* XXX: submitting IO with btree locks held: */ - bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key); + bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k); + bch2_bkey_buf_exit(&k, c); return; err: set_btree_node_noevict(b); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 8c35e39e..4d825cac 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_iter.h" #include "btree_key_cache.h" @@ -1048,27 +1049,31 @@ static void btree_iter_prefetch(struct btree_iter *iter) struct btree_iter_level *l = &iter->l[iter->level]; struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) ? (iter->level > 1 ? 0 : 2) : (iter->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(iter, iter->level); + bch2_bkey_buf_init(&tmp); + while (nr) { if (!bch2_btree_node_relock(iter, iter->level)) - return; + break; bch2_btree_node_iter_advance(&node_iter, l->b); k = bch2_btree_node_iter_peek(&node_iter, l->b); if (!k) break; - bch2_bkey_unpack(l->b, &tmp.k, k); - bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1); + bch2_bkey_buf_unpack(&tmp, c, l->b, k); + bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1); } if (!was_locked) btree_node_unlock(iter, iter->level); + + bch2_bkey_buf_exit(&tmp, c); } static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, @@ -1100,30 +1105,34 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, struct btree *b; unsigned level = iter->level - 1; enum six_lock_type lock_type = __btree_lock_want(iter, level); - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; + int ret; EBUG_ON(!btree_node_locked(iter, iter->level)); - bch2_bkey_unpack(l->b, &tmp.k, + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip); - if (unlikely(IS_ERR(b))) - return PTR_ERR(b); + b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip); + ret = PTR_ERR_OR_ZERO(b); + if (unlikely(ret)) + goto err; mark_btree_node_locked(iter, level, lock_type); btree_iter_node_set(iter, b); - if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 && - unlikely(b != btree_node_mem_ptr(&tmp.k))) + if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && + unlikely(b != btree_node_mem_ptr(tmp.k))) btree_node_mem_ptr_set(iter, level + 1, b); if (iter->flags & BTREE_ITER_PREFETCH) btree_iter_prefetch(iter); iter->level = level; - - return 0; +err: + bch2_bkey_buf_exit(&tmp, c); + return ret; } static void btree_iter_up(struct btree_iter *iter) @@ -2124,9 +2133,12 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans, iter->flags &= ~BTREE_ITER_USER_FLAGS; iter->flags |= flags & BTREE_ITER_USER_FLAGS; - if (iter->flags & BTREE_ITER_INTENT) - bch2_btree_iter_upgrade(iter, 1); - else + if (iter->flags & BTREE_ITER_INTENT) { + if (!iter->locks_want) { + __bch2_btree_iter_unlock(iter); + iter->locks_want = 1; + } + } else bch2_btree_iter_downgrade(iter); BUG_ON(iter->btree_id != btree_id); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 1a557b75..4357aefd 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -349,8 +349,6 @@ retry: BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| BTREE_INSERT_JOURNAL_RESERVED| BTREE_INSERT_JOURNAL_RECLAIM); err: diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index dc7de271..631bf469 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -57,7 +57,7 @@ struct btree_write { struct btree_alloc { struct open_buckets ob; - BKEY_PADDED(k); + __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); }; struct btree_bkey_cached_common { diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index adb07043..a2513808 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -20,7 +20,6 @@ enum btree_insert_flags { __BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_USE_RESERVE, - __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_RESERVED, __BTREE_INSERT_JOURNAL_RECLAIM, @@ -43,7 +42,6 @@ enum btree_insert_flags { /* for copygc, or when merging btree nodes */ #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) -#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE) /* Insert is for journal replay - don't get journal reservations: */ #define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 8f96756b..5bb65329 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -195,21 +195,18 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, { struct write_point *wp; struct btree *b; - BKEY_PADDED(k) tmp; + __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; enum alloc_reserve alloc_reserve; - if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) { + if (flags & BTREE_INSERT_USE_RESERVE) { nr_reserve = 0; - alloc_reserve = RESERVE_ALLOC; - } else if (flags & BTREE_INSERT_USE_RESERVE) { - nr_reserve = BTREE_NODE_RESERVE / 2; - alloc_reserve = RESERVE_BTREE; + alloc_reserve = RESERVE_BTREE_MOVINGGC; } else { nr_reserve = BTREE_NODE_RESERVE; - alloc_reserve = RESERVE_NONE; + alloc_reserve = RESERVE_BTREE; } mutex_lock(&c->btree_reserve_cache_lock); @@ -577,8 +574,6 @@ static void btree_update_nodes_written(struct btree_update *as) bch2_trans_init(&trans, c, 0, 512); ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq, BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| BTREE_INSERT_JOURNAL_RESERVED, @@ -1232,6 +1227,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, src = n; } + /* Also clear out the unwritten whiteouts area: */ + b->whiteout_u64s = 0; + i->u64s = cpu_to_le16((u64 *) dst - i->_data); set_btree_bset_end(b, b->set); @@ -1457,15 +1455,6 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, struct btree_update *as; struct closure cl; int ret = 0; - struct btree_insert_entry *i; - - /* - * We already have a disk reservation and open buckets pinned; this - * allocation must not block: - */ - trans_for_each_update(trans, i) - if (btree_node_type_needs_gc(i->iter->btree_id)) - flags |= BTREE_INSERT_USE_RESERVE; closure_init_stack(&cl); @@ -1926,10 +1915,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, retry: as = bch2_btree_update_start(iter->trans, iter->btree_id, parent ? btree_update_reserve_required(c, parent) : 0, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE, - &cl); + BTREE_INSERT_NOFAIL, &cl); if (IS_ERR(as)) { ret = PTR_ERR(as); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 64734f91..c490df47 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -869,8 +869,8 @@ int __bch2_trans_commit(struct btree_trans *trans) trans_trigger_run = false; trans_for_each_update(trans, i) { - if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK && - (ret = bch2_btree_iter_traverse(i->iter)))) { + ret = bch2_btree_iter_traverse(i->iter); + if (unlikely(ret)) { trace_trans_restart_traverse(trans->ip); goto out; } @@ -879,8 +879,8 @@ int __bch2_trans_commit(struct btree_trans *trans) * We're not using bch2_btree_iter_upgrade here because * we know trans->nounlock can't be set: */ - if (unlikely(i->iter->locks_want < 1 && - !__bch2_btree_iter_upgrade(i->iter, 1))) { + if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) && + !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) { trace_trans_restart_upgrade(trans->ip); ret = -EINTR; goto out; @@ -1084,8 +1084,7 @@ int bch2_btree_delete_at(struct btree_trans *trans, bch2_trans_update(trans, iter, &k, 0); return bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE|flags); + BTREE_INSERT_NOFAIL|flags); } int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 1934b845..8bbf958d 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -2192,7 +2192,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ca->mi.bucket_size / c->opts.btree_node_size); /* XXX: these should be tunable */ size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); - size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7); + size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6); size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12), btree_reserve * 2); bool resize = ca->buckets[0] != NULL; @@ -2209,7 +2209,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * sizeof(unsigned long), GFP_KERNEL|__GFP_ZERO)) || - !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_MOVINGGC], copygc_reserve, GFP_KERNEL) || !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) || diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index aebf46bb..f63651d2 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -336,8 +336,19 @@ static int attempt_compress(struct bch_fs *c, ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace, ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams)); + /* + * ZSTD requires that when we decompress we pass in the exact + * compressed size - rounding it up to the nearest sector + * doesn't work, so we use the first 4 bytes of the buffer for + * that. + * + * Additionally, the ZSTD code seems to have a bug where it will + * write just past the end of the buffer - so subtract a fudge + * factor (7 bytes) from the dst buffer size to account for + * that. + */ size_t len = ZSTD_compressCCtx(ctx, - dst + 4, dst_len - 4, + dst + 4, dst_len - 4 - 7, src, src_len, c->zstd_params); if (ZSTD_isError(len)) diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 8f39c4de..1c08f563 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -4,7 +4,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) return false; } +/* Stripe bufs: */ + +static void ec_stripe_buf_free(struct ec_stripe_buf *stripe) +{ + unsigned i; + + for (i = 0; i < stripe->key.v.nr_blocks; i++) { + kvpfree(stripe->data[i], stripe->size << 9); + stripe->data[i] = NULL; + } +} + +static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe) +{ + unsigned i; + + memset(stripe->valid, 0xFF, sizeof(stripe->valid)); + + for (i = 0; i < stripe->key.v.nr_blocks; i++) { + stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL); + if (!stripe->data[i]) + goto err; + } + + return 0; +err: + ec_stripe_buf_free(stripe); + return -ENOMEM; +} + /* Checksumming: */ static void ec_generate_checksums(struct ec_stripe_buf *buf) @@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf) raid_gen(nr_data, v->nr_redundant, bytes, buf->data); } -static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr) -{ - return nr - bitmap_weight(buf->valid, nr); -} - static unsigned ec_nr_failed(struct ec_stripe_buf *buf) { - return __ec_nr_failed(buf, buf->key.v.nr_blocks); + return buf->key.v.nr_blocks - + bitmap_weight(buf->valid, buf->key.v.nr_blocks); } static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) @@ -757,10 +783,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_extent e; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0, dev, idx; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); /* XXX this doesn't support the reflink btree */ @@ -787,7 +813,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, dev = s->key.v.ptrs[idx].dev; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); e = bkey_i_to_s_extent(sk.k); bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev); @@ -800,8 +826,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, bch2_trans_update(&trans, iter, sk.k, 0); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE); + BTREE_INSERT_NOFAIL); if (ret == -EINTR) ret = 0; if (ret) @@ -809,7 +834,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, } bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -823,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s) struct open_bucket *ob; struct bkey_i *k; struct stripe *m; - struct bch_stripe *v = &s->stripe.key.v; + struct bch_stripe *v = &s->new_stripe.key.v; unsigned i, nr_data = v->nr_blocks - v->nr_redundant; - struct closure cl; int ret; BUG_ON(s->h->s == s); - closure_init_stack(&cl); + closure_sync(&s->iodone); if (s->err) { if (s->err != -EROFS) @@ -838,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err; } + if (s->have_existing_stripe) { + ec_validate_checksums(c, &s->existing_stripe); + + if (ec_do_recov(c, &s->existing_stripe)) { + bch_err(c, "error creating stripe: error reading existing stripe"); + goto err; + } + + for (i = 0; i < nr_data; i++) + if (stripe_blockcount_get(&s->existing_stripe.key.v, i)) + swap(s->new_stripe.data[i], + s->existing_stripe.data[i]); + + ec_stripe_buf_free(&s->existing_stripe); + } + BUG_ON(!s->allocated); if (!percpu_ref_tryget(&c->writes)) @@ -846,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s) BUG_ON(bitmap_weight(s->blocks_allocated, s->blocks.nr) != s->blocks.nr); - ec_generate_ec(&s->stripe); + ec_generate_ec(&s->new_stripe); - ec_generate_checksums(&s->stripe); + ec_generate_checksums(&s->new_stripe); /* write p/q: */ for (i = nr_data; i < v->nr_blocks; i++) - ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl); + ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone); + closure_sync(&s->iodone); - closure_sync(&cl); + if (ec_nr_failed(&s->new_stripe)) { + bch_err(c, "error creating stripe: error writing redundancy buckets"); + goto err_put_writes; + } - for (i = nr_data; i < v->nr_blocks; i++) - if (!test_bit(i, s->stripe.valid)) { - bch_err(c, "error creating stripe: error writing redundancy buckets"); - goto err_put_writes; - } - - ret = s->existing_stripe - ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i, + ret = s->have_existing_stripe + ? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i, &s->res, NULL, BTREE_INSERT_NOFAIL) - : ec_stripe_bkey_insert(c, s, &s->stripe.key); + : ec_stripe_bkey_insert(c, s, &s->new_stripe.key); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); goto err_put_writes; } for_each_keylist_key(&s->keys, k) { - ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k); + ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k); if (ret) { bch_err(c, "error creating stripe: error %i updating pointers", ret); break; @@ -880,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s) } spin_lock(&c->ec_stripes_heap_lock); - m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset); + m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset); #if 0 pr_info("created a %s stripe %llu", - s->existing_stripe ? "existing" : "new", + s->have_existing_stripe ? "existing" : "new", s->stripe.key.k.p.offset); #endif BUG_ON(m->on_heap); - bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset); + bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset); spin_unlock(&c->ec_stripes_heap_lock); err_put_writes: percpu_ref_put(&c->writes); @@ -903,8 +941,9 @@ err: bch2_keylist_free(&s->keys, s->inline_keys); - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) - kvpfree(s->stripe.data[i], s->stripe.size << 9); + ec_stripe_buf_free(&s->existing_stripe); + ec_stripe_buf_free(&s->new_stripe); + closure_debug_destroy(&s->iodone); kfree(s); } @@ -981,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) ca = bch_dev_bkey_exists(c, ob->ptr.dev); offset = ca->mi.bucket_size - ob->sectors_free; - return ob->ec->stripe.data[ob->ec_idx] + (offset << 9); + return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9); } void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp, @@ -1088,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c, static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s; - unsigned i; lockdep_assert_held(&h->lock); @@ -1097,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) return -ENOMEM; mutex_init(&s->lock); + closure_init(&s->iodone, NULL); atomic_set(&s->pin, 1); s->c = c; s->h = h; @@ -1106,32 +1145,20 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) bch2_keylist_init(&s->keys, s->inline_keys); - s->stripe.offset = 0; - s->stripe.size = h->blocksize; - memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid)); + s->new_stripe.offset = 0; + s->new_stripe.size = h->blocksize; - ec_stripe_key_init(c, &s->stripe.key, s->nr_data, + ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize); - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) { - s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL); - if (!s->stripe.data[i]) - goto err; - } - h->s = s; - return 0; -err: - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) - kvpfree(s->stripe.data[i], s->stripe.size << 9); - kfree(s); - return -ENOMEM; } static struct ec_stripe_head * ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, - unsigned algo, unsigned redundancy) + unsigned algo, unsigned redundancy, + bool copygc) { struct ec_stripe_head *h; struct bch_dev *ca; @@ -1147,6 +1174,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, h->target = target; h->algo = algo; h->redundancy = redundancy; + h->copygc = copygc; rcu_read_lock(); h->devs = target_rw_devs(c, BCH_DATA_user, target); @@ -1178,9 +1206,10 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) } struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c, - unsigned target, - unsigned algo, - unsigned redundancy) + unsigned target, + unsigned algo, + unsigned redundancy, + bool copygc) { struct ec_stripe_head *h; @@ -1191,21 +1220,21 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c, list_for_each_entry(h, &c->ec_stripe_head_list, list) if (h->target == target && h->algo == algo && - h->redundancy == redundancy) { + h->redundancy == redundancy && + h->copygc == copygc) { mutex_lock(&h->lock); goto found; } - h = ec_new_stripe_head_alloc(c, target, algo, redundancy); + h = ec_new_stripe_head_alloc(c, target, algo, redundancy, copygc); found: mutex_unlock(&c->ec_stripe_head_lock); return h; } -/* - * XXX: use a higher watermark for allocating open buckets here: - */ -static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) +static enum bucket_alloc_ret +new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h, + struct closure *cl) { struct bch_devs_mask devs; struct open_bucket *ob; @@ -1213,12 +1242,12 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) min_t(unsigned, h->nr_active_devs, BCH_BKEY_PTRS_MAX) - h->redundancy; bool have_cache = true; - int ret = 0; + enum bucket_alloc_ret ret = ALLOC_SUCCESS; devs = h->devs; for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) { - __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d); + __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d); --nr_data; } @@ -1242,9 +1271,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) h->redundancy, &nr_have, &have_cache, - RESERVE_NONE, + h->copygc + ? RESERVE_MOVINGGC + : RESERVE_NONE, 0, - NULL); + cl); if (ret) goto err; } @@ -1258,9 +1289,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) nr_data, &nr_have, &have_cache, - RESERVE_NONE, + h->copygc + ? RESERVE_MOVINGGC + : RESERVE_NONE, 0, - NULL); + cl); if (ret) goto err; } @@ -1326,64 +1359,84 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, unsigned target, unsigned algo, - unsigned redundancy) + unsigned redundancy, + bool copygc, + struct closure *cl) { - struct closure cl; struct ec_stripe_head *h; struct open_bucket *ob; unsigned i, data_idx = 0; s64 idx; int ret; - closure_init_stack(&cl); - - h = __bch2_ec_stripe_head_get(c, target, algo, redundancy); - if (!h) + h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc); + if (!h) { + bch_err(c, "no stripe head"); return NULL; + } if (!h->s) { if (ec_new_stripe_alloc(c, h)) { bch2_ec_stripe_head_put(c, h); + bch_err(c, "failed to allocate new stripe"); return NULL; } idx = get_existing_stripe(c, target, algo, redundancy); if (idx >= 0) { - h->s->existing_stripe = true; - h->s->existing_stripe_idx = idx; - if (get_stripe_key(c, idx, &h->s->stripe)) { - /* btree error */ + h->s->have_existing_stripe = true; + ret = get_stripe_key(c, idx, &h->s->existing_stripe); + if (ret) { + bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); + bch2_ec_stripe_head_put(c, h); + return NULL; + } + + if (ec_stripe_buf_alloc(&h->s->existing_stripe)) { + /* + * this is a problem: we have deleted from the + * stripes heap already + */ BUG(); } - for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++) - if (stripe_blockcount_get(&h->s->stripe.key.v, i)) { + for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { + if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) __set_bit(i, h->s->blocks_allocated); - ec_block_io(c, &h->s->stripe, READ, i, &cl); - } + + ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + } + + bkey_copy(&h->s->new_stripe.key.k_i, + &h->s->existing_stripe.key.k_i); + } + + if (ec_stripe_buf_alloc(&h->s->new_stripe)) { + BUG(); } } if (!h->s->allocated) { - if (!h->s->existing_stripe && + if (!h->s->have_existing_stripe && !h->s->res.sectors) { ret = bch2_disk_reservation_get(c, &h->s->res, - h->blocksize, - h->s->nr_parity, 0); + h->blocksize, + h->s->nr_parity, 0); if (ret) { - /* What should we do here? */ - bch_err(c, "unable to create new stripe: %i", ret); + /* + * This means we need to wait for copygc to + * empty out buckets from existing stripes: + */ bch2_ec_stripe_head_put(c, h); h = NULL; goto out; - } - } - if (new_stripe_alloc_buckets(c, h)) { + ret = new_stripe_alloc_buckets(c, h, cl); + if (ret) { bch2_ec_stripe_head_put(c, h); - h = NULL; + h = ERR_PTR(-ret); goto out; } @@ -1392,19 +1445,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, h->s->nr_data, data_idx); BUG_ON(data_idx >= h->s->nr_data); - h->s->stripe.key.v.ptrs[data_idx] = ob->ptr; + h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr; h->s->data_block_idx[i] = data_idx; data_idx++; } open_bucket_for_each(c, &h->s->parity, ob, i) - h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr; + h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr; //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]); h->s->allocated = true; } out: - closure_sync(&cl); return h; } diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 450bb1a1..97a263cf 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -88,6 +88,7 @@ struct ec_stripe_new { struct ec_stripe_head *h; struct mutex lock; struct list_head list; + struct closure iodone; /* counts in flight writes, stripe is created when pin == 0 */ atomic_t pin; @@ -98,8 +99,7 @@ struct ec_stripe_new { u8 nr_parity; bool allocated; bool pending; - bool existing_stripe; - u64 existing_stripe_idx; + bool have_existing_stripe; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; @@ -111,7 +111,8 @@ struct ec_stripe_new { struct keylist keys; u64 inline_keys[BKEY_U64s * 8]; - struct ec_stripe_buf stripe; + struct ec_stripe_buf new_stripe; + struct ec_stripe_buf existing_stripe; }; struct ec_stripe_head { @@ -121,6 +122,7 @@ struct ec_stripe_head { unsigned target; unsigned algo; unsigned redundancy; + bool copygc; struct bch_devs_mask devs; unsigned nr_active_devs; @@ -145,8 +147,8 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *); void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *); -struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned, - unsigned, unsigned); +struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, + unsigned, unsigned, unsigned, bool, struct closure *); void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index fd011df3..1faca4bc 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 828ccf07..c0ae3123 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -665,7 +665,7 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k) } bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, - unsigned nr_replicas) + unsigned nr_replicas, bool compressed) { struct btree_trans trans; struct btree_iter *iter; @@ -683,7 +683,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; - if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) { + if (nr_replicas > bch2_bkey_replicas(c, k) || + (!compressed && bch2_bkey_sectors_compressed(k))) { ret = false; break; } @@ -693,6 +694,33 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, return ret; } +unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned replicas = 0; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + continue; + + if (p.has_ec) { + struct stripe *s = + genradix_ptr(&c->stripes[0], p.ec.idx); + + WARN_ON(!s); + if (s) + replicas += s->nr_redundant; + } + + replicas++; + + } + + return replicas; +} + static unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded p) { diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 74c7bb8f..ebe0a04c 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -538,7 +538,9 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); bool bch2_bkey_is_incompressible(struct bkey_s_c); unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); -bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned); +bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool); + +unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 53c6660e..959eff4c 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -3,7 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -791,7 +791,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct readpages_iter *readpages_iter) { struct bch_fs *c = trans->c; - struct bkey_on_stack sk; + struct bkey_buf sk; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; int ret = 0; @@ -799,7 +799,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, rbio->c = c; rbio->start_time = local_clock(); - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); retry: while (1) { struct bkey_s_c k; @@ -817,7 +817,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = bch2_read_indirect_extent(trans, &offset_into_extent, &sk); @@ -862,7 +862,7 @@ retry: bio_endio(&rbio->bio); } - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); } void bch2_readahead(struct readahead_control *ractl) @@ -1863,7 +1863,9 @@ static long bch2_dio_write_loop(struct dio_write *dio) dio->op.opts.data_replicas, 0); if (unlikely(ret) && !bch2_check_range_allocated(c, dio->op.pos, - bio_sectors(bio), dio->op.opts.data_replicas)) + bio_sectors(bio), + dio->op.opts.data_replicas, + dio->op.opts.compression != 0)) goto err; task_io_account_write(bio->bi_iter.bi_size); @@ -2414,7 +2416,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; - struct bkey_on_stack copy; + struct bkey_buf copy; struct btree_trans trans; struct btree_iter *src, *dst; loff_t shift, new_size; @@ -2424,7 +2426,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - bkey_on_stack_init(©); + bch2_bkey_buf_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); /* @@ -2512,7 +2514,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; reassemble: - bkey_on_stack_reassemble(©, c, k); + bch2_bkey_buf_reassemble(©, c, k); if (insert && bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) @@ -2589,7 +2591,7 @@ bkey_err: } err: bch2_trans_exit(&trans); - bkey_on_stack_exit(©, c); + bch2_bkey_buf_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index e3edca4d..9ce03172 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -3,7 +3,7 @@ #include "bcachefs.h" #include "acl.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "chardev.h" @@ -886,7 +886,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack cur, prev; + struct bkey_buf cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; @@ -899,8 +899,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - bkey_on_stack_init(&cur); - bkey_on_stack_init(&prev); + bch2_bkey_buf_init(&cur); + bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -919,7 +919,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&cur, c, k); + bch2_bkey_buf_reassemble(&cur, c, k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur); @@ -927,7 +927,7 @@ retry: break; k = bkey_i_to_s_c(cur.k); - bkey_on_stack_realloc(&prev, c, k.k->u64s); + bch2_bkey_buf_realloc(&prev, c, k.k->u64s); sectors = min(sectors, k.k->size - offset_into_extent); @@ -961,8 +961,8 @@ retry: FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&cur, c); - bkey_on_stack_exit(&prev, c); + bch2_bkey_buf_exit(&cur, c); + bch2_bkey_buf_exit(&prev, c); return ret < 0 ? ret : 0; } @@ -1007,10 +1007,7 @@ static const struct file_operations bch_file_operations = { .open = generic_file_open, .fsync = bch2_fsync, .splice_read = generic_file_splice_read, - /* - * Broken, on v5.3: .splice_write = iter_file_splice_write, - */ .fallocate = bch2_fallocate_dispatch, .unlocked_ioctl = bch2_fs_file_ioctl, #ifdef CONFIG_COMPAT diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 39f872de..df0f00f1 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "dirent.h" #include "error.h" @@ -464,11 +464,11 @@ static int check_extents(struct bch_fs *c) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack prev; + struct bkey_buf prev; u64 i_sectors; int ret = 0; - bkey_on_stack_init(&prev); + bch2_bkey_buf_init(&prev); prev.k->k = KEY(0, 0, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -500,7 +500,7 @@ retry: goto err; } } - bkey_on_stack_reassemble(&prev, c, k); + bch2_bkey_buf_reassemble(&prev, c, k); ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) @@ -569,7 +569,7 @@ err: fsck_err: if (ret == -EINTR) goto retry; - bkey_on_stack_exit(&prev, c); + bch2_bkey_buf_exit(&prev, c); return bch2_trans_exit(&trans) ?: ret; } diff --git a/libbcachefs/io.c b/libbcachefs/io.c index abf204ef..4c4ba07c 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -9,7 +9,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bset.h" #include "btree_update.h" #include "buckets.h" @@ -183,18 +183,23 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, /* Extent update path: */ -static int sum_sector_overwrites(struct btree_trans *trans, - struct btree_iter *extent_iter, - struct bkey_i *new, - bool *maybe_extending, - s64 *i_sectors_delta, - s64 *disk_sectors_delta) +int bch2_sum_sector_overwrites(struct btree_trans *trans, + struct btree_iter *extent_iter, + struct bkey_i *new, + bool *maybe_extending, + bool *should_check_enospc, + s64 *i_sectors_delta, + s64 *disk_sectors_delta) { + struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c old; + unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); + bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); int ret = 0; *maybe_extending = true; + *should_check_enospc = false; *i_sectors_delta = 0; *disk_sectors_delta = 0; @@ -213,6 +218,11 @@ static int sum_sector_overwrites(struct btree_trans *trans, (int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) - bch2_bkey_nr_ptrs_fully_allocated(old)); + if (!*should_check_enospc && + (new_replicas > bch2_bkey_replicas(c, old) || + (!new_compressed && bch2_bkey_sectors_compressed(old)))) + *should_check_enospc = true; + if (bkey_cmp(old.k->p, new->k.p) >= 0) { /* * Check if there's already data above where we're @@ -250,7 +260,7 @@ int bch2_extent_update(struct btree_trans *trans, { /* this must live until after bch2_trans_commit(): */ struct bkey_inode_buf inode_p; - bool extending = false; + bool extending = false, should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; int ret; @@ -258,8 +268,9 @@ int bch2_extent_update(struct btree_trans *trans, if (ret) return ret; - ret = sum_sector_overwrites(trans, iter, k, + ret = bch2_sum_sector_overwrites(trans, iter, k, &extending, + &should_check_enospc, &i_sectors_delta, &disk_sectors_delta); if (ret) @@ -269,7 +280,8 @@ int bch2_extent_update(struct btree_trans *trans, disk_sectors_delta > (s64) disk_res->sectors) { ret = bch2_disk_reservation_add(trans->c, disk_res, disk_sectors_delta - disk_res->sectors, - 0); + !should_check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) return ret; } @@ -320,8 +332,7 @@ int bch2_extent_update(struct btree_trans *trans, ret = bch2_trans_commit(trans, disk_res, journal_seq, BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE); + BTREE_INSERT_NOFAIL); if (ret) return ret; @@ -404,14 +415,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; - struct bkey_on_stack sk; + struct bkey_buf sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; struct btree_iter *iter; int ret; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -423,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op) k = bch2_keylist_front(keys); - bkey_on_stack_realloc(&sk, c, k->k.u64s); + bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); bch2_cut_front(iter->pos, sk.k); @@ -440,7 +451,7 @@ int bch2_write_index_default(struct bch_write_op *op) } while (!bch2_keylist_empty(keys)); bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -1617,14 +1628,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1636,7 +1647,7 @@ retry: if (bkey_err(k)) goto err; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); @@ -1657,7 +1668,7 @@ retry: out: bch2_rbio_done(rbio); bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return; err: rbio->bio.bi_status = BLK_STS_IOERR; @@ -1670,14 +1681,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1687,7 +1698,7 @@ retry: BTREE_ITER_SLOTS, k, ret) { unsigned bytes, sectors, offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -1736,7 +1747,7 @@ err: rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); bch2_rbio_done(rbio); } @@ -1807,17 +1818,6 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if ((ret = bkey_err(k))) goto out; - /* - * going to be temporarily appending another checksum entry: - */ - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + - BKEY_EXTENT_U64s_MAX * 8); - if ((ret = PTR_ERR_OR_ZERO(new))) - goto out; - - bkey_reassemble(new, k); - k = bkey_i_to_s_c(new); - if (bversion_cmp(k.k->version, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) goto out; @@ -1836,6 +1836,16 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, goto out; } + /* + * going to be temporarily appending another checksum entry: + */ + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + + sizeof(struct bch_extent_crc128)); + if ((ret = PTR_ERR_OR_ZERO(new))) + goto out; + + bkey_reassemble(new, k); + if (!bch2_bkey_narrow_crcs(new, new_crc)) goto out; @@ -2002,7 +2012,7 @@ static void bch2_read_endio(struct bio *bio) int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_on_stack *orig_k) + struct bkey_buf *orig_k) { struct btree_iter *iter; struct bkey_s_c k; @@ -2029,7 +2039,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, } *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); - bkey_on_stack_reassemble(orig_k, trans->c, k); + bch2_bkey_buf_reassemble(orig_k, trans->c, k); err: bch2_trans_iter_put(trans, iter); return ret; @@ -2208,7 +2218,11 @@ get_bio: bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); - if (pick.ptr.cached) + /* + * If it's being moved internally, we don't want to flag it as a cache + * hit: + */ + if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) bch2_bucket_io_time_reset(trans, pick.ptr.dev, PTR_BUCKET_NR(ca, &pick.ptr), READ); @@ -2290,7 +2304,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| @@ -2304,7 +2318,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -2327,7 +2341,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &sk); @@ -2364,7 +2378,7 @@ retry: } out: bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return; err: if (ret == -EINTR) diff --git a/libbcachefs/io.h b/libbcachefs/io.h index e6aac594..04f6baa1 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -3,7 +3,7 @@ #define _BCACHEFS_IO_H #include "checksum.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "io_types.h" #define to_wbio(_bio) \ @@ -60,6 +60,8 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) : op->c->wq; } +int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, + struct bkey_i *, bool *, bool *, s64 *, s64 *); int bch2_extent_update(struct btree_trans *, struct btree_iter *, struct bkey_i *, struct disk_reservation *, u64 *, u64, s64 *); @@ -112,11 +114,11 @@ struct cache_promote_op; struct extent_ptr_decoded; int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, - struct bkey_on_stack *); + struct bkey_buf *); static inline int bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_on_stack *k) + struct bkey_buf *k) { return k->k->k.type == KEY_TYPE_reflink_p ? __bch2_read_indirect_extent(trans, offset_into_extent, k) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d5442482..69e487bc 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -777,7 +777,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, } } else { rcu_read_lock(); - ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, + ob = bch2_bucket_alloc(c, ca, RESERVE_NONE, false, cl); rcu_read_unlock(); if (IS_ERR(ob)) { @@ -1095,7 +1095,7 @@ int bch2_fs_journal_init(struct journal *j) /* Btree roots: */ j->entry_u64s_reserved += - BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX); + BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX); atomic64_set(&j->reservations.counter, ((union journal_res_state) diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 0e6fbe2f..2a344a04 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -577,8 +577,15 @@ reread: if (bch2_dev_io_err_on(ret, ca, "journal read error: sector %llu", offset) || - bch2_meta_read_fault("journal")) - return -EIO; + bch2_meta_read_fault("journal")) { + /* + * We don't error out of the recovery process + * here, since the relevant journal entry may be + * found on a different device, and missing or + * no journal entries will be handled later + */ + return 0; + } j = buf->data; } @@ -990,6 +997,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, done: rcu_read_unlock(); + BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); + return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; } @@ -1050,9 +1059,13 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) return; memcpy(new_buf, buf->data, buf->buf_size); - kvpfree(buf->data, buf->buf_size); - buf->data = new_buf; - buf->buf_size = new_size; + + spin_lock(&j->lock); + swap(buf->data, new_buf); + swap(buf->buf_size, new_size); + spin_unlock(&j->lock); + + kvpfree(new_buf, new_size); } static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 67ee47eb..9953663e 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -20,7 +20,7 @@ struct journal_buf { struct jset *data; - BKEY_PADDED(key); + __BKEY_PADDED(key, BCH_REPLICAS_MAX); struct closure_waitlist wait; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 96c8690a..6241ff0c 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -4,7 +4,7 @@ */ #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" @@ -41,10 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -57,7 +57,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags continue; } - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), dev_idx, flags, false); @@ -90,7 +90,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags } ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); BUG_ON(ret == -EINTR); @@ -109,6 +109,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) struct btree_iter *iter; struct closure cl; struct btree *b; + struct bkey_buf k; unsigned id; int ret; @@ -116,28 +117,28 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; + bch2_bkey_buf_init(&k); bch2_trans_init(&trans, c, 0, 0); closure_init_stack(&cl); for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; retry: if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key), dev_idx)) continue; - bkey_copy(&tmp.k, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k), + ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true); if (ret) { bch_err(c, "Cannot drop device without losing data"); goto err; } - ret = bch2_btree_node_update_key(c, iter, b, &tmp.k); + ret = bch2_btree_node_update_key(c, iter, b, k.k); if (ret == -EINTR) { b = bch2_btree_iter_peek_node(iter); goto retry; @@ -157,6 +158,7 @@ retry: ret = 0; err: ret = bch2_trans_exit(&trans) ?: ret; + bch2_bkey_buf_exit(&k, c); BUG_ON(ret == -EINTR); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 6633d21f..9505eab9 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -2,7 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -61,8 +61,13 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct migrate_write *m = container_of(op, struct migrate_write, op); struct keylist *keys = &op->insert_keys; + struct bkey_buf _new, _insert; int ret = 0; + bch2_bkey_buf_init(&_new); + bch2_bkey_buf_init(&_insert); + bch2_bkey_buf_realloc(&_insert, c, U8_MAX); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, m->btree_id, @@ -73,21 +78,18 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct bkey_s_c k; struct bkey_i *insert; struct bkey_i_extent *new; - BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; struct extent_ptr_decoded p; bool did_work = false; - int nr; + bool extending = false, should_check_enospc; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; bch2_trans_reset(&trans, 0); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); - if (ret) { - if (ret == -EINTR) - continue; - break; - } + if (ret) + goto err; new = bkey_i_to_extent(bch2_keylist_front(keys)); @@ -95,11 +97,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op) !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) goto nomatch; - bkey_reassemble(&_insert.k, k); - insert = &_insert.k; + bkey_reassemble(_insert.k, k); + insert = _insert.k; - bkey_copy(&_new.k, bch2_keylist_front(keys)); - new = bkey_i_to_extent(&_new.k); + bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); + new = bkey_i_to_extent(_new.k); bch2_cut_front(iter->pos, &new->k_i); bch2_cut_front(iter->pos, insert); @@ -144,23 +146,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op) op->opts.background_target, op->opts.data_replicas); - /* - * If we're not fully overwriting @k, and it's compressed, we - * need a reservation for all the pointers in @insert - */ - nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) - - m->nr_ptrs_reserved; + ret = bch2_sum_sector_overwrites(&trans, iter, insert, + &extending, + &should_check_enospc, + &i_sectors_delta, + &disk_sectors_delta); + if (ret) + goto err; - if (insert->k.size < k.k->size && - bch2_bkey_sectors_compressed(k) && - nr > 0) { + if (disk_sectors_delta > (s64) &op->res.sectors) { ret = bch2_disk_reservation_add(c, &op->res, - keylist_sectors(keys) * nr, 0); + disk_sectors_delta - op->res.sectors, + !should_check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) goto out; - - m->nr_ptrs_reserved += nr; - goto next; } bch2_trans_update(&trans, iter, insert, 0); @@ -168,8 +168,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| m->data_opts.btree_insert_flags); +err: if (!ret) atomic_long_inc(&c->extent_migrate_done); if (ret == -EINTR) @@ -197,6 +197,8 @@ nomatch: } out: bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&_insert, c); + bch2_bkey_buf_exit(&_new, c); BUG_ON(ret == -EINTR); return ret; } @@ -516,7 +518,7 @@ static int __bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct bkey_on_stack sk; + struct bkey_buf sk; struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; @@ -525,7 +527,7 @@ static int __bch2_move_data(struct bch_fs *c, u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_user; @@ -605,13 +607,19 @@ peek: } /* unlock before doing IO: */ - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts); if (ret2) { + if (ret2 == -EINTR) { + bch2_trans_reset(&trans, 0); + bch2_trans_cond_resched(&trans); + continue; + } + if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ bch2_move_ctxt_wait_for_io(ctxt); @@ -633,7 +641,7 @@ next_nondata: } out: ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 2c5daed5..efa7f38e 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -200,6 +200,11 @@ static int bch2_copygc(struct bch_fs *c) return -1; } + /* + * Our btree node allocations also come out of RESERVE_MOVINGGC: + */ + sectors_to_move = (sectors_to_move * 3) / 4; + for (i = h->data; i < h->data + h->used; i++) sectors_to_move += i->sectors * i->replicas; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 1883a1fa..5a43682c 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "alloc_background.h" #include "btree_gc.h" #include "btree_update.h" @@ -224,28 +225,29 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b if (b->c.level) { struct btree *child; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_reassemble(&tmp, c, k); + k = bkey_i_to_s_c(tmp.k); bch2_btree_and_journal_iter_advance(&iter); - if (b->c.level > 0) { - child = bch2_btree_node_get_noiter(c, &tmp.k, - b->c.btree_id, b->c.level - 1); - ret = PTR_ERR_OR_ZERO(child); - if (ret) - break; + child = bch2_btree_node_get_noiter(c, tmp.k, + b->c.btree_id, b->c.level - 1); + bch2_bkey_buf_exit(&tmp, c); - ret = (node_fn ? node_fn(c, b) : 0) ?: - bch2_btree_and_journal_walk_recurse(c, child, - journal_keys, btree_id, node_fn, key_fn); - six_unlock_read(&child->c.lock); + ret = PTR_ERR_OR_ZERO(child); + if (ret) + break; - if (ret) - break; - } + ret = (node_fn ? node_fn(c, b) : 0) ?: + bch2_btree_and_journal_walk_recurse(c, child, + journal_keys, btree_id, node_fn, key_fn); + six_unlock_read(&child->c.lock); + + if (ret) + break; } else { bch2_btree_and_journal_iter_advance(&iter); } @@ -936,7 +938,7 @@ int bch2_fs_recovery(struct bch_fs *c) struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; u64 blacklist_seq, journal_seq; - bool write_sb = false, need_write_alloc = false; + bool write_sb = false; int ret; if (c->sb.clean) @@ -1082,10 +1084,8 @@ use_clean: bch_info(c, "starting metadata mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, &c->journal_keys, true, true); - if (ret < 0) - goto err; if (ret) - need_write_alloc = true; + goto err; bch_verbose(c, "mark and sweep done"); } @@ -1095,10 +1095,8 @@ use_clean: bch_info(c, "starting mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, &c->journal_keys, true, false); - if (ret < 0) - goto err; if (ret) - need_write_alloc = true; + goto err; bch_verbose(c, "mark and sweep done"); } @@ -1122,7 +1120,8 @@ use_clean: goto err; bch_verbose(c, "journal replay done"); - if (need_write_alloc && !c->opts.nochanges) { + if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) && + !c->opts.nochanges) { /* * note that even when filesystem was clean there might be work * to do here, if we ran gc (because of fsck) which recalculated @@ -1137,8 +1136,6 @@ use_clean: goto err; } bch_verbose(c, "alloc write done"); - - set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags); } if (!c->sb.clean) { diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 8abcbfb3..930547de 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "extents.h" #include "inode.h" @@ -198,8 +198,7 @@ s64 bch2_remap_range(struct bch_fs *c, struct btree_trans trans; struct btree_iter *dst_iter, *src_iter; struct bkey_s_c src_k; - BKEY_PADDED(k) new_dst; - struct bkey_on_stack new_src; + struct bkey_buf new_dst, new_src; struct bpos dst_end = dst_start, src_end = src_start; struct bpos dst_want, src_want; u64 src_done, dst_done; @@ -216,7 +215,8 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset += remap_sectors; src_end.offset += remap_sectors; - bkey_on_stack_init(&new_src); + bch2_bkey_buf_init(&new_dst); + bch2_bkey_buf_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, @@ -257,7 +257,7 @@ s64 bch2_remap_range(struct bch_fs *c, break; if (src_k.k->type != KEY_TYPE_reflink_p) { - bkey_on_stack_reassemble(&new_src, c, src_k); + bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); bch2_cut_front(src_iter->pos, new_src.k); @@ -275,7 +275,7 @@ s64 bch2_remap_range(struct bch_fs *c, struct bkey_s_c_reflink_p src_p = bkey_s_c_to_reflink_p(src_k); struct bkey_i_reflink_p *dst_p = - bkey_reflink_p_init(&new_dst.k); + bkey_reflink_p_init(new_dst.k); u64 offset = le64_to_cpu(src_p.v->idx) + (src_iter->pos.offset - @@ -286,12 +286,12 @@ s64 bch2_remap_range(struct bch_fs *c, BUG(); } - new_dst.k.k.p = dst_iter->pos; - bch2_key_resize(&new_dst.k.k, + new_dst.k->k.p = dst_iter->pos; + bch2_key_resize(&new_dst.k->k, min(src_k.k->p.offset - src_iter->pos.offset, dst_end.offset - dst_iter->pos.offset)); - ret = bch2_extent_update(&trans, dst_iter, &new_dst.k, + ret = bch2_extent_update(&trans, dst_iter, new_dst.k, NULL, journal_seq, new_i_size, i_sectors_delta); if (ret) @@ -333,7 +333,8 @@ err: } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&new_src, c); + bch2_bkey_buf_exit(&new_src, c); + bch2_bkey_buf_exit(&new_dst, c); percpu_ref_put(&c->writes); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index cc13fc25..bfae0d71 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -798,7 +798,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) pr_buf(out, "free_inc: %zu/%zu\n" - "free[RESERVE_BTREE]: %zu/%zu\n" "free[RESERVE_MOVINGGC]: %zu/%zu\n" "free[RESERVE_NONE]: %zu/%zu\n" "buckets:\n" @@ -827,7 +826,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) "open_buckets_user: %u\n" "btree reserve cache: %u\n", fifo_used(&ca->free_inc), ca->free_inc.size, - fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size, fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size, fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size, ca->mi.nbuckets - ca->mi.first_bucket,