From ff5e165532a2eed87700649d03f91a612a58e92a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Mar 2018 03:54:50 -0400 Subject: [PATCH] Update bcachefs sources to 9fc6ccd865 bcachefs: fix copygc_pred() --- .bcachefs_revision | 2 +- libbcachefs/alloc.c | 9 +++------ libbcachefs/btree_cache.c | 1 - libbcachefs/buckets.c | 13 +++++++++++++ libbcachefs/buckets_types.h | 1 + libbcachefs/extents.c | 38 ++++++++++++++++++------------------- libbcachefs/io.c | 4 ++-- libbcachefs/journal.c | 6 +++++- libbcachefs/move.c | 32 +++++++++++++++---------------- libbcachefs/movinggc.c | 37 ++++++++++++++++++++++++++---------- 10 files changed, 87 insertions(+), 56 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 333e97a3..641ae5fe 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -f7ccf513908be42581e41b48b8b078a441a6a804 +9fc6ccd8659598d4ca885220a795889071b619f4 diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index 22dcaeb2..ede44f73 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -1253,7 +1253,9 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c, if (!ca) continue; - if (have_cache_dev && !ca->mi.durability) + if (!ca->mi.durability && + (have_cache_dev || + wp->type != BCH_DATA_USER)) continue; ob = bch2_bucket_alloc(c, ca, reserve, @@ -1534,11 +1536,6 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, wp->first_ptr++; } - ret = open_bucket_add_buckets(c, target, wp, devs_have, - nr_replicas, reserve, cl); - if (ret && ret != -EROFS) - goto err; - if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) { ret = open_bucket_add_buckets(c, target, wp, devs_have, nr_replicas, reserve, cl); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 7eae4d20..469f8565 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -738,7 +738,6 @@ retry: return ERR_PTR(-EIO); } - EBUG_ON(!b->written); EBUG_ON(b->btree_id != iter->btree_id || BTREE_NODE_LEVEL(b->data) != level || bkey_cmp(b->data->max_key, k->k.p)); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 4ea89a97..864de940 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -285,6 +285,17 @@ static inline int is_unavailable_bucket(struct bucket_mark m) return !is_available_bucket(m); } +static inline int is_fragmented_bucket(struct bucket_mark m, + struct bch_dev *ca) +{ + if (!m.owned_by_allocator && + m.data_type == BCH_DATA_USER && + bucket_sectors_used(m)) + return max_t(int, 0, (int) ca->mi.bucket_size - + bucket_sectors_used(m)); + return 0; +} + static inline enum bch_data_type bucket_type(struct bucket_mark m) { return m.cached_sectors && !m.dirty_sectors @@ -361,6 +372,8 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, dev_usage->sectors[new.data_type] += new.dirty_sectors; dev_usage->sectors[BCH_DATA_CACHED] += (int) new.cached_sectors - (int) old.cached_sectors; + dev_usage->sectors_fragmented += + is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca); if (!is_available_bucket(old) && is_available_bucket(new)) bch2_wake_allocator(ca); diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index 6f52a109..a0256e13 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -53,6 +53,7 @@ struct bch_dev_usage { /* _compressed_ sectors: */ u64 sectors[BCH_DATA_NR]; + u64 sectors_fragmented; }; /* kill, switch to bch_data_type? */ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index ed33f9bf..f73e7562 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -552,8 +552,9 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, ? bch_dev_bkey_exists(c, ptr->dev) : NULL; - p("ptr: %u:%llu gen %u%s", ptr->dev, + p("ptr: %u:%llu gen %u%s%s", ptr->dev, (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : "", ca && ptr_stale(ca, ptr) ? " stale" : ""); break; @@ -2018,33 +2019,32 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) void bch2_extent_mark_replicas_cached(struct bch_fs *c, struct bkey_s_extent e, - unsigned nr_desired_replicas, - unsigned target) + unsigned target, + unsigned nr_desired_replicas) { struct bch_extent_ptr *ptr; int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas; - if (extra <= 0) - return; + if (target && extra > 0) + extent_for_each_ptr(e, ptr) { + int n = bch2_extent_ptr_durability(c, ptr); - extent_for_each_ptr(e, ptr) { - int n = bch2_extent_ptr_durability(c, ptr); - - if (n && n <= extra && - !dev_in_target(c->devs[ptr->dev], target)) { - ptr->cached = true; - extra -= n; + if (n && n <= extra && + !dev_in_target(c->devs[ptr->dev], target)) { + ptr->cached = true; + extra -= n; + } } - } - extent_for_each_ptr(e, ptr) { - int n = bch2_extent_ptr_durability(c, ptr); + if (extra > 0) + extent_for_each_ptr(e, ptr) { + int n = bch2_extent_ptr_durability(c, ptr); - if (n && n <= extra) { - ptr->cached = true; - extra -= n; + if (n && n <= extra) { + ptr->cached = true; + extra -= n; + } } - } } /* diff --git a/libbcachefs/io.c b/libbcachefs/io.c index d1fb89c5..7ee9c392 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -732,11 +732,11 @@ static void __bch2_write(struct closure *cl) int ret; do { - if (op->open_buckets_nr + op->nr_replicas > + /* +1 for possible cache device: */ + if (op->open_buckets_nr + op->nr_replicas + 1 > ARRAY_SIZE(op->open_buckets)) continue_at(cl, bch2_write_index, index_update_wq(op)); - /* for the device pointers and 1 for the chksum */ if (bch2_keylist_realloc(&op->insert_keys, op->inline_keys, ARRAY_SIZE(op->inline_keys), diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ea8a6282..e5000767 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -2110,6 +2110,9 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, if (!ca) continue; + if (!ca->mi.durability) + continue; + ja = &ca->journal; if (!ja->nr) continue; @@ -2139,7 +2142,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, ja->buckets[ja->cur_idx]), .dev = ca->dev_idx, }); - replicas++; + + replicas += ca->mi.durability; } rcu_read_unlock(); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 616844ae..07d2e2c8 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -67,8 +67,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) BKEY_PADDED(k) _new, _insert; struct bch_extent_ptr *ptr; struct bch_extent_crc_unpacked crc; - unsigned nr_dirty; bool did_work = false; + int nr; if (btree_iter_err(k)) { ret = bch2_btree_iter_unlock(&iter); @@ -133,22 +133,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op) * has fewer replicas than when we last looked at it - meaning * we need to get a disk reservation here: */ - nr_dirty = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)); - if (m->nr_ptrs_reserved < nr_dirty) { - unsigned sectors = (nr_dirty - m->nr_ptrs_reserved) * - keylist_sectors(keys); - + nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - + (bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); + if (nr > 0) { /* * can't call bch2_disk_reservation_add() with btree * locks held, at least not without a song and dance */ bch2_btree_iter_unlock(&iter); - ret = bch2_disk_reservation_add(c, &op->res, sectors, 0); + ret = bch2_disk_reservation_add(c, &op->res, + keylist_sectors(keys) * nr, 0); if (ret) goto out; - m->nr_ptrs_reserved = nr_dirty; + m->nr_ptrs_reserved += nr; goto next; } @@ -226,7 +225,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, m->data_cmd = data_cmd; m->data_opts = data_opts; - m->nr_ptrs_reserved = bch2_extent_nr_dirty_ptrs(k); + m->nr_ptrs_reserved = 0; bch2_write_op_init(&m->op, c, io_opts); m->op.compression_type = @@ -249,19 +248,20 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, m->op.index_update_fn = bch2_migrate_index_update; switch (data_cmd) { - case DATA_ADD_REPLICAS: - if (m->nr_ptrs_reserved < io_opts.data_replicas) { - m->op.nr_replicas = io_opts.data_replicas - m->nr_ptrs_reserved; + case DATA_ADD_REPLICAS: { + int nr = (int) io_opts.data_replicas - + bch2_extent_nr_dirty_ptrs(k); + + if (nr > 0) { + m->op.nr_replicas = m->nr_ptrs_reserved = nr; ret = bch2_disk_reservation_get(c, &m->op.res, - k.k->size, - m->op.nr_replicas, 0); + k.k->size, m->op.nr_replicas, 0); if (ret) return ret; - - m->nr_ptrs_reserved = io_opts.data_replicas; } break; + } case DATA_REWRITE: break; case DATA_PROMOTE: diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 2aa58b55..3b4a5292 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -72,9 +72,9 @@ static bool __copygc_pred(struct bch_dev *ca, if (ptr) { struct copygc_heap_entry search = { .offset = ptr->offset }; - size_t i = eytzinger0_find_le(h->data, h->used, - sizeof(h->data[0]), - bucket_offset_cmp, &search); + ssize_t i = eytzinger0_find_le(h->data, h->used, + sizeof(h->data[0]), + bucket_offset_cmp, &search); return (i >= 0 && ptr->offset < h->data[i].offset + ca->mi.bucket_size && @@ -213,8 +213,9 @@ static int bch2_copygc_thread(void *arg) struct bch_dev *ca = arg; struct bch_fs *c = ca->fs; struct io_clock *clock = &c->io_clock[WRITE]; + struct bch_dev_usage usage; unsigned long last; - u64 available, want, next; + u64 available, fragmented, reserve, next; set_freezable(); @@ -223,16 +224,32 @@ static int bch2_copygc_thread(void *arg) break; last = atomic_long_read(&clock->now); + + reserve = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) * + ca->mi.bucket_size * + c->opts.gc_reserve_percent, 200); + + usage = bch2_dev_usage_read(c, ca); + /* * don't start copygc until less than half the gc reserve is * available: */ - available = dev_buckets_available(c, ca); - want = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) * - c->opts.gc_reserve_percent, 200); - if (available > want) { - next = last + (available - want) * - ca->mi.bucket_size; + available = __dev_buckets_available(ca, usage) * + ca->mi.bucket_size; + if (available > reserve) { + next = last + available - reserve; + bch2_kthread_io_clock_wait(clock, next); + continue; + } + + /* + * don't start copygc until there's more than half the copygc + * reserve of fragmented space: + */ + fragmented = usage.sectors_fragmented; + if (fragmented < reserve) { + next = last + reserve - fragmented; bch2_kthread_io_clock_wait(clock, next); continue; }