diff --git a/.bcachefs_revision b/.bcachefs_revision index d01cd450..f836d05b 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -069e88fae5fdce2aea08c9e192cf6ac5c7ed492d +6bb1ba5c94c225a95cb59cb9670b558bcb1b4f81 diff --git a/cmd_migrate.c b/cmd_migrate.c index fa467306..23d8142c 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -599,7 +599,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, darray_free(s.extents); genradix_free(&s.hardlinks); - bch2_alloc_write(c, false); + bch2_alloc_write_all(c, false); } static void find_superblock_space(ranges extents, diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index bf3611e7..ed919b42 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -38,6 +38,15 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { #undef x }; +struct bkey_alloc_buf { + struct bkey_i k; + struct bch_alloc_v3 v; + +#define x(_name, _bits) + _bits / 8 + u8 _pad[0 + BCH_ALLOC_FIELDS_V2()]; +#undef x +} __attribute__((packed, aligned(8))); + /* Persistent alloc info: */ static inline u64 alloc_field_v1_get(const struct bch_alloc *a, @@ -244,13 +253,26 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) return ret; } -void bch2_alloc_pack(struct bch_fs *c, - struct bkey_alloc_buf *dst, - const struct bkey_alloc_unpacked src) +static void bch2_alloc_pack(struct bch_fs *c, + struct bkey_alloc_buf *dst, + const struct bkey_alloc_unpacked src) { bch2_alloc_pack_v3(dst, src); } +int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_alloc_unpacked *u, unsigned trigger_flags) +{ + struct bkey_alloc_buf *a; + + a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); + if (IS_ERR(a)) + return PTR_ERR(a); + + bch2_alloc_pack(trans->c, a, *u); + return bch2_trans_update(trans, iter, &a->k, trigger_flags); +} + static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) { unsigned i, bytes = offsetof(struct bch_alloc, data); @@ -371,11 +393,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_s_c k; - struct bch_dev *ca; - struct bucket *g; - struct bucket_mark m; struct bkey_alloc_unpacked old_u, new_u; - struct bkey_alloc_buf a; int ret; retry: bch2_trans_begin(trans); @@ -390,20 +408,13 @@ retry: if (ret) goto err; - old_u = bch2_alloc_unpack(k); - - percpu_down_read(&c->mark_lock); - ca = bch_dev_bkey_exists(c, iter->pos.inode); - g = bucket(ca, iter->pos.offset); - m = READ_ONCE(g->mark); - new_u = alloc_mem_to_key(iter, g, m); - percpu_up_read(&c->mark_lock); + old_u = bch2_alloc_unpack(k); + new_u = alloc_mem_to_key(c, iter); if (!bkey_alloc_unpacked_cmp(old_u, new_u)) return 0; - bch2_alloc_pack(c, &a, new_u); - ret = bch2_trans_update(trans, iter, &a.k, + ret = bch2_alloc_write(trans, iter, &new_u, BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags); @@ -413,7 +424,7 @@ err: return ret; } -int bch2_alloc_write(struct bch_fs *c, unsigned flags) +int bch2_alloc_write_all(struct bch_fs *c, unsigned flags) { struct btree_trans trans; struct btree_iter iter; @@ -450,10 +461,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, size_t bucket_nr, int rw) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, dev); struct btree_iter iter; - struct bucket *g; - struct bkey_alloc_buf *a; struct bkey_alloc_unpacked u; u64 *time, now; int ret = 0; @@ -466,15 +474,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, if (ret) goto out; - a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - goto out; - - percpu_down_read(&c->mark_lock); - g = bucket(ca, bucket_nr); - u = alloc_mem_to_key(&iter, g, READ_ONCE(g->mark)); - percpu_up_read(&c->mark_lock); + u = alloc_mem_to_key(c, &iter); time = rw == READ ? &u.read_time : &u.write_time; now = atomic64_read(&c->io_clock[rw].now); @@ -483,8 +483,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, *time = now; - bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, &iter, &a->k, 0) ?: + ret = bch2_alloc_write(trans, &iter, &u, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: bch2_trans_iter_exit(trans, &iter); @@ -752,10 +751,7 @@ static int bucket_invalidate_btree(struct btree_trans *trans, struct bch_dev *ca, u64 b) { struct bch_fs *c = trans->c; - struct bkey_alloc_buf *a; struct bkey_alloc_unpacked u; - struct bucket *g; - struct bucket_mark m; struct btree_iter iter; int ret; @@ -765,20 +761,11 @@ static int bucket_invalidate_btree(struct btree_trans *trans, BTREE_ITER_CACHED_NOFILL| BTREE_ITER_INTENT); - a = bch2_trans_kmalloc(trans, sizeof(*a)); - ret = PTR_ERR_OR_ZERO(a); - if (ret) - goto err; - ret = bch2_btree_iter_traverse(&iter); if (ret) goto err; - percpu_down_read(&c->mark_lock); - g = bucket(ca, b); - m = READ_ONCE(g->mark); - u = alloc_mem_to_key(&iter, g, m); - percpu_up_read(&c->mark_lock); + u = alloc_mem_to_key(c, &iter); u.gen++; u.data_type = 0; @@ -787,9 +774,8 @@ static int bucket_invalidate_btree(struct btree_trans *trans, u.read_time = atomic64_read(&c->io_clock[READ].now); u.write_time = atomic64_read(&c->io_clock[WRITE].now); - bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, &iter, &a->k, - BTREE_TRIGGER_BUCKET_INVALIDATE); + ret = bch2_alloc_write(trans, &iter, &u, + BTREE_TRIGGER_BUCKET_INVALIDATE); err: bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index b1efc149..e3cdb8bc 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -4,7 +4,9 @@ #include "bcachefs.h" #include "alloc_types.h" +#include "buckets.h" #include "debug.h" +#include "super.h" extern const char * const bch2_allocator_states[]; @@ -20,15 +22,6 @@ struct bkey_alloc_unpacked { #undef x }; -struct bkey_alloc_buf { - struct bkey_i k; - struct bch_alloc_v3 v; - -#define x(_name, _bits) + _bits / 8 - u8 _pad[0 + BCH_ALLOC_FIELDS_V2()]; -#undef x -} __attribute__((packed, aligned(8))); - /* How out of date a pointer gen is allowed to be: */ #define BUCKET_GC_GEN_MAX 96U @@ -46,28 +39,37 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, } struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); -void bch2_alloc_pack(struct bch_fs *, struct bkey_alloc_buf *, - const struct bkey_alloc_unpacked); +int bch2_alloc_write(struct btree_trans *, struct btree_iter *, + struct bkey_alloc_unpacked *, unsigned); int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); static inline struct bkey_alloc_unpacked -alloc_mem_to_key(struct btree_iter *iter, - struct bucket *g, struct bucket_mark m) +alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter) { - return (struct bkey_alloc_unpacked) { + struct bch_dev *ca; + struct bucket *g; + struct bkey_alloc_unpacked ret; + + percpu_down_read(&c->mark_lock); + ca = bch_dev_bkey_exists(c, iter->pos.inode); + g = bucket(ca, iter->pos.offset); + ret = (struct bkey_alloc_unpacked) { .dev = iter->pos.inode, .bucket = iter->pos.offset, - .gen = m.gen, + .gen = g->mark.gen, .oldest_gen = g->oldest_gen, - .data_type = m.data_type, - .dirty_sectors = m.dirty_sectors, - .cached_sectors = m.cached_sectors, + .data_type = g->mark.data_type, + .dirty_sectors = g->mark.dirty_sectors, + .cached_sectors = g->mark.cached_sectors, .read_time = g->io_time[READ], .write_time = g->io_time[WRITE], .stripe = g->stripe, .stripe_redundancy = g->stripe_redundancy, }; + percpu_up_read(&c->mark_lock); + + return ret; } #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) @@ -137,7 +139,7 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *); -int bch2_alloc_write(struct bch_fs *, unsigned); +int bch2_alloc_write_all(struct bch_fs *, unsigned); void bch2_fs_allocator_background_init(struct bch_fs *); void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index fee1fc58..5c01f056 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -321,8 +321,12 @@ BCH_DEBUG_PARAMS_DEBUG() #define BCH_TIME_STATS() \ x(btree_node_mem_alloc) \ x(btree_node_split) \ + x(btree_node_compact) \ + x(btree_node_merge) \ x(btree_node_sort) \ x(btree_node_read) \ + x(btree_interior_update_foreground) \ + x(btree_interior_update_total) \ x(btree_gc) \ x(btree_lock_contended_read) \ x(btree_lock_contended_intent) \ @@ -330,8 +334,8 @@ BCH_DEBUG_PARAMS_DEBUG() x(data_write) \ x(data_read) \ x(data_promote) \ - x(journal_write) \ - x(journal_delay) \ + x(journal_flush_write) \ + x(journal_noflush_write) \ x(journal_flush_seq) \ x(blocked_journal) \ x(blocked_allocate) \ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index a36b0e60..91c69a9f 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1849,6 +1849,7 @@ int bch2_gc_gens(struct bch_fs *c) struct bch_dev *ca; struct bucket_array *buckets; struct bucket *g; + u64 start_time = local_clock(); unsigned i; int ret; @@ -1892,6 +1893,8 @@ int bch2_gc_gens(struct bch_fs *c) c->gc_gens_pos = POS_MIN; c->gc_count++; + + bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); err: up_read(&c->gc_lock); return ret; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index dfff9725..d895d4ef 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -455,15 +455,23 @@ static void bch2_btree_update_free(struct btree_update *as) bch2_disk_reservation_put(c, &as->disk_res); bch2_btree_reserve_put(as); + bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], + as->start_time); + mutex_lock(&c->btree_interior_update_lock); list_del(&as->unwritten_list); list_del(&as->list); - mutex_unlock(&c->btree_interior_update_lock); closure_debug_destroy(&as->cl); mempool_free(as, &c->btree_interior_update_pool); + /* + * Have to do the wakeup with btree_interior_update_lock still held, + * since being on btree_interior_update_list is our ref on @c: + */ closure_wake_up(&c->btree_interior_update_wait); + + mutex_unlock(&c->btree_interior_update_lock); } static void btree_update_will_delete_key(struct btree_update *as, @@ -902,6 +910,9 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as, static void bch2_btree_update_done(struct btree_update *as) { + struct bch_fs *c = as->c; + u64 start_time = as->start_time; + BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); if (as->took_gc_lock) @@ -912,6 +923,9 @@ static void bch2_btree_update_done(struct btree_update *as) continue_at(&as->cl, btree_update_set_nodes_written, as->c->btree_interior_update_worker); + + bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground], + start_time); } static struct btree_update * @@ -921,6 +935,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, struct bch_fs *c = trans->c; struct btree_update *as; struct closure cl; + u64 start_time = local_clock(); int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; int journal_flags = 0; @@ -960,6 +975,7 @@ retry: memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); as->c = c; + as->start_time = start_time; as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); as->btree_id = path->btree_id; @@ -1452,7 +1468,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_trans_verify_locks(trans); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split], + bch2_time_stats_update(&c->times[n2 + ? BCH_TIME_btree_node_split + : BCH_TIME_btree_node_compact], start_time); } @@ -1573,6 +1591,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, struct btree *b, *m, *n, *prev, *next, *parent; struct bpos sib_pos; size_t sib_u64s; + u64 start_time = local_clock(); int ret = 0; BUG_ON(!path->should_be_locked); @@ -1710,6 +1729,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); + + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); out: err: bch2_path_put(trans, sib_path, true); diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index 8e03bd98..d4574161 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -35,6 +35,7 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, struct btree_update { struct closure cl; struct bch_fs *c; + u64 start_time; struct list_head list; struct list_head unwritten_list; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 10837a62..295942e7 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -1440,6 +1440,8 @@ retry: (k = bch2_btree_iter_peek(&iter)).k) && !(ret = bkey_err(k)) && bkey_cmp(iter.pos, end) < 0) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(trans->c, 0); struct bkey_i delete; bkey_init(&delete.k); @@ -1474,8 +1476,9 @@ retry: } ret = bch2_trans_update(trans, &iter, &delete, 0) ?: - bch2_trans_commit(trans, NULL, journal_seq, + bch2_trans_commit(trans, &disk_res, journal_seq, BTREE_INSERT_NOFAIL); + bch2_disk_reservation_put(trans->c, &disk_res); if (ret) break; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 5ff4e911..762366f5 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -977,7 +977,6 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, gc); } - return 0; } @@ -1123,7 +1122,6 @@ static int bch2_mark_stripe(struct btree_trans *trans, */ m->alive = true; m->sectors = le16_to_cpu(new_s->sectors); - m->algorithm = new_s->algorithm; m->nr_blocks = new_s->nr_blocks; m->nr_redundant = new_s->nr_redundant; @@ -1483,23 +1481,16 @@ need_mark: /* trans_mark: */ -static struct bkey_alloc_buf * -bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, +static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, const struct bch_extent_ptr *ptr, struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); - struct bucket *g; - struct bkey_alloc_buf *a; struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos); int ret; - a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); - if (IS_ERR(a)) - return a; - bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| @@ -1507,34 +1498,27 @@ bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter ret = bch2_btree_iter_traverse(iter); if (ret) { bch2_trans_iter_exit(trans, iter); - return ERR_PTR(ret); + return ret; } - if (update && !bpos_cmp(update->k.p, pos)) { - *u = bch2_alloc_unpack(bkey_i_to_s_c(update)); - } else { - percpu_down_read(&c->mark_lock); - g = bucket(ca, pos.offset); - *u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); - percpu_up_read(&c->mark_lock); - } + *u = update && !bpos_cmp(update->k.p, pos) + ? bch2_alloc_unpack(bkey_i_to_s_c(update)) + : alloc_mem_to_key(c, iter); - return a; + return 0; } static int bch2_trans_mark_pointer(struct btree_trans *trans, struct bkey_s_c k, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type) { - struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_alloc_unpacked u; - struct bkey_alloc_buf *a; int ret; - a = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); - if (IS_ERR(a)) - return PTR_ERR(a); + ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (ret) + return ret; ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, @@ -1542,8 +1526,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, if (ret) goto out; - bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_alloc_write(trans, &iter, &u, 0); if (ret) goto out; out: @@ -1673,7 +1656,6 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; - struct bkey_alloc_buf *a; struct btree_iter iter; struct bkey_alloc_unpacked u; enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant @@ -1684,9 +1666,9 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, if (deleting) sectors = -sectors; - a = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); - if (IS_ERR(a)) - return PTR_ERR(a); + ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); + if (ret) + return ret; ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type, u.gen, u.data_type, @@ -1736,8 +1718,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, if (data_type) u.data_type = !deleting ? data_type : 0; - bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_alloc_write(trans, &iter, &u, 0); if (ret) goto err; err: @@ -1985,7 +1966,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_alloc_unpacked u; - struct bkey_alloc_buf *a; struct bch_extent_ptr ptr = { .dev = ca->dev_idx, .offset = bucket_to_sector(ca, b), @@ -1998,9 +1978,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, if (b >= ca->mi.nbuckets) return 0; - a = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); - if (IS_ERR(a)) - return PTR_ERR(a); + ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); + if (ret) + return ret; if (u.data_type && u.data_type != type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, @@ -2017,8 +1997,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, u.data_type = type; u.dirty_sectors = sectors; - bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_alloc_write(trans, &iter, &u, 0); if (ret) goto out; out: diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index f1839990..033ded88 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -143,8 +143,8 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, } /* returns blocknr in stripe that we matched: */ -static int bkey_matches_stripe(struct bch_stripe *s, - struct bkey_s_c k) +static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, + struct bkey_s_c k, unsigned *block) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; @@ -153,10 +153,12 @@ static int bkey_matches_stripe(struct bch_stripe *s, bkey_for_each_ptr(ptrs, ptr) for (i = 0; i < nr_data; i++) if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr, - le16_to_cpu(s->sectors))) - return i; + le16_to_cpu(s->sectors))) { + *block = i; + return ptr; + } - return -1; + return NULL; } static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) @@ -834,6 +836,7 @@ retry: (k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { + const struct bch_extent_ptr *ptr_c; struct bch_extent_ptr *ptr, *ec_ptr = NULL; if (extent_has_stripe_ptr(k, s->key.k.p.offset)) { @@ -841,8 +844,12 @@ retry: continue; } - block = bkey_matches_stripe(&s->key.v, k); - if (block < 0) { + ptr_c = bkey_matches_stripe(&s->key.v, k, &block); + /* + * It doesn't generally make sense to erasure code cached ptrs: + * XXX: should we be incrementing a counter? + */ + if (!ptr_c || ptr_c->cached) { bch2_btree_iter_advance(&iter); continue; } diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h index d9623ba6..edd93da6 100644 --- a/libbcachefs/ec_types.h +++ b/libbcachefs/ec_types.h @@ -25,7 +25,6 @@ struct stripe { struct gc_stripe { u16 sectors; - u8 algorithm; u8 nr_blocks; u8 nr_redundant; diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 5f3429e9..d543480b 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -329,6 +329,7 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 5bcdfe3c..4ad843fa 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1280,7 +1280,7 @@ static void bch2_writepage_io_done(struct closure *cl) * racing with fallocate can cause us to add fewer sectors than * expected - but we shouldn't add more sectors than expected: */ - BUG_ON(io->op.i_sectors_delta > 0); + WARN_ON(io->op.i_sectors_delta > 0); /* * (error (due to going RO) halfway through a page can screw that up diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 5a3c9eff..a9ca81ec 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1111,7 +1111,7 @@ again: */ wp = bch2_alloc_sectors_start(c, op->target, - op->opts.erasure_code, + op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), op->write_point, &op->devs_have, op->nr_replicas, diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 268f3ea4..ff8b81fa 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -626,6 +626,12 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq) u64 start_time = local_clock(); int ret, ret2; + /* + * Don't update time_stats when @seq is already flushed: + */ + if (seq <= j->flushed_seq_ondisk) + return 0; + ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL))); if (!ret) diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 37abfb18..80e0dd31 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1238,7 +1238,9 @@ static void journal_write_done(struct closure *cl) u64 v, seq; int err = 0; - bch2_time_stats_update(j->write_time, j->write_start_time); + bch2_time_stats_update(!JSET_NO_FLUSH(w->data) + ? j->flush_write_time + : j->noflush_write_time, j->write_start_time); if (!w->devs_written.nr) { bch_err(c, "unable to write journal to sufficient devices"); diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 66b1707a..54cc69bd 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -271,8 +271,8 @@ struct journal { u64 nr_flush_writes; u64 nr_noflush_writes; - struct time_stats *write_time; - struct time_stats *delay_time; + struct time_stats *flush_write_time; + struct time_stats *noflush_write_time; struct time_stats *blocked_time; struct time_stats *flush_seq_time; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index f0495451..f73be9cb 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -394,10 +394,14 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, unsigned compressed_sectors = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (p.ptr.dev == data_opts.rewrite_dev && - !p.ptr.cached && - crc_is_compressed(p.crc)) - compressed_sectors += p.crc.compressed_size; + if (p.ptr.dev == data_opts.rewrite_dev) { + if (p.ptr.cached) + m->op.flags |= BCH_WRITE_CACHED; + + if (!p.ptr.cached && + crc_is_compressed(p.crc)) + compressed_sectors += p.crc.compressed_size; + } if (compressed_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index a955ef20..e81e07a3 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -291,7 +291,7 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, pr_buf(out, "%lli", v); break; case BCH_OPT_SECTORS: - bch2_hprint(out, v); + bch2_hprint(out, v << 9); break; case BCH_OPT_STR: if (flags & OPT_SHOW_FULL_LIST) diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index e2eb9b3f..87114277 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -81,9 +81,9 @@ enum opt_type { */ #ifdef __KERNEL__ -#define RATELIMIT_ERRORS true +#define RATELIMIT_ERRORS_DEFAULT true #else -#define RATELIMIT_ERRORS false +#define RATELIMIT_ERRORS_DEFAULT false #endif #define BCH_OPTS() \ @@ -288,7 +288,7 @@ enum opt_type { x(ratelimit_errors, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, RATELIMIT_ERRORS, \ + NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ NULL, "Ratelimit error messages during fsck") \ x(nochanges, u8, \ OPT_FS|OPT_MOUNT, \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 460b1ba2..29fe6260 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1238,7 +1238,7 @@ use_clean: */ bch_verbose(c, "writing allocation info"); err = "error writing out alloc info"; - ret = bch2_alloc_write(c, BTREE_INSERT_LAZY_RW); + ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW); if (ret) { bch_err(c, "error writing alloc info"); goto err; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index f673efed..505e559b 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -722,10 +722,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->rebalance.enabled = 1; c->promote_whole_extents = true; - c->journal.write_time = &c->times[BCH_TIME_journal_write]; - c->journal.delay_time = &c->times[BCH_TIME_journal_delay]; - c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal]; - c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; + c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; + c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; + c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal]; + c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; bch2_fs_btree_cache_init_early(&c->btree_cache);