From 17c5215c1c542dd7b6b4f891a0da16d8c98e0591 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 9 Feb 2019 19:54:14 -0500 Subject: [PATCH] Update bcachefs sources to 09a5465430 bcachefs: Don't need to walk inodes on clean shutdown --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 5 +- libbcachefs/bcachefs.h | 4 + libbcachefs/bcachefs_format.h | 26 ++++- libbcachefs/btree_gc.c | 22 +--- libbcachefs/btree_gc.h | 3 +- libbcachefs/btree_types.h | 1 + libbcachefs/btree_update_interior.c | 1 - libbcachefs/buckets.c | 37 ++++++- libbcachefs/buckets.h | 1 + libbcachefs/fsck.c | 19 ++-- libbcachefs/journal.c | 57 +++++++++-- libbcachefs/journal.h | 4 + libbcachefs/journal_io.c | 92 ++++++++++------- libbcachefs/journal_types.h | 11 ++ libbcachefs/recovery.c | 136 +++++++++++++++++++++---- libbcachefs/replicas.c | 78 ++++++++++++-- libbcachefs/replicas.h | 11 ++ libbcachefs/super-io.c | 153 ++++++++++++++++++++++------ libbcachefs/super-io.h | 4 + libbcachefs/super.c | 19 +++- libbcachefs/sysfs.c | 49 +++++---- libbcachefs/util.h | 23 +++++ 23 files changed, 588 insertions(+), 170 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 8eca0593..39d11479 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -99750eab4d583132cf61f071082c7cf21f5295c0 +09a546543006b60d44c4c51e7b40cd3ec7837a5e diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 2552d457..ce42202f 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1256,7 +1256,8 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca) { - closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full); + if (ca->alloc_thread) + closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full); } /* stop allocator thread: */ @@ -1534,6 +1535,8 @@ int bch2_fs_allocator_start(struct bch_fs *c) } } + set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + return bch2_alloc_write(c, false, &wrote); } diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index f42b2f90..245d8322 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -474,6 +474,7 @@ enum { /* startup: */ BCH_FS_ALLOC_READ_DONE, BCH_FS_ALLOCATOR_STARTED, + BCH_FS_ALLOCATOR_RUNNING, BCH_FS_INITIAL_GC_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, @@ -541,6 +542,8 @@ struct bch_fs { struct bch_replicas_cpu replicas_gc; struct mutex replicas_gc_lock; + struct journal_entry_res replicas_journal_res; + struct bch_disk_groups_cpu __rcu *disk_groups; struct bch_opts opts; @@ -562,6 +565,7 @@ struct bch_fs { u32 time_base_hi; u32 time_precision; u64 features; + u64 compat; } sb; struct bch_sb_handle disk_sb; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 9245465d..d020cf74 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1274,6 +1274,10 @@ enum bch_sb_features { BCH_FEATURE_NR, }; +enum bch_sb_compat { + BCH_COMPAT_FEAT_ALLOC_INFO = 0, +}; + /* options: */ #define BCH_REPLICAS_MAX 4U @@ -1354,7 +1358,9 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(btree_root, 1) \ x(prio_ptrs, 2) \ x(blacklist, 3) \ - x(blacklist_v2, 4) + x(blacklist_v2, 4) \ + x(usage, 5) \ + x(data_usage, 6) enum { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, @@ -1384,6 +1390,24 @@ struct jset_entry_blacklist_v2 { __le64 end; }; +enum { + FS_USAGE_RESERVED = 0, + FS_USAGE_INODES = 1, + FS_USAGE_KEY_VERSION = 2, + FS_USAGE_NR = 3 +}; + +struct jset_entry_usage { + struct jset_entry entry; + __le64 v; +} __attribute__((packed)); + +struct jset_entry_data_usage { + struct jset_entry entry; + __le64 v; + struct bch_replicas_entry r; +} __attribute__((packed)); + /* * On disk format for a journal entry: * seq is monotonically increasing; every journal entry has its own unique diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 433e8f22..b1f5e8b1 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -573,7 +573,8 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) percpu_down_write(&c->mark_lock); - if (initial) { + if (initial && + !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) { bch2_gc_done_nocheck(c); goto out; } @@ -815,9 +816,6 @@ out: bch2_gc_free(c); up_write(&c->gc_lock); - if (!ret && initial) - set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - trace_gc_end(c); bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); @@ -1245,19 +1243,3 @@ int bch2_gc_thread_start(struct bch_fs *c) wake_up_process(p); return 0; } - -/* Initial GC computes bucket marks during startup */ - -int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) -{ - int ret = bch2_gc(c, journal, true); - - /* - * Skip past versions that might have possibly been used (as nonces), - * but hadn't had their pointers written: - */ - if (c->sb.encryption_type) - atomic64_add(1 << 16, &c->key_version); - - return ret; -} diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h index 1905acfa..df51eb83 100644 --- a/libbcachefs/btree_gc.h +++ b/libbcachefs/btree_gc.h @@ -7,7 +7,6 @@ void bch2_coalesce(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool); void bch2_gc_thread_stop(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *); -int bch2_initial_gc(struct bch_fs *, struct list_head *); void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned); /* @@ -109,7 +108,7 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos) do { seq = read_seqcount_begin(&c->gc_pos_lock); - ret = gc_pos_cmp(pos, c->gc_pos) < 0; + ret = gc_pos_cmp(pos, c->gc_pos) <= 0; } while (read_seqcount_retry(&c->gc_pos_lock, seq)); return ret; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index dce4ed38..18596dc8 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -474,6 +474,7 @@ struct btree_root { __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); u8 level; u8 alive; + s8 error; }; /* diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 0f2fa6f7..33b5cf40 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -2117,7 +2117,6 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) BUG_ON(btree_node_root(c, b)); __bch2_btree_set_root_inmem(c, b); - bch2_btree_set_root_ondisk(c, b, READ); } void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index ea71acb5..9f4872a9 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -113,6 +113,36 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c) } } +void bch2_fs_usage_initialize(struct bch_fs *c) +{ + struct bch_fs_usage *usage; + unsigned i, nr; + + percpu_down_write(&c->mark_lock); + nr = sizeof(struct bch_fs_usage) / sizeof(u64) + c->replicas.nr; + usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr); + + for (i = 0; i < BCH_REPLICAS_MAX; i++) + usage->s.reserved += usage->persistent_reserved[i]; + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + + switch (e->data_type) { + case BCH_DATA_BTREE: + case BCH_DATA_USER: + usage->s.data += usage->data[i]; + break; + case BCH_DATA_CACHED: + usage->s.cached += usage->data[i]; + break; + } + } + + percpu_up_write(&c->mark_lock); +} + #define bch2_usage_read_raw(_stats) \ ({ \ typeof(*this_cpu_ptr(_stats)) _acc; \ @@ -814,7 +844,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ret = bch2_mark_stripe(c, k, inserting, fs_usage, journal_seq, flags, gc); break; - case KEY_TYPE_alloc: + case KEY_TYPE_inode: if (inserting) fs_usage->s.nr_inodes++; else @@ -994,10 +1024,7 @@ void bch2_mark_update(struct btree_insert *trans, static u64 bch2_recalc_sectors_available(struct bch_fs *c) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu_ptr(c->pcpu, cpu)->sectors_available = 0; + percpu_u64_set(&c->pcpu->sectors_available, 0); return avail_factor(bch2_fs_sectors_free(c)); } diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 6f368172..19cf6525 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -247,6 +247,7 @@ static inline u64 bch2_fs_sectors_free(struct bch_fs *c) /* key/bucket marking: */ void bch2_bucket_seq_cleanup(struct bch_fs *); +void bch2_fs_usage_initialize(struct bch_fs *); void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *, size_t, struct bucket_mark *); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 955ab8be..42bd2f7a 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1186,6 +1186,11 @@ static int check_inode(struct bch_fs *c, } if (u.bi_flags & BCH_INODE_UNLINKED) { + fsck_err_on(c->sb.clean, c, + "filesystem marked clean, " + "but inode %llu unlinked", + u.bi_inum); + bch_verbose(c, "deleting inode %llu", u.bi_inum); ret = bch2_inode_rm(c, u.bi_inum); @@ -1388,16 +1393,13 @@ static int check_inodes_fast(struct bch_fs *c) (BCH_INODE_I_SIZE_DIRTY| BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_UNLINKED)) { - fsck_err_on(c->sb.clean, c, - "filesystem marked clean but found inode %llu with flags %x", - inode.k->p.inode, inode.v->bi_flags); ret = check_inode(c, NULL, &iter, inode, NULL); BUG_ON(ret == -EINTR); if (ret) break; } } -fsck_err: + return bch2_btree_iter_unlock(&iter) ?: ret; } @@ -1459,9 +1461,10 @@ int bch2_fsck(struct bch_fs *c) if (c->opts.fsck) return bch2_fsck_full(c); - if (!c->sb.clean && - !(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) - return bch2_fsck_inode_nlink(c); + if (c->sb.clean) + return 0; - return bch2_fsck_walk_inodes_only(c); + return c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK) + ? bch2_fsck_walk_inodes_only(c) + : bch2_fsck_inode_nlink(c); } diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 261149ad..8ff8cfa8 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -64,11 +64,6 @@ static void bch2_journal_buf_init(struct journal *j) buf->data->u64s = 0; } -static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf) -{ - return BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX); -} - static inline bool journal_entry_empty(struct jset *j) { struct jset_entry *i; @@ -130,7 +125,7 @@ static enum { j->prev_buf_sectors = vstruct_blocks_plus(buf->data, c->block_bits, - journal_entry_u64s_reserve(buf)) * + buf->u64s_reserved) * c->opts.block_size; BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors); @@ -225,6 +220,7 @@ static int journal_entry_open(struct journal *j) return sectors; buf->disk_sectors = sectors; + buf->u64s_reserved = j->entry_u64s_reserved; sectors = min_t(unsigned, sectors, buf->size >> 9); j->cur_buf_sectors = sectors; @@ -233,11 +229,7 @@ static int journal_entry_open(struct journal *j) /* Subtract the journal header */ u64s -= sizeof(struct jset) / sizeof(u64); - /* - * Btree roots, prio pointers don't get added until right before we do - * the write: - */ - u64s -= journal_entry_u64s_reserve(buf); + u64s -= buf->u64s_reserved; u64s = max_t(ssize_t, 0L, u64s); BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL); @@ -436,6 +428,45 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; } +/* journal_entry_res: */ + +void bch2_journal_entry_res_resize(struct journal *j, + struct journal_entry_res *res, + unsigned new_u64s) +{ + union journal_res_state state; + int d = new_u64s - res->u64s; + + spin_lock(&j->lock); + + j->entry_u64s_reserved += d; + if (d <= 0) + goto out_unlock; + + j->cur_entry_u64s -= d; + smp_mb(); + state = READ_ONCE(j->reservations); + + if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL && + state.cur_entry_offset > j->cur_entry_u64s) { + j->cur_entry_u64s += d; + /* + * Not enough room in current journal entry, have to flush it: + */ + __journal_entry_close(j); + goto out; + } + + journal_cur_buf(j)->u64s_reserved += d; +out_unlock: + spin_unlock(&j->lock); +out: + res->u64s += d; + return; +} + +/* journal flushing: */ + u64 bch2_journal_last_unwritten_seq(struct journal *j) { u64 seq; @@ -1020,6 +1051,10 @@ int bch2_fs_journal_init(struct journal *j) j->write_delay_ms = 1000; j->reclaim_delay_ms = 100; + /* Btree roots: */ + j->entry_u64s_reserved += + BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX); + atomic64_set(&j->reservations.counter, ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 3a083748..50d864a3 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -330,6 +330,10 @@ out: return 0; } +void bch2_journal_entry_res_resize(struct journal *, + struct journal_entry_res *, + unsigned); + u64 bch2_journal_last_unwritten_seq(struct journal *); int bch2_journal_open_seq_async(struct journal *, u64, struct closure *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 5cc0651c..0f1f8e15 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -284,6 +284,7 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c, if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); + goto out; } bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry); @@ -293,6 +294,49 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c, "invalid journal seq blacklist entry: start > end")) { journal_entry_null_range(entry, vstruct_next(entry)); } +out: +fsck_err: + return ret; +} + +static int journal_entry_validate_usage(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + int write) +{ + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); + unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + int ret = 0; + + if (journal_entry_err_on(bytes < sizeof(*u), + c, + "invalid journal entry usage: bad size")) { + journal_entry_null_range(entry, vstruct_next(entry)); + return ret; + } + +fsck_err: + return ret; +} + +static int journal_entry_validate_data_usage(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + int write) +{ + struct jset_entry_data_usage *u = + container_of(entry, struct jset_entry_data_usage, entry); + unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + int ret = 0; + + if (journal_entry_err_on(bytes < sizeof(*u) || + bytes < sizeof(*u) + u->r.nr_devs, + c, + "invalid journal entry usage: bad size")) { + journal_entry_null_range(entry, vstruct_next(entry)); + return ret; + } fsck_err: return ret; @@ -315,18 +359,10 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = { static int journal_entry_validate(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, int write) { - int ret = 0; - - if (entry->type >= BCH_JSET_ENTRY_NR) { - journal_entry_err(c, "invalid journal entry type %u", - entry->type); - journal_entry_null_range(entry, vstruct_next(entry)); - return 0; - } - - ret = bch2_jset_entry_ops[entry->type].validate(c, jset, entry, write); -fsck_err: - return ret; + return entry->type < BCH_JSET_ENTRY_NR + ? bch2_jset_entry_ops[entry->type].validate(c, jset, + entry, write) + : 0; } static int jset_validate_entries(struct bch_fs *c, struct jset *jset, @@ -848,19 +884,6 @@ err: /* journal write: */ -static void bch2_journal_add_btree_root(struct journal_buf *buf, - enum btree_id id, struct bkey_i *k, - unsigned level) -{ - struct jset_entry *entry; - - entry = bch2_journal_add_entry_noreservation(buf, k->k.u64s); - entry->type = BCH_JSET_ENTRY_btree_root; - entry->btree_id = id; - entry->level = level; - memcpy_u64s(entry->_data, k, k->k.u64s); -} - static unsigned journal_dev_buckets_available(struct journal *j, struct journal_device *ja) { @@ -1191,25 +1214,26 @@ void bch2_journal_write(struct closure *cl) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_prev_buf(j); + struct jset_entry *start, *end; struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; bool validate_before_checksum = false; - unsigned i, sectors, bytes; + unsigned i, sectors, bytes, u64s; journal_buf_realloc(j, w); jset = w->data; j->write_start_time = local_clock(); - mutex_lock(&c->btree_root_lock); - for (i = 0; i < BTREE_ID_NR; i++) { - struct btree_root *r = &c->btree_roots[i]; - if (r->alive) - bch2_journal_add_btree_root(w, i, &r->key, r->level); - } - c->btree_roots_dirty = false; - mutex_unlock(&c->btree_root_lock); + start = vstruct_last(w->data); + end = bch2_journal_super_entries_add_common(c, start); + u64s = (u64 *) end - (u64 *) start; + BUG_ON(u64s > j->entry_u64s_reserved); + + le32_add_cpu(&w->data->u64s, u64s); + BUG_ON(vstruct_sectors(jset, c->block_bits) > + w->disk_sectors); journal_write_compact(jset); diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index a5933689..a91662f6 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -23,6 +23,7 @@ struct journal_buf { unsigned size; unsigned disk_sectors; + unsigned u64s_reserved; /* bloom filter: */ unsigned long has_inode[1024 / sizeof(unsigned long)]; }; @@ -154,6 +155,9 @@ struct journal { u64 seq_ondisk; u64 last_seq_ondisk; + /* Reserved space in journal entry to be used just prior to write */ + unsigned entry_u64s_reserved; + /* * FIFO of journal entries whose btree updates have not yet been * written out. @@ -242,4 +246,11 @@ struct journal_device { struct closure read; }; +/* + * journal_entry_res - reserve space in every journal entry: + */ +struct journal_entry_res { + unsigned u64s; +}; + #endif /* _BCACHEFS_JOURNAL_TYPES_H */ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index f5f3f94e..7e50547c 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -5,6 +5,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "btree_io.h" +#include "buckets.h" #include "dirent.h" #include "ec.h" #include "error.h" @@ -12,16 +13,17 @@ #include "journal_io.h" #include "quota.h" #include "recovery.h" +#include "replicas.h" #include "super-io.h" #include #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -struct bkey_i *btree_root_find(struct bch_fs *c, - struct bch_sb_field_clean *clean, - struct jset *j, - enum btree_id id, unsigned *level) +static struct bkey_i *btree_root_find(struct bch_fs *c, + struct bch_sb_field_clean *clean, + struct jset *j, + enum btree_id id, unsigned *level) { struct bkey_i *k; struct jset_entry *entry, *start, *end; @@ -49,6 +51,60 @@ found: return k; } +static int journal_replay_entry_early(struct bch_fs *c, + struct jset_entry *entry) +{ + int ret = 0; + + switch (entry->type) { + case BCH_JSET_ENTRY_btree_root: { + struct btree_root *r = &c->btree_roots[entry->btree_id]; + + if (entry->u64s) { + r->level = entry->level; + bkey_copy(&r->key, &entry->start[0]); + r->error = 0; + } else { + r->error = -EIO; + } + r->alive = true; + break; + } + case BCH_JSET_ENTRY_usage: { + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); + + switch (entry->btree_id) { + case FS_USAGE_RESERVED: + if (entry->level < BCH_REPLICAS_MAX) + percpu_u64_set(&c->usage[0]-> + persistent_reserved[entry->level], + le64_to_cpu(u->v)); + break; + case FS_USAGE_INODES: + percpu_u64_set(&c->usage[0]->s.nr_inodes, + le64_to_cpu(u->v)); + break; + case FS_USAGE_KEY_VERSION: + atomic64_set(&c->key_version, + le64_to_cpu(u->v)); + break; + } + + break; + } + case BCH_JSET_ENTRY_data_usage: { + struct jset_entry_data_usage *u = + container_of(entry, struct jset_entry_data_usage, entry); + ret = bch2_replicas_set_usage(c, &u->r, + le64_to_cpu(u->v)); + break; + } + } + + return ret; +} + static int verify_superblock_clean(struct bch_fs *c, struct bch_sb_field_clean *clean, struct jset *j) @@ -107,7 +163,9 @@ static bool journal_empty(struct list_head *journal) list_for_each_entry(i, journal, list) { vstruct_for_each(&i->j, entry) { - if (entry->type == BCH_JSET_ENTRY_btree_root) + if (entry->type == BCH_JSET_ENTRY_btree_root || + entry->type == BCH_JSET_ENTRY_usage || + entry->type == BCH_JSET_ENTRY_data_usage) continue; if (entry->type == BCH_JSET_ENTRY_btree_keys && @@ -124,6 +182,7 @@ int bch2_fs_recovery(struct bch_fs *c) { const char *err = "cannot allocate memory"; struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL; + struct jset_entry *entry; LIST_HEAD(journal); struct jset *j = NULL; unsigned i; @@ -176,28 +235,46 @@ int bch2_fs_recovery(struct bch_fs *c) fsck_err_on(clean && !journal_empty(&journal), c, "filesystem marked clean but journal not empty"); + err = "insufficient memory"; if (clean) { c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock); c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock); + + for (entry = clean->start; + entry != vstruct_end(&clean->field); + entry = vstruct_next(entry)) { + ret = journal_replay_entry_early(c, entry); + if (ret) + goto err; + } } else { + struct journal_replay *i; + c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock); c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock); + + list_for_each_entry(i, &journal, list) + vstruct_for_each(&i->j, entry) { + ret = journal_replay_entry_early(c, entry); + if (ret) + goto err; + } } - for (i = 0; i < BTREE_ID_NR; i++) { - unsigned level; - struct bkey_i *k; + bch2_fs_usage_initialize(c); - k = btree_root_find(c, clean, j, i, &level); - if (!k) + for (i = 0; i < BTREE_ID_NR; i++) { + struct btree_root *r = &c->btree_roots[i]; + + if (!r->alive) continue; err = "invalid btree root pointer"; - if (IS_ERR(k)) + if (r->error) goto err; err = "error reading btree root"; - if (bch2_btree_root_read(c, i, k, level)) { + if (bch2_btree_root_read(c, i, &r->key, r->level)) { if (i != BTREE_ID_ALLOC) goto err; @@ -214,21 +291,33 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; + bch_verbose(c, "starting stripes_read"); ret = bch2_stripes_read(c, &journal); if (ret) goto err; - pr_info("stripes_read done"); + bch_verbose(c, "stripes_read done"); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); - bch_verbose(c, "starting mark and sweep:"); - err = "error in recovery"; - ret = bch2_initial_gc(c, &journal); - if (ret) - goto err; - bch_verbose(c, "mark and sweep done"); + if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) || + c->opts.fsck) { + bch_verbose(c, "starting mark and sweep:"); + err = "error in recovery"; + ret = bch2_gc(c, &journal, true); + if (ret) + goto err; + bch_verbose(c, "mark and sweep done"); + } clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); + set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + + /* + * Skip past versions that might have possibly been used (as nonces), + * but hadn't had their pointers written: + */ + if (c->sb.encryption_type && !c->sb.clean) + atomic64_add(1 << 16, &c->key_version); if (c->opts.noreplay) goto out; @@ -311,15 +400,22 @@ int bch2_fs_initialize(struct bch_fs *c) bch_notice(c, "initializing new filesystem"); + mutex_lock(&c->sb_lock); + for_each_online_member(ca, c, i) + bch2_mark_dev_superblock(c, ca, 0); + mutex_unlock(&c->sb_lock); + set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - ret = bch2_initial_gc(c, &journal); + ret = bch2_gc(c, &journal, true); if (ret) goto err; + set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + err = "unable to allocate journal buckets"; for_each_online_member(ca, c, i) if (bch2_dev_journal_alloc(ca)) { diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 230f807b..4d0c9718 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -1,5 +1,6 @@ #include "bcachefs.h" +#include "journal.h" #include "replicas.h" #include "super-io.h" @@ -28,11 +29,6 @@ static void replicas_entry_sort(struct bch_replicas_entry *e) bubble_sort(e->devs, e->nr_devs, u8_cmp); } -#define for_each_cpu_replicas_entry(_r, _i) \ - for (_i = (_r)->entries; \ - (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\ - _i = (void *) (_i) + (_r)->entry_size) - static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) { eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); @@ -301,6 +297,32 @@ err: return ret; } +static unsigned reserve_journal_replicas(struct bch_fs *c, + struct bch_replicas_cpu *r) +{ + struct bch_replicas_entry *e; + unsigned journal_res_u64s = 0; + + /* nr_inodes: */ + journal_res_u64s += + DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); + + /* key_version: */ + journal_res_u64s += + DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); + + /* persistent_reserved: */ + journal_res_u64s += + DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * + BCH_REPLICAS_MAX; + + for_each_cpu_replicas_entry(r, e) + journal_res_u64s += + DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + + e->nr_devs, sizeof(u64)); + return journal_res_u64s; +} + noinline static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry *new_entry) @@ -328,6 +350,10 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); if (ret) goto err; + + bch2_journal_entry_res_resize(&c->journal, + &c->replicas_journal_res, + reserve_journal_replicas(c, &new_r)); } if (!new_r.entries && @@ -425,14 +451,12 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); struct bch_replicas_cpu n; - u64 v = 0; - int cpu; + u64 v; if (__replicas_has_entry(&c->replicas_gc, e)) continue; - for_each_possible_cpu(cpu) - v += *per_cpu_ptr(&c->usage[0]->data[i], cpu); + v = percpu_u64_get(&c->usage[0]->data[i]); if (!v) continue; @@ -510,6 +534,34 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) return 0; } +int bch2_replicas_set_usage(struct bch_fs *c, + struct bch_replicas_entry *r, + u64 sectors) +{ + int ret, idx = bch2_replicas_entry_idx(c, r); + + if (idx < 0) { + struct bch_replicas_cpu n; + + n = cpu_replicas_add_entry(&c->replicas, r); + if (!n.entries) + return -ENOMEM; + + ret = replicas_table_update(c, &n); + if (ret) + return ret; + + kfree(n.entries); + + idx = bch2_replicas_entry_idx(c, r); + BUG_ON(ret < 0); + } + + percpu_u64_set(&c->usage[0]->data[idx], sectors); + + return 0; +} + /* Replicas tracking - superblock: */ static int @@ -596,6 +648,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) bch2_cpu_replicas_sort(&new_r); percpu_down_write(&c->mark_lock); + ret = replicas_table_update(c, &new_r); percpu_up_write(&c->mark_lock); @@ -916,3 +969,10 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) return ret; } + +int bch2_fs_replicas_init(struct bch_fs *c) +{ + c->journal.entry_u64s_reserved += + reserve_journal_replicas(c, &c->replicas); + return 0; +} diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 0ac2b8e0..1607b7bd 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -56,6 +56,15 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); int bch2_replicas_gc_end(struct bch_fs *, int); int bch2_replicas_gc_start(struct bch_fs *, unsigned); +int bch2_replicas_set_usage(struct bch_fs *, + struct bch_replicas_entry *, + u64); + +#define for_each_cpu_replicas_entry(_r, _i) \ + for (_i = (_r)->entries; \ + (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\ + _i = (void *) (_i) + (_r)->entry_size) + /* iterate over superblock replicas - used by userspace tools: */ #define replicas_entry_bytes(_i) \ @@ -79,4 +88,6 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *); extern const struct bch_sb_field_ops bch_sb_field_ops_replicas; extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0; +int bch2_fs_replicas_init(struct bch_fs *); + #endif /* _BCACHEFS_REPLICAS_H */ diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index c5eaf155..b88750ff 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -181,6 +181,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, } } + f = bch2_sb_field_get(sb->sb, type); f = __bch2_sb_field_resize(sb, f, u64s); if (f) f->type = cpu_to_le32(type); @@ -362,6 +363,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); c->sb.time_precision = le32_to_cpu(src->time_precision); c->sb.features = le64_to_cpu(src->features[0]); + c->sb.compat = le64_to_cpu(src->compat[0]); for_each_member_device(ca, c, i) ca->mi = bch2_mi_to_cpu(mi->members + i); @@ -881,29 +883,132 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); } -void bch2_fs_mark_clean(struct bch_fs *c, bool clean) +static void bch2_fs_mark_dirty(struct bch_fs *c) { - struct bch_sb_field_clean *sb_clean; - unsigned u64s = sizeof(*sb_clean) / sizeof(u64); - struct jset_entry *entry; - struct btree_root *r; - mutex_lock(&c->sb_lock); - if (clean == BCH_SB_CLEAN(c->disk_sb.sb)) - goto out; + if (BCH_SB_CLEAN(c->disk_sb.sb) || + (c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) { + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); +} - SET_BCH_SB_CLEAN(c->disk_sb.sb, clean); - - if (!clean) - goto write_super; +struct jset_entry * +bch2_journal_super_entries_add_common(struct bch_fs *c, + struct jset_entry *entry) +{ + struct btree_root *r; + unsigned i; mutex_lock(&c->btree_root_lock); for (r = c->btree_roots; r < c->btree_roots + BTREE_ID_NR; r++) - if (r->alive) - u64s += jset_u64s(r->key.u64s); + if (r->alive) { + entry->u64s = r->key.u64s; + entry->btree_id = r - c->btree_roots; + entry->level = r->level; + entry->type = BCH_JSET_ENTRY_btree_root; + bkey_copy(&entry->start[0], &r->key); + + entry = vstruct_next(entry); + } + c->btree_roots_dirty = false; + + mutex_unlock(&c->btree_root_lock); + + percpu_down_read_preempt_disable(&c->mark_lock); + + { + u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes); + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); + + memset(u, 0, sizeof(*u)); + u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1; + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_INODES; + u->v = cpu_to_le64(nr_inodes); + + entry = vstruct_next(entry); + } + + { + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); + + memset(u, 0, sizeof(*u)); + u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1; + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_KEY_VERSION; + u->v = cpu_to_le64(atomic64_read(&c->key_version)); + + entry = vstruct_next(entry); + } + + for (i = 0; i < BCH_REPLICAS_MAX; i++) { + struct jset_entry_usage *u = + container_of(entry, struct jset_entry_usage, entry); + u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]); + + if (!sectors) + continue; + + memset(u, 0, sizeof(*u)); + u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1; + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_RESERVED; + u->entry.level = i; + u->v = sectors; + + entry = vstruct_next(entry); + } + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + u64 sectors = percpu_u64_get(&c->usage[0]->data[i]); + struct jset_entry_data_usage *u = + container_of(entry, struct jset_entry_data_usage, entry); + + memset(u, 0, sizeof(*u)); + u->entry.u64s = DIV_ROUND_UP(sizeof(*u) + e->nr_devs, + sizeof(u64)) - 1; + u->entry.type = BCH_JSET_ENTRY_data_usage; + u->v = cpu_to_le64(sectors); + memcpy(&u->r, e, replicas_entry_bytes(e)); + + entry = vstruct_next(entry); + } + + percpu_up_read_preempt_enable(&c->mark_lock); + + return entry; +} + +void bch2_fs_mark_clean(struct bch_fs *c, bool clean) +{ + struct bch_sb_field_clean *sb_clean; + struct jset_entry *entry; + unsigned u64s; + + if (!clean) { + bch2_fs_mark_dirty(c); + return; + } + + mutex_lock(&c->sb_lock); + if (BCH_SB_CLEAN(c->disk_sb.sb)) + goto out; + + SET_BCH_SB_CLEAN(c->disk_sb.sb, true); + + c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; + + u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved; sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s); if (!sb_clean) { @@ -917,30 +1022,16 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean) sb_clean->journal_seq = journal_cur_seq(&c->journal) - 1; entry = sb_clean->start; + entry = bch2_journal_super_entries_add_common(c, entry); + BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); + memset(entry, 0, vstruct_end(&sb_clean->field) - (void *) entry); - for (r = c->btree_roots; - r < c->btree_roots + BTREE_ID_NR; - r++) - if (r->alive) { - entry->u64s = r->key.u64s; - entry->btree_id = r - c->btree_roots; - entry->level = r->level; - entry->type = BCH_JSET_ENTRY_btree_root; - bkey_copy(&entry->start[0], &r->key); - entry = vstruct_next(entry); - BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); - } - - BUG_ON(entry != vstruct_end(&sb_clean->field)); - if (le16_to_cpu(c->disk_sb.sb->version) < bcachefs_metadata_version_bkey_renumber) bch2_sb_clean_renumber(sb_clean, WRITE); - mutex_unlock(&c->btree_root_lock); -write_super: bch2_write_super(c); out: mutex_unlock(&c->sb_lock); diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index b493d628..c48294c8 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -134,6 +134,10 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) /* BCH_SB_FIELD_clean: */ +struct jset_entry * +bch2_journal_super_entries_add_common(struct bch_fs *, + struct jset_entry *); + void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); void bch2_fs_mark_clean(struct bch_fs *, bool); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 1835b535..a8eb1615 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -222,6 +222,9 @@ static void __bch2_fs_read_only(struct bch_fs *c) */ bch2_journal_flush_all_pins(&c->journal); + if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags)) + goto allocator_not_running; + do { ret = bch2_alloc_write(c, false, &wrote); if (ret) { @@ -250,10 +253,12 @@ static void __bch2_fs_read_only(struct bch_fs *c) closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); } while (wrote); - +allocator_not_running: for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); + clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + bch2_fs_journal_stop(&c->journal); /* XXX: mark super that alloc info is persistent */ @@ -380,6 +385,8 @@ const char *bch2_fs_read_write(struct bch_fs *c) goto err; } + set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + err = "error starting btree GC thread"; if (bch2_gc_thread_start(c)) goto err; @@ -683,6 +690,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_fs_journal_init(&c->journal) || + bch2_fs_replicas_init(c) || bch2_fs_btree_cache_init(c) || bch2_fs_io_init(c) || bch2_fs_encryption_init(c) || @@ -1101,9 +1109,12 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) if (ret) return ret; - mutex_lock(&c->sb_lock); - bch2_mark_dev_superblock(ca->fs, ca, 0); - mutex_unlock(&c->sb_lock); + if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) && + !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_SB])) { + mutex_lock(&c->sb_lock); + bch2_mark_dev_superblock(ca->fs, ca, 0); + mutex_unlock(&c->sb_lock); + } bch2_dev_sysfs_online(c, ca); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 40384e7e..7e3aebed 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -132,6 +132,7 @@ do { \ write_attribute(trigger_journal_flush); write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); +write_attribute(trigger_alloc_write); write_attribute(prune_cache); rw_attribute(btree_gc_periodic); @@ -239,27 +240,29 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) if (!fs_usage) return -ENOMEM; - pr_buf(&out, "capacity:\t\t%llu\n", c->capacity); + pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity); + + pr_buf(&out, "hidden:\t\t\t\t%llu\n", + fs_usage->s.hidden); + pr_buf(&out, "data:\t\t\t\t%llu\n", + fs_usage->s.data); + pr_buf(&out, "cached:\t\t\t\t%llu\n", + fs_usage->s.cached); + pr_buf(&out, "reserved:\t\t\t%llu\n", + fs_usage->s.reserved); + pr_buf(&out, "nr_inodes:\t\t\t%llu\n", + fs_usage->s.nr_inodes); + pr_buf(&out, "online reserved:\t\t%llu\n", + fs_usage->s.online_reserved); for (i = 0; i < ARRAY_SIZE(fs_usage->persistent_reserved); i++) { pr_buf(&out, "%u replicas:\n", i + 1); -#if 0 - for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) - pr_buf(&out, "\t%s:\t\t%llu\n", - bch2_data_types[type], - stats.replicas[replicas].data[type]); - pr_buf(&out, "\terasure coded:\t%llu\n", - stats.replicas[replicas].ec_data); -#endif - pr_buf(&out, "\treserved:\t%llu\n", + pr_buf(&out, "\treserved:\t\t%llu\n", fs_usage->persistent_reserved[i]); } - pr_buf(&out, "online reserved:\t%llu\n", - fs_usage->s.online_reserved); - for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); @@ -492,6 +495,12 @@ STORE(__bch2_fs) if (attr == &sysfs_trigger_gc) bch2_gc(c, NULL, false); + if (attr == &sysfs_trigger_alloc_write) { + bool wrote; + + bch2_alloc_write(c, false, &wrote); + } + if (attr == &sysfs_prune_cache) { struct shrink_control sc; @@ -584,6 +593,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_journal_flush, &sysfs_trigger_btree_coalesce, &sysfs_trigger_gc, + &sysfs_trigger_alloc_write, &sysfs_prune_cache, &sysfs_copy_gc_enabled, @@ -882,20 +892,15 @@ static const char * const bch2_rw[] = { static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf) { struct printbuf out = _PBUF(buf, PAGE_SIZE); - int rw, i, cpu; + int rw, i; for (rw = 0; rw < 2; rw++) { pr_buf(&out, "%s:\n", bch2_rw[rw]); - for (i = 1; i < BCH_DATA_NR; i++) { - u64 n = 0; - - for_each_possible_cpu(cpu) - n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i]; - + for (i = 1; i < BCH_DATA_NR; i++) pr_buf(&out, "%-12s:%12llu\n", - bch2_data_types[i], n << 9); - } + bch2_data_types[i], + percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9); } return out.pos - buf; diff --git a/libbcachefs/util.h b/libbcachefs/util.h index fbfb2085..7e1729a4 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -698,6 +699,28 @@ do { \ } \ } while (0) +static inline u64 percpu_u64_get(u64 __percpu *src) +{ + u64 ret = 0; + int cpu; + + for_each_possible_cpu(cpu) + ret += *per_cpu_ptr(src, cpu); + return ret; +} + +static inline void percpu_u64_set(u64 __percpu *dst, u64 src) +{ + int cpu; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(dst, cpu) = 0; + + preempt_disable(); + *this_cpu_ptr(dst) = src; + preempt_enable(); +} + static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr) { unsigned i;