diff --git a/.bcachefs_revision b/.bcachefs_revision index 81c781d7..eaa2d30d 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -8ffb42b3d09418642680d23401a7a71d6ff87e3a +f67089dc9b3d98b8c6e8990d5525b4fc7ba770b0 diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 1347b1fc..d91408c2 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -42,6 +42,14 @@ static inline unsigned btree_cache_can_free(struct btree_cache *bc) return max_t(int, 0, bc->used - bc->reserve); } +static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) +{ + if (b->c.lock.readers) + list_move(&b->list, &bc->freed_pcpu); + else + list_move(&b->list, &bc->freed_nonpcpu); +} + static void btree_node_data_free(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; @@ -58,7 +66,8 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) b->aux_data = NULL; bc->used--; - list_move(&b->list, &bc->freed); + + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -163,11 +172,6 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; - if (level) - six_lock_pcpu_alloc(&b->c.lock); - else - six_lock_pcpu_free_rcu(&b->c.lock); - mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) @@ -328,17 +332,13 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, } restart: list_for_each_entry_safe(b, t, &bc->live, list) { - touched++; - - if (touched >= nr) { - /* Save position */ - if (&t->list != &bc->live) - list_move_tail(&bc->live, &t->list); - break; + /* tweak this */ + if (btree_node_accessed(b)) { + clear_btree_node_accessed(b); + goto touched; } - if (!btree_node_accessed(b) && - !btree_node_reclaim(c, b)) { + if (!btree_node_reclaim(c, b)) { /* can't call bch2_btree_node_hash_remove under lock */ freed++; if (&t->list != &bc->live) @@ -359,8 +359,18 @@ restart: else if (!mutex_trylock(&bc->lock)) goto out; goto restart; - } else - clear_btree_node_accessed(b); + } else { + continue; + } +touched: + touched++; + + if (touched >= nr) { + /* Save position */ + if (&t->list != &bc->live) + list_move_tail(&bc->live, &t->list); + break; + } } mutex_unlock(&bc->lock); @@ -427,8 +437,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(atomic_read(&c->btree_cache.dirty)); - while (!list_empty(&bc->freed)) { - b = list_first_entry(&bc->freed, struct btree, list); + list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); + + while (!list_empty(&bc->freed_nonpcpu)) { + b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); list_del(&b->list); six_lock_pcpu_free(&b->c.lock); kfree(b); @@ -482,7 +494,8 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc) mutex_init(&bc->lock); INIT_LIST_HEAD(&bc->live); INIT_LIST_HEAD(&bc->freeable); - INIT_LIST_HEAD(&bc->freed); + INIT_LIST_HEAD(&bc->freed_pcpu); + INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* @@ -557,55 +570,63 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) } } -struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) +struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks) { struct btree_cache *bc = &c->btree_cache; - struct btree *b; + struct list_head *freed = pcpu_read_locks + ? &bc->freed_pcpu + : &bc->freed_nonpcpu; + struct btree *b, *b2; u64 start_time = local_clock(); unsigned flags; flags = memalloc_nofs_save(); mutex_lock(&bc->lock); - /* - * btree_free() doesn't free memory; it sticks the node on the end of - * the list. Check if there's any freed nodes there: - */ - list_for_each_entry(b, &bc->freeable, list) - if (!btree_node_reclaim(c, b)) - goto got_node; - /* * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ - list_for_each_entry(b, &bc->freed, list) - if (!btree_node_reclaim(c, b)) + list_for_each_entry(b, freed, list) + if (!btree_node_reclaim(c, b)) { + list_del_init(&b->list); goto got_node; + } - b = NULL; + b = __btree_node_mem_alloc(c); + if (!b) + goto err_locked; + + if (pcpu_read_locks) + six_lock_pcpu_alloc(&b->c.lock); + + BUG_ON(!six_trylock_intent(&b->c.lock)); + BUG_ON(!six_trylock_write(&b->c.lock)); got_node: - if (b) - list_del_init(&b->list); + + /* + * btree_free() doesn't free memory; it sticks the node on the end of + * the list. Check if there's any freed nodes there: + */ + list_for_each_entry(b2, &bc->freeable, list) + if (!btree_node_reclaim(c, b2)) { + swap(b->data, b2->data); + swap(b->aux_data, b2->aux_data); + btree_node_to_freedlist(bc, b2); + six_unlock_write(&b2->c.lock); + six_unlock_intent(&b2->c.lock); + goto got_mem; + } + mutex_unlock(&bc->lock); - if (!b) { - b = __btree_node_mem_alloc(c); - if (!b) - goto err; + if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL)) + goto err; - BUG_ON(!six_trylock_intent(&b->c.lock)); - BUG_ON(!six_trylock_write(&b->c.lock)); - } - - if (!b->data) { - if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL)) - goto err; - - mutex_lock(&bc->lock); - bc->used++; - mutex_unlock(&bc->lock); - } + mutex_lock(&bc->lock); + bc->used++; +got_mem: + mutex_unlock(&bc->lock); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); @@ -627,17 +648,21 @@ out: return b; err: mutex_lock(&bc->lock); - - if (b) { - list_add(&b->list, &bc->freed); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - } - +err_locked: /* Try to cannibalize another cached btree node: */ if (bc->alloc_lock == current) { - b = btree_node_cannibalize(c); - list_del_init(&b->list); + b2 = btree_node_cannibalize(c); + if (b) { + swap(b->data, b2->data); + swap(b->aux_data, b2->aux_data); + btree_node_to_freedlist(bc, b2); + six_unlock_write(&b2->c.lock); + six_unlock_intent(&b2->c.lock); + } else { + b = b2; + list_del_init(&b->list); + } + mutex_unlock(&bc->lock); bch2_btree_node_hash_remove(bc, b); @@ -677,7 +702,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return ERR_PTR(-EINTR); } - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, level != 0); if (trans && b == ERR_PTR(-ENOMEM)) { trans->memory_allocation_failure = true; diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 2901f0dc..25906127 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -22,7 +22,7 @@ void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); -struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); +struct btree *bch2_btree_node_mem_alloc(struct bch_fs *, bool); struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, const struct bkey_i *, unsigned, diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 08f5f6b8..4f0ad06a 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1546,7 +1546,7 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, closure_sync(&cl); } while (ret); - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, level != 0); bch2_btree_cache_cannibalize_unlock(c); BUG_ON(IS_ERR(b)); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 167d1771..ee89b650 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -165,13 +165,13 @@ btree_key_cache_create(struct bch_fs *c, } was_new = false; + } else { + if (btree_id == BTREE_ID_subvolumes) + six_lock_pcpu_alloc(&ck->c.lock); + else + six_lock_pcpu_free(&ck->c.lock); } - if (btree_id == BTREE_ID_subvolumes) - six_lock_pcpu_alloc(&ck->c.lock); - else - six_lock_pcpu_free(&ck->c.lock); - ck->c.level = 0; ck->c.btree_id = btree_id; ck->key.btree_id = btree_id; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index d87069c5..c18e388b 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -152,7 +152,8 @@ struct btree_cache { struct mutex lock; struct list_head live; struct list_head freeable; - struct list_head freed; + struct list_head freed_pcpu; + struct list_head freed_nonpcpu; /* Number of elements in live + freeable lists */ unsigned used; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index e2cf0f58..5834190d 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -181,6 +181,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, struct disk_reservation *res, struct closure *cl, + bool interior_node, unsigned flags) { struct write_point *wp; @@ -242,7 +243,7 @@ retry: bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); mem_alloc: - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, interior_node); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -260,12 +261,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev { struct bch_fs *c = as->c; struct btree *b; + struct prealloc_nodes *p = &as->prealloc_nodes[!!level]; int ret; BUG_ON(level >= BTREE_MAX_DEPTH); - BUG_ON(!as->nr_prealloc_nodes); + BUG_ON(!p->nr); - b = as->prealloc_nodes[--as->nr_prealloc_nodes]; + b = p->b[--p->nr]; six_lock_intent(&b->c.lock, NULL, NULL); six_lock_write(&b->c.lock, NULL, NULL); @@ -377,47 +379,54 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) static void bch2_btree_reserve_put(struct btree_update *as) { struct bch_fs *c = as->c; + struct prealloc_nodes *p; mutex_lock(&c->btree_reserve_cache_lock); - while (as->nr_prealloc_nodes) { - struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes]; + for (p = as->prealloc_nodes; + p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes); + p++) { + while (p->nr) { + struct btree *b = p->b[--p->nr]; - six_lock_intent(&b->c.lock, NULL, NULL); - six_lock_write(&b->c.lock, NULL, NULL); + six_lock_intent(&b->c.lock, NULL, NULL); + six_lock_write(&b->c.lock, NULL, NULL); - if (c->btree_reserve_cache_nr < - ARRAY_SIZE(c->btree_reserve_cache)) { - struct btree_alloc *a = - &c->btree_reserve_cache[c->btree_reserve_cache_nr++]; + if (c->btree_reserve_cache_nr < + ARRAY_SIZE(c->btree_reserve_cache)) { + struct btree_alloc *a = + &c->btree_reserve_cache[c->btree_reserve_cache_nr++]; - a->ob = b->ob; - b->ob.nr = 0; - bkey_copy(&a->k, &b->key); - } else { - bch2_open_buckets_put(c, &b->ob); + a->ob = b->ob; + b->ob.nr = 0; + bkey_copy(&a->k, &b->key); + } else { + bch2_open_buckets_put(c, &b->ob); + } + + __btree_node_free(c, b); + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); } - - __btree_node_free(c, b); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); } mutex_unlock(&c->btree_reserve_cache_lock); } -static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes, +static int bch2_btree_reserve_get(struct btree_update *as, + unsigned nr_nodes[2], unsigned flags) { struct bch_fs *c = as->c; struct closure cl; struct btree *b; + unsigned interior; int ret; closure_init_stack(&cl); retry: - BUG_ON(nr_nodes > BTREE_RESERVE_MAX); + BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); /* * Protects reaping from the btree node cache and using the btree node @@ -430,16 +439,21 @@ retry: if (ret) goto err; - while (as->nr_prealloc_nodes < nr_nodes) { - b = __bch2_btree_node_alloc(c, &as->disk_res, - flags & BTREE_INSERT_NOWAIT - ? NULL : &cl, flags); - if (IS_ERR(b)) { - ret = PTR_ERR(b); - goto err; - } + for (interior = 0; interior < 2; interior++) { + struct prealloc_nodes *p = as->prealloc_nodes + interior; - as->prealloc_nodes[as->nr_prealloc_nodes++] = b; + while (p->nr < nr_nodes[interior]) { + b = __bch2_btree_node_alloc(c, &as->disk_res, + flags & BTREE_INSERT_NOWAIT + ? NULL : &cl, + interior, flags); + if (IS_ERR(b)) { + ret = PTR_ERR(b); + goto err; + } + + p->b[p->nr++] = b; + } } bch2_btree_cache_cannibalize_unlock(c); @@ -452,7 +466,7 @@ err: if (ret == -EAGAIN) goto retry; - trace_btree_reserve_get_fail(c, nr_nodes, &cl); + trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl); return ret; } @@ -947,13 +961,15 @@ static void bch2_btree_update_done(struct btree_update *as) static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level, unsigned nr_nodes, unsigned flags) + unsigned level, bool split, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; u64 start_time = local_clock(); int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; + unsigned nr_nodes[2] = { 0, 0 }; + unsigned update_level = level; int journal_flags = 0; int ret = 0; @@ -964,10 +980,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (flags & BTREE_INSERT_JOURNAL_RECLAIM) journal_flags |= JOURNAL_RES_GET_NONBLOCK; - /* - * XXX: figure out how far we might need to split, - * instead of locking/reserving all the way to the root: - */ + while (1) { + nr_nodes[!!update_level] += 1 + split; + update_level++; + + if (!btree_path_node(path, update_level)) + break; + + /* + * XXX: figure out how far we might need to split, + * instead of locking/reserving all the way to the root: + */ + split = update_level + 1 < BTREE_MAX_DEPTH; + } + + /* Might have to allocate a new root: */ + if (update_level < BTREE_MAX_DEPTH) + nr_nodes[1] += 1; + if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, path->btree_id, &path->pos); @@ -1030,7 +1060,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, } ret = bch2_disk_reservation_get(c, &as->disk_res, - nr_nodes * btree_sectors(c), + (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), c->opts.metadata_replicas, disk_res_flags); if (ret) @@ -1551,14 +1581,13 @@ int bch2_btree_split_leaf(struct btree_trans *trans, struct btree_path *path, unsigned flags) { - struct bch_fs *c = trans->c; struct btree *b = path_l(path)->b; struct btree_update *as; unsigned l; int ret = 0; as = bch2_btree_update_start(trans, path, path->level, - btree_update_reserve_required(c, b), flags); + true, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -1669,11 +1698,10 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, goto out; parent = btree_node_parent(path, b); - as = bch2_btree_update_start(trans, path, level, - btree_update_reserve_required(c, parent) + 1, - flags| + as = bch2_btree_update_start(trans, path, level, false, BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE); + BTREE_INSERT_USE_RESERVE| + flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto err; @@ -1756,10 +1784,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, parent = btree_node_parent(iter->path, b); as = bch2_btree_update_start(trans, iter->path, b->c.level, - (parent - ? btree_update_reserve_required(c, parent) - : 0) + 1, - flags); + false, flags); ret = PTR_ERR_OR_ZERO(as); if (ret) { trace_btree_gc_rewrite_node_fail(c, b); @@ -1996,7 +2021,7 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite return -EINTR; } - new_hash = bch2_btree_node_mem_alloc(c); + new_hash = bch2_btree_node_mem_alloc(c, false); } path->intent_ref++; @@ -2072,7 +2097,7 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) closure_sync(&cl); } while (ret); - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, false); bch2_btree_cache_cannibalize_unlock(c); set_btree_node_fake(b); diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index 8dc86fa6..e72eb879 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -76,8 +76,10 @@ struct btree_update { struct journal_entry_pin journal; /* Preallocated nodes we reserve when we start the update: */ - struct btree *prealloc_nodes[BTREE_UPDATE_NODES_MAX]; - unsigned nr_prealloc_nodes; + struct prealloc_nodes { + struct btree *b[BTREE_UPDATE_NODES_MAX]; + unsigned nr; + } prealloc_nodes[2]; /* Nodes being freed: */ struct keylist old_keys; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 31dbd4c6..f6e96132 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -980,6 +980,9 @@ retry: bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, iter.pos.offset + sectors)); + + if (btree_trans_too_many_iters(&trans)) + goto retry; } start = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 71bf26eb..15acbabc 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -96,6 +96,16 @@ const char * const bch2_d_types[BCH_DT_MAX] = { [DT_SUBVOL] = "subvol", }; +u64 BCH2_NO_SB_OPT(const struct bch_sb *sb) +{ + BUG(); +} + +void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v) +{ + BUG(); +} + void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) { #define x(_name, ...) \ @@ -431,6 +441,22 @@ out: return ret; } +u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id) +{ + const struct bch_option *opt = bch2_opt_table + id; + u64 v; + + v = opt->get_sb(sb); + + if (opt->flags & OPT_SB_FIELD_ILOG2) + v = 1ULL << v; + + if (opt->flags & OPT_SB_FIELD_SECTORS) + v <<= 9; + + return v; +} + /* * Initial options from superblock - here we don't want any options undefined, * any options the superblock doesn't specify are set to 0: @@ -444,16 +470,10 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) const struct bch_option *opt = bch2_opt_table + id; u64 v; - if (opt->get_sb == NO_SB_OPT) + if (opt->get_sb == BCH2_NO_SB_OPT) continue; - v = opt->get_sb(sb); - - if (opt->flags & OPT_SB_FIELD_ILOG2) - v = 1ULL << v; - - if (opt->flags & OPT_SB_FIELD_SECTORS) - v <<= 9; + v = bch2_opt_from_sb(sb, id); ret = bch2_opt_validate(opt, "superblock option ", v); if (ret) @@ -467,7 +487,7 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) { - if (opt->set_sb == SET_NO_SB_OPT) + if (opt->set_sb == SET_BCH2_NO_SB_OPT) return; if (opt->flags & OPT_SB_FIELD_SECTORS) @@ -481,7 +501,7 @@ void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v) { - if (opt->set_sb == SET_NO_SB_OPT) + if (opt->set_sb == SET_BCH2_NO_SB_OPT) return; mutex_lock(&c->sb_lock); diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index bafacf6b..9e68f100 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -42,7 +42,8 @@ static inline const char *bch2_d_type_str(unsigned d_type) */ /* dummy option, for options that aren't stored in the superblock */ -LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); +u64 BCH2_NO_SB_OPT(const struct bch_sb *); +void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64); /* When can be set: */ enum opt_flags { @@ -202,7 +203,7 @@ enum opt_type { x(btree_node_mem_ptr_optimization, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, true, \ + BCH2_NO_SB_OPT, true, \ NULL, "Stash pointer to in memory btree node in btree ptr")\ x(gc_reserve_percent, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ @@ -229,7 +230,7 @@ enum opt_type { x(inline_data, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, true, \ + BCH2_NO_SB_OPT, true, \ NULL, "Enable inline data extents") \ x(acl, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ @@ -254,22 +255,22 @@ enum opt_type { x(degraded, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allow mounting in degraded mode") \ x(very_degraded, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allow mounting in when data will be missing") \ x(discard, u8, \ OPT_FS|OPT_MOUNT|OPT_DEVICE, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Enable discard/TRIM support") \ x(verbose, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Extra debugging information during mount/recovery")\ x(journal_flush_delay, u32, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ @@ -291,48 +292,48 @@ enum opt_type { x(fsck, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Run fsck on mount") \ x(fix_errors, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Fix errors during fsck without asking") \ x(ratelimit_errors, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ + BCH2_NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ NULL, "Ratelimit error messages during fsck") \ x(nochanges, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Super read only mode - no writes at all will be issued,\n"\ "even if we have to replay the journal") \ x(norecovery, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't replay the journal") \ x(rebuild_replicas, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Rebuild the superblock replicas section") \ x(keep_journal, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't free journal entries/keys after startup")\ x(read_entire_journal, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Read all journal entries, not just dirty ones")\ x(read_journal_only, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Only read the journal, skip the rest of recovery")\ x(journal_transaction_names, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ @@ -342,58 +343,58 @@ enum opt_type { x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't open device in exclusive mode") \ x(sb, u64, \ OPT_MOUNT, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, BCH_SB_SECTOR, \ + BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ "offset", "Sector offset of superblock") \ x(read_only, u8, \ OPT_FS, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(nostart, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don\'t start filesystem, only open devices") \ x(reconstruct_alloc, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Set superblock to latest version,\n" \ "allowing any new features to be used") \ x(buckets_nouse, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(fs_size, u64, \ OPT_DEVICE, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, 0, \ + BCH2_NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(bucket, u32, \ OPT_DEVICE, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, 0, \ + BCH2_NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(durability, u8, \ OPT_DEVICE, \ OPT_UINT(0, BCH_REPLICAS_MAX), \ - NO_SB_OPT, 1, \ + BCH2_NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ "to have already been replicated n times") @@ -475,6 +476,7 @@ bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id); u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id); void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); +u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id); int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *); void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64); void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 1a70adae..d87edaad 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1045,45 +1045,56 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, if (!bch2_member_exists(m)) continue; - pr_buf(out, "Device: %u", i); + pr_buf(out, "Device:"); + pr_tab(out); + pr_buf(out, "%u", i); pr_newline(out); pr_indent_push(out, 2); - pr_buf(out, "UUID: "); + pr_buf(out, "UUID:"); + pr_tab(out); pr_uuid(out, m->uuid.b); pr_newline(out); - pr_buf(out, "Size: "); + pr_buf(out, "Size:"); + pr_tab(out); pr_units(out, device_size, device_size << 9); pr_newline(out); - pr_buf(out, "Bucket size: "); + pr_buf(out, "Bucket size:"); + pr_tab(out); pr_units(out, bucket_size, bucket_size << 9); pr_newline(out); - pr_buf(out, "First bucket: %u", - le16_to_cpu(m->first_bucket)); + pr_buf(out, "First bucket:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(m->first_bucket)); pr_newline(out); - pr_buf(out, "Buckets: %llu", - le64_to_cpu(m->nbuckets)); + pr_buf(out, "Buckets:"); + pr_tab(out); + pr_buf(out, "%llu", le64_to_cpu(m->nbuckets)); pr_newline(out); - pr_buf(out, "Last mount: "); + pr_buf(out, "Last mount:"); + pr_tab(out); if (m->last_mount) pr_time(out, le64_to_cpu(m->last_mount)); else pr_buf(out, "(never)"); pr_newline(out); - pr_buf(out, "State: %s", + pr_buf(out, "State:"); + pr_tab(out); + pr_buf(out, "%s", BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR ? bch2_member_states[BCH_MEMBER_STATE(m)] : "unknown"); pr_newline(out); - pr_buf(out, "Group: "); + pr_buf(out, "Group:"); + pr_tab(out); if (BCH_MEMBER_GROUP(m)) { unsigned idx = BCH_MEMBER_GROUP(m) - 1; @@ -1097,7 +1108,8 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, } pr_newline(out); - pr_buf(out, "Data allowed: "); + pr_buf(out, "Data allowed:"); + pr_tab(out); if (BCH_MEMBER_DATA_ALLOWED(m)) bch2_flags_to_text(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m)); @@ -1105,15 +1117,17 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, pr_buf(out, "(none)"); pr_newline(out); - pr_buf(out, "Has data: "); + pr_buf(out, "Has data:"); + pr_tab(out); if (data_have) bch2_flags_to_text(out, bch2_data_types, data_have); else pr_buf(out, "(none)"); pr_newline(out); - pr_buf(out, "Discard: %llu", - BCH_MEMBER_DISCARD(m)); + pr_buf(out, "Discard:"); + pr_tab(out); + pr_buf(out, "%llu", BCH_MEMBER_DISCARD(m)); pr_newline(out); pr_indent_pop(out, 2); @@ -1449,6 +1463,9 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR ? bch2_sb_field_ops[type] : NULL; + if (!out->tabstops[0]) + out->tabstops[0] = 32; + if (ops) pr_buf(out, "%s", bch2_sb_fields[type]); else @@ -1497,6 +1514,9 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, u64 fields_have = 0; unsigned nr_devices = 0; + if (!out->tabstops[0]) + out->tabstops[0] = 32; + mi = bch2_sb_get_members(sb); if (mi) { struct bch_member *m; @@ -1507,137 +1527,105 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, nr_devices += bch2_member_exists(m); } - pr_buf(out, "External UUID: "); + pr_buf(out, "External UUID:"); + pr_tab(out); pr_uuid(out, sb->user_uuid.b); pr_newline(out); - pr_buf(out, "Internal UUID: "); + pr_buf(out, "Internal UUID:"); + pr_tab(out); pr_uuid(out, sb->uuid.b); pr_newline(out); - pr_buf(out, "Device index: %u", sb->dev_idx); + pr_buf(out, "Device index:"); + pr_tab(out); + pr_buf(out, "%u", sb->dev_idx); pr_newline(out); - pr_buf(out, "Label: "); + pr_buf(out, "Label:"); + pr_tab(out); pr_buf(out, "%.*s", (int) sizeof(sb->label), sb->label); pr_newline(out); - pr_buf(out, "Version: %u", le16_to_cpu(sb->version)); + pr_buf(out, "Version:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(sb->version)); pr_newline(out); - pr_buf(out, "Oldest version on disk: %u", le16_to_cpu(sb->version_min)); + pr_buf(out, "Oldest version on disk:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(sb->version_min)); pr_newline(out); - pr_buf(out, "Created: "); + pr_buf(out, "Created:"); + pr_tab(out); if (sb->time_base_lo) pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); else pr_buf(out, "(not set)"); pr_newline(out); - pr_buf(out, "Squence number: %llu", le64_to_cpu(sb->seq)); + pr_buf(out, "Sequence number:"); + pr_tab(out); + pr_buf(out, "%llu", le64_to_cpu(sb->seq)); pr_newline(out); - pr_buf(out, "Block_size: "); - pr_units(out, le16_to_cpu(sb->block_size), - (u32) le16_to_cpu(sb->block_size) << 9); + pr_buf(out, "Superblock size:"); + pr_tab(out); + pr_buf(out, "%zu", vstruct_bytes(sb)); pr_newline(out); - pr_buf(out, "Btree node size: "); - pr_units(out, BCH_SB_BTREE_NODE_SIZE(sb), - BCH_SB_BTREE_NODE_SIZE(sb) << 9); + pr_buf(out, "Clean:"); + pr_tab(out); + pr_buf(out, "%llu", BCH_SB_CLEAN(sb)); pr_newline(out); - pr_buf(out, "Error action: %s", - BCH_SB_ERROR_ACTION(sb) < BCH_ON_ERROR_NR - ? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)] - : "unknown"); + pr_buf(out, "Devices:"); + pr_tab(out); + pr_buf(out, "%u", nr_devices); pr_newline(out); - pr_buf(out, "Clean: %llu", BCH_SB_CLEAN(sb)); + pr_buf(out, "Sections:"); + vstruct_for_each(sb, f) + fields_have |= 1 << le32_to_cpu(f->type); + pr_tab(out); + bch2_flags_to_text(out, bch2_sb_fields, fields_have); pr_newline(out); - pr_buf(out, "Features: "); + pr_buf(out, "Features:"); + pr_tab(out); bch2_flags_to_text(out, bch2_sb_features, le64_to_cpu(sb->features[0])); pr_newline(out); - pr_buf(out, "Compat features: "); + pr_buf(out, "Compat features:"); + pr_tab(out); bch2_flags_to_text(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); pr_newline(out); - pr_buf(out, "Metadata replicas: %llu", BCH_SB_META_REPLICAS_WANT(sb)); pr_newline(out); + pr_buf(out, "Options:"); + pr_newline(out); + pr_indent_push(out, 2); + { + enum bch_opt_id id; - pr_buf(out, "Data replicas: %llu", BCH_SB_DATA_REPLICAS_WANT(sb)); - pr_newline(out); + for (id = 0; id < bch2_opts_nr; id++) { + const struct bch_option *opt = bch2_opt_table + id; - pr_buf(out, "Metadata checksum type: %s (%llu)", - BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR - ? bch2_csum_opts[BCH_SB_META_CSUM_TYPE(sb)] - : "unknown", - BCH_SB_META_CSUM_TYPE(sb)); - pr_newline(out); + if (opt->get_sb != BCH2_NO_SB_OPT) { + u64 v = bch2_opt_from_sb(sb, id); - pr_buf(out, "Data checksum type: %s (%llu)", - BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR - ? bch2_csum_opts[BCH_SB_DATA_CSUM_TYPE(sb)] - : "unknown", - BCH_SB_DATA_CSUM_TYPE(sb)); - pr_newline(out); + pr_buf(out, "%s:", opt->attr.name); + pr_tab(out); + bch2_opt_to_text(out, NULL, opt, v, OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); + pr_newline(out); + } + } + } - pr_buf(out, "Compression type: %s (%llu)", - BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_OPT_NR - ? bch2_compression_opts[BCH_SB_COMPRESSION_TYPE(sb)] - : "unknown", - BCH_SB_COMPRESSION_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "Foreground write target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_FOREGROUND_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Background write target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_BACKGROUND_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Promote target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_PROMOTE_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Metadata target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_METADATA_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "String hash type: %s (%llu)", - BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR - ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)] - : "unknown", - BCH_SB_STR_HASH_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "32 bit inodes: %llu", BCH_SB_INODE_32BIT(sb)); - pr_newline(out); - - pr_buf(out, "GC reserve percentage: %llu%%", BCH_SB_GC_RESERVE(sb)); - pr_newline(out); - - pr_buf(out, "Root reserve percentage: %llu%%", BCH_SB_ROOT_RESERVE(sb)); - pr_newline(out); - - pr_buf(out, "Devices: %u live, %u total", - nr_devices, sb->nr_devices); - pr_newline(out); - - pr_buf(out, "Sections: "); - vstruct_for_each(sb, f) - fields_have |= 1 << le32_to_cpu(f->type); - bch2_flags_to_text(out, bch2_sb_fields, fields_have); - pr_newline(out); - - pr_buf(out, "Superblock size: %zu", vstruct_bytes(sb)); - pr_newline(out); + pr_indent_pop(out, 2); if (print_layout) { pr_newline(out); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 9af8eb35..fb7f8d6d 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1162,9 +1162,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, ca->mi = bch2_mi_to_cpu(member); ca->uuid = member->uuid; - if (opt_defined(c->opts, discard)) - ca->mi.discard = opt_get(c->opts, discard); - if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, 0, GFP_KERNEL) || percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,