From 55e3496d06c9b112f93bb1dea942564f900c2f7d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 10 Dec 2021 13:32:35 -0500 Subject: [PATCH] Rename --group to --label Disk labels used to be called groups - not all uses had been converted. This renames --group to --label, and --label to --fs_label Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- cmd_device.c | 6 +- cmd_format.c | 18 ++-- libbcachefs.c | 6 +- libbcachefs.h | 2 +- libbcachefs/bcachefs.h | 3 +- libbcachefs/bcachefs_format.h | 3 + libbcachefs/btree_gc.c | 110 ++++++++++++++++-------- libbcachefs/btree_update.h | 4 +- libbcachefs/btree_update_leaf.c | 17 ++-- libbcachefs/buckets.c | 142 +++++++++++++++--------------- libbcachefs/dirent.c | 8 +- libbcachefs/ec.c | 147 +++++++------------------------- libbcachefs/ec.h | 3 +- libbcachefs/ec_types.h | 10 +++ libbcachefs/journal.c | 88 +++++++------------ libbcachefs/journal_io.c | 2 +- libbcachefs/journal_reclaim.c | 5 +- libbcachefs/journal_types.h | 2 - libbcachefs/move.c | 3 +- libbcachefs/movinggc.c | 21 ++--- libbcachefs/opts.h | 115 ++++++++++++++----------- libbcachefs/recovery.c | 3 +- libbcachefs/subvolume.c | 21 +++-- libbcachefs/super.c | 9 ++ libbcachefs/sysfs.c | 22 +---- libbcachefs/tests.c | 4 +- 27 files changed, 364 insertions(+), 412 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 236c0c17..d01cd450 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -50d6a25d9c0090d84ad9aadd29f76bc0abff5423 +069e88fae5fdce2aea08c9e192cf6ac5c7ed492d diff --git a/cmd_device.c b/cmd_device.c index 1d91ecdd..e7e33367 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -49,7 +49,7 @@ static void device_add_usage(void) " -S, --fs_size=size Size of filesystem on device\n" " -B, --bucket=size Bucket size\n" " -D, --discard Enable discards\n" - " -g, --group=group Disk group\n" + " -l, --label=label Disk label\n" " -f, --force Use device even if it appears to already be formatted\n" " -h, --help Display this help and exit\n" "\n" @@ -62,7 +62,7 @@ int cmd_device_add(int argc, char *argv[]) { "fs_size", required_argument, NULL, 'S' }, { "bucket", required_argument, NULL, 'B' }, { "discard", no_argument, NULL, 'D' }, - { "group", required_argument, NULL, 'g' }, + { "label", required_argument, NULL, 'l' }, { "force", no_argument, NULL, 'f' }, { "help", no_argument, NULL, 'h' }, { NULL } @@ -89,7 +89,7 @@ int cmd_device_add(int argc, char *argv[]) dev_opts.discard = true; break; case 'g': - dev_opts.group = strdup(optarg); + dev_opts.label = strdup(optarg); break; case 'f': force = true; diff --git a/cmd_format.c b/cmd_format.c index 3f96f5de..2c610af2 100644 --- a/cmd_format.c +++ b/cmd_format.c @@ -33,12 +33,12 @@ x(0, replicas, required_argument) \ x(0, encrypted, no_argument) \ x(0, no_passphrase, no_argument) \ -x('L', label, required_argument) \ +x('L', fs_label, required_argument) \ x('U', uuid, required_argument) \ x(0, fs_size, required_argument) \ x(0, superblock_size, required_argument) \ x(0, bucket_size, required_argument) \ -x('g', group, required_argument) \ +x('l', label, required_argument) \ x(0, discard, no_argument) \ x(0, data_allowed, required_argument) \ x(0, durability, required_argument) \ @@ -61,7 +61,7 @@ static void usage(void) " --replicas=# Sets both data and metadata replicas\n" " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n" " --no_passphrase Don't encrypt master encryption key\n" - " -L, --label=label\n" + " -L, --fs_label=label\n" " -U, --uuid=uuid\n" " --superblock_size=size\n" "\n" @@ -69,14 +69,14 @@ static void usage(void) bch2_opts_usage(OPT_DEVICE); - puts(" -g, --group=label Disk group\n" + puts(" -l, --label=label Disk label\n" "\n" " -f, --force\n" " -q, --quiet Only print errors\n" " -h, --help Display this help and exit\n" "\n" "Device specific options must come before corresponding devices, e.g.\n" - " bcachefs format --group cache /dev/sdb /dev/sdc\n" + " bcachefs format --label cache /dev/sdb /dev/sdc\n" "\n" "Report bugs to "); } @@ -147,7 +147,7 @@ int cmd_format(int argc, char *argv[]) case O_no_passphrase: no_passphrase = true; break; - case O_label: + case O_fs_label: case 'L': opts.label = optarg; break; @@ -176,9 +176,9 @@ int cmd_format(int argc, char *argv[]) dev_opts.bucket_size = hatoi_validate(optarg, "bucket size"); break; - case O_group: - case 'g': - dev_opts.group = optarg; + case O_label: + case 'l': + dev_opts.label = optarg; break; case O_discard: dev_opts.discard = true; diff --git a/libbcachefs.c b/libbcachefs.c index 34246dc9..beba6f33 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -247,15 +247,15 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, SET_BCH_MEMBER_DURABILITY(m, i->durability + 1); } - /* Disk groups */ + /* Disk labels*/ for (i = devs; i < devs + nr_devs; i++) { struct bch_member *m = mi->members + (i - devs); int idx; - if (!i->group) + if (!i->label) continue; - idx = bch2_disk_path_find_or_create(&sb, i->group); + idx = bch2_disk_path_find_or_create(&sb, i->label); if (idx < 0) die("error creating disk path: %s", idx); diff --git a/libbcachefs.h b/libbcachefs.h index 7cdbf696..b5f9673f 100644 --- a/libbcachefs.h +++ b/libbcachefs.h @@ -54,7 +54,7 @@ struct dev_opts { char *path; u64 size; /* 512 byte sectors */ unsigned bucket_size; - const char *group; + const char *label; unsigned data_allowed; unsigned durability; bool discard; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 0439f3e0..fee1fc58 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -826,7 +826,8 @@ struct bch_fs { struct mutex data_progress_lock; /* STRIPES: */ - GENRADIX(struct stripe) stripes[2]; + GENRADIX(struct stripe) stripes; + GENRADIX(struct gc_stripe) gc_stripes; ec_stripes_heap ec_stripes_heap; spinlock_t ec_stripes_heap_lock; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index b115bd1f..495f4d19 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1436,6 +1436,9 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); +LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); +LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); +LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); /* * Features: diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 4deb87f9..a36b0e60 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -597,7 +597,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, } if (p.has_ec) { - struct stripe *m = genradix_ptr(&c->stripes[true], p.ec.idx); + struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); if (fsck_err_on(!m || !m->alive, c, "pointer to nonexistent stripe %llu\n" @@ -665,7 +665,7 @@ again: ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); bkey_extent_entry_for_each(ptrs, entry) { if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { - struct stripe *m = genradix_ptr(&c->stripes[true], + struct gc_stripe *m = genradix_ptr(&c->gc_stripes, entry->stripe_ptr.idx); union bch_extent_entry *next_ptr; @@ -1132,7 +1132,8 @@ static void bch2_gc_free(struct bch_fs *c) struct bch_dev *ca; unsigned i; - genradix_free(&c->stripes[1]); + genradix_free(&c->reflink_gc_table); + genradix_free(&c->gc_stripes); for_each_member_device(ca, c, i) { kvpfree(rcu_dereference_protected(ca->buckets[1], 1), @@ -1191,35 +1192,6 @@ static int bch2_gc_done(struct bch_fs *c, #define copy_fs_field(_f, _msg, ...) \ copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) - if (!metadata_only) { - struct genradix_iter iter = genradix_iter_init(&c->stripes[1], 0); - struct stripe *dst, *src; - - while ((src = genradix_iter_peek(&iter, &c->stripes[1]))) { - dst = genradix_ptr_alloc(&c->stripes[0], iter.pos, GFP_KERNEL); - - if (dst->alive != src->alive || - dst->sectors != src->sectors || - dst->algorithm != src->algorithm || - dst->nr_blocks != src->nr_blocks || - dst->nr_redundant != src->nr_redundant) { - bch_err(c, "unexpected stripe inconsistency at bch2_gc_done, confused"); - ret = -EINVAL; - goto fsck_err; - } - - for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++) - copy_stripe_field(block_sectors[i], - "block_sectors[%u]", i); - - dst->blocks_nonempty = 0; - for (i = 0; i < dst->nr_blocks; i++) - dst->blocks_nonempty += dst->block_sectors[i] != 0; - - genradix_iter_advance(&iter, &c->stripes[1]); - } - } - for (i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); @@ -1510,12 +1482,82 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, fsck_err: bch2_trans_iter_exit(&trans, &iter); out: - genradix_free(&c->reflink_gc_table); c->reflink_gc_nr = 0; bch2_trans_exit(&trans); return ret; } +static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct gc_stripe *m; + const struct bch_stripe *s; + char buf[200]; + unsigned i; + int ret = 0; + + if (k.k->type != KEY_TYPE_stripe) + return 0; + + s = bkey_s_c_to_stripe(k).v; + + m = genradix_ptr(&c->gc_stripes, k.k->p.offset); + + for (i = 0; i < s->nr_blocks; i++) + if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0)) + goto inconsistent; + return 0; +inconsistent: + if (fsck_err_on(true, c, + "stripe has wrong block sector count %u:\n" + " %s\n" + " should be %u", i, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + m ? m->block_sectors[i] : 0)) { + struct bkey_i_stripe *new; + + new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto fsck_err; + } + + bkey_reassemble(&new->k_i, k); + + for (i = 0; i < new->v.nr_blocks; i++) + stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); + + ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i); + if (ret) + kfree(new); + } +fsck_err: + return ret; +} + +static int bch2_gc_stripes_done(struct bch_fs *c, bool initial, + bool metadata_only) +{ + struct btree_trans trans; + int ret = 0; + + if (metadata_only) + return 0; + + bch2_trans_init(&trans, c, 0, 0); + + if (initial) { + ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes, + bch2_gc_stripes_done_initial_fn); + } else { + BUG(); + } + + bch2_trans_exit(&trans); + return ret; +} + static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans, struct bkey_s_c k) { @@ -1551,7 +1593,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, return 0; bch2_trans_init(&trans, c, 0, 0); - genradix_free(&c->reflink_gc_table); c->reflink_gc_nr = 0; if (initial) { @@ -1685,6 +1726,7 @@ out: percpu_down_write(&c->mark_lock); ret = bch2_gc_reflink_done(c, initial, metadata_only) ?: + bch2_gc_stripes_done(c, initial, metadata_only) ?: bch2_gc_done(c, initial, metadata_only); bch2_journal_unblock(&c->journal); diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 0268dd74..89f07e58 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -73,8 +73,8 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, struct bkey_i *, bool); -int bch2_trans_update(struct btree_trans *, struct btree_iter *, - struct bkey_i *, enum btree_update_flags); +int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, + struct bkey_i *, enum btree_update_flags); void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 131fd4c1..10837a62 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -1300,8 +1300,8 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans, return ret; } -int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_i *k, enum btree_update_flags flags) +int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_update_flags flags) { struct btree_insert_entry *i, n; @@ -1324,8 +1324,6 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, .ip_allocated = _RET_IP_, }; - __btree_path_get(n.path, true); - #ifdef CONFIG_BCACHEFS_DEBUG trans_for_each_update(trans, i) BUG_ON(i != trans->updates && @@ -1362,16 +1360,17 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, if (n.cached && !i->cached) { i->k = n.k; i->flags = n.flags; - - __btree_path_get(n.path, false); - } else { - bch2_path_put(trans, i->path, true); - *i = n; + return 0; } + + bch2_path_put(trans, i->path, true); + *i = n; } else array_insert_item(trans->updates, trans->nr_updates, i - trans->updates, n); + __btree_path_get(n.path, true); + return 0; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 4d55ef51..5ff4e911 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -949,38 +949,34 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, bool gc = flags & BTREE_TRIGGER_GC; struct bch_fs *c = trans->c; struct bch_replicas_padded r; - struct stripe *m; - unsigned i, blocks_nonempty = 0; - m = genradix_ptr(&c->stripes[gc], p.idx); + if (!gc) { + BUG(); + } else { + struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); - spin_lock(&c->ec_stripes_heap_lock); + if (!m) + return -ENOMEM; - if (!m || !m->alive) { + spin_lock(&c->ec_stripes_heap_lock); + + if (!m || !m->alive) { + spin_unlock(&c->ec_stripes_heap_lock); + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", + (u64) p.idx); + bch2_inconsistent_error(c); + return -EIO; + } + + m->block_sectors[p.block] += sectors; + + r = m->r; spin_unlock(&c->ec_stripes_heap_lock); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", - (u64) p.idx); - bch2_inconsistent_error(c); - return -EIO; + + r.e.data_type = data_type; + update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, gc); } - m->block_sectors[p.block] += sectors; - - r = m->r; - - for (i = 0; i < m->nr_blocks; i++) - blocks_nonempty += m->block_sectors[i] != 0; - - if (m->blocks_nonempty != blocks_nonempty) { - m->blocks_nonempty = blocks_nonempty; - if (!gc) - bch2_stripes_heap_update(c, m, p.idx); - } - - spin_unlock(&c->ec_stripes_heap_lock); - - r.e.data_type = data_type; - update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, gc); return 0; } @@ -1077,67 +1073,70 @@ static int bch2_mark_stripe(struct btree_trans *trans, ? bkey_s_c_to_stripe(old).v : NULL; const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe ? bkey_s_c_to_stripe(new).v : NULL; - struct stripe *m = genradix_ptr(&c->stripes[gc], idx); unsigned i; int ret; BUG_ON(gc && old_s); - if (!m || (old_s && !m->alive)) { - char buf1[200], buf2[200]; + if (!gc) { + struct stripe *m = genradix_ptr(&c->stripes, idx); - bch2_bkey_val_to_text(&PBUF(buf1), c, old); - bch2_bkey_val_to_text(&PBUF(buf2), c, new); - bch_err_ratelimited(c, "error marking nonexistent stripe %zu while marking\n" - "old %s\n" - "new %s", idx, buf1, buf2); - bch2_inconsistent_error(c); - return -1; - } + if (!m || (old_s && !m->alive)) { + char buf1[200], buf2[200]; - if (!new_s) { - spin_lock(&c->ec_stripes_heap_lock); - bch2_stripes_heap_del(c, m, idx); - spin_unlock(&c->ec_stripes_heap_lock); + bch2_bkey_val_to_text(&PBUF(buf1), c, old); + bch2_bkey_val_to_text(&PBUF(buf2), c, new); + bch_err_ratelimited(c, "error marking nonexistent stripe %zu while marking\n" + "old %s\n" + "new %s", idx, buf1, buf2); + bch2_inconsistent_error(c); + return -1; + } - memset(m, 0, sizeof(*m)); + if (!new_s) { + spin_lock(&c->ec_stripes_heap_lock); + bch2_stripes_heap_del(c, m, idx); + spin_unlock(&c->ec_stripes_heap_lock); + + memset(m, 0, sizeof(*m)); + } else { + m->alive = true; + m->sectors = le16_to_cpu(new_s->sectors); + m->algorithm = new_s->algorithm; + m->nr_blocks = new_s->nr_blocks; + m->nr_redundant = new_s->nr_redundant; + m->blocks_nonempty = 0; + + for (i = 0; i < new_s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); + + spin_lock(&c->ec_stripes_heap_lock); + bch2_stripes_heap_update(c, m, idx); + spin_unlock(&c->ec_stripes_heap_lock); + } } else { + struct gc_stripe *m = genradix_ptr(&c->gc_stripes, idx); + + /* + * This will be wrong when we bring back runtime gc: we should + * be unmarking the old key and then marking the new key + */ m->alive = true; m->sectors = le16_to_cpu(new_s->sectors); m->algorithm = new_s->algorithm; m->nr_blocks = new_s->nr_blocks; m->nr_redundant = new_s->nr_redundant; - m->blocks_nonempty = 0; - - for (i = 0; i < new_s->nr_blocks; i++) { - m->block_sectors[i] = - stripe_blockcount_get(new_s, i); - m->blocks_nonempty += !!m->block_sectors[i]; + for (i = 0; i < new_s->nr_blocks; i++) m->ptrs[i] = new_s->ptrs[i]; - } bch2_bkey_to_replicas(&m->r.e, new); - if (!gc) { - spin_lock(&c->ec_stripes_heap_lock); - bch2_stripes_heap_update(c, m, idx); - spin_unlock(&c->ec_stripes_heap_lock); - } - } - - if (gc) { - /* - * This will be wrong when we bring back runtime gc: we should - * be unmarking the old key and then marking the new key - */ - /* * gc recalculates this field from stripe ptr * references: */ memset(m->block_sectors, 0, sizeof(m->block_sectors)); - m->blocks_nonempty = 0; for (i = 0; i < new_s->nr_blocks; i++) { ret = mark_stripe_bucket(trans, new, i, journal_seq, flags); @@ -1544,7 +1543,9 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_trans_update(trans, &iter, &a->k, 0); + if (ret) + goto out; out: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1595,7 +1596,10 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, stripe_blockcount_set(&s->v, p.ec.block, stripe_blockcount_get(&s->v, p.ec.block) + sectors); - bch2_trans_update(trans, &iter, &s->k_i, 0); + + ret = bch2_trans_update(trans, &iter, &s->k_i, 0); + if (ret) + goto err; bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); r.e.data_type = data_type; @@ -1733,7 +1737,9 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, u.data_type = !deleting ? data_type : 0; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_trans_update(trans, &iter, &a->k, 0); + if (ret) + goto err; err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -2012,7 +2018,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, u.dirty_sectors = sectors; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, &iter, &a->k, 0); + ret = bch2_trans_update(trans, &iter, &a->k, 0); + if (ret) + goto out; out: bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index fe4a85a6..a165d08c 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -367,7 +367,9 @@ int bch2_dirent_rename(struct btree_trans *trans, } } - bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); + ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); + if (ret) + goto out; out_set_src: /* @@ -384,7 +386,9 @@ out_set_src: src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE; } - bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags); + ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags); + if (ret) + goto out; if (mode == BCH_RENAME_EXCHANGE) *src_offset = new_src->k.p.offset; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 71d85c93..f1839990 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -545,11 +545,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) free_heap(&n); } - if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp)) + if (!genradix_ptr_alloc(&c->stripes, idx, gfp)) return -ENOMEM; if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING && - !genradix_ptr_alloc(&c->stripes[1], idx, gfp)) + !genradix_ptr_alloc(&c->gc_stripes, idx, gfp)) return -ENOMEM; return 0; @@ -594,13 +594,13 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, { struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap); - genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i; + genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i; } static void heap_verify_backpointer(struct bch_fs *c, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; - struct stripe *m = genradix_ptr(&c->stripes[0], idx); + struct stripe *m = genradix_ptr(&c->stripes, idx); BUG_ON(!m->alive); BUG_ON(m->heap_idx >= h->used); @@ -692,7 +692,7 @@ static void ec_stripe_delete_work(struct work_struct *work) break; } - bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx); + bch2_stripes_heap_del(c, genradix_ptr(&c->stripes, idx), idx); spin_unlock(&c->ec_stripes_heap_lock); if (ec_stripe_delete(c, idx)) @@ -702,22 +702,18 @@ static void ec_stripe_delete_work(struct work_struct *work) /* stripe creation: */ -static int ec_stripe_bkey_insert(struct bch_fs *c, +static int ec_stripe_bkey_insert(struct btree_trans *trans, struct bkey_i_stripe *stripe, struct disk_reservation *res) { - struct btree_trans trans; + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bpos min_pos = POS(0, 1); struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - for_each_btree_key(&trans, iter, BTREE_ID_stripes, start_pos, + for_each_btree_key(trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { if (start_pos.offset) { @@ -738,29 +734,24 @@ retry: found_slot: start_pos = iter.pos; - ret = ec_stripe_mem_alloc(&trans, &iter); + ret = ec_stripe_mem_alloc(trans, &iter); if (ret) goto err; stripe->k.p = iter.pos; - ret = bch2_trans_update(&trans, &iter, &stripe->k_i, 0) ?: - bch2_trans_commit(&trans, res, NULL, - BTREE_INSERT_NOFAIL); + ret = bch2_trans_update(trans, &iter, &stripe->k_i, 0); + + c->ec_stripe_hint = start_pos.offset; err: - bch2_trans_iter_exit(&trans, &iter); - - if (ret == -EINTR) - goto retry; - - c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1; - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); return ret; } static int ec_stripe_bkey_update(struct btree_trans *trans, - struct bkey_i_stripe *new) + struct bkey_i_stripe *new, + struct disk_reservation *res) { struct btree_iter iter; struct bkey_s_c k; @@ -947,10 +938,10 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err_put_writes; } - ret = s->have_existing_stripe - ? bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL, - ec_stripe_bkey_update(&trans, &s->new_stripe.key)) - : ec_stripe_bkey_insert(c, &s->new_stripe.key, &s->res); + ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOFAIL, + s->have_existing_stripe + ? ec_stripe_bkey_update(&trans, &s->new_stripe.key, &s->res) + : ec_stripe_bkey_insert(&trans, &s->new_stripe.key, &s->res)); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); goto err_put_writes; @@ -965,7 +956,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) } spin_lock(&c->ec_stripes_heap_lock); - m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset); + m = genradix_ptr(&c->stripes, s->new_stripe.key.k.p.offset); BUG_ON(m->on_heap); bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset); @@ -1381,7 +1372,7 @@ static s64 get_existing_stripe(struct bch_fs *c, continue; stripe_idx = h->data[heap_idx].idx; - m = genradix_ptr(&c->stripes[0], stripe_idx); + m = genradix_ptr(&c->stripes, stripe_idx); if (m->algorithm == head->algo && m->nr_redundant == head->redundancy && @@ -1555,85 +1546,11 @@ void bch2_stripes_heap_start(struct bch_fs *c) struct genradix_iter iter; struct stripe *m; - genradix_for_each(&c->stripes[0], iter, m) + genradix_for_each(&c->stripes, iter, m) if (m->alive) bch2_stripes_heap_insert(c, m, iter.pos); } -static int __bch2_stripe_write_key(struct btree_trans *trans, - struct btree_iter *iter, - struct stripe *m, - size_t idx, - struct bkey_i_stripe *new_key) -{ - const struct bch_stripe *v; - struct bkey_s_c k; - unsigned i; - int ret; - - bch2_btree_iter_set_pos(iter, POS(0, idx)); - - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; - - if (k.k->type != KEY_TYPE_stripe) - return -EIO; - - v = bkey_s_c_to_stripe(k).v; - for (i = 0; i < v->nr_blocks; i++) - if (m->block_sectors[i] != stripe_blockcount_get(v, i)) - goto write; - return 0; -write: - bkey_reassemble(&new_key->k_i, k); - - for (i = 0; i < new_key->v.nr_blocks; i++) - stripe_blockcount_set(&new_key->v, i, - m->block_sectors[i]); - - return bch2_trans_update(trans, iter, &new_key->k_i, 0); -} - -int bch2_stripes_write(struct bch_fs *c, unsigned flags) -{ - struct btree_trans trans; - struct btree_iter iter; - struct genradix_iter giter; - struct bkey_i_stripe *new_key; - struct stripe *m; - int ret = 0; - - new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL); - BUG_ON(!new_key); - - bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - - genradix_for_each(&c->stripes[0], giter, m) { - if (!m->alive) - continue; - - ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL|flags, - __bch2_stripe_write_key(&trans, &iter, m, - giter.pos, new_key)); - - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - - kfree(new_key); - - return ret; -} - static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k) { const struct bch_stripe *s; @@ -1651,7 +1568,7 @@ static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k) s = bkey_s_c_to_stripe(k).v; - m = genradix_ptr(&c->stripes[0], k.k->p.offset); + m = genradix_ptr(&c->stripes, k.k->p.offset); m->alive = true; m->sectors = le16_to_cpu(s->sectors); m->algorithm = s->algorithm; @@ -1659,14 +1576,8 @@ static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k) m->nr_redundant = s->nr_redundant; m->blocks_nonempty = 0; - for (i = 0; i < s->nr_blocks; i++) { - m->block_sectors[i] = - stripe_blockcount_get(s, i); - m->blocks_nonempty += !!m->block_sectors[i]; - m->ptrs[i] = s->ptrs[i]; - } - - bch2_bkey_to_replicas(&m->r.e, k); + for (i = 0; i < s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(s, i); spin_lock(&c->ec_stripes_heap_lock); bch2_stripes_heap_update(c, m, k.k->p.offset); @@ -1722,7 +1633,9 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL); #else for (i = 0; i < idx; i++) - if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL)) + if (!gc + ? !genradix_ptr_alloc(&c->stripes, i, GFP_KERNEL) + : !genradix_ptr_alloc(&c->gc_stripes, i, GFP_KERNEL)) return -ENOMEM; #endif return 0; @@ -1736,7 +1649,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) spin_lock(&c->ec_stripes_heap_lock); for (i = 0; i < min_t(size_t, h->used, 20); i++) { - m = genradix_ptr(&c->stripes[0], h->data[i].idx); + m = genradix_ptr(&c->stripes, h->data[i].idx); pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx, h->data[i].blocks_nonempty, @@ -1794,7 +1707,7 @@ void bch2_fs_ec_exit(struct bch_fs *c) BUG_ON(!list_empty(&c->ec_stripe_new_list)); free_heap(&c->ec_stripes_heap); - genradix_free(&c->stripes[0]); + genradix_free(&c->stripes); bioset_exit(&c->ec_bioset); } diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index eb16e140..46814107 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -108,7 +108,7 @@ static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, le16_to_cpu(s->sectors)); } -static inline bool bch2_ptr_matches_stripe_m(const struct stripe *m, +static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m, struct extent_ptr_decoded p) { unsigned nr_data = m->nr_blocks - m->nr_redundant; @@ -216,7 +216,6 @@ void bch2_ec_flush_new_stripes(struct bch_fs *); void bch2_stripes_heap_start(struct bch_fs *); int bch2_stripes_read(struct bch_fs *); -int bch2_stripes_write(struct bch_fs *, unsigned); int bch2_ec_mem_alloc(struct bch_fs *, bool); diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h index 3fc31222..d9623ba6 100644 --- a/libbcachefs/ec_types.h +++ b/libbcachefs/ec_types.h @@ -21,6 +21,16 @@ struct stripe { unsigned alive:1; /* does a corresponding key exist in stripes btree? */ unsigned on_heap:1; u8 blocks_nonempty; +}; + +struct gc_stripe { + u16 sectors; + u8 algorithm; + + u8 nr_blocks; + u8 nr_redundant; + + unsigned alive:1; /* does a corresponding key exist in stripes btree? */ u16 block_sectors[BCH_BKEY_PTRS_MAX]; struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX]; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 14bea8a2..268f3ea4 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -311,7 +311,7 @@ static int journal_entry_open(struct journal *j) mod_delayed_work(c->io_complete_wq, &j->write_work, - msecs_to_jiffies(j->write_delay_ms)); + msecs_to_jiffies(c->opts.journal_flush_delay)); journal_wake(j); return 0; } @@ -1101,9 +1101,6 @@ int bch2_fs_journal_init(struct journal *j) lockdep_init_map(&j->res_map, "journal res", &res_key, 0); - j->write_delay_ms = 1000; - j->reclaim_delay_ms = 100; - atomic64_set(&j->reservations.counter, ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); @@ -1135,44 +1132,29 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); union journal_res_state s; struct bch_dev *ca; + unsigned long now = jiffies; unsigned i; rcu_read_lock(); s = READ_ONCE(j->reservations); - pr_buf(out, - "active journal entries:\t%llu\n" - "seq:\t\t\t%llu\n" - "last_seq:\t\t%llu\n" - "last_seq_ondisk:\t%llu\n" - "flushed_seq_ondisk:\t%llu\n" - "prereserved:\t\t%u/%u\n" - "each entry reserved:\t%u\n" - "nr flush writes:\t%llu\n" - "nr noflush writes:\t%llu\n" - "nr direct reclaim:\t%llu\n" - "nr background reclaim:\t%llu\n" - "reclaim kicked:\t\t%u\n" - "reclaim runs in:\t%u ms\n" - "current entry sectors:\t%u\n" - "current entry error:\t%u\n" - "current entry:\t\t", - fifo_used(&j->pin), - journal_cur_seq(j), - journal_last_seq(j), - j->last_seq_ondisk, - j->flushed_seq_ondisk, - j->prereserved.reserved, - j->prereserved.remaining, - j->entry_u64s_reserved, - j->nr_flush_writes, - j->nr_noflush_writes, - j->nr_direct_reclaim, - j->nr_background_reclaim, - j->reclaim_kicked, - jiffies_to_msecs(j->next_reclaim - jiffies), - j->cur_entry_sectors, - j->cur_entry_error); + pr_buf(out, "active journal entries:\t%llu\n", fifo_used(&j->pin)); + pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); + pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); + pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); + pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); + pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining); + pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); + pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); + pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); + pr_buf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim); + pr_buf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim); + pr_buf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked); + pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) + ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); + pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); + pr_buf(out, "current entry error:\t%u\n", j->cur_entry_error); + pr_buf(out, "current entry:\t\t"); switch (s.cur_entry_offset) { case JOURNAL_ENTRY_ERROR_VAL: @@ -1182,15 +1164,11 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) pr_buf(out, "closed\n"); break; default: - pr_buf(out, "%u/%u\n", - s.cur_entry_offset, - j->cur_entry_u64s); + pr_buf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s); break; } - pr_buf(out, - "current entry:\t\tidx %u refcount %u\n", - s.idx, journal_state_count(s, s.idx)); + pr_buf(out, "current entry:\t\tidx %u refcount %u\n", s.idx, journal_state_count(s, s.idx)); i = s.idx; while (i != s.unwritten_idx) { @@ -1230,22 +1208,14 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) if (!ja->nr) continue; - pr_buf(out, - "dev %u:\n" - "\tnr\t\t%u\n" - "\tbucket size\t%u\n" - "\tavailable\t%u:%u\n" - "\tdiscard_idx\t%u\n" - "\tdirty_ondisk\t%u (seq %llu)\n" - "\tdirty_idx\t%u (seq %llu)\n" - "\tcur_idx\t\t%u (seq %llu)\n", - i, ja->nr, ca->mi.bucket_size, - bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), - ja->sectors_free, - ja->discard_idx, - ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk], - ja->dirty_idx, ja->bucket_seq[ja->dirty_idx], - ja->cur_idx, ja->bucket_seq[ja->cur_idx]); + pr_buf(out, "dev %u:\n", i); + pr_buf(out, "\tnr\t\t%u\n", ja->nr); + pr_buf(out, "\tbucket size\t%u\n", ca->mi.bucket_size); + pr_buf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free); + pr_buf(out, "\tdiscard_idx\t%u\n", ja->discard_idx); + pr_buf(out, "\tdirty_ondisk\t%u (seq %llu)\n", ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]); + pr_buf(out, "\tdirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]); + pr_buf(out, "\tcur_idx\t\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]); } rcu_read_unlock(); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 5c8304e0..37abfb18 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1398,7 +1398,7 @@ void bch2_journal_write(struct closure *cl) spin_lock(&j->lock); if (c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush) && !w->must_flush && - (jiffies - j->last_flush_write) < msecs_to_jiffies(j->write_delay_ms) && + (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) { w->noflush = true; SET_JSET_NO_FLUSH(jset, true); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index ca482c67..ab9a6d96 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -637,7 +637,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct) * make sure to flush at least one journal pin: */ if (time_after(jiffies, j->last_flushed + - msecs_to_jiffies(j->reclaim_delay_ms))) + msecs_to_jiffies(c->opts.journal_reclaim_delay))) min_nr = 1; if (j->prereserved.reserved * 4 > j->prereserved.remaining) @@ -686,6 +686,7 @@ int bch2_journal_reclaim(struct journal *j) static int bch2_journal_reclaim_thread(void *arg) { struct journal *j = arg; + struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned long delay, now; int ret = 0; @@ -703,7 +704,7 @@ static int bch2_journal_reclaim_thread(void *arg) mutex_unlock(&j->reclaim_lock); now = jiffies; - delay = msecs_to_jiffies(j->reclaim_delay_ms); + delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); j->next_reclaim = j->last_flushed + delay; if (!time_in_range(j->next_reclaim, now, now + delay)) diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index d4845132..66b1707a 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -262,8 +262,6 @@ struct journal { struct mutex discard_lock; bool can_discard; - unsigned write_delay_ms; - unsigned reclaim_delay_ms; unsigned long last_flush_write; u64 res_get_blocked_start; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 64e39c10..f0495451 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -767,8 +767,7 @@ static int __bch2_move_data(struct bch_fs *c, if (rate) bch2_ratelimit_increment(rate, k.k->size); next: - atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k), - &stats->sectors_seen); + atomic64_add(k.k->size, &stats->sectors_seen); next_nondata: bch2_btree_iter_advance(&iter); } diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 5c9eafc0..7b7eee9b 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -139,7 +139,7 @@ static int bch2_copygc(struct bch_fs *c) struct copygc_heap_entry e, *i; struct bucket_array *buckets; struct bch_move_stats move_stats; - u64 sectors_to_move = 0, sectors_not_moved = 0; + u64 sectors_to_move = 0, sectors_to_write = 0, sectors_not_moved = 0; u64 sectors_reserved = 0; u64 buckets_to_move, buckets_not_moved = 0; struct bch_dev *ca; @@ -205,22 +205,23 @@ static int bch2_copygc(struct bch_fs *c) up_read(&ca->bucket_lock); } + /* + * Our btree node allocations also come out of RESERVE_MOVINGGC: + */ + sectors_reserved = (sectors_reserved * 3) / 4; if (!sectors_reserved) { bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!"); return -1; } - /* - * Our btree node allocations also come out of RESERVE_MOVINGGC: - */ - sectors_to_move = (sectors_to_move * 3) / 4; + for (i = h->data; i < h->data + h->used; i++) { + sectors_to_move += i->sectors; + sectors_to_write += i->sectors * i->replicas; + } - for (i = h->data; i < h->data + h->used; i++) - sectors_to_move += i->sectors * i->replicas; - - while (sectors_to_move > sectors_reserved) { + while (sectors_to_write > sectors_reserved) { BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - sectors_to_move -= e.sectors * e.replicas; + sectors_to_write -= e.sectors * e.replicas; } buckets_to_move = h->used; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index afb1bb2a..e2eb9b3f 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -45,11 +45,12 @@ LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); /* When can be set: */ enum opt_mode { - OPT_FORMAT = (1 << 0), - OPT_MOUNT = (1 << 1), - OPT_RUNTIME = (1 << 2), - OPT_INODE = (1 << 3), - OPT_DEVICE = (1 << 4), + OPT_FS = (1 << 0), /* Filesystem option */ + OPT_DEVICE = (1 << 1), /* Device option */ + OPT_INODE = (1 << 2), /* Inode option */ + OPT_FORMAT = (1 << 3), /* May be specified at format time */ + OPT_MOUNT = (1 << 4), /* May be specified at mount time */ + OPT_RUNTIME = (1 << 5), /* May be specified at runtime */ }; enum opt_type { @@ -87,216 +88,226 @@ enum opt_type { #define BCH_OPTS() \ x(block_size, u16, \ - OPT_FORMAT, \ + OPT_FS|OPT_FORMAT, \ OPT_SECTORS(1, 128), \ BCH_SB_BLOCK_SIZE, 8, \ "size", NULL) \ x(btree_node_size, u16, \ - OPT_FORMAT, \ + OPT_FS|OPT_FORMAT, \ OPT_SECTORS(1, 512), \ BCH_SB_BTREE_NODE_SIZE, 512, \ "size", "Btree node size, default 256k") \ x(errors, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_error_actions), \ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \ NULL, "Action to take on filesystem error") \ x(metadata_replicas, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_META_REPLICAS_WANT, 1, \ "#", "Number of metadata replicas") \ x(data_replicas, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_DATA_REPLICAS_WANT, 1, \ "#", "Number of data replicas") \ x(metadata_replicas_required, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_META_REPLICAS_REQ, 1, \ "#", NULL) \ x(data_replicas_required, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_UINT(1, BCH_REPLICAS_MAX), \ BCH_SB_DATA_REPLICAS_REQ, 1, \ "#", NULL) \ x(metadata_checksum, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_csum_opts), \ BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ NULL, NULL) \ x(data_checksum, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_csum_opts), \ BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ NULL, NULL) \ x(compression, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_compression_opts), \ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \ NULL, NULL) \ x(background_compression, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_compression_opts), \ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \ NULL, NULL) \ x(str_hash, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_STR(bch2_str_hash_opts), \ BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \ NULL, "Hash function for directory entries and xattrs")\ x(metadata_target, u16, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FN(bch2_opt_target), \ BCH_SB_METADATA_TARGET, 0, \ "(target)", "Device or disk group for metadata writes") \ x(foreground_target, u16, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FN(bch2_opt_target), \ BCH_SB_FOREGROUND_TARGET, 0, \ "(target)", "Device or disk group for foreground writes") \ x(background_target, u16, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FN(bch2_opt_target), \ BCH_SB_BACKGROUND_TARGET, 0, \ "(target)", "Device or disk group to move data to in the background")\ x(promote_target, u16, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FN(bch2_opt_target), \ BCH_SB_PROMOTE_TARGET, 0, \ "(target)", "Device or disk group to promote data to on read")\ x(erasure_code, u16, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH_SB_ERASURE_CODE, false, \ NULL, "Enable erasure coding (DO NOT USE YET)") \ x(inodes_32bit, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH_SB_INODE_32BIT, true, \ NULL, "Constrain inode numbers to 32 bits") \ x(shard_inode_numbers, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH_SB_SHARD_INUMS, true, \ NULL, "Shard new inode numbers by CPU id") \ x(inodes_use_key_cache, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ BCH_SB_INODES_USE_KEY_CACHE, true, \ NULL, "Use the btree key cache for the inodes btree") \ x(btree_node_mem_ptr_optimization, u8, \ - OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ NO_SB_OPT, true, \ NULL, "Stash pointer to in memory btree node in btree ptr")\ x(gc_reserve_percent, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(5, 21), \ BCH_SB_GC_RESERVE, 8, \ "%", "Percentage of disk space to reserve for copygc")\ x(gc_reserve_bytes, u64, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_SECTORS(0, U64_MAX), \ BCH_SB_GC_RESERVE_BYTES, 0, \ "%", "Amount of disk space to reserve for copygc\n" \ "Takes precedence over gc_reserve_percent if set")\ x(root_reserve_percent, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_UINT(0, 100), \ BCH_SB_ROOT_RESERVE, 0, \ "%", "Percentage of disk space to reserve for superuser")\ x(wide_macs, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH_SB_128_BIT_MACS, false, \ NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\ x(inline_data, u8, \ - OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ NO_SB_OPT, true, \ NULL, "Enable inline data extents") \ x(acl, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ BCH_SB_POSIX_ACL, true, \ NULL, "Enable POSIX acls") \ x(usrquota, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ BCH_SB_USRQUOTA, false, \ NULL, "Enable user quotas") \ x(grpquota, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ BCH_SB_GRPQUOTA, false, \ NULL, "Enable group quotas") \ x(prjquota, u8, \ - OPT_FORMAT|OPT_MOUNT, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ BCH_SB_PRJQUOTA, false, \ NULL, "Enable project quotas") \ x(degraded, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Allow mounting in degraded mode") \ x(very_degraded, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Allow mounting in when data will be missing") \ x(discard, u8, \ - OPT_MOUNT|OPT_DEVICE, \ + OPT_FS|OPT_MOUNT|OPT_DEVICE, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Enable discard/TRIM support") \ x(verbose, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Extra debugging information during mount/recovery")\ + x(journal_flush_delay, u32, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U32_MAX), \ + BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \ + NULL, "Delay in milliseconds before automatic journal commits")\ x(journal_flush_disabled, u8, \ - OPT_MOUNT|OPT_RUNTIME, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH_SB_JOURNAL_FLUSH_DISABLED,false, \ NULL, "Disable journal flush on sync/fsync\n" \ "If enabled, writes can be lost, but only since the\n"\ "last journal write (default 1 second)") \ + x(journal_reclaim_delay, u32, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U32_MAX), \ + BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \ + NULL, "Delay in milliseconds before automatic journal reclaim")\ x(fsck, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Run fsck on mount") \ x(fix_errors, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Fix errors during fsck without asking") \ x(ratelimit_errors, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, RATELIMIT_ERRORS, \ NULL, "Ratelimit error messages during fsck") \ x(nochanges, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Super read only mode - no writes at all will be issued,\n"\ "even if we have to replay the journal") \ x(norecovery, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Don't replay the journal") \ x(rebuild_replicas, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Rebuild the superblock replicas section") \ x(keep_journal, u8, \ - OPT_MOUNT, \ + 0, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Don't free journal entries/keys after startup")\ @@ -306,7 +317,7 @@ enum opt_type { NO_SB_OPT, false, \ NULL, "Read all journal entries, not just dirty ones")\ x(noexcl, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Don't open device in exclusive mode") \ @@ -316,7 +327,7 @@ enum opt_type { NO_SB_OPT, BCH_SB_SECTOR, \ "offset", "Sector offset of superblock") \ x(read_only, u8, \ - 0, \ + OPT_FS, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, NULL) \ @@ -326,12 +337,12 @@ enum opt_type { NO_SB_OPT, false, \ NULL, "Don\'t start filesystem, only open devices") \ x(reconstruct_alloc, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ - OPT_MOUNT, \ + OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ NULL, "Set superblock to latest version,\n" \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index c3b4d116..460b1ba2 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1238,8 +1238,7 @@ use_clean: */ bch_verbose(c, "writing allocation info"); err = "error writing out alloc info"; - ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?: - bch2_alloc_write(c, BTREE_INSERT_LAZY_RW); + ret = bch2_alloc_write(c, BTREE_INSERT_LAZY_RW); if (ret) { bch_err(c, "error writing alloc info"); goto err; diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 7e909a11..8aeb2e41 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -488,7 +488,7 @@ static int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, n = bch2_trans_kmalloc(trans, sizeof(*n)); ret = PTR_ERR_OR_ZERO(n); if (ret) - return ret; + goto err; bkey_snapshot_init(&n->k_i); n->k.p = iter.pos; @@ -498,11 +498,10 @@ static int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, n->v.pad = 0; SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - bch2_trans_update(trans, &iter, &n->k_i, 0); - - ret = bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); + ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?: + bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); if (ret) - break; + goto err; new_snapids[i] = iter.pos.offset; } @@ -536,7 +535,9 @@ static int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, n->v.children[0] = cpu_to_le32(new_snapids[0]); n->v.children[1] = cpu_to_le32(new_snapids[1]); SET_BCH_SNAPSHOT_SUBVOL(&n->v, false); - bch2_trans_update(trans, &iter, &n->k_i, 0); + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); + if (ret) + goto err; } err: bch2_trans_iter_exit(trans, &iter); @@ -1049,7 +1050,9 @@ found_slot: if (src_subvolid) { src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); - bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); + ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); + if (ret) + goto err; } new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); @@ -1064,7 +1067,9 @@ found_slot: SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); new_subvol->k.p = dst_iter.pos; - bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0); + ret = bch2_trans_update(trans, &dst_iter, &new_subvol->k_i, 0); + if (ret) + goto err; *new_subvolid = new_subvol->k.p.offset; *new_snapshotid = new_nodes[0]; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 3744b6d5..f673efed 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -744,6 +744,15 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); + /* Compat: */ + if (sb->version <= bcachefs_metadata_version_inode_v2 && + !BCH_SB_JOURNAL_FLUSH_DELAY(sb)) + SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); + + if (sb->version <= bcachefs_metadata_version_inode_v2 && + !BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) + SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 100); + c->opts = bch2_opts_default; bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb)); bch2_opts_apply(&c->opts, opts); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 864be860..d5d32bf1 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -140,8 +140,6 @@ rw_attribute(gc_gens_pos); read_attribute(uuid); read_attribute(minor); read_attribute(bucket_size); -read_attribute(block_size); -read_attribute(btree_node_size); read_attribute(first_bucket); read_attribute(nbuckets); read_attribute(durability); @@ -178,9 +176,6 @@ read_attribute(read_realloc_races); read_attribute(extent_migrate_done); read_attribute(extent_migrate_raced); -rw_attribute(journal_write_delay_ms); -rw_attribute(journal_reclaim_delay_ms); - rw_attribute(discard); rw_attribute(cache_replacement_policy); rw_attribute(label); @@ -357,11 +352,6 @@ SHOW(bch2_fs) sysfs_print(minor, c->minor); sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); - sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms); - sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - - sysfs_print(block_size, block_bytes(c)); - sysfs_print(btree_node_size, btree_bytes(c)); sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c)); @@ -475,9 +465,6 @@ STORE(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms); - sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - if (attr == &sysfs_btree_gc_periodic) { ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) ?: (ssize_t) size; @@ -564,14 +551,9 @@ SYSFS_OPS(bch2_fs); struct attribute *bch2_fs_files[] = { &sysfs_minor, - &sysfs_block_size, - &sysfs_btree_node_size, &sysfs_btree_cache_size, &sysfs_btree_avg_write_size, - &sysfs_journal_write_delay_ms, - &sysfs_journal_reclaim_delay_ms, - &sysfs_promote_whole_extents, &sysfs_compression_stats, @@ -703,7 +685,7 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) for (i = bch2_opt_table; i < bch2_opt_table + bch2_opts_nr; i++) { - if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME))) + if (!(i->mode & OPT_FS)) continue; ret = sysfs_create_file(kobj, &i->attr); @@ -846,7 +828,6 @@ SHOW(bch2_dev) sysfs_printf(uuid, "%pU\n", ca->uuid.b); sysfs_print(bucket_size, bucket_bytes(ca)); - sysfs_print(block_size, block_bytes(c)); sysfs_print(first_bucket, ca->mi.first_bucket); sysfs_print(nbuckets, ca->mi.nbuckets); sysfs_print(durability, ca->mi.durability); @@ -978,7 +959,6 @@ SYSFS_OPS(bch2_dev); struct attribute *bch2_dev_files[] = { &sysfs_uuid, &sysfs_bucket_size, - &sysfs_block_size, &sysfs_first_bucket, &sysfs_nbuckets, &sysfs_durability, diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index d5a74f4d..dfd8c43d 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -579,10 +579,10 @@ static int rand_mixed_trans(struct btree_trans *trans, if (!(i & 3) && k.k) { bkey_cookie_init(&cookie->k_i); cookie->k.p = iter->pos; - bch2_trans_update(trans, iter, &cookie->k_i, 0); + ret = bch2_trans_update(trans, iter, &cookie->k_i, 0); } - return 0; + return ret; } static int rand_mixed(struct bch_fs *c, u64 nr)