From cdf17bffadb3346ea4424357b5bb85de852231e9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 25 Nov 2017 06:16:39 -0500 Subject: [PATCH] disk groups --- cmd_device.c | 8 ++- cmd_format.c | 37 ++++++++----- cmd_migrate.c | 2 +- libbcachefs.c | 141 +++++++++++++++++++++++++++++++++++++++++--------- libbcachefs.h | 6 ++- 5 files changed, 151 insertions(+), 43 deletions(-) diff --git a/cmd_device.c b/cmd_device.c index 143f21d2..428d3047 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -41,7 +41,7 @@ int cmd_device_add(int argc, char *argv[]) { "fs_size", required_argument, NULL, 'S' }, { "bucket", required_argument, NULL, 'B' }, { "discard", no_argument, NULL, 'D' }, - { "tier", required_argument, NULL, 't' }, + { "group", required_argument, NULL, 'g' }, { "force", no_argument, NULL, 'f' }, { "help", no_argument, NULL, 'h' }, { NULL } @@ -67,10 +67,8 @@ int cmd_device_add(int argc, char *argv[]) case 'D': dev_opts.discard = true; break; - case 't': - if (kstrtouint(optarg, 10, &dev_opts.tier) || - dev_opts.tier >= BCH_TIER_MAX) - die("invalid tier"); + case 'g': + dev_opts.group = strdup(optarg); break; case 'f': force = true; diff --git a/cmd_format.c b/cmd_format.c index b6889ac1..fff2d77b 100644 --- a/cmd_format.c +++ b/cmd_format.c @@ -40,6 +40,9 @@ x(0, compression_type, "(none|lz4|gzip)", NULL) \ x(0, replicas, "#", NULL) \ x(0, data_replicas, "#", NULL) \ x(0, metadata_replicas, "#", NULL) \ +x(0, foreground_target, "target", NULL) \ +x(0, background_target, "target", NULL) \ +x(0, promote_target, "target", NULL) \ x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\ x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\ x('e', error_action, "(continue|remount-ro|panic)", NULL) \ @@ -50,11 +53,11 @@ t("") \ t("Device specific options:") \ x(0, fs_size, "size", "Size of filesystem on device")\ x(0, bucket_size, "size", "Bucket size") \ -x('t', tier, "#", "Higher tier indicates slower devices")\ +x('g', group, "label", "Disk group")\ x(0, discard, NULL, NULL) \ x(0, data_allowed, "journal,btree,data", "Allowed types of data on this device")\ t("Device specific options must come before corresponding devices, e.g.") \ -t(" bcachefs format --tier 0 /dev/sdb --tier 1 /dev/sdc") \ +t(" bcachefs format --group cache /dev/sdb --tier 1 /dev/sdc") \ t("") \ x('q', quiet, NULL, "Only print errors") \ x('h', help, NULL, "Display this help and exit") @@ -91,13 +94,13 @@ static void usage(void) " --fs_size=size Size of filesystem on device\n" " --bucket=size Bucket size\n" " --discard Enable discards\n" - " -t, --tier=# Higher tier (e.g. 1) indicates slower devices\n" + " -g, --group=label Disk group\n" "\n" " -q, --quiet Only print errors\n" " -h, --help Display this help and exit\n" "\n" "Device specific options must come before corresponding devices, e.g.\n" - " bcachefs format --tier 0 /dev/sdb --tier 1 /dev/sdc\n" + " bcachefs format --group cache /dev/sdb --tier 1 /dev/sdc\n" "\n" "Report bugs to "); } @@ -177,20 +180,32 @@ int cmd_format(int argc, char *argv[]) break; case O_data_replicas: if (kstrtouint(optarg, 10, &opts.data_replicas) || - opts.data_replicas >= BCH_REPLICAS_MAX) + !opts.data_replicas || + opts.data_replicas > BCH_REPLICAS_MAX) die("invalid replicas"); break; case O_metadata_replicas: if (kstrtouint(optarg, 10, &opts.meta_replicas) || - opts.meta_replicas >= BCH_REPLICAS_MAX) + !opts.meta_replicas || + opts.meta_replicas > BCH_REPLICAS_MAX) die("invalid replicas"); break; case O_replicas: if (kstrtouint(optarg, 10, &opts.data_replicas) || - opts.data_replicas >= BCH_REPLICAS_MAX) + !opts.data_replicas || + opts.data_replicas > BCH_REPLICAS_MAX) die("invalid replicas"); opts.meta_replicas = opts.data_replicas; break; + case O_foreground_target: + opts.foreground_target = strdup(optarg); + break; + case O_background_target: + opts.background_target = strdup(optarg); + break; + case O_promote_target: + opts.promote_target = strdup(optarg); + break; case O_encrypted: opts.encrypted = true; break; @@ -226,11 +241,9 @@ int cmd_format(int argc, char *argv[]) dev_opts.bucket_size = hatoi_validate(optarg, "bucket size"); break; - case O_tier: - case 't': - if (kstrtouint(optarg, 10, &dev_opts.tier) || - dev_opts.tier >= BCH_TIER_MAX) - die("invalid tier"); + case O_group: + case 'g': + dev_opts.group = strdup(optarg); break; case O_discard: dev_opts.discard = true; diff --git a/cmd_migrate.c b/cmd_migrate.c index 4ba3538d..a42d11ec 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -267,7 +267,7 @@ static void write_data(struct bch_fs *c, o.op.wbio.bio.bi_iter.bi_size = len; bch2_bio_map(&o.op.wbio.bio, buf); - bch2_write_op_init(&o.op, c); + bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts)); o.op.write_point = writepoint_hashed(0); o.op.nr_replicas = 1; o.op.pos = POS(dst_inode->bi_inum, dst_offset >> 9); diff --git a/libbcachefs.c b/libbcachefs.c index bdf2924f..9baaff04 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -124,12 +124,41 @@ void bch2_pick_bucket_size(struct format_opts opts, struct dev_opts *dev) } +static unsigned parse_target(struct dev_opts *devs, size_t nr_devs, + struct bch_sb_field_disk_groups *gi, + const char *s) +{ + struct bch_disk_group *g; + struct dev_opts *i; + + if (!s) + return 0; + + for (i = devs; i < devs + nr_devs; i++) + if (!strcmp(s, i->path)) + return dev_to_target(i - devs); + + for (g = gi->entries; + g < gi->entries + disk_groups_nr(gi); + g++) { + unsigned len = strnlen(g->label, sizeof(g->label)); + + if (len == strlen(s) && + !memcmp(s, g->label, len)) + return group_to_target(g - gi->entries); + } + + die("Invalid target %s", s); + return 0; +} + struct bch_sb *bch2_format(struct format_opts opts, struct dev_opts *devs, size_t nr_devs) { struct bch_sb *sb; struct dev_opts *i; struct bch_sb_field_members *mi; + struct bch_sb_field_disk_groups *gi = NULL; unsigned u64s; /* calculate block size: */ @@ -164,6 +193,8 @@ struct bch_sb *bch2_format(struct format_opts opts, sb = calloc(1, sizeof(*sb) + sizeof(struct bch_sb_field_members) + sizeof(struct bch_member) * nr_devs + + sizeof(struct bch_sb_field_disk_groups) + + sizeof(struct bch_disk_group) * nr_devs + sizeof(struct bch_sb_field_crypt)); sb->version = cpu_to_le64(BCH_SB_VERSION_MAX); @@ -201,6 +232,71 @@ struct bch_sb *bch2_format(struct format_opts opts, sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec); sb->time_precision = cpu_to_le32(1); + mi = vstruct_end(sb); + u64s = (sizeof(struct bch_sb_field_members) + + sizeof(struct bch_member) * nr_devs) / sizeof(u64); + + le32_add_cpu(&sb->u64s, u64s); + le32_add_cpu(&mi->field.u64s, u64s); + mi->field.type = BCH_SB_FIELD_members; + + /* Member info: */ + for (i = devs; i < devs + nr_devs; i++) { + struct bch_member *m = mi->members + (i - devs); + + uuid_generate(m->uuid.b); + m->nbuckets = cpu_to_le64(i->nbuckets); + m->first_bucket = 0; + m->bucket_size = cpu_to_le16(i->bucket_size); + + SET_BCH_MEMBER_REPLACEMENT(m, CACHE_REPLACEMENT_LRU); + SET_BCH_MEMBER_DISCARD(m, i->discard); + SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed); + } + + /* Disk groups */ + for (i = devs; i < devs + nr_devs; i++) { + struct bch_member *m = mi->members + (i - devs); + struct bch_disk_group *g; + size_t len; + int idx; + + if (!i->group) + continue; + + len = min_t(size_t, strlen(i->group) + 1, BCH_SB_LABEL_SIZE); + + if (!gi) { + gi = vstruct_end(sb); + u64s = sizeof(*gi) / sizeof(u64); + le32_add_cpu(&sb->u64s, u64s); + le32_add_cpu(&gi->field.u64s, u64s); + gi->field.type = BCH_SB_FIELD_disk_groups; + } + + idx = __bch2_disk_group_find(gi, i->group); + if (idx >= 0) { + g = gi->entries + idx; + } else { + u64s = sizeof(*g) / sizeof(u64); + g = vstruct_end(&gi->field); + le32_add_cpu(&sb->u64s, u64s); + le32_add_cpu(&gi->field.u64s, u64s); + memcpy(g->label, i->group, len); + SET_BCH_GROUP_DATA_ALLOWED(g, ~0); + } + + SET_BCH_MEMBER_GROUP(m, (g - gi->entries) + 1); + } + + SET_BCH_SB_FOREGROUND_TARGET(sb, + parse_target(devs, nr_devs, gi, opts.foreground_target)); + SET_BCH_SB_BACKGROUND_TARGET(sb, + parse_target(devs, nr_devs, gi, opts.background_target)); + SET_BCH_SB_PROMOTE_TARGET(sb, + parse_target(devs, nr_devs, gi, opts.promote_target)); + + /* Crypt: */ if (opts.encrypted) { struct bch_sb_field_crypt *crypt = vstruct_end(sb); @@ -214,28 +310,6 @@ struct bch_sb *bch2_format(struct format_opts opts, SET_BCH_SB_ENCRYPTION_TYPE(sb, 1); } - mi = vstruct_end(sb); - u64s = (sizeof(struct bch_sb_field_members) + - sizeof(struct bch_member) * nr_devs) / sizeof(u64); - - le32_add_cpu(&sb->u64s, u64s); - mi->field.u64s = cpu_to_le32(u64s); - mi->field.type = BCH_SB_FIELD_members; - - for (i = devs; i < devs + nr_devs; i++) { - struct bch_member *m = mi->members + (i - devs); - - uuid_generate(m->uuid.b); - m->nbuckets = cpu_to_le64(i->nbuckets); - m->first_bucket = 0; - m->bucket_size = cpu_to_le16(i->bucket_size); - - SET_BCH_MEMBER_TIER(m, i->tier); - SET_BCH_MEMBER_REPLACEMENT(m, CACHE_REPLACEMENT_LRU); - SET_BCH_MEMBER_DISCARD(m, i->discard); - SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed); - } - for (i = devs; i < devs + nr_devs; i++) { sb->dev_idx = i - devs; @@ -355,6 +429,7 @@ static void bch2_sb_print_members(struct bch_sb *sb, struct bch_sb_field *f, enum units units) { struct bch_sb_field_members *mi = field_to_type(f, members); + struct bch_sb_field_disk_groups *gi = bch2_sb_get_disk_groups(sb); unsigned i; for (i = 0; i < sb->nr_devices; i++) { @@ -363,11 +438,21 @@ static void bch2_sb_print_members(struct bch_sb *sb, struct bch_sb_field *f, char member_uuid_str[40]; char data_allowed_str[100]; char data_has_str[100]; + char group[64]; if (!bch2_member_exists(m)) continue; uuid_unparse(m->uuid.b, member_uuid_str); + + if (BCH_MEMBER_GROUP(m)) { + if (BCH_MEMBER_GROUP(m) < disk_groups_nr(gi)) + memcpy(group, gi->entries[BCH_MEMBER_GROUP(m)].label, + BCH_SB_LABEL_SIZE); + else + strcpy(group, "(bad disk groups section"); + } + bch2_scnprint_flag_list(data_allowed_str, sizeof(data_allowed_str), bch2_data_types, @@ -390,7 +475,7 @@ static void bch2_sb_print_members(struct bch_sb *sb, struct bch_sb_field *f, " Buckets: %llu\n" " Last mount: %s\n" " State: %s\n" - " Tier: %llu\n" + " Group: %s\n" " Data allowed: %s\n" " Has data: %s\n" @@ -409,7 +494,7 @@ static void bch2_sb_print_members(struct bch_sb *sb, struct bch_sb_field *f, ? bch2_dev_state[BCH_MEMBER_STATE(m)] : "unknown", - BCH_MEMBER_TIER(m), + group, data_allowed_str, data_has_str, @@ -540,6 +625,10 @@ void bch2_sb_print(struct bch_sb *sb, bool print_layout, "Data checksum type: %s (%llu)\n" "Compression type: %s (%llu)\n" + "Foreground write target: %llu\n" + "Background write target: %llu\n" + "Promote target: %llu\n" + "String hash type: %s (%llu)\n" "32 bit inodes: %llu\n" "GC reserve percentage: %llu%%\n" @@ -579,6 +668,10 @@ void bch2_sb_print(struct bch_sb *sb, bool print_layout, : "unknown", BCH_SB_COMPRESSION_TYPE(sb), + BCH_SB_FOREGROUND_TARGET(sb), + BCH_SB_BACKGROUND_TARGET(sb), + BCH_SB_PROMOTE_TARGET(sb), + BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)] : "unknown", diff --git a/libbcachefs.h b/libbcachefs.h index c7c31dd2..d27b4e8f 100644 --- a/libbcachefs.h +++ b/libbcachefs.h @@ -25,6 +25,10 @@ struct format_opts { unsigned meta_replicas_required; unsigned data_replicas_required; + const char *foreground_target; + const char *background_target; + const char *promote_target; + unsigned meta_csum_type; unsigned data_csum_type; unsigned compression_type; @@ -52,7 +56,7 @@ struct dev_opts { char *path; u64 size; /* 512 byte sectors */ unsigned bucket_size; - unsigned tier; + const char *group; unsigned data_allowed; bool discard;