Update bcachefs sources to e99d29e402 bcachefs: zstd support, compression refactoring

This commit is contained in:
Kent Overstreet 2018-02-16 15:36:33 -05:00
parent 8d95f4a437
commit 807abf36c1
40 changed files with 926 additions and 606 deletions

View File

@ -1 +1 @@
d5e561b3cc023dd247d2b3d08b680709ec21b477 e99d29e40210f6d9b7ec9e5b7aee1e48ae7655c5

View File

@ -9,6 +9,7 @@ CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall \
-D_GNU_SOURCE \ -D_GNU_SOURCE \
-D_LGPL_SOURCE \ -D_LGPL_SOURCE \
-DRCU_MEMBARRIER \ -DRCU_MEMBARRIER \
-DZSTD_STATIC_LINKING_ONLY \
-DNO_BCACHEFS_CHARDEV \ -DNO_BCACHEFS_CHARDEV \
-DNO_BCACHEFS_FS \ -DNO_BCACHEFS_FS \
-DNO_BCACHEFS_SYSFS \ -DNO_BCACHEFS_SYSFS \
@ -31,9 +32,15 @@ ifdef D
endif endif
PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib" PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib"
PKGCONFIG_LIBS_STATIC="libzstd"
CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}` CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}`
LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` \ LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}`
-lm -lpthread -lrt -lscrypt -lkeyutils -laio
CFLAGS+=`pkg-config --static --cflags ${PKGCONFIG_LIBS_STATIC}`
LDLIBS+=`pkg-config --static --libs ${PKGCONFIG_LIBS_STATIC}`
LDLIBS+=-lm -lpthread -lrt -lscrypt -lkeyutils -laio
ifeq ($(PREFIX),/usr) ifeq ($(PREFIX),/usr)
ROOT_SBINDIR=/sbin ROOT_SBINDIR=/sbin

View File

@ -344,8 +344,8 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
die("error reserving space in new filesystem: %s", die("error reserving space in new filesystem: %s",
strerror(-ret)); strerror(-ret));
bch2_check_mark_super(c, BCH_DATA_USER, bch2_mark_bkey_replicas(c, BCH_DATA_USER,
bch2_bkey_devs(extent_i_to_s_c(e).s_c)); extent_i_to_s_c(e).s_c);
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i, ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
&res, NULL, NULL, 0); &res, NULL, NULL, 0);

2
debian/control vendored
View File

@ -5,7 +5,7 @@ Priority: optional
Standards-Version: 3.9.5 Standards-Version: 3.9.5
Build-Depends: debhelper (>= 9), pkg-config, libblkid-dev, uuid-dev, Build-Depends: debhelper (>= 9), pkg-config, libblkid-dev, uuid-dev,
libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev, zlib1g-dev, libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev, zlib1g-dev,
libattr1-dev, libaio-dev libattr1-dev, libaio-dev, libzstd-dev
Homepage: http://bcache.evilpiepirate.org/ Homepage: http://bcache.evilpiepirate.org/
Package: bcachefs-tools Package: bcachefs-tools

View File

@ -1201,43 +1201,56 @@ out:
return ob - c->open_buckets; return ob - c->open_buckets;
} }
static int __dev_alloc_cmp(struct bch_fs *c,
struct write_point *wp,
unsigned l, unsigned r)
{
struct bch_dev *ca_l = rcu_dereference(c->devs[l]);
struct bch_dev *ca_r = rcu_dereference(c->devs[r]);
if (ca_l && ca_r && ca_l->mi.tier != ca_r->mi.tier)
return ((ca_l->mi.tier > ca_r->mi.tier) -
(ca_l->mi.tier < ca_r->mi.tier));
return ((wp->next_alloc[l] > wp->next_alloc[r]) -
(wp->next_alloc[l] < wp->next_alloc[r]));
}
#define dev_alloc_cmp(l, r) __dev_alloc_cmp(c, wp, l, r)
struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c, struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c,
struct write_point *wp, struct write_point *wp,
struct bch_devs_mask *devs) struct bch_devs_mask *devs)
{ {
struct dev_alloc_list ret = { .nr = 0 }; struct dev_alloc_list ret = { .nr = 0 };
struct bch_dev *ca, *ca2; struct bch_dev *ca;
unsigned i, j; unsigned i;
for_each_member_device_rcu(ca, c, i, devs) { for_each_member_device_rcu(ca, c, i, devs)
for (j = 0; j < ret.nr; j++) { ret.devs[ret.nr++] = i;
unsigned idx = ret.devs[j];
ca2 = rcu_dereference(c->devs[idx]);
if (!ca2)
break;
if (ca->mi.tier < ca2->mi.tier)
break;
if (ca->mi.tier == ca2->mi.tier &&
wp->next_alloc[i] < wp->next_alloc[idx])
break;
}
array_insert_item(ret.devs, ret.nr, j, i);
}
bubble_sort(ret.devs, ret.nr, dev_alloc_cmp);
return ret; return ret;
} }
void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca, void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp) struct write_point *wp)
{ {
unsigned i; u64 *v = wp->next_alloc + ca->dev_idx;
u64 free_space = dev_buckets_free(c, ca);
u64 free_space_inv = free_space
? div64_u64(1ULL << 48, free_space)
: 1ULL << 48;
u64 scale = *v / 4;
for (i = 0; i < ARRAY_SIZE(wp->next_alloc); i++) if (*v + free_space_inv >= *v)
wp->next_alloc[i] >>= 1; *v += free_space_inv;
else
*v = U64_MAX;
for (v = wp->next_alloc;
v < wp->next_alloc + ARRAY_SIZE(wp->next_alloc); v++)
*v = *v < scale ? 0 : *v - scale;
} }
static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c, static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
@ -1249,7 +1262,6 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
{ {
enum bucket_alloc_ret ret = NO_DEVICES; enum bucket_alloc_ret ret = NO_DEVICES;
struct dev_alloc_list devs_sorted; struct dev_alloc_list devs_sorted;
u64 buckets_free;
unsigned i; unsigned i;
BUG_ON(nr_replicas > ARRAY_SIZE(wp->ptrs)); BUG_ON(nr_replicas > ARRAY_SIZE(wp->ptrs));
@ -1281,13 +1293,6 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
BUG_ON(wp->nr_ptrs >= ARRAY_SIZE(wp->ptrs)); BUG_ON(wp->nr_ptrs >= ARRAY_SIZE(wp->ptrs));
wp->ptrs[wp->nr_ptrs++] = c->open_buckets + ob; wp->ptrs[wp->nr_ptrs++] = c->open_buckets + ob;
buckets_free = U64_MAX, dev_buckets_free(c, ca);
if (buckets_free)
wp->next_alloc[ca->dev_idx] +=
div64_u64(U64_MAX, buckets_free *
ca->mi.bucket_size);
else
wp->next_alloc[ca->dev_idx] = U64_MAX;
bch2_wp_rescale(c, ca, wp); bch2_wp_rescale(c, ca, wp);
__clear_bit(ca->dev_idx, devs->d); __clear_bit(ca->dev_idx, devs->d);

View File

@ -194,6 +194,7 @@
#include <linux/shrinker.h> #include <linux/shrinker.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/zstd.h>
#include "bcachefs_format.h" #include "bcachefs_format.h"
#include "bset.h" #include "bset.h"
@ -231,6 +232,12 @@ do { \
bch_info(c, fmt, ##__VA_ARGS__); \ bch_info(c, fmt, ##__VA_ARGS__); \
} while (0) } while (0)
#define pr_verbose_init(opts, fmt, ...) \
do { \
if (opt_get(opts, verbose_init)) \
pr_info(fmt, ##__VA_ARGS__); \
} while (0)
/* Parameters that are useful for debugging, but should always be compiled in: */ /* Parameters that are useful for debugging, but should always be compiled in: */
#define BCH_DEBUG_PARAMS_ALWAYS() \ #define BCH_DEBUG_PARAMS_ALWAYS() \
BCH_DEBUG_PARAM(key_merging_disabled, \ BCH_DEBUG_PARAM(key_merging_disabled, \
@ -646,10 +653,10 @@ struct bch_fs {
struct mutex bio_bounce_pages_lock; struct mutex bio_bounce_pages_lock;
mempool_t bio_bounce_pages; mempool_t bio_bounce_pages;
mempool_t lz4_workspace_pool;
void *zlib_workspace;
struct mutex zlib_workspace_lock;
mempool_t compression_bounce[2]; mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_NR];
mempool_t decompress_workspace;
ZSTD_parameters zstd_params;
struct crypto_shash *sha256; struct crypto_shash *sha256;
struct crypto_skcipher *chacha20; struct crypto_skcipher *chacha20;

View File

@ -6,7 +6,6 @@
*/ */
#include <asm/types.h> #include <asm/types.h>
#include <linux/compiler.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <linux/uuid.h> #include <linux/uuid.h>
@ -370,7 +369,8 @@ enum bch_compression_type {
BCH_COMPRESSION_LZ4_OLD = 1, BCH_COMPRESSION_LZ4_OLD = 1,
BCH_COMPRESSION_GZIP = 2, BCH_COMPRESSION_GZIP = 2,
BCH_COMPRESSION_LZ4 = 3, BCH_COMPRESSION_LZ4 = 3,
BCH_COMPRESSION_NR = 4, BCH_COMPRESSION_ZSTD = 4,
BCH_COMPRESSION_NR = 5,
}; };
enum bch_extent_entry_type { enum bch_extent_entry_type {
@ -1082,6 +1082,7 @@ LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
enum bch_sb_features { enum bch_sb_features {
BCH_FEATURE_LZ4 = 0, BCH_FEATURE_LZ4 = 0,
BCH_FEATURE_GZIP = 1, BCH_FEATURE_GZIP = 1,
BCH_FEATURE_ZSTD = 2,
}; };
/* options: */ /* options: */
@ -1109,11 +1110,17 @@ enum bch_str_hash_opts {
BCH_STR_HASH_NR = 3, BCH_STR_HASH_NR = 3,
}; };
#define BCH_COMPRESSION_TYPES() \
x(NONE) \
x(LZ4) \
x(GZIP) \
x(ZSTD)
enum bch_compression_opts { enum bch_compression_opts {
BCH_COMPRESSION_OPT_NONE = 0, #define x(t) BCH_COMPRESSION_OPT_##t,
BCH_COMPRESSION_OPT_LZ4 = 1, BCH_COMPRESSION_TYPES()
BCH_COMPRESSION_OPT_GZIP = 2, #undef x
BCH_COMPRESSION_OPT_NR = 3, BCH_COMPRESSION_OPT_NR
}; };
/* /*
@ -1322,8 +1329,10 @@ struct btree_node {
}; };
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
/* 8-32 unused */
LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
struct btree_node_entry { struct btree_node_entry {
struct bch_csum csum; struct bch_csum csum;

View File

@ -373,19 +373,23 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
unsigned i; unsigned i;
int ret; int ret = 0;
pr_verbose_init(c->opts, "");
ret = rhashtable_init(&bc->table, &bch_btree_cache_params); ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
if (ret) if (ret)
return ret; goto out;
bc->table_init_done = true; bc->table_init_done = true;
bch2_recalc_btree_reserve(c); bch2_recalc_btree_reserve(c);
for (i = 0; i < bc->reserve; i++) for (i = 0; i < bc->reserve; i++)
if (!btree_node_mem_alloc(c, GFP_KERNEL)) if (!btree_node_mem_alloc(c, GFP_KERNEL)) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
list_splice_init(&bc->live, &bc->freeable); list_splice_init(&bc->live, &bc->freeable);
@ -393,12 +397,16 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
mutex_init(&c->verify_lock); mutex_init(&c->verify_lock);
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
if (!c->verify_ondisk) if (!c->verify_ondisk) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL); c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
if (!c->verify_data) if (!c->verify_data) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
list_del_init(&c->verify_data->list); list_del_init(&c->verify_data->list);
#endif #endif
@ -408,8 +416,9 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bc->shrink.seeks = 4; bc->shrink.seeks = 4;
bc->shrink.batch = btree_pages(c) * 2; bc->shrink.batch = btree_pages(c) * 2;
register_shrinker(&bc->shrink); register_shrinker(&bc->shrink);
out:
return 0; pr_verbose_init(c->opts, "ret %i", ret);
return ret;
} }
void bch2_fs_btree_cache_init_early(struct btree_cache *bc) void bch2_fs_btree_cache_init_early(struct btree_cache *bc)

View File

@ -148,14 +148,13 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
{ {
enum bch_data_type data_type = type == BKEY_TYPE_BTREE enum bch_data_type data_type = type == BKEY_TYPE_BTREE
? BCH_DATA_BTREE : BCH_DATA_USER; ? BCH_DATA_BTREE : BCH_DATA_USER;
struct bch_devs_list devs = bch2_bkey_devs(k);
int ret = 0; int ret = 0;
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_sb_has_replicas(c, data_type, devs), c, fsck_err_on(!bch2_bkey_replicas_marked(c, data_type, k), c,
"superblock not marked as containing replicas (type %u)", "superblock not marked as containing replicas (type %u)",
data_type)) { data_type)) {
ret = bch2_check_mark_super(c, data_type, devs); ret = bch2_mark_bkey_replicas(c, data_type, k);
if (ret) if (ret)
return ret; return ret;
} }

View File

@ -1135,6 +1135,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
unsigned sectors, whiteout_u64s = 0; unsigned sectors, whiteout_u64s = 0;
struct nonce nonce; struct nonce nonce;
struct bch_csum csum; struct bch_csum csum;
bool first = !b->written;
if (!b->written) { if (!b->written) {
i = &b->data->keys; i = &b->data->keys;
@ -1194,10 +1195,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
} }
if (ret) { if (ret) {
btree_err_on(!b->written, btree_err_on(first,
BTREE_ERR_FIXABLE, c, b, i, BTREE_ERR_FIXABLE, c, b, i,
"first btree node bset has blacklisted journal seq"); "first btree node bset has blacklisted journal seq");
if (b->written) if (!first)
continue; continue;
} }

View File

@ -430,6 +430,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
n->data->min_key = b->data->min_key; n->data->min_key = b->data->min_key;
n->data->max_key = b->data->max_key; n->data->max_key = b->data->max_key;
n->data->format = format; n->data->format = format;
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
btree_node_set_format(n, format); btree_node_set_format(n, format);
@ -559,8 +560,8 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
goto err_free; goto err_free;
} }
ret = bch2_check_mark_super(c, BCH_DATA_BTREE, ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
bch2_bkey_devs(bkey_i_to_s_c(&b->key))); bkey_i_to_s_c(&b->key));
if (ret) if (ret)
goto err_free; goto err_free;
@ -1225,6 +1226,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
n2->data->max_key = n1->data->max_key; n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format; n2->data->format = n1->format;
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
n2->key.k.p = n1->key.k.p; n2->key.k.p = n1->key.k.p;
btree_node_set_format(n2, n2->data->format); btree_node_set_format(n2, n2->data->format);
@ -2019,8 +2021,8 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
goto err; goto err;
} }
ret = bch2_check_mark_super(c, BCH_DATA_BTREE, ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
bch2_extent_devs(extent_i_to_s_c(new_key))); extent_i_to_s_c(new_key).s_c);
if (ret) if (ret)
goto err_free_update; goto err_free_update;

View File

@ -272,15 +272,10 @@ static void multi_unlock_write(struct btree_insert *trans)
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
} }
static inline void btree_trans_sort(struct btree_insert *trans) static inline int btree_trans_cmp(struct btree_insert_entry l,
struct btree_insert_entry r)
{ {
int i, end = trans->nr; return btree_iter_cmp(l.iter, r.iter);
while (--end > 0)
for (i = 0; i < end; i++)
if (btree_iter_cmp(trans->entries[i].iter,
trans->entries[i + 1].iter) > 0)
swap(trans->entries[i], trans->entries[i + 1]);
} }
/* Normal update interface: */ /* Normal update interface: */
@ -313,7 +308,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
bkey_i_to_s_c(i->k))); bkey_i_to_s_c(i->k)));
} }
btree_trans_sort(trans); bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
if (unlikely(!percpu_ref_tryget(&c->writes))) if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS; return -EROFS;

View File

@ -219,12 +219,16 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
crypto_alloc_skcipher("chacha20", 0, 0); crypto_alloc_skcipher("chacha20", 0, 0);
int ret; int ret;
if (!chacha20) if (!chacha20) {
pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
return PTR_ERR(chacha20); return PTR_ERR(chacha20);
}
ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key)); ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key));
if (ret) if (ret) {
pr_err("crypto_skcipher_setkey() error: %i", ret);
goto err; goto err;
}
do_encrypt(chacha20, nonce, buf, len); do_encrypt(chacha20, nonce, buf, len);
err: err:
@ -567,7 +571,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
ret = bch2_request_key(c->disk_sb, &user_key); ret = bch2_request_key(c->disk_sb, &user_key);
if (ret) { if (ret) {
bch_err(c, "error requesting encryption key"); bch_err(c, "error requesting encryption key: %i", ret);
goto err; goto err;
} }
@ -594,13 +598,19 @@ static int bch2_alloc_ciphers(struct bch_fs *c)
{ {
if (!c->chacha20) if (!c->chacha20)
c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0); c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0);
if (IS_ERR(c->chacha20)) if (IS_ERR(c->chacha20)) {
bch_err(c, "error requesting chacha20 module: %li",
PTR_ERR(c->chacha20));
return PTR_ERR(c->chacha20); return PTR_ERR(c->chacha20);
}
if (!c->poly1305) if (!c->poly1305)
c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
if (IS_ERR(c->poly1305)) if (IS_ERR(c->poly1305)) {
bch_err(c, "error requesting poly1305 module: %li",
PTR_ERR(c->poly1305));
return PTR_ERR(c->poly1305); return PTR_ERR(c->poly1305);
}
return 0; return 0;
} }
@ -660,7 +670,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
if (keyed) { if (keyed) {
ret = bch2_request_key(c->disk_sb, &user_key); ret = bch2_request_key(c->disk_sb, &user_key);
if (ret) { if (ret) {
bch_err(c, "error requesting encryption key"); bch_err(c, "error requesting encryption key: %i", ret);
goto err; goto err;
} }
@ -707,27 +717,35 @@ int bch2_fs_encryption_init(struct bch_fs *c)
{ {
struct bch_sb_field_crypt *crypt; struct bch_sb_field_crypt *crypt;
struct bch_key key; struct bch_key key;
int ret; int ret = 0;
pr_verbose_init(c->opts, "");
c->sha256 = crypto_alloc_shash("sha256", 0, 0); c->sha256 = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(c->sha256)) if (IS_ERR(c->sha256)) {
return PTR_ERR(c->sha256); bch_err(c, "error requesting sha256 module");
ret = PTR_ERR(c->sha256);
goto out;
}
crypt = bch2_sb_get_crypt(c->disk_sb); crypt = bch2_sb_get_crypt(c->disk_sb);
if (!crypt) if (!crypt)
return 0; goto out;
ret = bch2_alloc_ciphers(c); ret = bch2_alloc_ciphers(c);
if (ret) if (ret)
return ret; goto out;
ret = bch2_decrypt_sb_key(c, crypt, &key); ret = bch2_decrypt_sb_key(c, crypt, &key);
if (ret) if (ret)
goto err; goto out;
ret = crypto_skcipher_setkey(c->chacha20, ret = crypto_skcipher_setkey(c->chacha20,
(void *) &key.key, sizeof(key.key)); (void *) &key.key, sizeof(key.key));
err: if (ret)
goto out;
out:
memzero_explicit(&key, sizeof(key)); memzero_explicit(&key, sizeof(key));
pr_verbose_init(c->opts, "ret %i", ret);
return ret; return ret;
} }

View File

@ -91,20 +91,11 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false); return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
} }
static inline enum bch_compression_type static const unsigned bch2_compression_opt_to_type[] = {
bch2_compression_opt_to_type(enum bch_compression_opts type) #define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_##t,
{ BCH_COMPRESSION_TYPES()
switch (type) { #undef x
case BCH_COMPRESSION_OPT_NONE: };
return BCH_COMPRESSION_NONE;
case BCH_COMPRESSION_OPT_LZ4:
return BCH_COMPRESSION_LZ4;
case BCH_COMPRESSION_OPT_GZIP:
return BCH_COMPRESSION_GZIP;
default:
BUG();
}
}
static inline bool bch2_checksum_type_valid(const struct bch_fs *c, static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
unsigned type) unsigned type)

View File

@ -8,6 +8,7 @@
#include "lz4.h" #include "lz4.h"
#include <linux/lz4.h> #include <linux/lz4.h>
#include <linux/zlib.h> #include <linux/zlib.h>
#include <linux/zstd.h>
/* Bounce buffer: */ /* Bounce buffer: */
struct bbuf { struct bbuf {
@ -151,6 +152,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
struct bbuf src_data = { NULL }; struct bbuf src_data = { NULL };
size_t src_len = src->bi_iter.bi_size; size_t src_len = src->bi_iter.bi_size;
size_t dst_len = crc.uncompressed_size << 9; size_t dst_len = crc.uncompressed_size << 9;
void *workspace;
int ret; int ret;
src_data = bio_map_or_bounce(c, src, READ); src_data = bio_map_or_bounce(c, src, READ);
@ -159,57 +161,64 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
case BCH_COMPRESSION_LZ4_OLD: case BCH_COMPRESSION_LZ4_OLD:
ret = bch2_lz4_decompress(src_data.b, &src_len, ret = bch2_lz4_decompress(src_data.b, &src_len,
dst_data, dst_len); dst_data, dst_len);
if (ret) { if (ret)
ret = -EIO;
goto err; goto err;
}
break; break;
case BCH_COMPRESSION_LZ4: case BCH_COMPRESSION_LZ4:
ret = LZ4_decompress_safe_partial(src_data.b, dst_data, ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
src_len, dst_len, dst_len); src_len, dst_len, dst_len);
if (ret != dst_len) { if (ret != dst_len)
ret = -EIO;
goto err; goto err;
}
break; break;
case BCH_COMPRESSION_GZIP: { case BCH_COMPRESSION_GZIP: {
void *workspace; z_stream strm = {
z_stream strm; .next_in = src_data.b,
.avail_in = src_len,
.next_out = dst_data,
.avail_out = dst_len,
};
workspace = kmalloc(zlib_inflate_workspacesize(), workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
GFP_NOIO|__GFP_NOWARN);
if (!workspace) {
mutex_lock(&c->zlib_workspace_lock);
workspace = c->zlib_workspace;
}
strm.next_in = src_data.b;
strm.avail_in = src_len;
strm.next_out = dst_data;
strm.avail_out = dst_len;
zlib_set_workspace(&strm, workspace); zlib_set_workspace(&strm, workspace);
zlib_inflateInit2(&strm, -MAX_WBITS); zlib_inflateInit2(&strm, -MAX_WBITS);
ret = zlib_inflate(&strm, Z_FINISH); ret = zlib_inflate(&strm, Z_FINISH);
if (workspace == c->zlib_workspace) mempool_free(workspace, &c->decompress_workspace);
mutex_unlock(&c->zlib_workspace_lock);
else
kfree(workspace);
if (ret != Z_STREAM_END) { if (ret != Z_STREAM_END)
ret = -EIO; goto err;
break;
}
case BCH_COMPRESSION_ZSTD: {
ZSTD_DCtx *ctx;
size_t len;
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
src_len = le32_to_cpup(src_data.b);
len = ZSTD_decompressDCtx(ctx,
dst_data, dst_len,
src_data.b + 4, src_len);
mempool_free(workspace, &c->decompress_workspace);
if (len != dst_len)
goto err; goto err;
}
break; break;
} }
default: default:
BUG(); BUG();
} }
ret = 0; ret = 0;
err: out:
bio_unmap_or_unbounce(c, src_data); bio_unmap_or_unbounce(c, src_data);
return ret; return ret;
err:
ret = -EIO;
goto out;
} }
int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
@ -282,114 +291,130 @@ err:
return ret; return ret;
} }
static int attempt_compress(struct bch_fs *c,
void *workspace,
void *dst, size_t dst_len,
void *src, size_t src_len,
unsigned compression_type)
{
switch (compression_type) {
case BCH_COMPRESSION_LZ4: {
int len = src_len;
int ret = LZ4_compress_destSize(
src, dst,
&len, dst_len,
workspace);
if (len < src_len)
return -len;
return ret;
}
case BCH_COMPRESSION_GZIP: {
z_stream strm = {
.next_in = src,
.avail_in = src_len,
.next_out = dst,
.avail_out = dst_len,
};
zlib_set_workspace(&strm, workspace);
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY);
if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
return 0;
if (zlib_deflateEnd(&strm) != Z_OK)
return 0;
return strm.total_out;
}
case BCH_COMPRESSION_ZSTD: {
ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
size_t len = ZSTD_compressCCtx(ctx,
dst + 4, dst_len - 4,
src, src_len,
c->zstd_params);
if (ZSTD_isError(len))
return 0;
*((__le32 *) dst) = cpu_to_le32(len);
return len + 4;
}
default:
BUG();
}
}
static unsigned __bio_compress(struct bch_fs *c, static unsigned __bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len, struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len, struct bio *src, size_t *src_len,
unsigned compression_type) unsigned compression_type)
{ {
struct bbuf src_data = { NULL }, dst_data = { NULL }; struct bbuf src_data = { NULL }, dst_data = { NULL };
void *workspace;
unsigned pad; unsigned pad;
int ret = 0; int ret = 0;
/* If it's only one block, don't bother trying to compress: */ /* If it's only one block, don't bother trying to compress: */
if (bio_sectors(src) <= c->opts.block_size) if (bio_sectors(src) <= c->opts.block_size)
goto err; return 0;
dst_data = bio_map_or_bounce(c, dst, WRITE); dst_data = bio_map_or_bounce(c, dst, WRITE);
src_data = bio_map_or_bounce(c, src, READ); src_data = bio_map_or_bounce(c, src, READ);
switch (compression_type) { workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
case BCH_COMPRESSION_LZ4_OLD:
compression_type = BCH_COMPRESSION_LZ4;
case BCH_COMPRESSION_LZ4: { *src_len = src->bi_iter.bi_size;
void *workspace; *dst_len = dst->bi_iter.bi_size;
int len = src->bi_iter.bi_size;
workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO); /*
* XXX: this algorithm sucks when the compression code doesn't tell us
while (1) { * how much would fit, like LZ4 does:
if (len <= block_bytes(c)) { */
ret = 0; while (1) {
break; if (*src_len <= block_bytes(c)) {
} ret = -1;
break;
ret = LZ4_compress_destSize(
src_data.b, dst_data.b,
&len, dst->bi_iter.bi_size,
workspace);
if (ret >= len) {
/* uncompressible: */
ret = 0;
break;
}
if (!(len & (block_bytes(c) - 1)))
break;
len = round_down(len, block_bytes(c));
}
mempool_free(workspace, &c->lz4_workspace_pool);
if (!ret)
goto err;
*src_len = len;
*dst_len = ret;
ret = 0;
break;
}
case BCH_COMPRESSION_GZIP: {
void *workspace;
z_stream strm;
workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
DEF_MEM_LEVEL),
GFP_NOIO|__GFP_NOWARN);
if (!workspace) {
mutex_lock(&c->zlib_workspace_lock);
workspace = c->zlib_workspace;
} }
strm.next_in = src_data.b; ret = attempt_compress(c, workspace,
strm.avail_in = min(src->bi_iter.bi_size, dst_data.b, *dst_len,
dst->bi_iter.bi_size); src_data.b, *src_len,
strm.next_out = dst_data.b; compression_type);
strm.avail_out = dst->bi_iter.bi_size; if (ret > 0) {
zlib_set_workspace(&strm, workspace); *dst_len = ret;
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION, ret = 0;
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, break;
Z_DEFAULT_STRATEGY);
ret = zlib_deflate(&strm, Z_FINISH);
if (ret != Z_STREAM_END) {
ret = -EIO;
goto zlib_err;
} }
ret = zlib_deflateEnd(&strm); /* Didn't fit: should we retry with a smaller amount? */
if (ret != Z_OK) { if (*src_len <= *dst_len) {
ret = -EIO; ret = -1;
goto zlib_err; break;
} }
ret = 0; /*
zlib_err: * If ret is negative, it's a hint as to how much data would fit
if (workspace == c->zlib_workspace) */
mutex_unlock(&c->zlib_workspace_lock); BUG_ON(-ret >= *src_len);
if (ret < 0)
*src_len = -ret;
else else
kfree(workspace); *src_len -= (*src_len - *dst_len) / 2;
*src_len = round_down(*src_len, block_bytes(c));
if (ret)
goto err;
*dst_len = strm.total_out;
*src_len = strm.total_in;
break;
}
default:
BUG();
} }
mempool_free(workspace, &c->compress_workspace[compression_type]);
if (ret)
goto err;
/* Didn't get smaller: */ /* Didn't get smaller: */
if (round_up(*dst_len, block_bytes(c)) >= *src_len) if (round_up(*dst_len, block_bytes(c)) >= *src_len)
goto err; goto err;
@ -429,6 +454,9 @@ unsigned bch2_bio_compress(struct bch_fs *c,
/* Don't generate a bigger output than input: */ /* Don't generate a bigger output than input: */
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
if (compression_type == BCH_COMPRESSION_LZ4_OLD)
compression_type = BCH_COMPRESSION_LZ4;
compression_type = compression_type =
__bio_compress(c, dst, dst_len, src, src_len, compression_type); __bio_compress(c, dst, dst_len, src, src_len, compression_type);
@ -437,81 +465,147 @@ unsigned bch2_bio_compress(struct bch_fs *c,
return compression_type; return compression_type;
} }
#define BCH_FEATURE_NONE 0
static const unsigned bch2_compression_opt_to_feature[] = {
#define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
BCH_COMPRESSION_TYPES()
#undef x
};
#undef BCH_FEATURE_NONE
/* doesn't write superblock: */ /* doesn't write superblock: */
int bch2_check_set_has_compressed_data(struct bch_fs *c, int bch2_check_set_has_compressed_data(struct bch_fs *c,
unsigned compression_type) unsigned compression_type)
{ {
switch (compression_type) { unsigned f;
case BCH_COMPRESSION_OPT_NONE: int ret = 0;
return 0;
case BCH_COMPRESSION_OPT_LZ4:
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
return 0;
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4); pr_verbose_init(c->opts, "");
break;
case BCH_COMPRESSION_OPT_GZIP:
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
return 0;
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP); BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
break;
default:
BUG();
}
return bch2_fs_compress_init(c); if (!compression_type)
goto out;
f = bch2_compression_opt_to_feature[compression_type];
if (bch2_sb_test_feature(c->disk_sb, f))
goto out;
bch2_sb_set_feature(c->disk_sb, f);
ret = bch2_fs_compress_init(c);
out:
pr_verbose_init(c->opts, "ret %i", ret);
return ret;
} }
void bch2_fs_compress_exit(struct bch_fs *c) void bch2_fs_compress_exit(struct bch_fs *c)
{ {
vfree(c->zlib_workspace); unsigned i;
mempool_exit(&c->lz4_workspace_pool);
mempool_exit(&c->decompress_workspace);
for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
mempool_exit(&c->compress_workspace[i]);
mempool_exit(&c->compression_bounce[WRITE]); mempool_exit(&c->compression_bounce[WRITE]);
mempool_exit(&c->compression_bounce[READ]); mempool_exit(&c->compression_bounce[READ]);
} }
#define COMPRESSION_WORKSPACE_SIZE \ static void *mempool_kvpmalloc(gfp_t gfp_mask, void *pool_data)
max_t(size_t, zlib_inflate_workspacesize(), \ {
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL)) size_t size = (size_t)pool_data;
return kvpmalloc(size, gfp_mask);
}
void mempool_kvpfree(void *element, void *pool_data)
{
size_t size = (size_t)pool_data;
kvpfree(element, size);
}
static int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
{
return !mempool_initialized(pool)
? mempool_init(pool, min_nr, mempool_kvpmalloc,
mempool_kvpfree, (void *) size)
: 0;
}
int bch2_fs_compress_init(struct bch_fs *c) int bch2_fs_compress_init(struct bch_fs *c)
{ {
unsigned order = get_order(c->sb.encoded_extent_max << 9); size_t max_extent = c->sb.encoded_extent_max << 9;
int ret; size_t order = get_order(max_extent);
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
struct {
unsigned feature;
unsigned type;
size_t compress_workspace;
size_t decompress_workspace;
} compression_types[] = {
{ BCH_FEATURE_LZ4, BCH_COMPRESSION_LZ4, LZ4_MEM_COMPRESS, 0 },
{ BCH_FEATURE_GZIP, BCH_COMPRESSION_GZIP,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
{ BCH_FEATURE_ZSTD, BCH_COMPRESSION_ZSTD,
ZSTD_CCtxWorkspaceBound(params.cParams),
ZSTD_DCtxWorkspaceBound() },
}, *i;
int ret = 0;
if (!bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) && pr_verbose_init(c->opts, "");
!bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
return 0; c->zstd_params = params;
for (i = compression_types;
i < compression_types + ARRAY_SIZE(compression_types);
i++)
if (bch2_sb_test_feature(c->disk_sb, i->feature))
goto have_compressed;
goto out;
have_compressed:
if (!mempool_initialized(&c->compression_bounce[READ])) { if (!mempool_initialized(&c->compression_bounce[READ])) {
ret = mempool_init_page_pool(&c->compression_bounce[READ], ret = mempool_init_page_pool(&c->compression_bounce[READ],
1, order); 1, order);
if (ret) if (ret)
return ret; goto out;
} }
if (!mempool_initialized(&c->compression_bounce[WRITE])) { if (!mempool_initialized(&c->compression_bounce[WRITE])) {
ret = mempool_init_page_pool(&c->compression_bounce[WRITE], ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
1, order); 1, order);
if (ret) if (ret)
return ret; goto out;
} }
if (!mempool_initialized(&c->lz4_workspace_pool) && for (i = compression_types;
bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4)) { i < compression_types + ARRAY_SIZE(compression_types);
ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool, i++) {
1, LZ4_MEM_COMPRESS); decompress_workspace_size =
max(decompress_workspace_size, i->decompress_workspace);
if (!bch2_sb_test_feature(c->disk_sb, i->feature))
continue;
if (i->decompress_workspace)
decompress_workspace_needed = true;
ret = mempool_init_kvpmalloc_pool(
&c->compress_workspace[i->type],
1, i->compress_workspace);
if (ret) if (ret)
return ret; goto out;
} }
if (!c->zlib_workspace && ret = mempool_init_kmalloc_pool(
bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) { &c->decompress_workspace,
c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE); 1, decompress_workspace_size);
if (!c->zlib_workspace) if (ret)
return -ENOMEM; goto out;
} out:
pr_verbose_init(c->opts, "ret %i", ret);
return 0; return ret;
} }

View File

@ -694,7 +694,7 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
goto err; goto err;
} }
if (!bch2_sb_has_replicas(c, BCH_DATA_BTREE, bch2_extent_devs(e))) { if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b), bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), k); buf, sizeof(buf), k);
bch2_fs_bug(c, bch2_fs_bug(c,
@ -1834,7 +1834,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
} }
if (!bkey_extent_is_cached(e.k) && if (!bkey_extent_is_cached(e.k) &&
!bch2_sb_has_replicas(c, BCH_DATA_USER, bch2_extent_devs(e))) { !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b), bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), e.s_c); buf, sizeof(buf), e.s_c);
bch2_fs_bug(c, bch2_fs_bug(c,
@ -2013,17 +2013,18 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
} }
void bch2_extent_mark_replicas_cached(struct bch_fs *c, void bch2_extent_mark_replicas_cached(struct bch_fs *c,
struct bkey_s_extent e) struct bkey_s_extent e,
unsigned nr_desired_replicas)
{ {
struct bch_extent_ptr *ptr; struct bch_extent_ptr *ptr;
unsigned tier = 0, nr_cached = 0; unsigned tier = 0, nr_cached = 0;
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c); unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
bool have_higher_tier; bool have_higher_tier;
if (nr_good <= c->opts.data_replicas) if (nr_good <= nr_desired_replicas)
return; return;
nr_cached = nr_good - c->opts.data_replicas; nr_cached = nr_good - nr_desired_replicas;
do { do {
have_higher_tier = false; have_higher_tier = false;

View File

@ -38,7 +38,8 @@ bch2_insert_fixup_extent(struct btree_insert *,
struct btree_insert_entry *); struct btree_insert_entry *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent); void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned);
const struct bch_extent_ptr * const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned); bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
@ -430,6 +431,18 @@ static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent
return ret; return ret;
} }
static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e)
{
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (ptr->cached)
ret.devs[ret.nr++] = ptr->dev;
return ret;
}
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
{ {
switch (k.k->type) { switch (k.k->type) {
@ -441,6 +454,28 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
} }
} }
static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return bch2_extent_dirty_devs(bkey_s_c_to_extent(k));
default:
return (struct bch_devs_list) { .nr = 0 };
}
}
static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
{
switch (k.k->type) {
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
return bch2_extent_cached_devs(bkey_s_c_to_extent(k));
default:
return (struct bch_devs_list) { .nr = 0 };
}
}
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent, bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
struct bch_extent_crc_unpacked); struct bch_extent_crc_unpacked);
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked); bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);

View File

@ -452,14 +452,18 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
ret = bch2_btree_insert_at(wop->c, &wop->res, ret = bch2_btree_insert_at(wop->c, &wop->res,
&hook.hook, op_journal_seq(wop), &hook.hook, op_journal_seq(wop),
BTREE_INSERT_NOFAIL|BTREE_INSERT_ATOMIC, BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&extent_iter, k), BTREE_INSERT_ENTRY(&extent_iter, k),
BTREE_INSERT_ENTRY_EXTRA_RES(&inode_iter, BTREE_INSERT_ENTRY_EXTRA_RES(&inode_iter,
&hook.inode_p.inode.k_i, 2)); &hook.inode_p.inode.k_i, 2));
} else { } else {
ret = bch2_btree_insert_at(wop->c, &wop->res, ret = bch2_btree_insert_at(wop->c, &wop->res,
&hook.hook, op_journal_seq(wop), &hook.hook, op_journal_seq(wop),
BTREE_INSERT_NOFAIL|BTREE_INSERT_ATOMIC, BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&extent_iter, k)); BTREE_INSERT_ENTRY(&extent_iter, k));
} }
@ -502,7 +506,7 @@ static inline void bch2_fswrite_op_init(struct bchfs_write_op *op,
bch2_write_op_init(&op->op, c); bch2_write_op_init(&op->op, c);
op->op.csum_type = bch2_data_checksum_type(c, opts.data_checksum); op->op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
op->op.compression_type = bch2_compression_opt_to_type(opts.compression); op->op.compression_type = bch2_compression_opt_to_type[opts.compression];
op->op.devs = c->fastest_devs; op->op.devs = c->fastest_devs;
op->op.index_update_fn = bchfs_write_index_update; op->op.index_update_fn = bchfs_write_index_update;
op_journal_seq_set(&op->op, &inode->ei_journal_seq); op_journal_seq_set(&op->op, &inode->ei_journal_seq);
@ -2692,6 +2696,10 @@ void bch2_fs_fsio_exit(struct bch_fs *c)
int bch2_fs_fsio_init(struct bch_fs *c) int bch2_fs_fsio_init(struct bch_fs *c)
{ {
int ret = 0;
pr_verbose_init(c->opts, "");
if (bioset_init(&c->writepage_bioset, if (bioset_init(&c->writepage_bioset,
4, offsetof(struct bch_writepage_io, op.op.wbio.bio), 4, offsetof(struct bch_writepage_io, op.op.wbio.bio),
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
@ -2701,9 +2709,10 @@ int bch2_fs_fsio_init(struct bch_fs *c)
bioset_init(&c->dio_write_bioset, bioset_init(&c->dio_write_bioset,
4, offsetof(struct dio_write, iop.op.wbio.bio), 4, offsetof(struct dio_write, iop.op.wbio.bio),
BIOSET_NEED_BVECS)) BIOSET_NEED_BVECS))
return -ENOMEM; ret = -ENOMEM;
return 0; pr_verbose_init(c->opts, "ret %i", ret);
return ret;
} }
#endif /* NO_BCACHEFS_FS */ #endif /* NO_BCACHEFS_FS */

View File

@ -209,17 +209,6 @@ static void bch2_write_done(struct closure *cl)
closure_return(cl); closure_return(cl);
} }
static u64 keylist_sectors(struct keylist *keys)
{
struct bkey_i *k;
u64 ret = 0;
for_each_keylist_key(keys, k)
ret += k->k.size;
return ret;
}
int bch2_write_index_default(struct bch_write_op *op) int bch2_write_index_default(struct bch_write_op *op)
{ {
struct keylist *keys = &op->insert_keys; struct keylist *keys = &op->insert_keys;
@ -232,7 +221,8 @@ int bch2_write_index_default(struct bch_write_op *op)
ret = bch2_btree_insert_list_at(&iter, keys, &op->res, ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
NULL, op_journal_seq(op), NULL, op_journal_seq(op),
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
bch2_btree_iter_unlock(&iter); bch2_btree_iter_unlock(&iter);
return ret; return ret;
@ -268,8 +258,7 @@ static void bch2_write_index(struct closure *cl)
} }
if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) { if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
ret = bch2_check_mark_super(c, BCH_DATA_USER, ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, e.s_c);
bch2_extent_devs(e.c));
if (ret) if (ret)
goto err; goto err;
} }
@ -910,18 +899,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
swap(bio->bi_vcnt, rbio->bio.bi_vcnt); swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
rbio->promote = NULL; rbio->promote = NULL;
bch2_write_op_init(&op->write.op, c); bch2_migrate_read_done(&op->write, rbio);
op->write.op.csum_type = bch2_data_checksum_type(c, rbio->opts.data_checksum);
op->write.op.compression_type =
bch2_compression_opt_to_type(rbio->opts.compression);
op->write.move_dev = -1;
op->write.op.devs = c->fastest_devs;
op->write.op.write_point = writepoint_hashed((unsigned long) current);
op->write.op.flags |= BCH_WRITE_ALLOC_NOWAIT;
op->write.op.flags |= BCH_WRITE_CACHED;
bch2_migrate_write_init(&op->write, rbio);
closure_init(cl, NULL); closure_init(cl, NULL);
closure_call(&op->write.op.cl, bch2_write, c->wq, cl); closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
@ -932,13 +910,16 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
* XXX: multiple promotes can race with each other, wastefully. Keep a list of * XXX: multiple promotes can race with each other, wastefully. Keep a list of
* outstanding promotes? * outstanding promotes?
*/ */
static struct promote_op *promote_alloc(struct bch_read_bio *rbio) static struct promote_op *promote_alloc(struct bch_read_bio *rbio,
struct bkey_s_c k)
{ {
struct bch_fs *c = rbio->c;
struct promote_op *op; struct promote_op *op;
struct bio *bio; struct bio *bio;
/* data might have to be decompressed in the write path: */ /* data might have to be decompressed in the write path: */
unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size, unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size,
PAGE_SECTORS); PAGE_SECTORS);
int ret;
BUG_ON(!rbio->bounce); BUG_ON(!rbio->bounce);
BUG_ON(pages < rbio->bio.bi_vcnt); BUG_ON(pages < rbio->bio.bi_vcnt);
@ -954,6 +935,14 @@ static struct promote_op *promote_alloc(struct bch_read_bio *rbio)
memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
sizeof(struct bio_vec) * rbio->bio.bi_vcnt); sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
ret = bch2_migrate_write_init(c, &op->write, c->fastest_devs,
writepoint_hashed((unsigned long) current),
rbio->opts,
DATA_PROMOTE,
(struct data_opts) { 0 },
k);
BUG_ON(ret);
return op; return op;
} }
@ -1407,7 +1396,7 @@ noclone:
rbio->pick = *pick; rbio->pick = *pick;
rbio->pos = pos; rbio->pos = pos;
rbio->version = e.k->version; rbio->version = e.k->version;
rbio->promote = promote ? promote_alloc(rbio) : NULL; rbio->promote = promote ? promote_alloc(rbio, e.s_c) : NULL;
INIT_WORK(&rbio->work, NULL); INIT_WORK(&rbio->work, NULL);
bio_set_dev(&rbio->bio, pick->ca->disk_sb.bdev); bio_set_dev(&rbio->bio, pick->ca->disk_sb.bdev);

View File

@ -70,7 +70,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c)
op->error = 0; op->error = 0;
op->csum_type = bch2_data_checksum_type(c, c->opts.data_checksum); op->csum_type = bch2_data_checksum_type(c, c->opts.data_checksum);
op->compression_type = op->compression_type =
bch2_compression_opt_to_type(c->opts.compression); bch2_compression_opt_to_type[c->opts.compression];
op->nr_replicas = 0; op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required; op->nr_replicas_required = c->opts.data_replicas_required;
op->alloc_reserve = RESERVE_NONE; op->alloc_reserve = RESERVE_NONE;

View File

@ -1046,12 +1046,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (!degraded && if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_sb_has_replicas(c, BCH_DATA_JOURNAL, fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL,
i->devs), c, i->devs), c,
"superblock not marked as containing replicas (type %u)", "superblock not marked as containing replicas (type %u)",
BCH_DATA_JOURNAL))) { BCH_DATA_JOURNAL))) {
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL, ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs);
i->devs);
if (ret) if (ret)
return ret; return ret;
} }
@ -2232,7 +2231,7 @@ static void journal_write_done(struct closure *cl)
goto err; goto err;
} }
if (bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs)) if (bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs))
goto err; goto err;
out: out:
__bch2_time_stats_update(j->write_time, j->write_start_time); __bch2_time_stats_update(j->write_time, j->write_start_time);
@ -2851,7 +2850,7 @@ int bch2_journal_flush_device(struct journal *j, int dev_idx)
seq++; seq++;
spin_unlock(&j->lock); spin_unlock(&j->lock);
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs); ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs);
spin_lock(&j->lock); spin_lock(&j->lock);
} }
spin_unlock(&j->lock); spin_unlock(&j->lock);
@ -2946,7 +2945,11 @@ void bch2_fs_journal_exit(struct journal *j)
int bch2_fs_journal_init(struct journal *j) int bch2_fs_journal_init(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal);
static struct lock_class_key res_key; static struct lock_class_key res_key;
int ret = 0;
pr_verbose_init(c->opts, "");
spin_lock_init(&j->lock); spin_lock_init(&j->lock);
spin_lock_init(&j->err_lock); spin_lock_init(&j->err_lock);
@ -2972,12 +2975,15 @@ int bch2_fs_journal_init(struct journal *j)
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
!(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) || !(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) !(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
j->pin.front = j->pin.back = 1; j->pin.front = j->pin.back = 1;
out:
return 0; pr_verbose_init(c->opts, "ret %i", ret);
return ret;
} }
/* debug: */ /* debug: */

View File

@ -58,6 +58,17 @@ static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
#define keylist_single(k) \ #define keylist_single(k) \
((struct keylist) { .keys = k, .top = bkey_next(k) }) ((struct keylist) { .keys = k, .top = bkey_next(k) })
static inline u64 keylist_sectors(struct keylist *keys)
{
struct bkey_i *k;
u64 ret = 0;
for_each_keylist_key(keys, k)
ret += k->k.size;
return ret;
}
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
void bch2_verify_keylist_sorted(struct keylist *); void bch2_verify_keylist_sorted(struct keylist *);
#else #else

View File

@ -13,118 +13,6 @@
#include "move.h" #include "move.h"
#include "super-io.h" #include "super-io.h"
static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
{
struct bch_dev *ca = arg;
return bch2_extent_has_device(e, ca->dev_idx);
}
#define MAX_DATA_OFF_ITER 10
static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
int flags)
{
struct btree_iter iter;
struct bkey_s_c k;
struct bch_move_stats stats;
unsigned pass = 0;
int ret = 0;
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_USER)))
return 0;
/*
* XXX: we should be able to do this in one pass, but bch2_move_data()
* can spuriously fail to move an extent due to racing with other move
* operations
*/
do {
memset(&stats, 0, sizeof(stats));
ret = bch2_move_data(c, NULL,
SECTORS_IN_FLIGHT_PER_DEVICE,
NULL,
writepoint_hashed((unsigned long) current),
0,
ca->dev_idx,
POS_MIN, POS_MAX,
migrate_pred, ca,
&stats);
if (ret) {
bch_err(c, "error migrating data: %i", ret);
return ret;
}
} while (atomic64_read(&stats.keys_moved) && pass++ < MAX_DATA_OFF_ITER);
if (atomic64_read(&stats.keys_moved)) {
bch_err(c, "unable to migrate all data in %d iterations",
MAX_DATA_OFF_ITER);
return -1;
}
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) {
ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k));
if (ret) {
bch_err(c, "error migrating data %i from check_mark_super()", ret);
break;
}
}
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
return ret;
}
static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca,
int flags)
{
struct btree_iter iter;
struct btree *b;
int ret = 0;
unsigned id;
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_BTREE)))
return 0;
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
if (!bch2_extent_has_device(e, ca->dev_idx))
continue;
ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0);
if (ret) {
bch2_btree_iter_unlock(&iter);
goto err;
}
}
ret = bch2_btree_iter_unlock(&iter);
if (ret)
goto err;
}
err:
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
return ret;
}
int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags)
{
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW &&
bch2_dev_is_online(ca));
return bch2_dev_usrdata_migrate(c, ca, flags) ?:
bch2_dev_metadata_migrate(c, ca, flags);
}
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
unsigned dev_idx, int flags, bool metadata) unsigned dev_idx, int flags, bool metadata)
{ {
@ -152,7 +40,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
int ret = 0; int ret = 0;
mutex_lock(&c->replicas_gc_lock); mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER); bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH); POS_MIN, BTREE_ITER_PREFETCH);
@ -161,8 +49,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
!(ret = btree_iter_err(k))) { !(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) || if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_check_mark_super(c, BCH_DATA_USER, ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
bch2_bkey_devs(k));
if (ret) if (ret)
break; break;
bch2_btree_iter_next(&iter); bch2_btree_iter_next(&iter);
@ -183,8 +70,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/ */
bch2_extent_normalize(c, e.s); bch2_extent_normalize(c, e.s);
ret = bch2_check_mark_super(c, BCH_DATA_USER, ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
bch2_bkey_devs(bkey_i_to_s_c(&tmp.key))); bkey_i_to_s_c(&tmp.key));
if (ret) if (ret)
break; break;
@ -240,8 +127,8 @@ retry:
dev_idx)) { dev_idx)) {
bch2_btree_iter_set_locks_want(&iter, 0); bch2_btree_iter_set_locks_want(&iter, 0);
ret = bch2_check_mark_super(c, BCH_DATA_BTREE, ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
bch2_bkey_devs(bkey_i_to_s_c(&b->key))); bkey_i_to_s_c(&b->key));
if (ret) if (ret)
goto err; goto err;
} else { } else {

View File

@ -1,7 +1,6 @@
#ifndef _BCACHEFS_MIGRATE_H #ifndef _BCACHEFS_MIGRATE_H
#define _BCACHEFS_MIGRATE_H #define _BCACHEFS_MIGRATE_H
int bch2_dev_data_migrate(struct bch_fs *, struct bch_dev *, int);
int bch2_dev_data_drop(struct bch_fs *, unsigned, int); int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
#endif /* _BCACHEFS_MIGRATE_H */ #endif /* _BCACHEFS_MIGRATE_H */

View File

@ -58,6 +58,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
BKEY_PADDED(k) _new, _insert; BKEY_PADDED(k) _new, _insert;
struct bch_extent_ptr *ptr; struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc; struct bch_extent_crc_unpacked crc;
unsigned nr_dirty;
bool did_work = false; bool did_work = false;
if (btree_iter_err(k)) { if (btree_iter_err(k)) {
@ -71,6 +72,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
m->ptr, m->offset)) m->ptr, m->offset))
goto nomatch; goto nomatch;
if (m->data_cmd == DATA_REWRITE &&
!bch2_extent_has_device(bkey_s_c_to_extent(k),
m->data_opts.rewrite_dev))
goto nomatch;
bkey_reassemble(&_insert.k, k); bkey_reassemble(&_insert.k, k);
insert = bkey_i_to_extent(&_insert.k); insert = bkey_i_to_extent(&_insert.k);
@ -81,11 +87,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_cut_back(new->k.p, &insert->k); bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k); bch2_cut_back(insert->k.p, &new->k);
if (m->move_dev >= 0 && if (m->data_cmd == DATA_REWRITE) {
(ptr = (struct bch_extent_ptr *) ptr = (struct bch_extent_ptr *)
bch2_extent_has_device(extent_i_to_s_c(insert), bch2_extent_has_device(extent_i_to_s_c(insert),
m->move_dev))) m->data_opts.rewrite_dev);
bch2_extent_drop_ptr(extent_i_to_s(insert), ptr); bch2_extent_drop_ptr(extent_i_to_s(insert), ptr);
}
extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) { extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) {
if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) { if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) {
@ -108,10 +115,35 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_extent_narrow_crcs(insert, bch2_extent_narrow_crcs(insert,
(struct bch_extent_crc_unpacked) { 0 }); (struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, extent_i_to_s(insert).s); bch2_extent_normalize(c, extent_i_to_s(insert).s);
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert)); bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert),
c->opts.data_replicas);
ret = bch2_check_mark_super(c, BCH_DATA_USER, /*
bch2_extent_devs(extent_i_to_s_c(insert))); * It's possible we race, and for whatever reason the extent now
* has fewer replicas than when we last looked at it - meaning
* we need to get a disk reservation here:
*/
nr_dirty = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i));
if (m->nr_ptrs_reserved < nr_dirty) {
unsigned sectors = (nr_dirty - m->nr_ptrs_reserved) *
keylist_sectors(keys);
/*
* can't call bch2_disk_reservation_add() with btree
* locks held, at least not without a song and dance
*/
bch2_btree_iter_unlock(&iter);
ret = bch2_disk_reservation_add(c, &op->res, sectors, 0);
if (ret)
goto out;
m->nr_ptrs_reserved = nr_dirty;
goto next;
}
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
extent_i_to_s_c(insert).s_c);
if (ret) if (ret)
break; break;
@ -119,7 +151,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
NULL, op_journal_seq(op), NULL, op_journal_seq(op),
BTREE_INSERT_ATOMIC| BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
m->btree_insert_flags, BTREE_INSERT_USE_RESERVE|
m->data_opts.btree_insert_flags,
BTREE_INSERT_ENTRY(&iter, &insert->k_i)); BTREE_INSERT_ENTRY(&iter, &insert->k_i));
if (!ret) if (!ret)
atomic_long_inc(&c->extent_migrate_done); atomic_long_inc(&c->extent_migrate_done);
@ -150,8 +183,7 @@ out:
return ret; return ret;
} }
void bch2_migrate_write_init(struct migrate_write *m, void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
struct bch_read_bio *rbio)
{ {
/* write bio must own pages: */ /* write bio must own pages: */
BUG_ON(!m->op.wbio.bio.bi_vcnt); BUG_ON(!m->op.wbio.bio.bi_vcnt);
@ -162,16 +194,39 @@ void bch2_migrate_write_init(struct migrate_write *m,
m->op.pos = rbio->pos; m->op.pos = rbio->pos;
m->op.version = rbio->version; m->op.version = rbio->version;
m->op.crc = rbio->pick.crc; m->op.crc = rbio->pick.crc;
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) { if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
m->op.nonce = m->op.crc.nonce + m->op.crc.offset; m->op.nonce = m->op.crc.nonce + m->op.crc.offset;
m->op.csum_type = m->op.crc.csum_type; m->op.csum_type = m->op.crc.csum_type;
} }
if (m->move_dev >= 0) if (m->data_cmd == DATA_REWRITE)
bch2_dev_list_drop_dev(&m->op.devs_have, m->move_dev); bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
}
if (m->btree_insert_flags & BTREE_INSERT_USE_RESERVE) int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
struct bch_devs_mask *devs,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
enum data_cmd data_cmd,
struct data_opts data_opts,
struct bkey_s_c k)
{
int ret;
m->data_cmd = data_cmd;
m->data_opts = data_opts;
m->nr_ptrs_reserved = bch2_extent_nr_dirty_ptrs(k);
bch2_write_op_init(&m->op, c);
m->op.csum_type = bch2_data_checksum_type(c, io_opts.data_checksum);
m->op.compression_type =
bch2_compression_opt_to_type[io_opts.compression];
m->op.devs = devs;
m->op.write_point = wp;
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
m->op.alloc_reserve = RESERVE_MOVINGGC; m->op.alloc_reserve = RESERVE_MOVINGGC;
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS| m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
@ -180,10 +235,35 @@ void bch2_migrate_write_init(struct migrate_write *m,
BCH_WRITE_DATA_ENCODED| BCH_WRITE_DATA_ENCODED|
BCH_WRITE_NOMARK_REPLICAS; BCH_WRITE_NOMARK_REPLICAS;
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
m->op.nr_replicas = 1; m->op.nr_replicas = 1;
m->op.nr_replicas_required = 1; m->op.nr_replicas_required = 1;
m->op.index_update_fn = bch2_migrate_index_update; m->op.index_update_fn = bch2_migrate_index_update;
switch (data_cmd) {
case DATA_ADD_REPLICAS:
if (m->nr_ptrs_reserved < c->opts.data_replicas) {
m->op.nr_replicas = c->opts.data_replicas - m->nr_ptrs_reserved;
ret = bch2_disk_reservation_get(c, &m->op.res,
k.k->size,
m->op.nr_replicas, 0);
if (ret)
return ret;
m->nr_ptrs_reserved = c->opts.data_replicas;
}
break;
case DATA_REWRITE:
break;
case DATA_PROMOTE:
m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
m->op.flags |= BCH_WRITE_CACHED;
break;
default:
BUG();
}
return 0;
} }
static void move_free(struct closure *cl) static void move_free(struct closure *cl)
@ -210,7 +290,7 @@ static void move_write(struct closure *cl)
struct moving_io *io = container_of(cl, struct moving_io, cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
if (likely(!io->rbio.bio.bi_status)) { if (likely(!io->rbio.bio.bi_status)) {
bch2_migrate_write_init(&io->write, &io->rbio); bch2_migrate_read_done(&io->write, &io->rbio);
closure_call(&io->write.op.cl, bch2_write, NULL, cl); closure_call(&io->write.op.cl, bch2_write, NULL, cl);
} }
@ -238,19 +318,19 @@ static void move_read_endio(struct bio *bio)
} }
static int bch2_move_extent(struct bch_fs *c, static int bch2_move_extent(struct bch_fs *c,
struct moving_context *ctxt, struct moving_context *ctxt,
struct bch_devs_mask *devs, struct bch_devs_mask *devs,
struct write_point_specifier wp, struct write_point_specifier wp,
int btree_insert_flags, struct bch_io_opts io_opts,
int move_device, struct bkey_s_c_extent e,
struct bch_io_opts opts, enum data_cmd data_cmd,
struct bkey_s_c_extent e) struct data_opts data_opts)
{ {
struct extent_pick_ptr pick; struct extent_pick_ptr pick;
struct moving_io *io; struct moving_io *io;
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
struct bch_extent_crc_unpacked crc; struct bch_extent_crc_unpacked crc;
unsigned sectors = e.k->size, pages, nr_good; unsigned sectors = e.k->size, pages;
int ret = -ENOMEM; int ret = -ENOMEM;
bch2_extent_pick_ptr(c, e.s_c, NULL, &pick); bch2_extent_pick_ptr(c, e.s_c, NULL, &pick);
@ -279,7 +359,7 @@ static int bch2_move_extent(struct bch_fs *c,
if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL)) if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL))
goto err_free; goto err_free;
io->rbio.opts = opts; io->rbio.opts = io_opts;
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages); bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
io->rbio.bio.bi_iter.bi_size = sectors << 9; io->rbio.bio.bi_iter.bi_size = sectors << 9;
@ -288,27 +368,10 @@ static int bch2_move_extent(struct bch_fs *c,
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k); io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k);
io->rbio.bio.bi_end_io = move_read_endio; io->rbio.bio.bi_end_io = move_read_endio;
io->write.btree_insert_flags = btree_insert_flags; ret = bch2_migrate_write_init(c, &io->write, devs, wp,
io->write.move_dev = move_device; io_opts, data_cmd, data_opts, e.s_c);
if (ret)
bch2_write_op_init(&io->write.op, c); goto err_free_pages;
io->write.op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
io->write.op.compression_type =
bch2_compression_opt_to_type(opts.compression);
io->write.op.devs = devs;
io->write.op.write_point = wp;
if (move_device < 0 &&
((nr_good = bch2_extent_nr_good_ptrs(c, e)) <
c->opts.data_replicas)) {
io->write.op.nr_replicas = c->opts.data_replicas - nr_good;
ret = bch2_disk_reservation_get(c, &io->write.op.res,
e.k->size,
io->write.op.nr_replicas, 0);
if (ret)
goto err_free_pages;
}
atomic64_inc(&ctxt->stats->keys_moved); atomic64_inc(&ctxt->stats->keys_moved);
atomic64_add(e.k->size, &ctxt->stats->sectors_moved); atomic64_add(e.k->size, &ctxt->stats->sectors_moved);
@ -369,8 +432,6 @@ int bch2_move_data(struct bch_fs *c,
unsigned sectors_in_flight, unsigned sectors_in_flight,
struct bch_devs_mask *devs, struct bch_devs_mask *devs,
struct write_point_specifier wp, struct write_point_specifier wp,
int btree_insert_flags,
int move_device,
struct bpos start, struct bpos start,
struct bpos end, struct bpos end,
move_pred_fn pred, void *arg, move_pred_fn pred, void *arg,
@ -378,12 +439,14 @@ int bch2_move_data(struct bch_fs *c,
{ {
bool kthread = (current->flags & PF_KTHREAD) != 0; bool kthread = (current->flags & PF_KTHREAD) != 0;
struct moving_context ctxt = { .stats = stats }; struct moving_context ctxt = { .stats = stats };
struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts); struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
BKEY_PADDED(k) tmp; BKEY_PADDED(k) tmp;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_c_extent e; struct bkey_s_c_extent e;
struct data_opts data_opts;
enum data_cmd data_cmd;
u64 cur_inum = U64_MAX; u64 cur_inum = U64_MAX;
int ret = 0; int ret = 0, ret2;
closure_init_stack(&ctxt.cl); closure_init_stack(&ctxt.cl);
INIT_LIST_HEAD(&ctxt.reads); INIT_LIST_HEAD(&ctxt.reads);
@ -430,28 +493,44 @@ peek:
/* don't hold btree locks while looking up inode: */ /* don't hold btree locks while looking up inode: */
bch2_btree_iter_unlock(&stats->iter); bch2_btree_iter_unlock(&stats->iter);
opts = bch2_opts_to_inode_opts(c->opts); io_opts = bch2_opts_to_inode_opts(c->opts);
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
bch2_io_opts_apply(&opts, bch2_inode_opts_get(&inode)); bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
cur_inum = k.k->p.inode; cur_inum = k.k->p.inode;
goto peek; goto peek;
} }
if (!pred(arg, e)) switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e,
&io_opts, &data_opts))) {
case DATA_SKIP:
goto next; goto next;
case DATA_SCRUB:
BUG();
case DATA_ADD_REPLICAS:
case DATA_REWRITE:
case DATA_PROMOTE:
break;
default:
BUG();
}
/* unlock before doing IO: */ /* unlock before doing IO: */
bkey_reassemble(&tmp.k, k); bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k); k = bkey_i_to_s_c(&tmp.k);
bch2_btree_iter_unlock(&stats->iter); bch2_btree_iter_unlock(&stats->iter);
if (bch2_move_extent(c, &ctxt, devs, wp, ret2 = bch2_move_extent(c, &ctxt, devs, wp, io_opts,
btree_insert_flags, bkey_s_c_to_extent(k),
move_device, opts, data_cmd, data_opts);
bkey_s_c_to_extent(k))) { if (ret2) {
/* memory allocation failure, wait for some IO to finish */ if (ret2 == -ENOMEM) {
bch2_move_ctxt_wait_for_io(&ctxt); /* memory allocation failure, wait for some IO to finish */
continue; bch2_move_ctxt_wait_for_io(&ctxt);
continue;
}
/* XXX signal failure */
goto next;
} }
if (rate) if (rate)
@ -486,11 +565,11 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
int ret; int ret;
mutex_lock(&c->replicas_gc_lock); mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER); bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k) { BTREE_ITER_PREFETCH, k) {
ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k)); ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
if (ret) if (ret)
break; break;
} }
@ -514,8 +593,8 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
ret = bch2_check_mark_super(c, BCH_DATA_BTREE, ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
bch2_bkey_devs(bkey_i_to_s_c(&b->key))); bkey_i_to_s_c(&b->key));
bch2_btree_iter_cond_resched(&iter); bch2_btree_iter_cond_resched(&iter);
} }
@ -534,18 +613,35 @@ static int bch2_move_btree(struct bch_fs *c,
void *arg, void *arg,
struct bch_move_stats *stats) struct bch_move_stats *stats)
{ {
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree *b; struct btree *b;
unsigned id; unsigned id;
struct data_opts data_opts;
enum data_cmd cmd;
int ret = 0; int ret = 0;
stats->data_type = BCH_DATA_BTREE; stats->data_type = BCH_DATA_BTREE;
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
if (pred(arg, bkey_i_to_s_c_extent(&b->key))) switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE,
ret = bch2_btree_node_rewrite(c, &stats->iter, bkey_i_to_s_c_extent(&b->key),
b->data->keys.seq, 0) ?: ret; &io_opts,
&data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
BUG();
case DATA_ADD_REPLICAS:
case DATA_REWRITE:
break;
default:
BUG();
}
ret = bch2_btree_node_rewrite(c, &stats->iter,
b->data->keys.seq, 0) ?: ret;
next:
bch2_btree_iter_cond_resched(&stats->iter); bch2_btree_iter_cond_resched(&stats->iter);
} }
@ -556,32 +652,48 @@ static int bch2_move_btree(struct bch_fs *c,
} }
#if 0 #if 0
static bool scrub_data_pred(void *arg, struct bkey_s_c_extent e) static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{ {
return DATA_SCRUB;
} }
#endif #endif
static bool rereplicate_metadata_pred(void *arg, struct bkey_s_c_extent e) static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{ {
struct bch_fs *c = arg;
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e); unsigned nr_good = bch2_extent_nr_good_ptrs(c, e);
unsigned replicas = type == BKEY_TYPE_BTREE
? c->opts.metadata_replicas
: c->opts.data_replicas;
return nr_good && nr_good < c->opts.metadata_replicas; if (!nr_good || nr_good >= replicas)
return DATA_SKIP;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
} }
static bool rereplicate_data_pred(void *arg, struct bkey_s_c_extent e) static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
{ enum bkey_type type,
struct bch_fs *c = arg; struct bkey_s_c_extent e,
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e); struct bch_io_opts *io_opts,
struct data_opts *data_opts)
return nr_good && nr_good < c->opts.data_replicas;
}
static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
{ {
struct bch_ioctl_data *op = arg; struct bch_ioctl_data *op = arg;
return bch2_extent_has_device(e, op->migrate.dev); if (!bch2_extent_has_device(e, op->migrate.dev))
return DATA_SKIP;
data_opts->btree_insert_flags = 0;
data_opts->rewrite_dev = op->migrate.dev;
return DATA_REWRITE;
} }
int bch2_data_job(struct bch_fs *c, int bch2_data_job(struct bch_fs *c,
@ -595,16 +707,15 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_JOURNAL; stats->data_type = BCH_DATA_JOURNAL;
ret = bch2_journal_flush_device(&c->journal, -1); ret = bch2_journal_flush_device(&c->journal, -1);
ret = bch2_move_btree(c, rereplicate_metadata_pred, c, stats) ?: ret; ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
ret = bch2_gc_btree_replicas(c) ?: ret; ret = bch2_gc_btree_replicas(c) ?: ret;
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
NULL, NULL,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
0, -1,
op.start, op.start,
op.end, op.end,
rereplicate_data_pred, c, stats) ?: ret; rereplicate_pred, c, stats) ?: ret;
ret = bch2_gc_data_replicas(c) ?: ret; ret = bch2_gc_data_replicas(c) ?: ret;
break; break;
case BCH_DATA_OP_MIGRATE: case BCH_DATA_OP_MIGRATE:
@ -620,7 +731,6 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE, ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
NULL, NULL,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
0, -1,
op.start, op.start,
op.end, op.end,
migrate_pred, &op, stats) ?: ret; migrate_pred, &op, stats) ?: ret;

View File

@ -8,23 +8,47 @@
struct bch_read_bio; struct bch_read_bio;
struct moving_context; struct moving_context;
enum data_cmd {
DATA_SKIP,
DATA_SCRUB,
DATA_ADD_REPLICAS,
DATA_REWRITE,
DATA_PROMOTE,
};
struct data_opts {
unsigned rewrite_dev;
int btree_insert_flags;
};
struct migrate_write { struct migrate_write {
enum data_cmd data_cmd;
struct data_opts data_opts;
unsigned nr_ptrs_reserved;
struct moving_context *ctxt; struct moving_context *ctxt;
/* what we read: */ /* what we read: */
struct bch_extent_ptr ptr; struct bch_extent_ptr ptr;
u64 offset; u64 offset;
int move_dev;
int btree_insert_flags;
struct bch_write_op op; struct bch_write_op op;
}; };
void bch2_migrate_write_init(struct migrate_write *, struct bch_read_bio *); void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
struct bch_devs_mask *,
struct write_point_specifier,
struct bch_io_opts,
enum data_cmd, struct data_opts,
struct bkey_s_c);
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048 #define SECTORS_IN_FLIGHT_PER_DEVICE 2048
typedef bool (*move_pred_fn)(void *, struct bkey_s_c_extent); typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
enum bkey_type, struct bkey_s_c_extent,
struct bch_io_opts *, struct data_opts *);
struct bch_move_stats { struct bch_move_stats {
enum bch_data_type data_type; enum bch_data_type data_type;
@ -39,7 +63,7 @@ struct bch_move_stats {
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
unsigned, struct bch_devs_mask *, unsigned, struct bch_devs_mask *,
struct write_point_specifier, struct write_point_specifier,
int, int, struct bpos, struct bpos, struct bpos, struct bpos,
move_pred_fn, void *, move_pred_fn, void *,
struct bch_move_stats *); struct bch_move_stats *);

View File

@ -61,9 +61,9 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
return (l->offset > r->offset) - (l->offset < r->offset); return (l->offset > r->offset) - (l->offset < r->offset);
} }
static bool copygc_pred(void *arg, struct bkey_s_c_extent e) static bool __copygc_pred(struct bch_dev *ca,
struct bkey_s_c_extent e)
{ {
struct bch_dev *ca = arg;
copygc_heap *h = &ca->copygc_heap; copygc_heap *h = &ca->copygc_heap;
const struct bch_extent_ptr *ptr = const struct bch_extent_ptr *ptr =
bch2_extent_has_device(e, ca->dev_idx); bch2_extent_has_device(e, ca->dev_idx);
@ -83,6 +83,22 @@ static bool copygc_pred(void *arg, struct bkey_s_c_extent e)
return false; return false;
} }
static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
struct bch_dev *ca = arg;
if (!__copygc_pred(ca, e))
return DATA_SKIP;
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE,
data_opts->rewrite_dev = ca->dev_idx;
return DATA_REWRITE;
}
static bool have_copygc_reserve(struct bch_dev *ca) static bool have_copygc_reserve(struct bch_dev *ca)
{ {
bool ret; bool ret;
@ -165,8 +181,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
SECTORS_IN_FLIGHT_PER_DEVICE, SECTORS_IN_FLIGHT_PER_DEVICE,
&ca->self, &ca->self,
writepoint_ptr(&ca->copygc_write_point), writepoint_ptr(&ca->copygc_write_point),
BTREE_INSERT_USE_RESERVE,
ca->dev_idx,
POS_MIN, POS_MAX, POS_MIN, POS_MAX,
copygc_pred, ca, copygc_pred, ca,
&move_stats); &move_stats);

View File

@ -22,6 +22,7 @@ const char * const bch2_compression_types[] = {
"none", "none",
"lz4", "lz4",
"gzip", "gzip",
"zstd",
NULL NULL
}; };

View File

@ -73,10 +73,10 @@ enum opt_type {
BCH_OPT(errors, u8, OPT_RUNTIME, \ BCH_OPT(errors, u8, OPT_RUNTIME, \
OPT_STR(bch2_error_actions), \ OPT_STR(bch2_error_actions), \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \
BCH_OPT(metadata_replicas, u8, OPT_MOUNT, \ BCH_OPT(metadata_replicas, u8, OPT_RUNTIME, \
OPT_UINT(1, BCH_REPLICAS_MAX), \ OPT_UINT(1, BCH_REPLICAS_MAX), \
BCH_SB_META_REPLICAS_WANT, 1) \ BCH_SB_META_REPLICAS_WANT, 1) \
BCH_OPT(data_replicas, u8, OPT_MOUNT, \ BCH_OPT(data_replicas, u8, OPT_RUNTIME, \
OPT_UINT(1, BCH_REPLICAS_MAX), \ OPT_UINT(1, BCH_REPLICAS_MAX), \
BCH_SB_DATA_REPLICAS_WANT, 1) \ BCH_SB_DATA_REPLICAS_WANT, 1) \
BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \ BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \
@ -127,6 +127,9 @@ enum opt_type {
BCH_OPT(verbose_recovery, u8, OPT_MOUNT, \ BCH_OPT(verbose_recovery, u8, OPT_MOUNT, \
OPT_BOOL(), \ OPT_BOOL(), \
NO_SB_OPT, false) \ NO_SB_OPT, false) \
BCH_OPT(verbose_init, u8, OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, false) \
BCH_OPT(journal_flush_disabled, u8, OPT_RUNTIME, \ BCH_OPT(journal_flush_disabled, u8, OPT_RUNTIME, \
OPT_BOOL(), \ OPT_BOOL(), \
NO_SB_OPT, false) \ NO_SB_OPT, false) \

View File

@ -74,13 +74,6 @@ static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
_i < QTYP_NR); \ _i < QTYP_NR); \
_i++) _i++)
static inline unsigned enabled_qtypes(struct bch_fs *c)
{
return ((c->opts.usrquota << QTYP_USR)|
(c->opts.grpquota << QTYP_GRP)|
(c->opts.prjquota << QTYP_PRJ));
}
static bool ignore_hardlimit(struct bch_memquota_type *q) static bool ignore_hardlimit(struct bch_memquota_type *q)
{ {
if (capable(CAP_SYS_RESOURCE)) if (capable(CAP_SYS_RESOURCE))
@ -478,7 +471,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota) if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
return -EINVAL; return -EINVAL;
if (uflags & FS_QUOTA_PDQ_ENFD) if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
return -EINVAL; return -EINVAL;
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
@ -487,10 +480,9 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
if (uflags & FS_QUOTA_GDQ_ENFD) if (uflags & FS_QUOTA_GDQ_ENFD)
SET_BCH_SB_GRPQUOTA(c->disk_sb, true); SET_BCH_SB_GRPQUOTA(c->disk_sb, true);
#if 0
if (uflags & FS_QUOTA_PDQ_ENFD) if (uflags & FS_QUOTA_PDQ_ENFD)
SET_BCH_SB_PRJQUOTA(c->disk_sb, true); SET_BCH_SB_PRJQUOTA(c->disk_sb, true);
#endif
bch2_write_super(c); bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);

View File

@ -20,6 +20,13 @@ static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
}; };
} }
static inline unsigned enabled_qtypes(struct bch_fs *c)
{
return ((c->opts.usrquota << QTYP_USR)|
(c->opts.grpquota << QTYP_GRP)|
(c->opts.prjquota << QTYP_PRJ));
}
#ifdef CONFIG_BCACHEFS_QUOTA #ifdef CONFIG_BCACHEFS_QUOTA
int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters, int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,

View File

@ -43,7 +43,6 @@
* https://131002.net/siphash/ * https://131002.net/siphash/
*/ */
#include <linux/compiler.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <linux/bitops.h> #include <linux/bitops.h>

View File

@ -546,6 +546,8 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
__le64 *i; __le64 *i;
int ret; int ret;
pr_verbose_init(*opts, "");
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
sb->mode = FMODE_READ; sb->mode = FMODE_READ;
@ -566,8 +568,10 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
opt_set(*opts, nochanges, true); opt_set(*opts, nochanges, true);
} }
if (IS_ERR(sb->bdev)) if (IS_ERR(sb->bdev)) {
return PTR_ERR(sb->bdev); ret = PTR_ERR(sb->bdev);
goto out;
}
err = "cannot allocate memory"; err = "cannot allocate memory";
ret = __bch2_super_realloc(sb, 0); ret = __bch2_super_realloc(sb, 0);
@ -638,12 +642,14 @@ got_super:
if (sb->mode & FMODE_WRITE) if (sb->mode & FMODE_WRITE)
bdev_get_queue(sb->bdev)->backing_dev_info->capabilities bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
|= BDI_CAP_STABLE_WRITES; |= BDI_CAP_STABLE_WRITES;
ret = 0;
return 0; out:
pr_verbose_init(*opts, "ret %i", ret);
return ret;
err: err:
bch2_free_super(sb); bch2_free_super(sb);
pr_err("error reading superblock: %s", err); pr_err("error reading superblock: %s", err);
return ret; goto out;
} }
/* write superblock: */ /* write superblock: */
@ -744,17 +750,15 @@ void bch2_write_super(struct bch_fs *c)
nr_wrote = dev_mask_nr(&sb_written); nr_wrote = dev_mask_nr(&sb_written);
can_mount_with_written = can_mount_with_written =
bch2_have_enough_devs(c, bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
__bch2_replicas_status(c, sb_written), BCH_FORCE_IF_DEGRADED);
BCH_FORCE_IF_DEGRADED);
for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
sb_written.d[i] = ~sb_written.d[i]; sb_written.d[i] = ~sb_written.d[i];
can_mount_without_written = can_mount_without_written =
bch2_have_enough_devs(c, bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
__bch2_replicas_status(c, sb_written), BCH_FORCE_IF_DEGRADED);
BCH_FORCE_IF_DEGRADED);
/* /*
* If we would be able to mount _without_ the devices we successfully * If we would be able to mount _without_ the devices we successfully
@ -1052,7 +1056,7 @@ static bool replicas_has_entry(struct bch_replicas_cpu *r,
} }
noinline noinline
static int bch2_check_mark_super_slowpath(struct bch_fs *c, static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_cpu_entry new_entry, struct bch_replicas_cpu_entry new_entry,
unsigned max_dev) unsigned max_dev)
{ {
@ -1109,9 +1113,9 @@ err:
return ret; return ret;
} }
int bch2_check_mark_super(struct bch_fs *c, int bch2_mark_replicas(struct bch_fs *c,
enum bch_data_type data_type, enum bch_data_type data_type,
struct bch_devs_list devs) struct bch_devs_list devs)
{ {
struct bch_replicas_cpu_entry search; struct bch_replicas_cpu_entry search;
struct bch_replicas_cpu *r, *gc_r; struct bch_replicas_cpu *r, *gc_r;
@ -1121,6 +1125,8 @@ int bch2_check_mark_super(struct bch_fs *c,
if (!devs.nr) if (!devs.nr)
return 0; return 0;
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
devlist_to_replicas(devs, data_type, &search, &max_dev); devlist_to_replicas(devs, data_type, &search, &max_dev);
rcu_read_lock(); rcu_read_lock();
@ -1131,7 +1137,23 @@ int bch2_check_mark_super(struct bch_fs *c,
rcu_read_unlock(); rcu_read_unlock();
return likely(marked) ? 0 return likely(marked) ? 0
: bch2_check_mark_super_slowpath(c, search, max_dev); : bch2_mark_replicas_slowpath(c, search, max_dev);
}
int bch2_mark_bkey_replicas(struct bch_fs *c,
enum bch_data_type data_type,
struct bkey_s_c k)
{
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
int ret;
for (i = 0; i < cached.nr; i++)
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i]))))
return ret;
return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
} }
int bch2_replicas_gc_end(struct bch_fs *c, int err) int bch2_replicas_gc_end(struct bch_fs *c, int err)
@ -1417,7 +1439,7 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
/* Query replicas: */ /* Query replicas: */
bool bch2_sb_has_replicas(struct bch_fs *c, bool bch2_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type, enum bch_data_type data_type,
struct bch_devs_list devs) struct bch_devs_list devs)
{ {
@ -1438,6 +1460,21 @@ bool bch2_sb_has_replicas(struct bch_fs *c,
return ret; return ret;
} }
bool bch2_bkey_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type,
struct bkey_s_c k)
{
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
bch2_dev_list_single(cached.devs[i])))
return false;
return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
}
struct replicas_status __bch2_replicas_status(struct bch_fs *c, struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_devs_mask online_devs) struct bch_devs_mask online_devs)
{ {
@ -1495,29 +1532,26 @@ struct replicas_status bch2_replicas_status(struct bch_fs *c)
return __bch2_replicas_status(c, bch2_online_devs(c)); return __bch2_replicas_status(c, bch2_online_devs(c));
} }
bool bch2_have_enough_devs(struct bch_fs *c, static bool have_enough_devs(struct replicas_status s,
struct replicas_status s, enum bch_data_type type,
unsigned flags) bool force_if_degraded,
bool force_if_lost)
{ {
if ((s.replicas[BCH_DATA_JOURNAL].nr_offline || return (!s.replicas[type].nr_offline || force_if_degraded) &&
s.replicas[BCH_DATA_BTREE].nr_offline) && (s.replicas[type].nr_online || force_if_lost);
!(flags & BCH_FORCE_IF_METADATA_DEGRADED)) }
return false;
if ((!s.replicas[BCH_DATA_JOURNAL].nr_online || bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
!s.replicas[BCH_DATA_BTREE].nr_online) && {
!(flags & BCH_FORCE_IF_METADATA_LOST)) return (have_enough_devs(s, BCH_DATA_JOURNAL,
return false; flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
if (s.replicas[BCH_DATA_USER].nr_offline && have_enough_devs(s, BCH_DATA_BTREE,
!(flags & BCH_FORCE_IF_DATA_DEGRADED)) flags & BCH_FORCE_IF_METADATA_DEGRADED,
return false; flags & BCH_FORCE_IF_METADATA_LOST) &&
have_enough_devs(s, BCH_DATA_USER,
if (!s.replicas[BCH_DATA_USER].nr_online && flags & BCH_FORCE_IF_DATA_DEGRADED,
!(flags & BCH_FORCE_IF_DATA_LOST)) flags & BCH_FORCE_IF_DATA_LOST));
return false;
return true;
} }
unsigned bch2_replicas_online(struct bch_fs *c, bool meta) unsigned bch2_replicas_online(struct bch_fs *c, bool meta)

View File

@ -139,10 +139,14 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
/* BCH_SB_FIELD_replicas: */ /* BCH_SB_FIELD_replicas: */
bool bch2_sb_has_replicas(struct bch_fs *, enum bch_data_type, bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
int bch2_check_mark_super(struct bch_fs *, enum bch_data_type,
struct bch_devs_list); struct bch_devs_list);
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
struct bkey_s_c);
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
struct bkey_s_c);
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t); int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t); int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t);
@ -157,7 +161,7 @@ struct replicas_status {
struct replicas_status __bch2_replicas_status(struct bch_fs *, struct replicas_status __bch2_replicas_status(struct bch_fs *,
struct bch_devs_mask); struct bch_devs_mask);
struct replicas_status bch2_replicas_status(struct bch_fs *); struct replicas_status bch2_replicas_status(struct bch_fs *);
bool bch2_have_enough_devs(struct bch_fs *, struct replicas_status, unsigned); bool bch2_have_enough_devs(struct replicas_status, unsigned);
unsigned bch2_replicas_online(struct bch_fs *, bool); unsigned bch2_replicas_online(struct bch_fs *, bool);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);

View File

@ -507,9 +507,11 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
struct bch_fs *c; struct bch_fs *c;
unsigned i, iter_size; unsigned i, iter_size;
pr_verbose_init(opts, "");
c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
if (!c) if (!c)
return NULL; goto out;
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
@ -539,7 +541,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_interior_update_lock); mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->bio_bounce_pages_lock); mutex_init(&c->bio_bounce_pages_lock);
mutex_init(&c->zlib_workspace_lock);
bio_list_init(&c->btree_write_error_list); bio_list_init(&c->btree_write_error_list);
spin_lock_init(&c->btree_write_error_lock); spin_lock_init(&c->btree_write_error_lock);
@ -646,10 +647,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
kobject_init(&c->internal, &bch2_fs_internal_ktype); kobject_init(&c->internal, &bch2_fs_internal_ktype);
kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype); kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype); kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
out:
pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
return c; return c;
err: err:
bch2_fs_free(c); bch2_fs_free(c);
return NULL; c = NULL;
goto out;
} }
static const char *__bch2_fs_online(struct bch_fs *c) static const char *__bch2_fs_online(struct bch_fs *c)
@ -809,7 +813,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
goto err; goto err;
bch_verbose(c, "fsck done"); bch_verbose(c, "fsck done");
if (c->opts.usrquota || c->opts.grpquota) { if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:"); bch_verbose(c, "reading quotas:");
ret = bch2_fs_quota_read(c); ret = bch2_fs_quota_read(c);
if (ret) if (ret)
@ -864,7 +868,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
NULL, NULL, NULL, 0)) NULL, NULL, NULL, 0))
goto err; goto err;
if (c->opts.usrquota || c->opts.grpquota) { if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c); ret = bch2_fs_quota_read(c);
if (ret) if (ret)
goto err; goto err;
@ -1084,14 +1088,17 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
{ {
struct bch_member *member; struct bch_member *member;
struct bch_dev *ca; struct bch_dev *ca = NULL;
int ret = 0;
pr_verbose_init(c->opts, "");
if (bch2_fs_init_fault("dev_alloc")) if (bch2_fs_init_fault("dev_alloc"))
return -ENOMEM; goto err;
ca = kzalloc(sizeof(*ca), GFP_KERNEL); ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) if (!ca)
return -ENOMEM; goto err;
kobject_init(&ca->kobj, &bch2_dev_ktype); kobject_init(&ca->kobj, &bch2_dev_ktype);
init_completion(&ca->ref_completion); init_completion(&ca->ref_completion);
@ -1133,11 +1140,14 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
if (bch2_dev_sysfs_online(c, ca)) if (bch2_dev_sysfs_online(c, ca))
pr_warn("error creating sysfs objects"); pr_warn("error creating sysfs objects");
out:
return 0; pr_verbose_init(c->opts, "ret %i", ret);
return ret;
err: err:
bch2_dev_free(ca); if (ca)
return -ENOMEM; bch2_dev_free(ca);
ret = -ENOMEM;
goto out;
} }
static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb) static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
@ -1240,7 +1250,8 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
/* do we have enough devices to write to? */ /* do we have enough devices to write to? */
for_each_member_device(ca2, c, i) for_each_member_device(ca2, c, i)
nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW; if (ca2 != ca)
nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
? c->opts.metadata_replicas ? c->opts.metadata_replicas
@ -1249,7 +1260,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
? c->opts.data_replicas ? c->opts.data_replicas
: c->opts.data_replicas_required); : c->opts.data_replicas_required);
return nr_rw - 1 <= required; return nr_rw >= required;
case BCH_MEMBER_STATE_FAILED: case BCH_MEMBER_STATE_FAILED:
case BCH_MEMBER_STATE_SPARE: case BCH_MEMBER_STATE_SPARE:
if (ca->mi.state != BCH_MEMBER_STATE_RW && if (ca->mi.state != BCH_MEMBER_STATE_RW &&
@ -1262,7 +1273,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
s = __bch2_replicas_status(c, new_online_devs); s = __bch2_replicas_status(c, new_online_devs);
return bch2_have_enough_devs(c, s, flags); return bch2_have_enough_devs(s, flags);
default: default:
BUG(); BUG();
} }
@ -1299,7 +1310,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
s = bch2_replicas_status(c); s = bch2_replicas_status(c);
return bch2_have_enough_devs(c, s, flags); return bch2_have_enough_devs(s, flags);
} }
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
@ -1346,12 +1357,8 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
if (!bch2_dev_state_allowed(c, ca, new_state, flags)) if (!bch2_dev_state_allowed(c, ca, new_state, flags))
return -EINVAL; return -EINVAL;
if (new_state == BCH_MEMBER_STATE_RW) { if (new_state != BCH_MEMBER_STATE_RW)
if (__bch2_dev_read_write(c, ca))
return -ENOMEM;
} else {
__bch2_dev_read_only(c, ca); __bch2_dev_read_only(c, ca);
}
bch_notice(ca, "%s", bch2_dev_state[new_state]); bch_notice(ca, "%s", bch2_dev_state[new_state]);
@ -1361,6 +1368,9 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
bch2_write_super(c); bch2_write_super(c);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (new_state == BCH_MEMBER_STATE_RW)
return __bch2_dev_read_write(c, ca) ? -ENOMEM : 0;
return 0; return 0;
} }
@ -1701,11 +1711,17 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
const char *err; const char *err;
int ret = -ENOMEM; int ret = -ENOMEM;
if (!nr_devices) pr_verbose_init(opts, "");
return ERR_PTR(-EINVAL);
if (!try_module_get(THIS_MODULE)) if (!nr_devices) {
return ERR_PTR(-ENODEV); c = ERR_PTR(-EINVAL);
goto out2;
}
if (!try_module_get(THIS_MODULE)) {
c = ERR_PTR(-ENODEV);
goto out2;
}
sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
if (!sb) if (!sb)
@ -1760,8 +1776,11 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
if (err) if (err)
goto err_print; goto err_print;
out:
kfree(sb); kfree(sb);
module_put(THIS_MODULE); module_put(THIS_MODULE);
out2:
pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
return c; return c;
err_print: err_print:
pr_err("bch_fs_open err opening %s: %s", pr_err("bch_fs_open err opening %s: %s",
@ -1770,12 +1789,10 @@ err_print:
err: err:
if (c) if (c)
bch2_fs_stop(c); bch2_fs_stop(c);
for (i = 0; i < nr_devices; i++) for (i = 0; i < nr_devices; i++)
bch2_free_super(&sb[i]); bch2_free_super(&sb[i]);
kfree(sb); c = ERR_PTR(ret);
module_put(THIS_MODULE); goto out;
return ERR_PTR(ret);
} }
static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,

View File

@ -67,6 +67,11 @@ static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
devs->devs[devs->nr++] = dev; devs->devs[devs->nr++] = dev;
} }
static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
{
return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
}
static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter, static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
const struct bch_devs_mask *mask) const struct bch_devs_mask *mask)
{ {

View File

@ -15,7 +15,7 @@ struct bch_devs_mask {
struct bch_devs_list { struct bch_devs_list {
u8 nr; u8 nr;
u8 devs[BCH_REPLICAS_MAX]; u8 devs[BCH_REPLICAS_MAX + 1];
}; };
struct bch_member_cpu { struct bch_member_cpu {

View File

@ -164,6 +164,8 @@ read_attribute(extent_migrate_raced);
rw_attribute(journal_write_delay_ms); rw_attribute(journal_write_delay_ms);
rw_attribute(journal_reclaim_delay_ms); rw_attribute(journal_reclaim_delay_ms);
rw_attribute(writeback_pages_max);
rw_attribute(discard); rw_attribute(discard);
rw_attribute(cache_replacement_policy); rw_attribute(cache_replacement_policy);
@ -310,6 +312,8 @@ SHOW(bch2_fs)
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
sysfs_print(writeback_pages_max, c->writeback_pages_max);
sysfs_print(block_size, block_bytes(c)); sysfs_print(block_size, block_bytes(c));
sysfs_print(btree_node_size, btree_bytes(c)); sysfs_print(btree_node_size, btree_bytes(c));
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
@ -370,6 +374,9 @@ STORE(__bch2_fs)
sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
if (attr == &sysfs_writeback_pages_max)
c->writeback_pages_max = strtoul_restrict_or_return(buf, 1, UINT_MAX);
if (attr == &sysfs_btree_gc_periodic) { if (attr == &sysfs_btree_gc_periodic) {
ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
?: (ssize_t) size; ?: (ssize_t) size;
@ -459,6 +466,8 @@ struct attribute *bch2_fs_files[] = {
&sysfs_journal_write_delay_ms, &sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms, &sysfs_journal_reclaim_delay_ms,
&sysfs_writeback_pages_max,
&sysfs_tiering_percent, &sysfs_tiering_percent,
&sysfs_compression_stats, &sysfs_compression_stats,

View File

@ -14,10 +14,9 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <trace/events/bcachefs.h> #include <trace/events/bcachefs.h>
static bool tiering_pred(void *arg, struct bkey_s_c_extent e) static bool __tiering_pred(struct bch_fs *c, struct bch_tier *tier,
struct bkey_s_c_extent e)
{ {
struct bch_tier *tier = arg;
struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]);
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
unsigned replicas = 0; unsigned replicas = 0;
@ -33,6 +32,21 @@ static bool tiering_pred(void *arg, struct bkey_s_c_extent e)
return replicas < c->opts.data_replicas; return replicas < c->opts.data_replicas;
} }
static enum data_cmd tiering_pred(struct bch_fs *c, void *arg,
enum bkey_type type,
struct bkey_s_c_extent e,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
struct bch_tier *tier = arg;
if (!__tiering_pred(c, tier, e))
return DATA_SKIP;
data_opts->btree_insert_flags = 0;
return DATA_ADD_REPLICAS;
}
static int bch2_tiering_thread(void *arg) static int bch2_tiering_thread(void *arg)
{ {
struct bch_tier *tier = arg; struct bch_tier *tier = arg;
@ -90,8 +104,6 @@ static int bch2_tiering_thread(void *arg)
SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices, SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices,
&tier->devs, &tier->devs,
writepoint_ptr(&tier->wp), writepoint_ptr(&tier->wp),
0,
-1,
POS_MIN, POS_MAX, POS_MIN, POS_MAX,
tiering_pred, tier, tiering_pred, tier,
&move_stats); &move_stats);

View File

@ -817,4 +817,19 @@ do { \
#define array_remove_item(_array, _nr, _pos) \ #define array_remove_item(_array, _nr, _pos) \
array_remove_items(_array, _nr, _pos, 1) array_remove_items(_array, _nr, _pos, 1)
#define bubble_sort(_base, _nr, _cmp) \
do { \
ssize_t _i, _end; \
bool _swapped = true; \
\
for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
_swapped = false; \
for (_i = 0; _i < _end; _i++) \
if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \
swap((_base)[_i], (_base)[_i + 1]); \
_swapped = true; \
} \
} \
} while (0)
#endif /* _BCACHEFS_UTIL_H */ #endif /* _BCACHEFS_UTIL_H */