mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to e99d29e402 bcachefs: zstd support, compression refactoring
This commit is contained in:
parent
8d95f4a437
commit
807abf36c1
@ -1 +1 @@
|
||||
d5e561b3cc023dd247d2b3d08b680709ec21b477
|
||||
e99d29e40210f6d9b7ec9e5b7aee1e48ae7655c5
|
||||
|
11
Makefile
11
Makefile
@ -9,6 +9,7 @@ CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall \
|
||||
-D_GNU_SOURCE \
|
||||
-D_LGPL_SOURCE \
|
||||
-DRCU_MEMBARRIER \
|
||||
-DZSTD_STATIC_LINKING_ONLY \
|
||||
-DNO_BCACHEFS_CHARDEV \
|
||||
-DNO_BCACHEFS_FS \
|
||||
-DNO_BCACHEFS_SYSFS \
|
||||
@ -31,9 +32,15 @@ ifdef D
|
||||
endif
|
||||
|
||||
PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib"
|
||||
PKGCONFIG_LIBS_STATIC="libzstd"
|
||||
|
||||
CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}`
|
||||
LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` \
|
||||
-lm -lpthread -lrt -lscrypt -lkeyutils -laio
|
||||
LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}`
|
||||
|
||||
CFLAGS+=`pkg-config --static --cflags ${PKGCONFIG_LIBS_STATIC}`
|
||||
LDLIBS+=`pkg-config --static --libs ${PKGCONFIG_LIBS_STATIC}`
|
||||
|
||||
LDLIBS+=-lm -lpthread -lrt -lscrypt -lkeyutils -laio
|
||||
|
||||
ifeq ($(PREFIX),/usr)
|
||||
ROOT_SBINDIR=/sbin
|
||||
|
@ -344,8 +344,8 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
die("error reserving space in new filesystem: %s",
|
||||
strerror(-ret));
|
||||
|
||||
bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(extent_i_to_s_c(e).s_c));
|
||||
bch2_mark_bkey_replicas(c, BCH_DATA_USER,
|
||||
extent_i_to_s_c(e).s_c);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
|
||||
&res, NULL, NULL, 0);
|
||||
|
2
debian/control
vendored
2
debian/control
vendored
@ -5,7 +5,7 @@ Priority: optional
|
||||
Standards-Version: 3.9.5
|
||||
Build-Depends: debhelper (>= 9), pkg-config, libblkid-dev, uuid-dev,
|
||||
libscrypt-dev, libsodium-dev, libkeyutils-dev, liburcu-dev, zlib1g-dev,
|
||||
libattr1-dev, libaio-dev
|
||||
libattr1-dev, libaio-dev, libzstd-dev
|
||||
Homepage: http://bcache.evilpiepirate.org/
|
||||
|
||||
Package: bcachefs-tools
|
||||
|
@ -1201,43 +1201,56 @@ out:
|
||||
return ob - c->open_buckets;
|
||||
}
|
||||
|
||||
static int __dev_alloc_cmp(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
unsigned l, unsigned r)
|
||||
{
|
||||
struct bch_dev *ca_l = rcu_dereference(c->devs[l]);
|
||||
struct bch_dev *ca_r = rcu_dereference(c->devs[r]);
|
||||
|
||||
if (ca_l && ca_r && ca_l->mi.tier != ca_r->mi.tier)
|
||||
return ((ca_l->mi.tier > ca_r->mi.tier) -
|
||||
(ca_l->mi.tier < ca_r->mi.tier));
|
||||
|
||||
return ((wp->next_alloc[l] > wp->next_alloc[r]) -
|
||||
(wp->next_alloc[l] < wp->next_alloc[r]));
|
||||
}
|
||||
|
||||
#define dev_alloc_cmp(l, r) __dev_alloc_cmp(c, wp, l, r)
|
||||
|
||||
struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_mask *devs)
|
||||
{
|
||||
struct dev_alloc_list ret = { .nr = 0 };
|
||||
struct bch_dev *ca, *ca2;
|
||||
unsigned i, j;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device_rcu(ca, c, i, devs) {
|
||||
for (j = 0; j < ret.nr; j++) {
|
||||
unsigned idx = ret.devs[j];
|
||||
|
||||
ca2 = rcu_dereference(c->devs[idx]);
|
||||
if (!ca2)
|
||||
break;
|
||||
|
||||
if (ca->mi.tier < ca2->mi.tier)
|
||||
break;
|
||||
|
||||
if (ca->mi.tier == ca2->mi.tier &&
|
||||
wp->next_alloc[i] < wp->next_alloc[idx])
|
||||
break;
|
||||
}
|
||||
|
||||
array_insert_item(ret.devs, ret.nr, j, i);
|
||||
}
|
||||
for_each_member_device_rcu(ca, c, i, devs)
|
||||
ret.devs[ret.nr++] = i;
|
||||
|
||||
bubble_sort(ret.devs, ret.nr, dev_alloc_cmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct write_point *wp)
|
||||
{
|
||||
unsigned i;
|
||||
u64 *v = wp->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_free(c, ca);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
u64 scale = *v / 4;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(wp->next_alloc); i++)
|
||||
wp->next_alloc[i] >>= 1;
|
||||
if (*v + free_space_inv >= *v)
|
||||
*v += free_space_inv;
|
||||
else
|
||||
*v = U64_MAX;
|
||||
|
||||
for (v = wp->next_alloc;
|
||||
v < wp->next_alloc + ARRAY_SIZE(wp->next_alloc); v++)
|
||||
*v = *v < scale ? 0 : *v - scale;
|
||||
}
|
||||
|
||||
static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
@ -1249,7 +1262,6 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
{
|
||||
enum bucket_alloc_ret ret = NO_DEVICES;
|
||||
struct dev_alloc_list devs_sorted;
|
||||
u64 buckets_free;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(nr_replicas > ARRAY_SIZE(wp->ptrs));
|
||||
@ -1281,13 +1293,6 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
BUG_ON(wp->nr_ptrs >= ARRAY_SIZE(wp->ptrs));
|
||||
wp->ptrs[wp->nr_ptrs++] = c->open_buckets + ob;
|
||||
|
||||
buckets_free = U64_MAX, dev_buckets_free(c, ca);
|
||||
if (buckets_free)
|
||||
wp->next_alloc[ca->dev_idx] +=
|
||||
div64_u64(U64_MAX, buckets_free *
|
||||
ca->mi.bucket_size);
|
||||
else
|
||||
wp->next_alloc[ca->dev_idx] = U64_MAX;
|
||||
bch2_wp_rescale(c, ca, wp);
|
||||
|
||||
__clear_bit(ca->dev_idx, devs->d);
|
||||
|
@ -194,6 +194,7 @@
|
||||
#include <linux/shrinker.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "bset.h"
|
||||
@ -231,6 +232,12 @@ do { \
|
||||
bch_info(c, fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define pr_verbose_init(opts, fmt, ...) \
|
||||
do { \
|
||||
if (opt_get(opts, verbose_init)) \
|
||||
pr_info(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/* Parameters that are useful for debugging, but should always be compiled in: */
|
||||
#define BCH_DEBUG_PARAMS_ALWAYS() \
|
||||
BCH_DEBUG_PARAM(key_merging_disabled, \
|
||||
@ -646,10 +653,10 @@ struct bch_fs {
|
||||
struct mutex bio_bounce_pages_lock;
|
||||
mempool_t bio_bounce_pages;
|
||||
|
||||
mempool_t lz4_workspace_pool;
|
||||
void *zlib_workspace;
|
||||
struct mutex zlib_workspace_lock;
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_NR];
|
||||
mempool_t decompress_workspace;
|
||||
ZSTD_parameters zstd_params;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_skcipher *chacha20;
|
||||
|
@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
#include <asm/types.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/uuid.h>
|
||||
|
||||
@ -370,7 +369,8 @@ enum bch_compression_type {
|
||||
BCH_COMPRESSION_LZ4_OLD = 1,
|
||||
BCH_COMPRESSION_GZIP = 2,
|
||||
BCH_COMPRESSION_LZ4 = 3,
|
||||
BCH_COMPRESSION_NR = 4,
|
||||
BCH_COMPRESSION_ZSTD = 4,
|
||||
BCH_COMPRESSION_NR = 5,
|
||||
};
|
||||
|
||||
enum bch_extent_entry_type {
|
||||
@ -1082,6 +1082,7 @@ LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
|
||||
enum bch_sb_features {
|
||||
BCH_FEATURE_LZ4 = 0,
|
||||
BCH_FEATURE_GZIP = 1,
|
||||
BCH_FEATURE_ZSTD = 2,
|
||||
};
|
||||
|
||||
/* options: */
|
||||
@ -1109,11 +1110,17 @@ enum bch_str_hash_opts {
|
||||
BCH_STR_HASH_NR = 3,
|
||||
};
|
||||
|
||||
#define BCH_COMPRESSION_TYPES() \
|
||||
x(NONE) \
|
||||
x(LZ4) \
|
||||
x(GZIP) \
|
||||
x(ZSTD)
|
||||
|
||||
enum bch_compression_opts {
|
||||
BCH_COMPRESSION_OPT_NONE = 0,
|
||||
BCH_COMPRESSION_OPT_LZ4 = 1,
|
||||
BCH_COMPRESSION_OPT_GZIP = 2,
|
||||
BCH_COMPRESSION_OPT_NR = 3,
|
||||
#define x(t) BCH_COMPRESSION_OPT_##t,
|
||||
BCH_COMPRESSION_TYPES()
|
||||
#undef x
|
||||
BCH_COMPRESSION_OPT_NR
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1322,8 +1329,10 @@ struct btree_node {
|
||||
};
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
|
||||
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
|
||||
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
|
||||
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
|
||||
/* 8-32 unused */
|
||||
LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
|
||||
|
||||
struct btree_node_entry {
|
||||
struct bch_csum csum;
|
||||
|
@ -373,19 +373,23 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
unsigned i;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
bc->table_init_done = true;
|
||||
|
||||
bch2_recalc_btree_reserve(c);
|
||||
|
||||
for (i = 0; i < bc->reserve; i++)
|
||||
if (!btree_node_mem_alloc(c, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
if (!btree_node_mem_alloc(c, GFP_KERNEL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_splice_init(&bc->live, &bc->freeable);
|
||||
|
||||
@ -393,12 +397,16 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
mutex_init(&c->verify_lock);
|
||||
|
||||
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
if (!c->verify_ondisk)
|
||||
return -ENOMEM;
|
||||
if (!c->verify_ondisk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!c->verify_data)
|
||||
return -ENOMEM;
|
||||
if (!c->verify_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_del_init(&c->verify_data->list);
|
||||
#endif
|
||||
@ -408,8 +416,9 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bc->shrink.seeks = 4;
|
||||
bc->shrink.batch = btree_pages(c) * 2;
|
||||
register_shrinker(&bc->shrink);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||
|
@ -148,14 +148,13 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
|
||||
{
|
||||
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
|
||||
? BCH_DATA_BTREE : BCH_DATA_USER;
|
||||
struct bch_devs_list devs = bch2_bkey_devs(k);
|
||||
int ret = 0;
|
||||
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, data_type, devs), c,
|
||||
fsck_err_on(!bch2_bkey_replicas_marked(c, data_type, k), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
data_type)) {
|
||||
ret = bch2_check_mark_super(c, data_type, devs);
|
||||
ret = bch2_mark_bkey_replicas(c, data_type, k);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -1135,6 +1135,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
unsigned sectors, whiteout_u64s = 0;
|
||||
struct nonce nonce;
|
||||
struct bch_csum csum;
|
||||
bool first = !b->written;
|
||||
|
||||
if (!b->written) {
|
||||
i = &b->data->keys;
|
||||
@ -1194,10 +1195,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
btree_err_on(!b->written,
|
||||
btree_err_on(first,
|
||||
BTREE_ERR_FIXABLE, c, b, i,
|
||||
"first btree node bset has blacklisted journal seq");
|
||||
if (b->written)
|
||||
if (!first)
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -430,6 +430,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
n->data->min_key = b->data->min_key;
|
||||
n->data->max_key = b->data->max_key;
|
||||
n->data->format = format;
|
||||
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
|
||||
|
||||
btree_node_set_format(n, format);
|
||||
|
||||
@ -559,8 +560,8 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
|
||||
bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
@ -1225,6 +1226,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
|
||||
n2->data->max_key = n1->data->max_key;
|
||||
n2->data->format = n1->format;
|
||||
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
|
||||
n2->key.k.p = n1->key.k.p;
|
||||
|
||||
btree_node_set_format(n2, n2->data->format);
|
||||
@ -2019,8 +2021,8 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_extent_devs(extent_i_to_s_c(new_key)));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
|
||||
extent_i_to_s_c(new_key).s_c);
|
||||
if (ret)
|
||||
goto err_free_update;
|
||||
|
||||
|
@ -272,15 +272,10 @@ static void multi_unlock_write(struct btree_insert *trans)
|
||||
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
|
||||
}
|
||||
|
||||
static inline void btree_trans_sort(struct btree_insert *trans)
|
||||
static inline int btree_trans_cmp(struct btree_insert_entry l,
|
||||
struct btree_insert_entry r)
|
||||
{
|
||||
int i, end = trans->nr;
|
||||
|
||||
while (--end > 0)
|
||||
for (i = 0; i < end; i++)
|
||||
if (btree_iter_cmp(trans->entries[i].iter,
|
||||
trans->entries[i + 1].iter) > 0)
|
||||
swap(trans->entries[i], trans->entries[i + 1]);
|
||||
return btree_iter_cmp(l.iter, r.iter);
|
||||
}
|
||||
|
||||
/* Normal update interface: */
|
||||
@ -313,7 +308,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
bkey_i_to_s_c(i->k)));
|
||||
}
|
||||
|
||||
btree_trans_sort(trans);
|
||||
bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
|
||||
|
||||
if (unlikely(!percpu_ref_tryget(&c->writes)))
|
||||
return -EROFS;
|
||||
|
@ -219,12 +219,16 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
|
||||
crypto_alloc_skcipher("chacha20", 0, 0);
|
||||
int ret;
|
||||
|
||||
if (!chacha20)
|
||||
if (!chacha20) {
|
||||
pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
|
||||
return PTR_ERR(chacha20);
|
||||
}
|
||||
|
||||
ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key));
|
||||
if (ret)
|
||||
if (ret) {
|
||||
pr_err("crypto_skcipher_setkey() error: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
do_encrypt(chacha20, nonce, buf, len);
|
||||
err:
|
||||
@ -567,7 +571,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
|
||||
|
||||
ret = bch2_request_key(c->disk_sb, &user_key);
|
||||
if (ret) {
|
||||
bch_err(c, "error requesting encryption key");
|
||||
bch_err(c, "error requesting encryption key: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -594,13 +598,19 @@ static int bch2_alloc_ciphers(struct bch_fs *c)
|
||||
{
|
||||
if (!c->chacha20)
|
||||
c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0);
|
||||
if (IS_ERR(c->chacha20))
|
||||
if (IS_ERR(c->chacha20)) {
|
||||
bch_err(c, "error requesting chacha20 module: %li",
|
||||
PTR_ERR(c->chacha20));
|
||||
return PTR_ERR(c->chacha20);
|
||||
}
|
||||
|
||||
if (!c->poly1305)
|
||||
c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
|
||||
if (IS_ERR(c->poly1305))
|
||||
if (IS_ERR(c->poly1305)) {
|
||||
bch_err(c, "error requesting poly1305 module: %li",
|
||||
PTR_ERR(c->poly1305));
|
||||
return PTR_ERR(c->poly1305);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -660,7 +670,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
if (keyed) {
|
||||
ret = bch2_request_key(c->disk_sb, &user_key);
|
||||
if (ret) {
|
||||
bch_err(c, "error requesting encryption key");
|
||||
bch_err(c, "error requesting encryption key: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -707,27 +717,35 @@ int bch2_fs_encryption_init(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_crypt *crypt;
|
||||
struct bch_key key;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
c->sha256 = crypto_alloc_shash("sha256", 0, 0);
|
||||
if (IS_ERR(c->sha256))
|
||||
return PTR_ERR(c->sha256);
|
||||
if (IS_ERR(c->sha256)) {
|
||||
bch_err(c, "error requesting sha256 module");
|
||||
ret = PTR_ERR(c->sha256);
|
||||
goto out;
|
||||
}
|
||||
|
||||
crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
if (!crypt)
|
||||
return 0;
|
||||
goto out;
|
||||
|
||||
ret = bch2_alloc_ciphers(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = bch2_decrypt_sb_key(c, crypt, &key);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
|
||||
ret = crypto_skcipher_setkey(c->chacha20,
|
||||
(void *) &key.key, sizeof(key.key));
|
||||
err:
|
||||
if (ret)
|
||||
goto out;
|
||||
out:
|
||||
memzero_explicit(&key, sizeof(key));
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -91,20 +91,11 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
|
||||
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
|
||||
}
|
||||
|
||||
static inline enum bch_compression_type
|
||||
bch2_compression_opt_to_type(enum bch_compression_opts type)
|
||||
{
|
||||
switch (type) {
|
||||
case BCH_COMPRESSION_OPT_NONE:
|
||||
return BCH_COMPRESSION_NONE;
|
||||
case BCH_COMPRESSION_OPT_LZ4:
|
||||
return BCH_COMPRESSION_LZ4;
|
||||
case BCH_COMPRESSION_OPT_GZIP:
|
||||
return BCH_COMPRESSION_GZIP;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
static const unsigned bch2_compression_opt_to_type[] = {
|
||||
#define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_##t,
|
||||
BCH_COMPRESSION_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
|
||||
unsigned type)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "lz4.h"
|
||||
#include <linux/lz4.h>
|
||||
#include <linux/zlib.h>
|
||||
#include <linux/zstd.h>
|
||||
|
||||
/* Bounce buffer: */
|
||||
struct bbuf {
|
||||
@ -151,6 +152,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
struct bbuf src_data = { NULL };
|
||||
size_t src_len = src->bi_iter.bi_size;
|
||||
size_t dst_len = crc.uncompressed_size << 9;
|
||||
void *workspace;
|
||||
int ret;
|
||||
|
||||
src_data = bio_map_or_bounce(c, src, READ);
|
||||
@ -159,57 +161,64 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
case BCH_COMPRESSION_LZ4_OLD:
|
||||
ret = bch2_lz4_decompress(src_data.b, &src_len,
|
||||
dst_data, dst_len);
|
||||
if (ret) {
|
||||
ret = -EIO;
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
break;
|
||||
case BCH_COMPRESSION_LZ4:
|
||||
ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret != dst_len) {
|
||||
ret = -EIO;
|
||||
if (ret != dst_len)
|
||||
goto err;
|
||||
}
|
||||
break;
|
||||
case BCH_COMPRESSION_GZIP: {
|
||||
void *workspace;
|
||||
z_stream strm;
|
||||
z_stream strm = {
|
||||
.next_in = src_data.b,
|
||||
.avail_in = src_len,
|
||||
.next_out = dst_data,
|
||||
.avail_out = dst_len,
|
||||
};
|
||||
|
||||
workspace = kmalloc(zlib_inflate_workspacesize(),
|
||||
GFP_NOIO|__GFP_NOWARN);
|
||||
if (!workspace) {
|
||||
mutex_lock(&c->zlib_workspace_lock);
|
||||
workspace = c->zlib_workspace;
|
||||
}
|
||||
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
|
||||
|
||||
strm.next_in = src_data.b;
|
||||
strm.avail_in = src_len;
|
||||
strm.next_out = dst_data;
|
||||
strm.avail_out = dst_len;
|
||||
zlib_set_workspace(&strm, workspace);
|
||||
zlib_inflateInit2(&strm, -MAX_WBITS);
|
||||
|
||||
ret = zlib_inflate(&strm, Z_FINISH);
|
||||
|
||||
if (workspace == c->zlib_workspace)
|
||||
mutex_unlock(&c->zlib_workspace_lock);
|
||||
else
|
||||
kfree(workspace);
|
||||
mempool_free(workspace, &c->decompress_workspace);
|
||||
|
||||
if (ret != Z_STREAM_END) {
|
||||
ret = -EIO;
|
||||
if (ret != Z_STREAM_END)
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
case BCH_COMPRESSION_ZSTD: {
|
||||
ZSTD_DCtx *ctx;
|
||||
size_t len;
|
||||
|
||||
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
|
||||
ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
|
||||
|
||||
src_len = le32_to_cpup(src_data.b);
|
||||
|
||||
len = ZSTD_decompressDCtx(ctx,
|
||||
dst_data, dst_len,
|
||||
src_data.b + 4, src_len);
|
||||
|
||||
mempool_free(workspace, &c->decompress_workspace);
|
||||
|
||||
if (len != dst_len)
|
||||
goto err;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
out:
|
||||
bio_unmap_or_unbounce(c, src_data);
|
||||
return ret;
|
||||
err:
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
|
||||
@ -282,114 +291,130 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int attempt_compress(struct bch_fs *c,
|
||||
void *workspace,
|
||||
void *dst, size_t dst_len,
|
||||
void *src, size_t src_len,
|
||||
unsigned compression_type)
|
||||
{
|
||||
switch (compression_type) {
|
||||
case BCH_COMPRESSION_LZ4: {
|
||||
int len = src_len;
|
||||
int ret = LZ4_compress_destSize(
|
||||
src, dst,
|
||||
&len, dst_len,
|
||||
workspace);
|
||||
|
||||
if (len < src_len)
|
||||
return -len;
|
||||
|
||||
return ret;
|
||||
}
|
||||
case BCH_COMPRESSION_GZIP: {
|
||||
z_stream strm = {
|
||||
.next_in = src,
|
||||
.avail_in = src_len,
|
||||
.next_out = dst,
|
||||
.avail_out = dst_len,
|
||||
};
|
||||
|
||||
zlib_set_workspace(&strm, workspace);
|
||||
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
|
||||
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
|
||||
Z_DEFAULT_STRATEGY);
|
||||
|
||||
if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
|
||||
return 0;
|
||||
|
||||
if (zlib_deflateEnd(&strm) != Z_OK)
|
||||
return 0;
|
||||
|
||||
return strm.total_out;
|
||||
}
|
||||
case BCH_COMPRESSION_ZSTD: {
|
||||
ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
|
||||
ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
|
||||
|
||||
size_t len = ZSTD_compressCCtx(ctx,
|
||||
dst + 4, dst_len - 4,
|
||||
src, src_len,
|
||||
c->zstd_params);
|
||||
if (ZSTD_isError(len))
|
||||
return 0;
|
||||
|
||||
*((__le32 *) dst) = cpu_to_le32(len);
|
||||
return len + 4;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned __bio_compress(struct bch_fs *c,
|
||||
struct bio *dst, size_t *dst_len,
|
||||
struct bio *src, size_t *src_len,
|
||||
unsigned compression_type)
|
||||
{
|
||||
struct bbuf src_data = { NULL }, dst_data = { NULL };
|
||||
void *workspace;
|
||||
unsigned pad;
|
||||
int ret = 0;
|
||||
|
||||
/* If it's only one block, don't bother trying to compress: */
|
||||
if (bio_sectors(src) <= c->opts.block_size)
|
||||
goto err;
|
||||
return 0;
|
||||
|
||||
dst_data = bio_map_or_bounce(c, dst, WRITE);
|
||||
src_data = bio_map_or_bounce(c, src, READ);
|
||||
|
||||
switch (compression_type) {
|
||||
case BCH_COMPRESSION_LZ4_OLD:
|
||||
compression_type = BCH_COMPRESSION_LZ4;
|
||||
workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
|
||||
|
||||
case BCH_COMPRESSION_LZ4: {
|
||||
void *workspace;
|
||||
int len = src->bi_iter.bi_size;
|
||||
*src_len = src->bi_iter.bi_size;
|
||||
*dst_len = dst->bi_iter.bi_size;
|
||||
|
||||
workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
|
||||
|
||||
while (1) {
|
||||
if (len <= block_bytes(c)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = LZ4_compress_destSize(
|
||||
src_data.b, dst_data.b,
|
||||
&len, dst->bi_iter.bi_size,
|
||||
workspace);
|
||||
if (ret >= len) {
|
||||
/* uncompressible: */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(len & (block_bytes(c) - 1)))
|
||||
break;
|
||||
len = round_down(len, block_bytes(c));
|
||||
}
|
||||
mempool_free(workspace, &c->lz4_workspace_pool);
|
||||
|
||||
if (!ret)
|
||||
goto err;
|
||||
|
||||
*src_len = len;
|
||||
*dst_len = ret;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
case BCH_COMPRESSION_GZIP: {
|
||||
void *workspace;
|
||||
z_stream strm;
|
||||
|
||||
workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
|
||||
DEF_MEM_LEVEL),
|
||||
GFP_NOIO|__GFP_NOWARN);
|
||||
if (!workspace) {
|
||||
mutex_lock(&c->zlib_workspace_lock);
|
||||
workspace = c->zlib_workspace;
|
||||
/*
|
||||
* XXX: this algorithm sucks when the compression code doesn't tell us
|
||||
* how much would fit, like LZ4 does:
|
||||
*/
|
||||
while (1) {
|
||||
if (*src_len <= block_bytes(c)) {
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
strm.next_in = src_data.b;
|
||||
strm.avail_in = min(src->bi_iter.bi_size,
|
||||
dst->bi_iter.bi_size);
|
||||
strm.next_out = dst_data.b;
|
||||
strm.avail_out = dst->bi_iter.bi_size;
|
||||
zlib_set_workspace(&strm, workspace);
|
||||
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
|
||||
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
|
||||
Z_DEFAULT_STRATEGY);
|
||||
|
||||
ret = zlib_deflate(&strm, Z_FINISH);
|
||||
if (ret != Z_STREAM_END) {
|
||||
ret = -EIO;
|
||||
goto zlib_err;
|
||||
ret = attempt_compress(c, workspace,
|
||||
dst_data.b, *dst_len,
|
||||
src_data.b, *src_len,
|
||||
compression_type);
|
||||
if (ret > 0) {
|
||||
*dst_len = ret;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = zlib_deflateEnd(&strm);
|
||||
if (ret != Z_OK) {
|
||||
ret = -EIO;
|
||||
goto zlib_err;
|
||||
/* Didn't fit: should we retry with a smaller amount? */
|
||||
if (*src_len <= *dst_len) {
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
zlib_err:
|
||||
if (workspace == c->zlib_workspace)
|
||||
mutex_unlock(&c->zlib_workspace_lock);
|
||||
/*
|
||||
* If ret is negative, it's a hint as to how much data would fit
|
||||
*/
|
||||
BUG_ON(-ret >= *src_len);
|
||||
|
||||
if (ret < 0)
|
||||
*src_len = -ret;
|
||||
else
|
||||
kfree(workspace);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
*dst_len = strm.total_out;
|
||||
*src_len = strm.total_in;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
*src_len -= (*src_len - *dst_len) / 2;
|
||||
*src_len = round_down(*src_len, block_bytes(c));
|
||||
}
|
||||
|
||||
mempool_free(workspace, &c->compress_workspace[compression_type]);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* Didn't get smaller: */
|
||||
if (round_up(*dst_len, block_bytes(c)) >= *src_len)
|
||||
goto err;
|
||||
@ -429,6 +454,9 @@ unsigned bch2_bio_compress(struct bch_fs *c,
|
||||
/* Don't generate a bigger output than input: */
|
||||
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
|
||||
|
||||
if (compression_type == BCH_COMPRESSION_LZ4_OLD)
|
||||
compression_type = BCH_COMPRESSION_LZ4;
|
||||
|
||||
compression_type =
|
||||
__bio_compress(c, dst, dst_len, src, src_len, compression_type);
|
||||
|
||||
@ -437,81 +465,147 @@ unsigned bch2_bio_compress(struct bch_fs *c,
|
||||
return compression_type;
|
||||
}
|
||||
|
||||
#define BCH_FEATURE_NONE 0
|
||||
|
||||
static const unsigned bch2_compression_opt_to_feature[] = {
|
||||
#define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
|
||||
BCH_COMPRESSION_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#undef BCH_FEATURE_NONE
|
||||
|
||||
/* doesn't write superblock: */
|
||||
int bch2_check_set_has_compressed_data(struct bch_fs *c,
|
||||
unsigned compression_type)
|
||||
{
|
||||
switch (compression_type) {
|
||||
case BCH_COMPRESSION_OPT_NONE:
|
||||
return 0;
|
||||
case BCH_COMPRESSION_OPT_LZ4:
|
||||
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
|
||||
return 0;
|
||||
unsigned f;
|
||||
int ret = 0;
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4);
|
||||
break;
|
||||
case BCH_COMPRESSION_OPT_GZIP:
|
||||
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||
return 0;
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
|
||||
|
||||
return bch2_fs_compress_init(c);
|
||||
if (!compression_type)
|
||||
goto out;
|
||||
|
||||
f = bch2_compression_opt_to_feature[compression_type];
|
||||
if (bch2_sb_test_feature(c->disk_sb, f))
|
||||
goto out;
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, f);
|
||||
ret = bch2_fs_compress_init(c);
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_compress_exit(struct bch_fs *c)
|
||||
{
|
||||
vfree(c->zlib_workspace);
|
||||
mempool_exit(&c->lz4_workspace_pool);
|
||||
unsigned i;
|
||||
|
||||
mempool_exit(&c->decompress_workspace);
|
||||
for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
|
||||
mempool_exit(&c->compress_workspace[i]);
|
||||
mempool_exit(&c->compression_bounce[WRITE]);
|
||||
mempool_exit(&c->compression_bounce[READ]);
|
||||
}
|
||||
|
||||
#define COMPRESSION_WORKSPACE_SIZE \
|
||||
max_t(size_t, zlib_inflate_workspacesize(), \
|
||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
|
||||
static void *mempool_kvpmalloc(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t)pool_data;
|
||||
return kvpmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
void mempool_kvpfree(void *element, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t)pool_data;
|
||||
kvpfree(element, size);
|
||||
}
|
||||
|
||||
static int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
|
||||
{
|
||||
return !mempool_initialized(pool)
|
||||
? mempool_init(pool, min_nr, mempool_kvpmalloc,
|
||||
mempool_kvpfree, (void *) size)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int bch2_fs_compress_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned order = get_order(c->sb.encoded_extent_max << 9);
|
||||
int ret;
|
||||
size_t max_extent = c->sb.encoded_extent_max << 9;
|
||||
size_t order = get_order(max_extent);
|
||||
size_t decompress_workspace_size = 0;
|
||||
bool decompress_workspace_needed;
|
||||
ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
|
||||
struct {
|
||||
unsigned feature;
|
||||
unsigned type;
|
||||
size_t compress_workspace;
|
||||
size_t decompress_workspace;
|
||||
} compression_types[] = {
|
||||
{ BCH_FEATURE_LZ4, BCH_COMPRESSION_LZ4, LZ4_MEM_COMPRESS, 0 },
|
||||
{ BCH_FEATURE_GZIP, BCH_COMPRESSION_GZIP,
|
||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize(), },
|
||||
{ BCH_FEATURE_ZSTD, BCH_COMPRESSION_ZSTD,
|
||||
ZSTD_CCtxWorkspaceBound(params.cParams),
|
||||
ZSTD_DCtxWorkspaceBound() },
|
||||
}, *i;
|
||||
int ret = 0;
|
||||
|
||||
if (!bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
|
||||
!bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||
return 0;
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
c->zstd_params = params;
|
||||
|
||||
for (i = compression_types;
|
||||
i < compression_types + ARRAY_SIZE(compression_types);
|
||||
i++)
|
||||
if (bch2_sb_test_feature(c->disk_sb, i->feature))
|
||||
goto have_compressed;
|
||||
|
||||
goto out;
|
||||
have_compressed:
|
||||
|
||||
if (!mempool_initialized(&c->compression_bounce[READ])) {
|
||||
ret = mempool_init_page_pool(&c->compression_bounce[READ],
|
||||
1, order);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!mempool_initialized(&c->compression_bounce[WRITE])) {
|
||||
ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
|
||||
1, order);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!mempool_initialized(&c->lz4_workspace_pool) &&
|
||||
bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4)) {
|
||||
ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool,
|
||||
1, LZ4_MEM_COMPRESS);
|
||||
for (i = compression_types;
|
||||
i < compression_types + ARRAY_SIZE(compression_types);
|
||||
i++) {
|
||||
decompress_workspace_size =
|
||||
max(decompress_workspace_size, i->decompress_workspace);
|
||||
|
||||
if (!bch2_sb_test_feature(c->disk_sb, i->feature))
|
||||
continue;
|
||||
|
||||
if (i->decompress_workspace)
|
||||
decompress_workspace_needed = true;
|
||||
|
||||
ret = mempool_init_kvpmalloc_pool(
|
||||
&c->compress_workspace[i->type],
|
||||
1, i->compress_workspace);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!c->zlib_workspace &&
|
||||
bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) {
|
||||
c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE);
|
||||
if (!c->zlib_workspace)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
ret = mempool_init_kmalloc_pool(
|
||||
&c->decompress_workspace,
|
||||
1, decompress_workspace_size);
|
||||
if (ret)
|
||||
goto out;
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -694,7 +694,7 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!bch2_sb_has_replicas(c, BCH_DATA_BTREE, bch2_extent_devs(e))) {
|
||||
if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), k);
|
||||
bch2_fs_bug(c,
|
||||
@ -1834,7 +1834,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
if (!bkey_extent_is_cached(e.k) &&
|
||||
!bch2_sb_has_replicas(c, BCH_DATA_USER, bch2_extent_devs(e))) {
|
||||
!bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), e.s_c);
|
||||
bch2_fs_bug(c,
|
||||
@ -2013,17 +2013,18 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
|
||||
}
|
||||
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *c,
|
||||
struct bkey_s_extent e)
|
||||
struct bkey_s_extent e,
|
||||
unsigned nr_desired_replicas)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
unsigned tier = 0, nr_cached = 0;
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
|
||||
bool have_higher_tier;
|
||||
|
||||
if (nr_good <= c->opts.data_replicas)
|
||||
if (nr_good <= nr_desired_replicas)
|
||||
return;
|
||||
|
||||
nr_cached = nr_good - c->opts.data_replicas;
|
||||
nr_cached = nr_good - nr_desired_replicas;
|
||||
|
||||
do {
|
||||
have_higher_tier = false;
|
||||
|
@ -38,7 +38,8 @@ bch2_insert_fixup_extent(struct btree_insert *,
|
||||
struct btree_insert_entry *);
|
||||
|
||||
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent);
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
|
||||
unsigned);
|
||||
|
||||
const struct bch_extent_ptr *
|
||||
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
|
||||
@ -430,6 +431,18 @@ static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_devs_list ret = (struct bch_devs_list) { 0 };
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (ptr->cached)
|
||||
ret.devs[ret.nr++] = ptr->dev;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
@ -441,6 +454,28 @@ static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED:
|
||||
return bch2_extent_dirty_devs(bkey_s_c_to_extent(k));
|
||||
default:
|
||||
return (struct bch_devs_list) { .nr = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED:
|
||||
return bch2_extent_cached_devs(bkey_s_c_to_extent(k));
|
||||
default:
|
||||
return (struct bch_devs_list) { .nr = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
|
||||
struct bch_extent_crc_unpacked);
|
||||
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
|
||||
|
@ -452,14 +452,18 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
|
||||
|
||||
ret = bch2_btree_insert_at(wop->c, &wop->res,
|
||||
&hook.hook, op_journal_seq(wop),
|
||||
BTREE_INSERT_NOFAIL|BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
BTREE_INSERT_ENTRY(&extent_iter, k),
|
||||
BTREE_INSERT_ENTRY_EXTRA_RES(&inode_iter,
|
||||
&hook.inode_p.inode.k_i, 2));
|
||||
} else {
|
||||
ret = bch2_btree_insert_at(wop->c, &wop->res,
|
||||
&hook.hook, op_journal_seq(wop),
|
||||
BTREE_INSERT_NOFAIL|BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
BTREE_INSERT_ENTRY(&extent_iter, k));
|
||||
}
|
||||
|
||||
@ -502,7 +506,7 @@ static inline void bch2_fswrite_op_init(struct bchfs_write_op *op,
|
||||
|
||||
bch2_write_op_init(&op->op, c);
|
||||
op->op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
|
||||
op->op.compression_type = bch2_compression_opt_to_type(opts.compression);
|
||||
op->op.compression_type = bch2_compression_opt_to_type[opts.compression];
|
||||
op->op.devs = c->fastest_devs;
|
||||
op->op.index_update_fn = bchfs_write_index_update;
|
||||
op_journal_seq_set(&op->op, &inode->ei_journal_seq);
|
||||
@ -2692,6 +2696,10 @@ void bch2_fs_fsio_exit(struct bch_fs *c)
|
||||
|
||||
int bch2_fs_fsio_init(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
if (bioset_init(&c->writepage_bioset,
|
||||
4, offsetof(struct bch_writepage_io, op.op.wbio.bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
@ -2701,9 +2709,10 @@ int bch2_fs_fsio_init(struct bch_fs *c)
|
||||
bioset_init(&c->dio_write_bioset,
|
||||
4, offsetof(struct dio_write, iop.op.wbio.bio),
|
||||
BIOSET_NEED_BVECS))
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
|
||||
return 0;
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* NO_BCACHEFS_FS */
|
||||
|
@ -209,17 +209,6 @@ static void bch2_write_done(struct closure *cl)
|
||||
closure_return(cl);
|
||||
}
|
||||
|
||||
static u64 keylist_sectors(struct keylist *keys)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
u64 ret = 0;
|
||||
|
||||
for_each_keylist_key(keys, k)
|
||||
ret += k->k.size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *op)
|
||||
{
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
@ -232,7 +221,8 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
|
||||
ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
|
||||
NULL, op_journal_seq(op),
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
return ret;
|
||||
@ -268,8 +258,7 @@ static void bch2_write_index(struct closure *cl)
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_extent_devs(e.c));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, e.s_c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -910,18 +899,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
|
||||
rbio->promote = NULL;
|
||||
|
||||
bch2_write_op_init(&op->write.op, c);
|
||||
op->write.op.csum_type = bch2_data_checksum_type(c, rbio->opts.data_checksum);
|
||||
op->write.op.compression_type =
|
||||
bch2_compression_opt_to_type(rbio->opts.compression);
|
||||
|
||||
op->write.move_dev = -1;
|
||||
op->write.op.devs = c->fastest_devs;
|
||||
op->write.op.write_point = writepoint_hashed((unsigned long) current);
|
||||
op->write.op.flags |= BCH_WRITE_ALLOC_NOWAIT;
|
||||
op->write.op.flags |= BCH_WRITE_CACHED;
|
||||
|
||||
bch2_migrate_write_init(&op->write, rbio);
|
||||
bch2_migrate_read_done(&op->write, rbio);
|
||||
|
||||
closure_init(cl, NULL);
|
||||
closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
|
||||
@ -932,13 +910,16 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
* XXX: multiple promotes can race with each other, wastefully. Keep a list of
|
||||
* outstanding promotes?
|
||||
*/
|
||||
static struct promote_op *promote_alloc(struct bch_read_bio *rbio)
|
||||
static struct promote_op *promote_alloc(struct bch_read_bio *rbio,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = rbio->c;
|
||||
struct promote_op *op;
|
||||
struct bio *bio;
|
||||
/* data might have to be decompressed in the write path: */
|
||||
unsigned pages = DIV_ROUND_UP(rbio->pick.crc.uncompressed_size,
|
||||
PAGE_SECTORS);
|
||||
int ret;
|
||||
|
||||
BUG_ON(!rbio->bounce);
|
||||
BUG_ON(pages < rbio->bio.bi_vcnt);
|
||||
@ -954,6 +935,14 @@ static struct promote_op *promote_alloc(struct bch_read_bio *rbio)
|
||||
memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
|
||||
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
|
||||
|
||||
ret = bch2_migrate_write_init(c, &op->write, c->fastest_devs,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
rbio->opts,
|
||||
DATA_PROMOTE,
|
||||
(struct data_opts) { 0 },
|
||||
k);
|
||||
BUG_ON(ret);
|
||||
|
||||
return op;
|
||||
}
|
||||
|
||||
@ -1407,7 +1396,7 @@ noclone:
|
||||
rbio->pick = *pick;
|
||||
rbio->pos = pos;
|
||||
rbio->version = e.k->version;
|
||||
rbio->promote = promote ? promote_alloc(rbio) : NULL;
|
||||
rbio->promote = promote ? promote_alloc(rbio, e.s_c) : NULL;
|
||||
INIT_WORK(&rbio->work, NULL);
|
||||
|
||||
bio_set_dev(&rbio->bio, pick->ca->disk_sb.bdev);
|
||||
|
@ -70,7 +70,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c)
|
||||
op->error = 0;
|
||||
op->csum_type = bch2_data_checksum_type(c, c->opts.data_checksum);
|
||||
op->compression_type =
|
||||
bch2_compression_opt_to_type(c->opts.compression);
|
||||
bch2_compression_opt_to_type[c->opts.compression];
|
||||
op->nr_replicas = 0;
|
||||
op->nr_replicas_required = c->opts.data_replicas_required;
|
||||
op->alloc_reserve = RESERVE_NONE;
|
||||
|
@ -1046,12 +1046,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
|
||||
if (!degraded &&
|
||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, BCH_DATA_JOURNAL,
|
||||
fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL,
|
||||
i->devs), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
BCH_DATA_JOURNAL))) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL,
|
||||
i->devs);
|
||||
ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -2232,7 +2231,7 @@ static void journal_write_done(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs))
|
||||
if (bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs))
|
||||
goto err;
|
||||
out:
|
||||
__bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
@ -2851,7 +2850,7 @@ int bch2_journal_flush_device(struct journal *j, int dev_idx)
|
||||
seq++;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs);
|
||||
ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
@ -2946,7 +2945,11 @@ void bch2_fs_journal_exit(struct journal *j)
|
||||
|
||||
int bch2_fs_journal_init(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
static struct lock_class_key res_key;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
spin_lock_init(&j->lock);
|
||||
spin_lock_init(&j->err_lock);
|
||||
@ -2972,12 +2975,15 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||
!(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
|
||||
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL)))
|
||||
return -ENOMEM;
|
||||
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
j->pin.front = j->pin.back = 1;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* debug: */
|
||||
|
@ -58,6 +58,17 @@ static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
|
||||
#define keylist_single(k) \
|
||||
((struct keylist) { .keys = k, .top = bkey_next(k) })
|
||||
|
||||
static inline u64 keylist_sectors(struct keylist *keys)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
u64 ret = 0;
|
||||
|
||||
for_each_keylist_key(keys, k)
|
||||
ret += k->k.size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_verify_keylist_sorted(struct keylist *);
|
||||
#else
|
||||
|
@ -13,118 +13,6 @@
|
||||
#include "move.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
|
||||
return bch2_extent_has_device(e, ca->dev_idx);
|
||||
}
|
||||
|
||||
#define MAX_DATA_OFF_ITER 10
|
||||
|
||||
static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
int flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_move_stats stats;
|
||||
unsigned pass = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_USER)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* XXX: we should be able to do this in one pass, but bch2_move_data()
|
||||
* can spuriously fail to move an extent due to racing with other move
|
||||
* operations
|
||||
*/
|
||||
do {
|
||||
memset(&stats, 0, sizeof(stats));
|
||||
|
||||
ret = bch2_move_data(c, NULL,
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
NULL,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
0,
|
||||
ca->dev_idx,
|
||||
POS_MIN, POS_MAX,
|
||||
migrate_pred, ca,
|
||||
&stats);
|
||||
if (ret) {
|
||||
bch_err(c, "error migrating data: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
} while (atomic64_read(&stats.keys_moved) && pass++ < MAX_DATA_OFF_ITER);
|
||||
|
||||
if (atomic64_read(&stats.keys_moved)) {
|
||||
bch_err(c, "unable to migrate all data in %d iterations",
|
||||
MAX_DATA_OFF_ITER);
|
||||
return -1;
|
||||
}
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k));
|
||||
if (ret) {
|
||||
bch_err(c, "error migrating data %i from check_mark_super()", ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_replicas_gc_end(c, ret);
|
||||
mutex_unlock(&c->replicas_gc_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
int flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
int ret = 0;
|
||||
unsigned id;
|
||||
|
||||
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_BTREE)))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
|
||||
if (!bch2_extent_has_device(e, ca->dev_idx))
|
||||
continue;
|
||||
|
||||
ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0);
|
||||
if (ret) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
ret = bch2_btree_iter_unlock(&iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
bch2_replicas_gc_end(c, ret);
|
||||
mutex_unlock(&c->replicas_gc_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
{
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW &&
|
||||
bch2_dev_is_online(ca));
|
||||
|
||||
return bch2_dev_usrdata_migrate(c, ca, flags) ?:
|
||||
bch2_dev_metadata_migrate(c, ca, flags);
|
||||
}
|
||||
|
||||
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
|
||||
unsigned dev_idx, int flags, bool metadata)
|
||||
{
|
||||
@ -152,7 +40,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
|
||||
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS_MIN, BTREE_ITER_PREFETCH);
|
||||
@ -161,8 +49,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
!(ret = btree_iter_err(k))) {
|
||||
if (!bkey_extent_is_data(k.k) ||
|
||||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(k));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
|
||||
if (ret)
|
||||
break;
|
||||
bch2_btree_iter_next(&iter);
|
||||
@ -183,8 +70,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
*/
|
||||
bch2_extent_normalize(c, e.s);
|
||||
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&tmp.key)));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
|
||||
bkey_i_to_s_c(&tmp.key));
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -240,8 +127,8 @@ retry:
|
||||
dev_idx)) {
|
||||
bch2_btree_iter_set_locks_want(&iter, 0);
|
||||
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
|
||||
bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
|
@ -1,7 +1,6 @@
|
||||
#ifndef _BCACHEFS_MIGRATE_H
|
||||
#define _BCACHEFS_MIGRATE_H
|
||||
|
||||
int bch2_dev_data_migrate(struct bch_fs *, struct bch_dev *, int);
|
||||
int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
|
||||
|
||||
#endif /* _BCACHEFS_MIGRATE_H */
|
||||
|
@ -58,6 +58,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
BKEY_PADDED(k) _new, _insert;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
unsigned nr_dirty;
|
||||
bool did_work = false;
|
||||
|
||||
if (btree_iter_err(k)) {
|
||||
@ -71,6 +72,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
m->ptr, m->offset))
|
||||
goto nomatch;
|
||||
|
||||
if (m->data_cmd == DATA_REWRITE &&
|
||||
!bch2_extent_has_device(bkey_s_c_to_extent(k),
|
||||
m->data_opts.rewrite_dev))
|
||||
goto nomatch;
|
||||
|
||||
bkey_reassemble(&_insert.k, k);
|
||||
insert = bkey_i_to_extent(&_insert.k);
|
||||
|
||||
@ -81,11 +87,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
bch2_cut_back(new->k.p, &insert->k);
|
||||
bch2_cut_back(insert->k.p, &new->k);
|
||||
|
||||
if (m->move_dev >= 0 &&
|
||||
(ptr = (struct bch_extent_ptr *)
|
||||
bch2_extent_has_device(extent_i_to_s_c(insert),
|
||||
m->move_dev)))
|
||||
if (m->data_cmd == DATA_REWRITE) {
|
||||
ptr = (struct bch_extent_ptr *)
|
||||
bch2_extent_has_device(extent_i_to_s_c(insert),
|
||||
m->data_opts.rewrite_dev);
|
||||
bch2_extent_drop_ptr(extent_i_to_s(insert), ptr);
|
||||
}
|
||||
|
||||
extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) {
|
||||
if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) {
|
||||
@ -108,10 +115,35 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
bch2_extent_narrow_crcs(insert,
|
||||
(struct bch_extent_crc_unpacked) { 0 });
|
||||
bch2_extent_normalize(c, extent_i_to_s(insert).s);
|
||||
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert));
|
||||
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert),
|
||||
c->opts.data_replicas);
|
||||
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_extent_devs(extent_i_to_s_c(insert)));
|
||||
/*
|
||||
* It's possible we race, and for whatever reason the extent now
|
||||
* has fewer replicas than when we last looked at it - meaning
|
||||
* we need to get a disk reservation here:
|
||||
*/
|
||||
nr_dirty = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i));
|
||||
if (m->nr_ptrs_reserved < nr_dirty) {
|
||||
unsigned sectors = (nr_dirty - m->nr_ptrs_reserved) *
|
||||
keylist_sectors(keys);
|
||||
|
||||
/*
|
||||
* can't call bch2_disk_reservation_add() with btree
|
||||
* locks held, at least not without a song and dance
|
||||
*/
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
ret = bch2_disk_reservation_add(c, &op->res, sectors, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
m->nr_ptrs_reserved = nr_dirty;
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
|
||||
extent_i_to_s_c(insert).s_c);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -119,7 +151,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
NULL, op_journal_seq(op),
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
m->btree_insert_flags,
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
m->data_opts.btree_insert_flags,
|
||||
BTREE_INSERT_ENTRY(&iter, &insert->k_i));
|
||||
if (!ret)
|
||||
atomic_long_inc(&c->extent_migrate_done);
|
||||
@ -150,8 +183,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_migrate_write_init(struct migrate_write *m,
|
||||
struct bch_read_bio *rbio)
|
||||
void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
|
||||
{
|
||||
/* write bio must own pages: */
|
||||
BUG_ON(!m->op.wbio.bio.bi_vcnt);
|
||||
@ -162,16 +194,39 @@ void bch2_migrate_write_init(struct migrate_write *m,
|
||||
m->op.pos = rbio->pos;
|
||||
m->op.version = rbio->version;
|
||||
m->op.crc = rbio->pick.crc;
|
||||
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
|
||||
|
||||
if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
|
||||
m->op.nonce = m->op.crc.nonce + m->op.crc.offset;
|
||||
m->op.csum_type = m->op.crc.csum_type;
|
||||
}
|
||||
|
||||
if (m->move_dev >= 0)
|
||||
bch2_dev_list_drop_dev(&m->op.devs_have, m->move_dev);
|
||||
if (m->data_cmd == DATA_REWRITE)
|
||||
bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
|
||||
}
|
||||
|
||||
if (m->btree_insert_flags & BTREE_INSERT_USE_RESERVE)
|
||||
int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
enum data_cmd data_cmd,
|
||||
struct data_opts data_opts,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
int ret;
|
||||
|
||||
m->data_cmd = data_cmd;
|
||||
m->data_opts = data_opts;
|
||||
m->nr_ptrs_reserved = bch2_extent_nr_dirty_ptrs(k);
|
||||
|
||||
bch2_write_op_init(&m->op, c);
|
||||
m->op.csum_type = bch2_data_checksum_type(c, io_opts.data_checksum);
|
||||
m->op.compression_type =
|
||||
bch2_compression_opt_to_type[io_opts.compression];
|
||||
m->op.devs = devs;
|
||||
m->op.write_point = wp;
|
||||
|
||||
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
|
||||
m->op.alloc_reserve = RESERVE_MOVINGGC;
|
||||
|
||||
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
|
||||
@ -180,10 +235,35 @@ void bch2_migrate_write_init(struct migrate_write *m,
|
||||
BCH_WRITE_DATA_ENCODED|
|
||||
BCH_WRITE_NOMARK_REPLICAS;
|
||||
|
||||
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
|
||||
m->op.nr_replicas = 1;
|
||||
m->op.nr_replicas_required = 1;
|
||||
m->op.index_update_fn = bch2_migrate_index_update;
|
||||
|
||||
switch (data_cmd) {
|
||||
case DATA_ADD_REPLICAS:
|
||||
if (m->nr_ptrs_reserved < c->opts.data_replicas) {
|
||||
m->op.nr_replicas = c->opts.data_replicas - m->nr_ptrs_reserved;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &m->op.res,
|
||||
k.k->size,
|
||||
m->op.nr_replicas, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
m->nr_ptrs_reserved = c->opts.data_replicas;
|
||||
}
|
||||
break;
|
||||
case DATA_REWRITE:
|
||||
break;
|
||||
case DATA_PROMOTE:
|
||||
m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
|
||||
m->op.flags |= BCH_WRITE_CACHED;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void move_free(struct closure *cl)
|
||||
@ -210,7 +290,7 @@ static void move_write(struct closure *cl)
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
|
||||
if (likely(!io->rbio.bio.bi_status)) {
|
||||
bch2_migrate_write_init(&io->write, &io->rbio);
|
||||
bch2_migrate_read_done(&io->write, &io->rbio);
|
||||
closure_call(&io->write.op.cl, bch2_write, NULL, cl);
|
||||
}
|
||||
|
||||
@ -238,19 +318,19 @@ static void move_read_endio(struct bio *bio)
|
||||
}
|
||||
|
||||
static int bch2_move_extent(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
int btree_insert_flags,
|
||||
int move_device,
|
||||
struct bch_io_opts opts,
|
||||
struct bkey_s_c_extent e)
|
||||
struct moving_context *ctxt,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct bkey_s_c_extent e,
|
||||
enum data_cmd data_cmd,
|
||||
struct data_opts data_opts)
|
||||
{
|
||||
struct extent_pick_ptr pick;
|
||||
struct moving_io *io;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
unsigned sectors = e.k->size, pages, nr_good;
|
||||
unsigned sectors = e.k->size, pages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
bch2_extent_pick_ptr(c, e.s_c, NULL, &pick);
|
||||
@ -279,7 +359,7 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL))
|
||||
goto err_free;
|
||||
|
||||
io->rbio.opts = opts;
|
||||
io->rbio.opts = io_opts;
|
||||
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->rbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
@ -288,27 +368,10 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
io->write.btree_insert_flags = btree_insert_flags;
|
||||
io->write.move_dev = move_device;
|
||||
|
||||
bch2_write_op_init(&io->write.op, c);
|
||||
io->write.op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
|
||||
io->write.op.compression_type =
|
||||
bch2_compression_opt_to_type(opts.compression);
|
||||
io->write.op.devs = devs;
|
||||
io->write.op.write_point = wp;
|
||||
|
||||
if (move_device < 0 &&
|
||||
((nr_good = bch2_extent_nr_good_ptrs(c, e)) <
|
||||
c->opts.data_replicas)) {
|
||||
io->write.op.nr_replicas = c->opts.data_replicas - nr_good;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &io->write.op.res,
|
||||
e.k->size,
|
||||
io->write.op.nr_replicas, 0);
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
}
|
||||
ret = bch2_migrate_write_init(c, &io->write, devs, wp,
|
||||
io_opts, data_cmd, data_opts, e.s_c);
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(e.k->size, &ctxt->stats->sectors_moved);
|
||||
@ -369,8 +432,6 @@ int bch2_move_data(struct bch_fs *c,
|
||||
unsigned sectors_in_flight,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
int btree_insert_flags,
|
||||
int move_device,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
move_pred_fn pred, void *arg,
|
||||
@ -378,12 +439,14 @@ int bch2_move_data(struct bch_fs *c,
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct moving_context ctxt = { .stats = stats };
|
||||
struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
BKEY_PADDED(k) tmp;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_extent e;
|
||||
struct data_opts data_opts;
|
||||
enum data_cmd data_cmd;
|
||||
u64 cur_inum = U64_MAX;
|
||||
int ret = 0;
|
||||
int ret = 0, ret2;
|
||||
|
||||
closure_init_stack(&ctxt.cl);
|
||||
INIT_LIST_HEAD(&ctxt.reads);
|
||||
@ -430,28 +493,44 @@ peek:
|
||||
/* don't hold btree locks while looking up inode: */
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
opts = bch2_opts_to_inode_opts(c->opts);
|
||||
io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
|
||||
bch2_io_opts_apply(&opts, bch2_inode_opts_get(&inode));
|
||||
bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
|
||||
cur_inum = k.k->p.inode;
|
||||
goto peek;
|
||||
}
|
||||
|
||||
if (!pred(arg, e))
|
||||
switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e,
|
||||
&io_opts, &data_opts))) {
|
||||
case DATA_SKIP:
|
||||
goto next;
|
||||
case DATA_SCRUB:
|
||||
BUG();
|
||||
case DATA_ADD_REPLICAS:
|
||||
case DATA_REWRITE:
|
||||
case DATA_PROMOTE:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* unlock before doing IO: */
|
||||
bkey_reassemble(&tmp.k, k);
|
||||
k = bkey_i_to_s_c(&tmp.k);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
if (bch2_move_extent(c, &ctxt, devs, wp,
|
||||
btree_insert_flags,
|
||||
move_device, opts,
|
||||
bkey_s_c_to_extent(k))) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(&ctxt);
|
||||
continue;
|
||||
ret2 = bch2_move_extent(c, &ctxt, devs, wp, io_opts,
|
||||
bkey_s_c_to_extent(k),
|
||||
data_cmd, data_opts);
|
||||
if (ret2) {
|
||||
if (ret2 == -ENOMEM) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(&ctxt);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* XXX signal failure */
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (rate)
|
||||
@ -486,11 +565,11 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
|
||||
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -514,8 +593,8 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
|
||||
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
|
||||
bkey_i_to_s_c(&b->key));
|
||||
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
}
|
||||
@ -534,18 +613,35 @@ static int bch2_move_btree(struct bch_fs *c,
|
||||
void *arg,
|
||||
struct bch_move_stats *stats)
|
||||
{
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree *b;
|
||||
unsigned id;
|
||||
struct data_opts data_opts;
|
||||
enum data_cmd cmd;
|
||||
int ret = 0;
|
||||
|
||||
stats->data_type = BCH_DATA_BTREE;
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
if (pred(arg, bkey_i_to_s_c_extent(&b->key)))
|
||||
ret = bch2_btree_node_rewrite(c, &stats->iter,
|
||||
b->data->keys.seq, 0) ?: ret;
|
||||
switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE,
|
||||
bkey_i_to_s_c_extent(&b->key),
|
||||
&io_opts,
|
||||
&data_opts))) {
|
||||
case DATA_SKIP:
|
||||
goto next;
|
||||
case DATA_SCRUB:
|
||||
BUG();
|
||||
case DATA_ADD_REPLICAS:
|
||||
case DATA_REWRITE:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_rewrite(c, &stats->iter,
|
||||
b->data->keys.seq, 0) ?: ret;
|
||||
next:
|
||||
bch2_btree_iter_cond_resched(&stats->iter);
|
||||
}
|
||||
|
||||
@ -556,32 +652,48 @@ static int bch2_move_btree(struct bch_fs *c,
|
||||
}
|
||||
|
||||
#if 0
|
||||
static bool scrub_data_pred(void *arg, struct bkey_s_c_extent e)
|
||||
static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
return DATA_SCRUB;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool rereplicate_metadata_pred(void *arg, struct bkey_s_c_extent e)
|
||||
static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_fs *c = arg;
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e);
|
||||
unsigned replicas = type == BKEY_TYPE_BTREE
|
||||
? c->opts.metadata_replicas
|
||||
: c->opts.data_replicas;
|
||||
|
||||
return nr_good && nr_good < c->opts.metadata_replicas;
|
||||
if (!nr_good || nr_good >= replicas)
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
|
||||
static bool rereplicate_data_pred(void *arg, struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_fs *c = arg;
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e);
|
||||
|
||||
return nr_good && nr_good < c->opts.data_replicas;
|
||||
}
|
||||
|
||||
static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
|
||||
static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_ioctl_data *op = arg;
|
||||
|
||||
return bch2_extent_has_device(e, op->migrate.dev);
|
||||
if (!bch2_extent_has_device(e, op->migrate.dev))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = 0;
|
||||
data_opts->rewrite_dev = op->migrate.dev;
|
||||
return DATA_REWRITE;
|
||||
}
|
||||
|
||||
int bch2_data_job(struct bch_fs *c,
|
||||
@ -595,16 +707,15 @@ int bch2_data_job(struct bch_fs *c,
|
||||
stats->data_type = BCH_DATA_JOURNAL;
|
||||
ret = bch2_journal_flush_device(&c->journal, -1);
|
||||
|
||||
ret = bch2_move_btree(c, rereplicate_metadata_pred, c, stats) ?: ret;
|
||||
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
|
||||
ret = bch2_gc_btree_replicas(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
NULL,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
0, -1,
|
||||
op.start,
|
||||
op.end,
|
||||
rereplicate_data_pred, c, stats) ?: ret;
|
||||
rereplicate_pred, c, stats) ?: ret;
|
||||
ret = bch2_gc_data_replicas(c) ?: ret;
|
||||
break;
|
||||
case BCH_DATA_OP_MIGRATE:
|
||||
@ -620,7 +731,6 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
NULL,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
0, -1,
|
||||
op.start,
|
||||
op.end,
|
||||
migrate_pred, &op, stats) ?: ret;
|
||||
|
@ -8,23 +8,47 @@
|
||||
struct bch_read_bio;
|
||||
struct moving_context;
|
||||
|
||||
enum data_cmd {
|
||||
DATA_SKIP,
|
||||
DATA_SCRUB,
|
||||
DATA_ADD_REPLICAS,
|
||||
DATA_REWRITE,
|
||||
DATA_PROMOTE,
|
||||
};
|
||||
|
||||
struct data_opts {
|
||||
unsigned rewrite_dev;
|
||||
int btree_insert_flags;
|
||||
};
|
||||
|
||||
struct migrate_write {
|
||||
enum data_cmd data_cmd;
|
||||
struct data_opts data_opts;
|
||||
|
||||
unsigned nr_ptrs_reserved;
|
||||
|
||||
struct moving_context *ctxt;
|
||||
|
||||
/* what we read: */
|
||||
struct bch_extent_ptr ptr;
|
||||
u64 offset;
|
||||
|
||||
int move_dev;
|
||||
int btree_insert_flags;
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
void bch2_migrate_write_init(struct migrate_write *, struct bch_read_bio *);
|
||||
void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
|
||||
int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
|
||||
struct bch_devs_mask *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts,
|
||||
enum data_cmd, struct data_opts,
|
||||
struct bkey_s_c);
|
||||
|
||||
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
|
||||
|
||||
typedef bool (*move_pred_fn)(void *, struct bkey_s_c_extent);
|
||||
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
|
||||
enum bkey_type, struct bkey_s_c_extent,
|
||||
struct bch_io_opts *, struct data_opts *);
|
||||
|
||||
struct bch_move_stats {
|
||||
enum bch_data_type data_type;
|
||||
@ -39,7 +63,7 @@ struct bch_move_stats {
|
||||
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
|
||||
unsigned, struct bch_devs_mask *,
|
||||
struct write_point_specifier,
|
||||
int, int, struct bpos, struct bpos,
|
||||
struct bpos, struct bpos,
|
||||
move_pred_fn, void *,
|
||||
struct bch_move_stats *);
|
||||
|
||||
|
@ -61,9 +61,9 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
|
||||
return (l->offset > r->offset) - (l->offset < r->offset);
|
||||
}
|
||||
|
||||
static bool copygc_pred(void *arg, struct bkey_s_c_extent e)
|
||||
static bool __copygc_pred(struct bch_dev *ca,
|
||||
struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
copygc_heap *h = &ca->copygc_heap;
|
||||
const struct bch_extent_ptr *ptr =
|
||||
bch2_extent_has_device(e, ca->dev_idx);
|
||||
@ -83,6 +83,22 @@ static bool copygc_pred(void *arg, struct bkey_s_c_extent e)
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
|
||||
if (!__copygc_pred(ca, e))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE,
|
||||
data_opts->rewrite_dev = ca->dev_idx;
|
||||
return DATA_REWRITE;
|
||||
}
|
||||
|
||||
static bool have_copygc_reserve(struct bch_dev *ca)
|
||||
{
|
||||
bool ret;
|
||||
@ -165,8 +181,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
&ca->self,
|
||||
writepoint_ptr(&ca->copygc_write_point),
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
ca->dev_idx,
|
||||
POS_MIN, POS_MAX,
|
||||
copygc_pred, ca,
|
||||
&move_stats);
|
||||
|
@ -22,6 +22,7 @@ const char * const bch2_compression_types[] = {
|
||||
"none",
|
||||
"lz4",
|
||||
"gzip",
|
||||
"zstd",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -73,10 +73,10 @@ enum opt_type {
|
||||
BCH_OPT(errors, u8, OPT_RUNTIME, \
|
||||
OPT_STR(bch2_error_actions), \
|
||||
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \
|
||||
BCH_OPT(metadata_replicas, u8, OPT_MOUNT, \
|
||||
BCH_OPT(metadata_replicas, u8, OPT_RUNTIME, \
|
||||
OPT_UINT(1, BCH_REPLICAS_MAX), \
|
||||
BCH_SB_META_REPLICAS_WANT, 1) \
|
||||
BCH_OPT(data_replicas, u8, OPT_MOUNT, \
|
||||
BCH_OPT(data_replicas, u8, OPT_RUNTIME, \
|
||||
OPT_UINT(1, BCH_REPLICAS_MAX), \
|
||||
BCH_SB_DATA_REPLICAS_WANT, 1) \
|
||||
BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \
|
||||
@ -127,6 +127,9 @@ enum opt_type {
|
||||
BCH_OPT(verbose_recovery, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false) \
|
||||
BCH_OPT(verbose_init, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false) \
|
||||
BCH_OPT(journal_flush_disabled, u8, OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false) \
|
||||
|
@ -74,13 +74,6 @@ static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
|
||||
_i < QTYP_NR); \
|
||||
_i++)
|
||||
|
||||
static inline unsigned enabled_qtypes(struct bch_fs *c)
|
||||
{
|
||||
return ((c->opts.usrquota << QTYP_USR)|
|
||||
(c->opts.grpquota << QTYP_GRP)|
|
||||
(c->opts.prjquota << QTYP_PRJ));
|
||||
}
|
||||
|
||||
static bool ignore_hardlimit(struct bch_memquota_type *q)
|
||||
{
|
||||
if (capable(CAP_SYS_RESOURCE))
|
||||
@ -478,7 +471,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
|
||||
return -EINVAL;
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
@ -487,10 +480,9 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
|
||||
if (uflags & FS_QUOTA_GDQ_ENFD)
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb, true);
|
||||
#if 0
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb, true);
|
||||
#endif
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
@ -20,6 +20,13 @@ static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
|
||||
};
|
||||
}
|
||||
|
||||
static inline unsigned enabled_qtypes(struct bch_fs *c)
|
||||
{
|
||||
return ((c->opts.usrquota << QTYP_USR)|
|
||||
(c->opts.grpquota << QTYP_GRP)|
|
||||
(c->opts.prjquota << QTYP_PRJ));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
|
||||
int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
|
||||
|
@ -43,7 +43,6 @@
|
||||
* https://131002.net/siphash/
|
||||
*/
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/bitops.h>
|
||||
|
@ -546,6 +546,8 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
|
||||
__le64 *i;
|
||||
int ret;
|
||||
|
||||
pr_verbose_init(*opts, "");
|
||||
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
sb->mode = FMODE_READ;
|
||||
|
||||
@ -566,8 +568,10 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
|
||||
opt_set(*opts, nochanges, true);
|
||||
}
|
||||
|
||||
if (IS_ERR(sb->bdev))
|
||||
return PTR_ERR(sb->bdev);
|
||||
if (IS_ERR(sb->bdev)) {
|
||||
ret = PTR_ERR(sb->bdev);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
ret = __bch2_super_realloc(sb, 0);
|
||||
@ -638,12 +642,14 @@ got_super:
|
||||
if (sb->mode & FMODE_WRITE)
|
||||
bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
|
||||
|= BDI_CAP_STABLE_WRITES;
|
||||
|
||||
return 0;
|
||||
ret = 0;
|
||||
out:
|
||||
pr_verbose_init(*opts, "ret %i", ret);
|
||||
return ret;
|
||||
err:
|
||||
bch2_free_super(sb);
|
||||
pr_err("error reading superblock: %s", err);
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* write superblock: */
|
||||
@ -744,17 +750,15 @@ void bch2_write_super(struct bch_fs *c)
|
||||
nr_wrote = dev_mask_nr(&sb_written);
|
||||
|
||||
can_mount_with_written =
|
||||
bch2_have_enough_devs(c,
|
||||
__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
|
||||
sb_written.d[i] = ~sb_written.d[i];
|
||||
|
||||
can_mount_without_written =
|
||||
bch2_have_enough_devs(c,
|
||||
__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
|
||||
/*
|
||||
* If we would be able to mount _without_ the devices we successfully
|
||||
@ -1052,7 +1056,7 @@ static bool replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_check_mark_super_slowpath(struct bch_fs *c,
|
||||
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry new_entry,
|
||||
unsigned max_dev)
|
||||
{
|
||||
@ -1109,9 +1113,9 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_mark_super(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
int bch2_mark_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
struct bch_replicas_cpu *r, *gc_r;
|
||||
@ -1121,6 +1125,8 @@ int bch2_check_mark_super(struct bch_fs *c,
|
||||
if (!devs.nr)
|
||||
return 0;
|
||||
|
||||
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search, &max_dev);
|
||||
|
||||
rcu_read_lock();
|
||||
@ -1131,7 +1137,23 @@ int bch2_check_mark_super(struct bch_fs *c,
|
||||
rcu_read_unlock();
|
||||
|
||||
return likely(marked) ? 0
|
||||
: bch2_check_mark_super_slowpath(c, search, max_dev);
|
||||
: bch2_mark_replicas_slowpath(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i]))))
|
||||
return ret;
|
||||
|
||||
return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int err)
|
||||
@ -1417,7 +1439,7 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *c,
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
@ -1438,6 +1460,21 @@ bool bch2_sb_has_replicas(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i])))
|
||||
return false;
|
||||
|
||||
return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
{
|
||||
@ -1495,29 +1532,26 @@ struct replicas_status bch2_replicas_status(struct bch_fs *c)
|
||||
return __bch2_replicas_status(c, bch2_online_devs(c));
|
||||
}
|
||||
|
||||
bool bch2_have_enough_devs(struct bch_fs *c,
|
||||
struct replicas_status s,
|
||||
unsigned flags)
|
||||
static bool have_enough_devs(struct replicas_status s,
|
||||
enum bch_data_type type,
|
||||
bool force_if_degraded,
|
||||
bool force_if_lost)
|
||||
{
|
||||
if ((s.replicas[BCH_DATA_JOURNAL].nr_offline ||
|
||||
s.replicas[BCH_DATA_BTREE].nr_offline) &&
|
||||
!(flags & BCH_FORCE_IF_METADATA_DEGRADED))
|
||||
return false;
|
||||
return (!s.replicas[type].nr_offline || force_if_degraded) &&
|
||||
(s.replicas[type].nr_online || force_if_lost);
|
||||
}
|
||||
|
||||
if ((!s.replicas[BCH_DATA_JOURNAL].nr_online ||
|
||||
!s.replicas[BCH_DATA_BTREE].nr_online) &&
|
||||
!(flags & BCH_FORCE_IF_METADATA_LOST))
|
||||
return false;
|
||||
|
||||
if (s.replicas[BCH_DATA_USER].nr_offline &&
|
||||
!(flags & BCH_FORCE_IF_DATA_DEGRADED))
|
||||
return false;
|
||||
|
||||
if (!s.replicas[BCH_DATA_USER].nr_online &&
|
||||
!(flags & BCH_FORCE_IF_DATA_LOST))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
{
|
||||
return (have_enough_devs(s, BCH_DATA_JOURNAL,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_BTREE,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_USER,
|
||||
flags & BCH_FORCE_IF_DATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_DATA_LOST));
|
||||
}
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
|
@ -139,10 +139,14 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
|
||||
/* BCH_SB_FIELD_replicas: */
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_check_mark_super(struct bch_fs *, enum bch_data_type,
|
||||
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
|
||||
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
|
||||
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t);
|
||||
@ -157,7 +161,7 @@ struct replicas_status {
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *,
|
||||
struct bch_devs_mask);
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *);
|
||||
bool bch2_have_enough_devs(struct bch_fs *, struct replicas_status, unsigned);
|
||||
bool bch2_have_enough_devs(struct replicas_status, unsigned);
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *, bool);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
@ -507,9 +507,11 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
struct bch_fs *c;
|
||||
unsigned i, iter_size;
|
||||
|
||||
pr_verbose_init(opts, "");
|
||||
|
||||
c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
|
||||
if (!c)
|
||||
return NULL;
|
||||
goto out;
|
||||
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
@ -539,7 +541,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mutex_init(&c->btree_interior_update_lock);
|
||||
|
||||
mutex_init(&c->bio_bounce_pages_lock);
|
||||
mutex_init(&c->zlib_workspace_lock);
|
||||
|
||||
bio_list_init(&c->btree_write_error_list);
|
||||
spin_lock_init(&c->btree_write_error_lock);
|
||||
@ -646,10 +647,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
kobject_init(&c->internal, &bch2_fs_internal_ktype);
|
||||
kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
|
||||
kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
|
||||
out:
|
||||
pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
|
||||
return c;
|
||||
err:
|
||||
bch2_fs_free(c);
|
||||
return NULL;
|
||||
c = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static const char *__bch2_fs_online(struct bch_fs *c)
|
||||
@ -809,7 +813,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
goto err;
|
||||
bch_verbose(c, "fsck done");
|
||||
|
||||
if (c->opts.usrquota || c->opts.grpquota) {
|
||||
if (enabled_qtypes(c)) {
|
||||
bch_verbose(c, "reading quotas:");
|
||||
ret = bch2_fs_quota_read(c);
|
||||
if (ret)
|
||||
@ -864,7 +868,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
NULL, NULL, NULL, 0))
|
||||
goto err;
|
||||
|
||||
if (c->opts.usrquota || c->opts.grpquota) {
|
||||
if (enabled_qtypes(c)) {
|
||||
ret = bch2_fs_quota_read(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1084,14 +1088,17 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
|
||||
static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
{
|
||||
struct bch_member *member;
|
||||
struct bch_dev *ca;
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
if (bch2_fs_init_fault("dev_alloc"))
|
||||
return -ENOMEM;
|
||||
goto err;
|
||||
|
||||
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
||||
if (!ca)
|
||||
return -ENOMEM;
|
||||
goto err;
|
||||
|
||||
kobject_init(&ca->kobj, &bch2_dev_ktype);
|
||||
init_completion(&ca->ref_completion);
|
||||
@ -1133,11 +1140,14 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
|
||||
if (bch2_dev_sysfs_online(c, ca))
|
||||
pr_warn("error creating sysfs objects");
|
||||
|
||||
return 0;
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
err:
|
||||
bch2_dev_free(ca);
|
||||
return -ENOMEM;
|
||||
if (ca)
|
||||
bch2_dev_free(ca);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
@ -1240,7 +1250,8 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
/* do we have enough devices to write to? */
|
||||
for_each_member_device(ca2, c, i)
|
||||
nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
|
||||
if (ca2 != ca)
|
||||
nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
|
||||
|
||||
required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
|
||||
? c->opts.metadata_replicas
|
||||
@ -1249,7 +1260,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
? c->opts.data_replicas
|
||||
: c->opts.data_replicas_required);
|
||||
|
||||
return nr_rw - 1 <= required;
|
||||
return nr_rw >= required;
|
||||
case BCH_MEMBER_STATE_FAILED:
|
||||
case BCH_MEMBER_STATE_SPARE:
|
||||
if (ca->mi.state != BCH_MEMBER_STATE_RW &&
|
||||
@ -1262,7 +1273,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
s = __bch2_replicas_status(c, new_online_devs);
|
||||
|
||||
return bch2_have_enough_devs(c, s, flags);
|
||||
return bch2_have_enough_devs(s, flags);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -1299,7 +1310,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
|
||||
s = bch2_replicas_status(c);
|
||||
|
||||
return bch2_have_enough_devs(c, s, flags);
|
||||
return bch2_have_enough_devs(s, flags);
|
||||
}
|
||||
|
||||
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
@ -1346,12 +1357,8 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (!bch2_dev_state_allowed(c, ca, new_state, flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_RW) {
|
||||
if (__bch2_dev_read_write(c, ca))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
if (new_state != BCH_MEMBER_STATE_RW)
|
||||
__bch2_dev_read_only(c, ca);
|
||||
}
|
||||
|
||||
bch_notice(ca, "%s", bch2_dev_state[new_state]);
|
||||
|
||||
@ -1361,6 +1368,9 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_RW)
|
||||
return __bch2_dev_read_write(c, ca) ? -ENOMEM : 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1701,11 +1711,17 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
const char *err;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (!nr_devices)
|
||||
return ERR_PTR(-EINVAL);
|
||||
pr_verbose_init(opts, "");
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return ERR_PTR(-ENODEV);
|
||||
if (!nr_devices) {
|
||||
c = ERR_PTR(-EINVAL);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
if (!try_module_get(THIS_MODULE)) {
|
||||
c = ERR_PTR(-ENODEV);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
|
||||
if (!sb)
|
||||
@ -1760,8 +1776,11 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
if (err)
|
||||
goto err_print;
|
||||
|
||||
out:
|
||||
kfree(sb);
|
||||
module_put(THIS_MODULE);
|
||||
out2:
|
||||
pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
|
||||
return c;
|
||||
err_print:
|
||||
pr_err("bch_fs_open err opening %s: %s",
|
||||
@ -1770,12 +1789,10 @@ err_print:
|
||||
err:
|
||||
if (c)
|
||||
bch2_fs_stop(c);
|
||||
|
||||
for (i = 0; i < nr_devices; i++)
|
||||
bch2_free_super(&sb[i]);
|
||||
kfree(sb);
|
||||
module_put(THIS_MODULE);
|
||||
return ERR_PTR(ret);
|
||||
c = ERR_PTR(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
|
||||
|
@ -67,6 +67,11 @@ static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
|
||||
devs->devs[devs->nr++] = dev;
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
|
||||
{
|
||||
return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
|
||||
}
|
||||
|
||||
static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
|
||||
const struct bch_devs_mask *mask)
|
||||
{
|
||||
|
@ -15,7 +15,7 @@ struct bch_devs_mask {
|
||||
|
||||
struct bch_devs_list {
|
||||
u8 nr;
|
||||
u8 devs[BCH_REPLICAS_MAX];
|
||||
u8 devs[BCH_REPLICAS_MAX + 1];
|
||||
};
|
||||
|
||||
struct bch_member_cpu {
|
||||
|
@ -164,6 +164,8 @@ read_attribute(extent_migrate_raced);
|
||||
rw_attribute(journal_write_delay_ms);
|
||||
rw_attribute(journal_reclaim_delay_ms);
|
||||
|
||||
rw_attribute(writeback_pages_max);
|
||||
|
||||
rw_attribute(discard);
|
||||
rw_attribute(cache_replacement_policy);
|
||||
|
||||
@ -310,6 +312,8 @@ SHOW(bch2_fs)
|
||||
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
|
||||
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
|
||||
|
||||
sysfs_print(writeback_pages_max, c->writeback_pages_max);
|
||||
|
||||
sysfs_print(block_size, block_bytes(c));
|
||||
sysfs_print(btree_node_size, btree_bytes(c));
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
@ -370,6 +374,9 @@ STORE(__bch2_fs)
|
||||
sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
|
||||
sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
|
||||
|
||||
if (attr == &sysfs_writeback_pages_max)
|
||||
c->writeback_pages_max = strtoul_restrict_or_return(buf, 1, UINT_MAX);
|
||||
|
||||
if (attr == &sysfs_btree_gc_periodic) {
|
||||
ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
|
||||
?: (ssize_t) size;
|
||||
@ -459,6 +466,8 @@ struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_journal_write_delay_ms,
|
||||
&sysfs_journal_reclaim_delay_ms,
|
||||
|
||||
&sysfs_writeback_pages_max,
|
||||
|
||||
&sysfs_tiering_percent,
|
||||
|
||||
&sysfs_compression_stats,
|
||||
|
@ -14,10 +14,9 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static bool tiering_pred(void *arg, struct bkey_s_c_extent e)
|
||||
static bool __tiering_pred(struct bch_fs *c, struct bch_tier *tier,
|
||||
struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_tier *tier = arg;
|
||||
struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned replicas = 0;
|
||||
|
||||
@ -33,6 +32,21 @@ static bool tiering_pred(void *arg, struct bkey_s_c_extent e)
|
||||
return replicas < c->opts.data_replicas;
|
||||
}
|
||||
|
||||
static enum data_cmd tiering_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_tier *tier = arg;
|
||||
|
||||
if (!__tiering_pred(c, tier, e))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
|
||||
static int bch2_tiering_thread(void *arg)
|
||||
{
|
||||
struct bch_tier *tier = arg;
|
||||
@ -90,8 +104,6 @@ static int bch2_tiering_thread(void *arg)
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices,
|
||||
&tier->devs,
|
||||
writepoint_ptr(&tier->wp),
|
||||
0,
|
||||
-1,
|
||||
POS_MIN, POS_MAX,
|
||||
tiering_pred, tier,
|
||||
&move_stats);
|
||||
|
@ -817,4 +817,19 @@ do { \
|
||||
#define array_remove_item(_array, _nr, _pos) \
|
||||
array_remove_items(_array, _nr, _pos, 1)
|
||||
|
||||
#define bubble_sort(_base, _nr, _cmp) \
|
||||
do { \
|
||||
ssize_t _i, _end; \
|
||||
bool _swapped = true; \
|
||||
\
|
||||
for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
|
||||
_swapped = false; \
|
||||
for (_i = 0; _i < _end; _i++) \
|
||||
if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \
|
||||
swap((_base)[_i], (_base)[_i + 1]); \
|
||||
_swapped = true; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif /* _BCACHEFS_UTIL_H */
|
||||
|
Loading…
Reference in New Issue
Block a user