Update bcachefs sources to e14d7c7195 bcachefs: Compression levels

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-07-10 20:31:34 -04:00
parent 1c156d5c46
commit c8bec83e30
39 changed files with 645 additions and 237 deletions

View File

@ -1 +1 @@
04f2d2ae5bd69eecd0b4bb700efdc665b09745a1
e14d7c7195b974bbaf400f9c3f2bdaa94fc8d372

View File

@ -117,7 +117,7 @@ int cmd_dump(int argc, char *argv[])
opt_set(opts, norecovery, true);
opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_OPT_NO);
opt_set(opts, fix_errors, FSCK_FIX_no);
while ((opt = getopt(argc, argv, "o:fjvh")) != -1)
switch (opt) {

View File

@ -37,7 +37,7 @@ int cmd_fsck(int argc, char *argv[])
opt_set(opts, degraded, true);
opt_set(opts, fsck, true);
opt_set(opts, fix_errors, FSCK_OPT_ASK);
opt_set(opts, fix_errors, FSCK_FIX_ask);
while ((opt = getopt_long(argc, argv,
"apynfo:rvh",
@ -45,14 +45,14 @@ int cmd_fsck(int argc, char *argv[])
switch (opt) {
case 'a': /* outdated alias for -p */
case 'p':
opt_set(opts, fix_errors, FSCK_OPT_YES);
opt_set(opts, fix_errors, FSCK_FIX_yes);
break;
case 'y':
opt_set(opts, fix_errors, FSCK_OPT_YES);
opt_set(opts, fix_errors, FSCK_FIX_no);
break;
case 'n':
opt_set(opts, nochanges, true);
opt_set(opts, fix_errors, FSCK_OPT_NO);
opt_set(opts, fix_errors, FSCK_FIX_no);
break;
case 'f':
/* force check, even if filesystem marked clean: */

View File

@ -234,7 +234,7 @@ int cmd_list_journal(int argc, char *argv[])
opt_set(opts, norecovery, true);
opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_OPT_YES);
opt_set(opts, fix_errors, FSCK_FIX_yes);
opt_set(opts, keep_journal, true);
opt_set(opts, read_journal_only,true);

View File

@ -44,6 +44,7 @@ struct user_namespace;
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400)
#define FMODE_BUFFERED ((__force fmode_t)0x800)
struct inode {
unsigned long i_ino;

View File

@ -2,4 +2,9 @@
#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \
LZ4_compress_destSize(src, dst, srclen, dstlen)
#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1
#define LZ4_MEM_COMPRESS 0
#define LZ4HC_MEM_COMPRESS 0
#define LZ4HC_MIN_CLEVEL 0

View File

@ -12,6 +12,7 @@
#define rcu_access_pointer(p) READ_ONCE(p)
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
#define kvfree_rcu(ptr) kfree(ptr) /* XXX */
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)

View File

@ -213,7 +213,7 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size)
#define vfree(p) free(p)
static inline void *__vmalloc(unsigned long size, gfp_t flags)
static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags)
{
unsigned i;
void *p;
@ -234,6 +234,7 @@ static inline void *__vmalloc(unsigned long size, gfp_t flags)
return p;
}
#define __vmalloc __vmalloc_noprof
static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
{

View File

@ -604,8 +604,7 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
u64 v;
for (i = 0; i < bch2_opts_nr; i++) {
if (!strs.by_id[i] ||
bch2_opt_table[i].type == BCH_OPT_FN)
if (!strs.by_id[i])
continue;
ret = bch2_opt_parse(NULL,

View File

@ -774,9 +774,10 @@ struct bch_fs {
struct mutex sb_lock;
/* snapshot.c: */
GENRADIX(struct snapshot_t) snapshots;
struct bch_snapshot_table __rcu *snapshot_table;
struct snapshot_table __rcu *snapshots;
size_t snapshot_table_size;
struct mutex snapshot_table_lock;
struct work_struct snapshot_delete_work;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked;

View File

@ -695,7 +695,7 @@ struct bch_reservation {
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
((sizeof(struct bch_extent_crc128) + \
sizeof(struct bch_extent_ptr)) / sizeof(u64))
sizeof(struct bch_extent_ptr)) / sizeof(__u64))
/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \
@ -707,7 +707,7 @@ struct bch_reservation {
/* Btree pointers don't carry around checksums: */
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
((sizeof(struct bch_btree_ptr_v2) + \
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
#define BKEY_BTREE_PTR_U64s_MAX \
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
@ -749,7 +749,7 @@ struct bch_inode_v3 {
} __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
struct bch_inode_generation {
struct bch_val v;
@ -916,7 +916,7 @@ struct bch_dirent {
#define DT_SUBVOL 16
#define BCH_DT_MAX 17
#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(u64) - \
#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(__u64) - \
sizeof(struct bkey) - \
offsetof(struct bch_dirent, d_name)))
@ -1009,7 +1009,7 @@ struct bch_alloc_v4 {
} __packed __aligned(8);
#define BCH_ALLOC_V4_U64s_V0 6
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64))
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
@ -1289,10 +1289,10 @@ struct bch_key {
};
#define BCH_KEY_MAGIC \
(((u64) 'b' << 0)|((u64) 'c' << 8)| \
((u64) 'h' << 16)|((u64) '*' << 24)| \
((u64) '*' << 32)|((u64) 'k' << 40)| \
((u64) 'e' << 48)|((u64) 'y' << 56))
(((__u64) 'b' << 0)|((__u64) 'c' << 8)| \
((__u64) 'h' << 16)|((__u64) '*' << 24)| \
((__u64) '*' << 32)|((__u64) 'k' << 40)| \
((__u64) 'e' << 48)|((__u64) 'y' << 56))
struct bch_encrypted_key {
__le64 magic;
@ -1747,7 +1747,7 @@ LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
@ -1767,7 +1767,7 @@ LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
struct bch_sb, flags[2], 0, 4);
LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
@ -1783,11 +1783,36 @@ LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56);
/* flags[4] 56-64 unused: */
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
struct bch_sb, flags[4], 60, 64);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
}
static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
{
SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
}
static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
{
return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
}
static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
{
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
}
/*
* Features:
*
@ -2272,7 +2297,7 @@ static inline __u64 BTREE_NODE_ID(struct btree_node *n)
return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
}
static inline void SET_BTREE_NODE_ID(struct btree_node *n, u64 v)
static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
{
SET_BTREE_NODE_ID_LO(n, v);
SET_BTREE_NODE_ID_HI(n, v >> 4);

View File

@ -1811,7 +1811,7 @@ again:
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
(BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations &&
c->opts.fix_errors != FSCK_OPT_NO)) {
c->opts.fix_errors != FSCK_FIX_no)) {
bch_info(c, "Starting topology repair pass");
ret = bch2_repair_topology(c);
if (ret)

View File

@ -311,7 +311,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot));
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}
static noinline int
@ -1229,7 +1229,7 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
struct bpos pos)
{
if (!btree_type_has_snapshots(id) ||
!snapshot_t(trans->c, pos.snapshot)->children[0])
bch2_snapshot_is_leaf(trans->c, pos.snapshot))
return 0;
return __check_pos_snapshot_overwritten(trans, id, pos);

View File

@ -129,6 +129,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
keys = wb->keys[s.idx];
nr = s.nr;
if (race_fault())
goto slowpath;
/*
* We first sort so that we can detect and skip redundant updates, and
* then we attempt to flush in sorted btree order, as this is most

View File

@ -120,12 +120,6 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
}
static const unsigned bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
unsigned type)
{

View File

@ -296,21 +296,32 @@ static int attempt_compress(struct bch_fs *c,
void *workspace,
void *dst, size_t dst_len,
void *src, size_t src_len,
enum bch_compression_type compression_type)
struct bch_compression_opt compression)
{
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type[compression.type];
switch (compression_type) {
case BCH_COMPRESSION_TYPE_lz4: {
int len = src_len;
int ret = LZ4_compress_destSize(
src, dst,
&len, dst_len,
workspace);
case BCH_COMPRESSION_TYPE_lz4:
if (compression.level < LZ4HC_MIN_CLEVEL) {
int len = src_len;
int ret = LZ4_compress_destSize(
src, dst,
&len, dst_len,
workspace);
if (len < src_len)
return -len;
if (len < src_len)
return -len;
return ret;
} else {
int ret = LZ4_compress_HC(
src, dst,
src_len, dst_len,
compression.level,
workspace);
return ret;
}
return ret ?: -1;
}
case BCH_COMPRESSION_TYPE_gzip: {
z_stream strm = {
.next_in = src,
@ -320,7 +331,11 @@ static int attempt_compress(struct bch_fs *c,
};
zlib_set_workspace(&strm, workspace);
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
zlib_deflateInit2(&strm,
compression.level
? clamp_t(unsigned, compression.level,
Z_BEST_SPEED, Z_BEST_COMPRESSION)
: Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY);
@ -333,8 +348,14 @@ static int attempt_compress(struct bch_fs *c,
return strm.total_out;
}
case BCH_COMPRESSION_TYPE_zstd: {
/*
* rescale:
* zstd max compression level is 22, our max level is 15
*/
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
zstd_cctx_workspace_bound(&c->zstd_params.cParams));
zstd_cctx_workspace_bound(&params.cParams));
/*
* ZSTD requires that when we decompress we pass in the exact
@ -365,10 +386,12 @@ static int attempt_compress(struct bch_fs *c,
static unsigned __bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len,
enum bch_compression_type compression_type)
struct bch_compression_opt compression)
{
struct bbuf src_data = { NULL }, dst_data = { NULL };
void *workspace;
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type[compression.type];
unsigned pad;
int ret = 0;
@ -400,7 +423,7 @@ static unsigned __bio_compress(struct bch_fs *c,
ret = attempt_compress(c, workspace,
dst_data.b, *dst_len,
src_data.b, *src_len,
compression_type);
compression);
if (ret > 0) {
*dst_len = ret;
ret = 0;
@ -447,22 +470,24 @@ static unsigned __bio_compress(struct bch_fs *c,
BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
BUG_ON(*dst_len & (block_bytes(c) - 1));
BUG_ON(*src_len & (block_bytes(c) - 1));
ret = compression_type;
out:
bio_unmap_or_unbounce(c, src_data);
bio_unmap_or_unbounce(c, dst_data);
return compression_type;
return ret;
err:
compression_type = BCH_COMPRESSION_TYPE_incompressible;
ret = BCH_COMPRESSION_TYPE_incompressible;
goto out;
}
unsigned bch2_bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len,
unsigned compression_type)
unsigned compression_opt)
{
unsigned orig_dst = dst->bi_iter.bi_size;
unsigned orig_src = src->bi_iter.bi_size;
unsigned compression_type;
/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
@ -470,11 +495,9 @@ unsigned bch2_bio_compress(struct bch_fs *c,
/* Don't generate a bigger output than input: */
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
compression_type = BCH_COMPRESSION_TYPE_lz4;
compression_type =
__bio_compress(c, dst, dst_len, src, src_len, compression_type);
__bio_compress(c, dst, dst_len, src, src_len,
bch2_compression_decode(compression_opt));
dst->bi_iter.bi_size = orig_dst;
src->bi_iter.bi_size = orig_src;
@ -521,8 +544,10 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
}
int bch2_check_set_has_compressed_data(struct bch_fs *c,
unsigned compression_type)
unsigned compression_opt)
{
unsigned compression_type = bch2_compression_decode(compression_opt).type;
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
return compression_type
@ -546,14 +571,16 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
struct {
unsigned feature;
unsigned type;
size_t compress_workspace;
size_t decompress_workspace;
unsigned feature;
enum bch_compression_type type;
size_t compress_workspace;
size_t decompress_workspace;
} compression_types[] = {
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
@ -612,16 +639,74 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
return 0;
}
static u64 compression_opt_to_feature(unsigned v)
{
unsigned type = bch2_compression_decode(v).type;
return 1ULL << bch2_compression_opt_to_feature[type];
}
int bch2_fs_compress_init(struct bch_fs *c)
{
u64 f = c->sb.features;
if (c->opts.compression)
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
if (c->opts.background_compression)
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
f |= compression_opt_to_feature(c->opts.compression);
f |= compression_opt_to_feature(c->opts.background_compression);
return __bch2_fs_compress_init(c, f);
}
int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
struct printbuf *err)
{
char *val = kstrdup(_val, GFP_KERNEL);
char *p = val, *type_str, *level_str;
struct bch_compression_opt opt = { 0 };
int ret;
if (!val)
return -ENOMEM;
type_str = strsep(&p, ":");
level_str = p;
ret = match_string(bch2_compression_opts, -1, type_str);
if (ret < 0 && err)
prt_str(err, "invalid compression type");
if (ret < 0)
goto err;
opt.type = ret;
if (level_str) {
unsigned level;
ret = kstrtouint(level_str, 10, &level);
if (!ret && !opt.type && level)
ret = -EINVAL;
if (!ret && level > 15)
ret = -EINVAL;
if (ret < 0 && err)
prt_str(err, "invalid compression level");
if (ret < 0)
goto err;
opt.level = level;
}
*res = bch2_compression_encode(opt);
err:
kfree(val);
return ret;
}
void bch2_opt_compression_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
struct bch_compression_opt opt = bch2_compression_decode(v);
prt_str(out, bch2_compression_opts[opt.type]);
if (opt.level)
prt_printf(out, ":%u", opt.level);
}

View File

@ -4,6 +4,35 @@
#include "extents_types.h"
struct bch_compression_opt {
u8 type:4,
level:4;
};
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
{
return (struct bch_compression_opt) {
.type = v & 15,
.level = v >> 4,
};
}
static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
{
return opt.type|(opt.level << 4);
}
static const unsigned __bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
{
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
@ -15,4 +44,12 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
void bch2_fs_compress_exit(struct bch_fs *);
int bch2_fs_compress_init(struct bch_fs *);
int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
#define bch2_opt_compression (struct bch_opt_fn) { \
.parse = bch2_opt_compression_parse, \
.to_text = bch2_opt_compression_to_text, \
}
#endif /* _BCACHEFS_COMPRESS_H */

View File

@ -455,9 +455,7 @@ int bch2_data_update_init(struct btree_trans *trans,
BCH_WRITE_DATA_ENCODED|
BCH_WRITE_MOVE|
m->data_opts.write_flags;
m->op.compression_type =
bch2_compression_opt_to_type[io_opts.background_compression ?:
io_opts.compression];
m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
bkey_for_each_ptr(ptrs, ptr)

View File

@ -460,30 +460,37 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
return ret;
}
int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
struct bch_dev *ca;
int g;
if (!strlen(buf) || !strcmp(buf, "none")) {
*v = 0;
if (!val)
return -EINVAL;
if (!c)
return 0;
if (!strlen(val) || !strcmp(val, "none")) {
*res = 0;
return 0;
}
/* Is it a device? */
ca = bch2_dev_lookup(c, buf);
ca = bch2_dev_lookup(c, val);
if (!IS_ERR(ca)) {
*v = dev_to_target(ca->dev_idx);
*res = dev_to_target(ca->dev_idx);
percpu_ref_put(&ca->ref);
return 0;
}
mutex_lock(&c->sb_lock);
g = bch2_disk_path_find(&c->disk_sb, buf);
g = bch2_disk_path_find(&c->disk_sb, val);
mutex_unlock(&c->sb_lock);
if (g >= 0) {
*v = group_to_target(g);
*res = group_to_target(g);
return 0;
}

View File

@ -85,9 +85,14 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
#define bch2_opt_target (struct bch_opt_fn) { \
.parse = bch2_opt_target_parse, \
.to_text = bch2_opt_target_to_text, \
}
int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);

View File

@ -204,7 +204,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
prt_str(out, ", continuing");
ret = -BCH_ERR_fsck_ignore;
}
} else if (c->opts.fix_errors == FSCK_OPT_EXIT) {
} else if (c->opts.fix_errors == FSCK_FIX_exit) {
prt_str(out, ", exiting");
ret = -BCH_ERR_fsck_errors_not_fixed;
} else if (flags & FSCK_CAN_FIX) {
@ -212,7 +212,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
? s->fix
: c->opts.fix_errors;
if (fix == FSCK_OPT_ASK) {
if (fix == FSCK_FIX_ask) {
int ask;
prt_str(out, ": fix?");
@ -223,13 +223,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
if (ask >= YN_ALLNO && s)
s->fix = ask == YN_ALLNO
? FSCK_OPT_NO
: FSCK_OPT_YES;
? FSCK_FIX_no
: FSCK_FIX_yes;
ret = ask & 1
? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore;
} else if (fix == FSCK_OPT_YES ||
} else if (fix == FSCK_FIX_yes ||
(c->opts.nochanges &&
!(flags & FSCK_CAN_IGNORE))) {
prt_str(out, ", fixing");
@ -244,7 +244,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
}
if (ret == -BCH_ERR_fsck_ignore &&
(c->opts.fix_errors == FSCK_OPT_EXIT ||
(c->opts.fix_errors == FSCK_FIX_exit ||
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;

View File

@ -91,13 +91,6 @@ do { \
* be able to repair:
*/
enum fsck_err_opts {
FSCK_OPT_EXIT,
FSCK_OPT_YES,
FSCK_OPT_NO,
FSCK_OPT_ASK,
};
struct fsck_err_state {
struct list_head list;
const char *fmt;

View File

@ -35,6 +35,8 @@
#include <trace/events/writeback.h>
static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned);
struct folio_vec {
struct folio *fv_folio;
size_t fv_offset;
@ -1972,7 +1974,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_for_each(folios, fi) {
struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos;
unsigned f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter);
unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) {
folios_trunc(&folios, fi);
@ -3373,6 +3375,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
struct quota_res quota_res = { 0 };
struct bkey_s_c k;
unsigned sectors;
bool is_allocation;
u64 hole_start, hole_end;
u32 snapshot;
bch2_trans_begin(&trans);
@ -3388,6 +3392,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if ((ret = bkey_err(k)))
goto bkey_err;
hole_start = iter.pos.offset;
hole_end = bpos_min(k.k->p, end_pos).offset;
is_allocation = bkey_extent_is_allocation(k.k);
/* already reserved */
if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
@ -3401,17 +3409,26 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
continue;
}
/*
* XXX: for nocow mode, we should promote shared extents to
* unshared here
*/
if (!(mode & FALLOC_FL_ZERO_RANGE)) {
ret = drop_locks_do(&trans,
(bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
opts.data_replicas), 0));
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset;
if (ret)
goto bkey_err;
if (!bkey_extent_is_allocation(k.k)) {
if (hole_start == hole_end)
continue;
}
sectors = hole_end - hole_start;
if (!is_allocation) {
ret = bch2_quota_reservation_add(c, inode,
&quota_res,
sectors, true);
&quota_res, sectors, true);
if (unlikely(ret))
goto bkey_err;
}
@ -3423,15 +3440,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
drop_locks_do(&trans,
(mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
}
bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */
mark_pagecache_reserved(inode, start_sector, iter.pos.offset);
if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
@ -3679,14 +3696,16 @@ err:
/* fseek: */
static int folio_data_offset(struct folio *folio, loff_t pos)
static int folio_data_offset(struct folio *folio, loff_t pos,
unsigned min_replicas)
{
struct bch_folio *s = bch2_folio(folio);
unsigned i, sectors = folio_sectors(folio);
if (s)
for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
if (s->s[i].state >= SECTOR_dirty)
if (s->s[i].state >= SECTOR_dirty &&
s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
return i << SECTOR_SHIFT;
return -1;
@ -3694,7 +3713,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos)
static loff_t bch2_seek_pagecache_data(struct inode *vinode,
loff_t start_offset,
loff_t end_offset)
loff_t end_offset,
unsigned min_replicas)
{
struct folio_batch fbatch;
pgoff_t start_index = start_offset >> PAGE_SHIFT;
@ -3713,7 +3733,8 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
folio_lock(folio);
offset = folio_data_offset(folio,
max(folio_pos(folio), start_offset));
max(folio_pos(folio), start_offset),
min_replicas);
if (offset >= 0) {
ret = clamp(folio_pos(folio) + offset,
start_offset, end_offset);
@ -3775,7 +3796,7 @@ err:
if (next_data > offset)
next_data = bch2_seek_pagecache_data(&inode->v,
offset, next_data);
offset, next_data, 0);
if (next_data >= isize)
return -ENXIO;
@ -3783,7 +3804,8 @@ err:
return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
}
static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
unsigned min_replicas)
{
struct folio *folio;
struct bch_folio *s;
@ -3800,7 +3822,8 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
sectors = folio_sectors(folio);
for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
if (s->s[i].state < SECTOR_dirty) {
if (s->s[i].state < SECTOR_dirty ||
s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
*offset = max(*offset,
folio_pos(folio) + (i << SECTOR_SHIFT));
goto unlock;
@ -3815,18 +3838,34 @@ unlock:
static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
loff_t start_offset,
loff_t end_offset)
loff_t end_offset,
unsigned min_replicas)
{
struct address_space *mapping = vinode->i_mapping;
loff_t offset = start_offset;
while (offset < end_offset &&
!folio_hole_offset(mapping, &offset))
!folio_hole_offset(mapping, &offset, min_replicas))
;
return min(offset, end_offset);
}
static void bch2_clamp_data_hole(struct inode *inode,
u64 *hole_start,
u64 *hole_end,
unsigned min_replicas)
{
*hole_start = bch2_seek_pagecache_hole(inode,
*hole_start << 9, *hole_end << 9, min_replicas) >> 9;
if (*hole_start == *hole_end)
return;
*hole_end = bch2_seek_pagecache_data(inode,
*hole_start << 9, *hole_end << 9, min_replicas) >> 9;
}
static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
@ -3856,12 +3895,12 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
offset, MAX_LFS_FILESIZE);
offset, MAX_LFS_FILESIZE, 0);
break;
} else if (!bkey_extent_is_data(k.k)) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
max(offset, bkey_start_offset(k.k) << 9),
k.k->p.offset << 9);
k.k->p.offset << 9, 0);
if (next_hole < k.k->p.offset << 9)
break;

View File

@ -894,7 +894,7 @@ static int check_inode(struct btree_trans *trans,
* particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown
*/
if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
return 0;
if (!bkey_is_inode(k.k))
@ -2122,6 +2122,8 @@ int bch2_check_directory_structure(struct bch_fs *c)
return ret;
}
/* check_nlink pass: */
struct nlink_table {
size_t nr;
size_t size;

View File

@ -1078,7 +1078,7 @@ static enum prep_encoded_ret {
/* Can we just write the entire extent as is? */
if (op->crc.uncompressed_size == op->crc.live_size &&
op->crc.compressed_size <= wp->sectors_free &&
(op->crc.compression_type == op->compression_type ||
(op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
op->incompressible)) {
if (!crc_is_compressed(op->crc) &&
op->csum_type != op->crc.csum_type &&
@ -1126,7 +1126,7 @@ static enum prep_encoded_ret {
/*
* If we want to compress the data, it has to be decrypted:
*/
if ((op->compression_type ||
if ((op->compression_opt ||
bch2_csum_type_is_encryption(op->crc.csum_type) !=
bch2_csum_type_is_encryption(op->csum_type)) &&
bch2_write_decrypt(op))
@ -1173,7 +1173,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
}
if (ec_buf ||
op->compression_type ||
op->compression_opt ||
(op->csum_type &&
!(op->flags & BCH_WRITE_PAGES_STABLE)) ||
(bch2_csum_type_is_encryption(op->csum_type) &&
@ -1196,16 +1196,16 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
dst->bi_iter.bi_size < c->opts.encoded_extent_max)
break;
BUG_ON(op->compression_type &&
BUG_ON(op->compression_opt &&
(op->flags & BCH_WRITE_DATA_ENCODED) &&
bch2_csum_type_is_encryption(op->crc.csum_type));
BUG_ON(op->compression_type && !bounce);
BUG_ON(op->compression_opt && !bounce);
crc.compression_type = op->incompressible
? BCH_COMPRESSION_TYPE_incompressible
: op->compression_type
: op->compression_opt
? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
op->compression_type)
op->compression_opt)
: 0;
if (!crc_is_compressed(crc)) {
dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);

View File

@ -86,7 +86,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->written = 0;
op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts);
op->compression_type = bch2_compression_opt_to_type[opts.compression];
op->compression_opt = opts.compression;
op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required;
op->watermark = BCH_WATERMARK_normal;

View File

@ -115,8 +115,8 @@ struct bch_write_op {
u16 flags;
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
unsigned compression_opt:8;
unsigned csum_type:4;
unsigned compression_type:4;
unsigned nr_replicas:4;
unsigned nr_replicas_required:4;
unsigned watermark:3;

View File

@ -5,6 +5,7 @@
#include "bcachefs.h"
#include "compress.h"
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
#include "super-io.h"
#include "util.h"
@ -16,6 +17,11 @@ const char * const bch2_error_actions[] = {
NULL
};
const char * const bch2_fsck_fix_opts[] = {
BCH_FIX_ERRORS_OPTS()
NULL
};
const char * const bch2_version_upgrade_opts[] = {
BCH_VERSION_UPGRADE_OPTS()
NULL
@ -89,6 +95,37 @@ const char * const bch2_fs_usage_types[] = {
#undef x
int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
if (!val) {
*res = FSCK_FIX_yes;
} else {
int ret = match_string(bch2_fsck_fix_opts, -1, val);
if (ret < 0 && err)
prt_str(err, "fix_errors: invalid selection");
if (ret < 0)
return ret;
*res = ret;
}
return 0;
}
void bch2_opt_fix_errors_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
prt_str(out, bch2_fsck_fix_opts[v]);
}
static const struct bch_opt_fn bch2_opt_fix_errors = {
.parse = bch2_opt_fix_errors_parse,
.to_text = bch2_opt_fix_errors_to_text,
};
const char * const bch2_d_types[BCH_DT_MAX] = {
[DT_UNKNOWN] = "unknown",
[DT_FIFO] = "fifo",
@ -167,11 +204,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
.min = _min, .max = _max
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices),\
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, \
.parse = _fn##_parse, \
.to_text = _fn##_to_text
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
[Opt_##_name] = { \
@ -267,15 +302,26 @@ int bch2_opt_parse(struct bch_fs *c,
switch (opt->type) {
case BCH_OPT_BOOL:
ret = kstrtou64(val, 10, res);
if (val) {
ret = kstrtou64(val, 10, res);
} else {
ret = 0;
*res = 1;
}
if (ret < 0 || (*res != 0 && *res != 1)) {
if (err)
prt_printf(err, "%s: must be bool",
opt->attr.name);
prt_printf(err, "%s: must be bool", opt->attr.name);
return ret;
}
break;
case BCH_OPT_UINT:
if (!val) {
prt_printf(err, "%s: required value",
opt->attr.name);
return -EINVAL;
}
ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res);
@ -287,6 +333,12 @@ int bch2_opt_parse(struct bch_fs *c,
}
break;
case BCH_OPT_STR:
if (!val) {
prt_printf(err, "%s: required value",
opt->attr.name);
return -EINVAL;
}
ret = match_string(opt->choices, -1, val);
if (ret < 0) {
if (err)
@ -298,10 +350,7 @@ int bch2_opt_parse(struct bch_fs *c,
*res = ret;
break;
case BCH_OPT_FN:
if (!c)
return 0;
ret = opt->parse(c, val, res);
ret = opt->fn.parse(c, val, res, err);
if (ret < 0) {
if (err)
prt_printf(err, "%s: parse error",
@ -341,10 +390,10 @@ void bch2_opt_to_text(struct printbuf *out,
if (flags & OPT_SHOW_FULL_LIST)
prt_string_option(out, opt->choices, v);
else
prt_printf(out, "%s", opt->choices[v]);
prt_str(out, opt->choices[v]);
break;
case BCH_OPT_FN:
opt->to_text(out, c, sb, v);
opt->fn.to_text(out, c, sb, v);
break;
default:
BUG();
@ -405,31 +454,19 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
name = strsep(&opt, "=");
val = opt;
if (val) {
id = bch2_mount_opt_lookup(name);
if (id < 0)
goto bad_opt;
id = bch2_mount_opt_lookup(name);
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
if (ret < 0)
goto bad_val;
} else {
id = bch2_mount_opt_lookup(name);
v = 1;
if (id < 0 &&
!strncmp("no", name, 2)) {
id = bch2_mount_opt_lookup(name + 2);
v = 0;
}
if (id < 0)
goto bad_opt;
if (bch2_opt_table[id].type != BCH_OPT_BOOL)
goto no_val;
/* Check for the form "noopt", negation of a boolean opt: */
if (id < 0 &&
!val &&
!strncmp("no", name, 2)) {
id = bch2_mount_opt_lookup(name + 2);
val = "0";
}
if (id < 0)
goto bad_opt;
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;
@ -442,6 +479,10 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
!IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
goto bad_opt;
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
if (ret < 0)
goto bad_val;
bch2_opt_set_by_id(opts, id, v);
}
@ -456,10 +497,6 @@ bad_val:
pr_err("Invalid mount option %s", err.buf);
ret = -1;
goto out;
no_val:
pr_err("Mount option %s requires a value", name);
ret = -1;
goto out;
out:
kfree(copied_opts_start);
printbuf_exit(&err);

View File

@ -8,7 +8,10 @@
#include <linux/sysfs.h>
#include "bcachefs_format.h"
struct bch_fs;
extern const char * const bch2_error_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
@ -67,6 +70,11 @@ enum opt_type {
BCH_OPT_FN,
};
struct bch_opt_fn {
int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
};
/**
* x(name, shortopt, type, in mem type, mode, sb_opt)
*
@ -98,6 +106,18 @@ enum opt_type {
#define BCACHEFS_VERBOSE_DEFAULT false
#endif
#define BCH_FIX_ERRORS_OPTS() \
x(exit, 0) \
x(yes, 1) \
x(no, 2) \
x(ask, 3)
enum fsck_err_opts {
#define x(t, n) FSCK_FIX_##t,
BCH_FIX_ERRORS_OPTS()
#undef x
};
#define BCH_OPTS() \
x(block_size, u16, \
OPT_FS|OPT_FORMAT| \
@ -154,12 +174,12 @@ enum opt_type {
NULL, NULL) \
x(compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_compression_opts), \
OPT_FN(bch2_opt_compression), \
BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \
NULL, NULL) \
x(background_compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_compression_opts), \
OPT_FN(bch2_opt_compression), \
BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \
NULL, NULL) \
x(str_hash, u8, \
@ -318,8 +338,8 @@ enum opt_type {
NULL, "Run fsck on mount") \
x(fix_errors, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
OPT_FN(bch2_opt_fix_errors), \
BCH2_NO_SB_OPT, FSCK_FIX_exit, \
NULL, "Fix errors during fsck without asking") \
x(ratelimit_errors, u8, \
OPT_FS|OPT_MOUNT, \
@ -495,8 +515,8 @@ struct bch_option {
u64 min, max;
const char * const *choices;
int (*parse)(struct bch_fs *, const char *, u64 *);
void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
struct bch_opt_fn fn;
const char *hint;
const char *help;

View File

@ -5,6 +5,7 @@
#include "btree_iter.h"
#include "buckets.h"
#include "clock.h"
#include "compress.h"
#include "disk_groups.h"
#include "errcode.h"
#include "extents.h"
@ -45,7 +46,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached &&
p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression])
bch2_compression_opt_to_type(io_opts->background_compression))
data_opts->rewrite_ptrs |= 1U << i;
i++;
}

View File

@ -1162,12 +1162,9 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, " incomplete\n");
}
prt_str(&buf, "Doing ");
if (BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version))
prt_str(&buf, "incompatible");
else
prt_str(&buf, "compatible");
prt_str(&buf, "version upgrade from ");
prt_printf(&buf, "Doing %s version upgrade from ",
BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
? "incompatible" : "compatible");
bch2_version_to_text(&buf, old_version);
prt_str(&buf, " to ");
bch2_version_to_text(&buf, new_version);
@ -1178,7 +1175,7 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, "fsck required");
c->recovery_passes_explicit |= recovery_passes;
c->opts.fix_errors = FSCK_OPT_YES;
c->opts.fix_errors = FSCK_FIX_yes;
}
bch_info(c, "%s", buf.buf);

View File

@ -12,9 +12,9 @@
static int bch2_subvolume_delete(struct btree_trans *, u32);
static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
struct snapshot_t *s = snapshot_t(c, id);
const struct snapshot_t *s = __snapshot_t(t, id);
if (s->skip[2] <= ancestor)
return s->skip[2];
@ -27,22 +27,83 @@ static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
struct snapshot_table *t;
EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
rcu_read_lock();
t = rcu_dereference(c->snapshots);
while (id && id < ancestor)
id = get_ancestor_below(c, id, ancestor);
id = get_ancestor_below(t, id, ancestor);
rcu_read_unlock();
return id == ancestor;
}
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
struct snapshot_table *t;
rcu_read_lock();
t = rcu_dereference(c->snapshots);
while (id && id < ancestor)
id = snapshot_t(c, id)->parent;
id = __snapshot_t(t, id)->parent;
rcu_read_unlock();
return id == ancestor;
}
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
{
u32 depth;
rcu_read_lock();
depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
rcu_read_unlock();
return depth;
}
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
size_t new_size;
struct snapshot_table *new, *old;
new_size = max(16UL, roundup_pow_of_two(idx + 1));
new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
if (!new)
return NULL;
old = c->snapshots;
if (old)
memcpy(new->s,
rcu_dereference_protected(c->snapshots, true)->s,
sizeof(new->s[0]) * c->snapshot_table_size);
rcu_assign_pointer(c->snapshots, new);
c->snapshot_table_size = new_size;
if (old)
kvfree_rcu(old);
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
lockdep_assert_held(&c->snapshot_table_lock);
if (likely(idx < c->snapshot_table_size))
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
return __snapshot_t_mut(c, id);
}
/* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@ -202,12 +263,15 @@ int bch2_mark_snapshot(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct snapshot_t *t;
int ret = 0;
t = genradix_ptr_alloc(&c->snapshots,
U32_MAX - new.k->p.offset,
GFP_KERNEL);
if (!t)
return -BCH_ERR_ENOMEM_mark_snapshot;
mutex_lock(&c->snapshot_table_lock);
t = snapshot_t_mut(c, new.k->p.offset);
if (!t) {
ret = -BCH_ERR_ENOMEM_mark_snapshot;
goto err;
}
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
@ -231,8 +295,9 @@ int bch2_mark_snapshot(struct btree_trans *trans,
t->subvol = 0;
t->tree = 0;
}
return 0;
err:
mutex_unlock(&c->snapshot_table_lock);
return ret;
}
static int snapshot_lookup(struct btree_trans *trans, u32 id,
@ -285,9 +350,14 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
nr_live += ret;
}
snapshot_t(c, id)->equiv = nr_live == 1
? snapshot_t(c, child[live_idx])->equiv
mutex_lock(&c->snapshot_table_lock);
snapshot_t_mut(c, id)->equiv = nr_live == 1
? snapshot_t_mut(c, child[live_idx])->equiv
: id;
mutex_unlock(&c->snapshot_table_lock);
return 0;
}
@ -505,16 +575,18 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
{
struct snapshot_t *s;
const struct snapshot_t *s;
if (!id)
return 0;
rcu_read_lock();
s = snapshot_t(c, id);
if (!s->parent)
return id;
if (s->parent)
id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
rcu_read_unlock();
return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
return id;
}
static int snapshot_rand_ancestor_good(struct btree_trans *trans,
@ -613,9 +685,7 @@ static int check_snapshot(struct btree_trans *trans,
struct bch_snapshot v;
struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
struct snapshot_t *parent = parent_id
? snapshot_t(c, parent_id)
: NULL;
u32 real_depth;
struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id;
@ -706,16 +776,18 @@ static int check_snapshot(struct btree_trans *trans,
}
ret = 0;
if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c,
real_depth = bch2_snapshot_depth(c, parent_id);
if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c,
"snapshot with incorrect depth fields, should be %u:\n %s",
parent->depth + 1,
real_depth,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
goto err;
u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0);
u->v.depth = cpu_to_le32(real_depth);
s = u->v;
}
@ -799,9 +871,13 @@ static int check_subvol(struct btree_trans *trans,
if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
u32 snapshot_tree;
struct bch_snapshot_tree st;
rcu_read_lock();
snapshot_tree = snapshot_t(c, snapshot_root)->tree;
rcu_read_unlock();
ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
@ -845,7 +921,7 @@ int bch2_check_subvols(struct bch_fs *c)
void bch2_fs_snapshots_exit(struct bch_fs *c)
{
genradix_free(&c->snapshots);
kfree(c->snapshots);
}
int bch2_snapshots_read(struct bch_fs *c)
@ -987,7 +1063,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
struct bkey_i_snapshot *n;
struct bkey_s_c k;
unsigned i, j;
u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
u32 depth = bch2_snapshot_depth(c, parent);
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@ -1126,7 +1202,7 @@ static int snapshot_delete_key(struct btree_trans *trans,
struct bpos *last_pos)
{
struct bch_fs *c = trans->c;
u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0;

View File

@ -32,17 +32,31 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
.min_val_size = 24, \
})
static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{
return genradix_ptr(&c->snapshots, U32_MAX - id);
return &t->s[U32_MAX - id];
}
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
{
return __snapshot_t(rcu_dereference(c->snapshots), id);
}
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->parent;
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
rcu_read_lock();
id = __bch2_snapshot_parent_early(c, id);
rcu_read_unlock();
return id;
}
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
#ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent;
@ -59,10 +73,21 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
#endif
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
rcu_read_lock();
id = __bch2_snapshot_parent(c, id);
rcu_read_unlock();
return id;
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
rcu_read_lock();
while (n--)
id = bch2_snapshot_parent(c, id);
id = __bch2_snapshot_parent(c, id);
rcu_read_unlock();
return id;
}
@ -71,37 +96,60 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{
u32 parent;
while ((parent = bch2_snapshot_parent(c, id)))
rcu_read_lock();
while ((parent = __bch2_snapshot_parent(c, id)))
id = parent;
rcu_read_unlock();
return id;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->equiv;
}
static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
return id == snapshot_t(c, id)->equiv;
rcu_read_lock();
id = __bch2_snapshot_equiv(c, id);
rcu_read_unlock();
return id;
}
static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id)
static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
{
struct snapshot_t *s = snapshot_t(c, id);
return id == bch2_snapshot_equiv(c, id);
}
return s->children[0] || s->children[1];
static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s;
bool ret;
rcu_read_lock();
s = snapshot_t(c, id);
ret = s->children[0];
rcu_read_unlock();
return ret;
}
static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
{
return !bch2_snapshot_is_internal_node(c, id);
}
static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
{
struct snapshot_t *s;
u32 parent = bch2_snapshot_parent(c, id);
const struct snapshot_t *s;
u32 parent = __bch2_snapshot_parent(c, id);
if (!parent)
return 0;
s = snapshot_t(c, bch2_snapshot_parent(c, id));
s = snapshot_t(c, __bch2_snapshot_parent(c, id));
if (id == s->children[0])
return s->children[1];
if (id == s->children[1])
@ -113,9 +161,15 @@ bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{
struct snapshot_t *t = snapshot_t(c, id);
const struct snapshot_t *t;
bool ret;
return (t->children[0]|t->children[1]) != 0;
rcu_read_lock();
t = snapshot_t(c, id);
ret = (t->children[0]|t->children[1]) != 0;
rcu_read_unlock();
return ret;
}
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)

View File

@ -16,6 +16,10 @@ struct snapshot_t {
u32 equiv;
};
struct snapshot_table {
struct snapshot_t s[0];
};
typedef struct {
u32 subvol;
u64 inum;

View File

@ -658,11 +658,18 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
struct printbuf err = PRINTBUF;
__le64 *i;
int ret;
#ifndef __KERNEL__
retry:
#endif
memset(sb, 0, sizeof(*sb));
sb->mode = FMODE_READ;
sb->have_bio = true;
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
sb->mode |= FMODE_BUFFERED;
#endif
if (!opt_get(*opts, noexcl))
sb->mode |= FMODE_EXCL;
@ -747,7 +754,13 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
got_super:
if (le16_to_cpu(sb->sb->block_size) << 9 <
bdev_logical_block_size(sb->bdev)) {
bdev_logical_block_size(sb->bdev) &&
opt_get(*opts, direct_io)) {
#ifndef __KERNEL__
opt_set(*opts, direct_io, false);
bch2_free_super(sb);
goto retry;
#endif
prt_printf(&err, "block size (%u) smaller than device block size (%u)",
le16_to_cpu(sb->sb->block_size) << 9,
bdev_logical_block_size(sb->bdev));

View File

@ -344,6 +344,19 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*
* Ideally we'd start copygc/rebalance earlier instead of waiting for
* all of recovery/fsck to complete:
*/
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
@ -403,12 +416,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
return ret;
}
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)

View File

@ -64,7 +64,7 @@ static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask)
{
return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN,
get_order(size)) ?:
__vmalloc(size, gfp_mask);
__vmalloc_noprof(size, gfp_mask);
}
#define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp))

View File

@ -183,16 +183,19 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
else if (mode & FMODE_WRITE)
flags = O_WRONLY;
if (!(mode & FMODE_BUFFERED))
flags |= O_DIRECT;
#if 0
/* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
if (mode & FMODE_EXCL)
flags |= O_EXCL;
#endif
buffered_fd = open(path, flags);
buffered_fd = open(path, flags & ~O_DIRECT);
if (buffered_fd < 0)
return ERR_PTR(-errno);
fd = open(path, flags|O_DIRECT);
fd = open(path, flags);
if (fd < 0)
fd = dup(buffered_fd);
if (fd < 0) {
@ -200,9 +203,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
return ERR_PTR(-errno);
}
sync_fd = open(path, flags|O_DIRECT|O_SYNC);
sync_fd = open(path, flags|O_SYNC);
if (sync_fd < 0)
sync_fd = open(path, flags|O_SYNC);
sync_fd = open(path, (flags & ~O_DIRECT)|O_SYNC);
if (sync_fd < 0) {
close(fd);
close(buffered_fd);

View File

@ -138,7 +138,7 @@ fn cmd_list_inner(opt: Cli) -> anyhow::Result<()> {
opt_set!(fs_opts, errors, bcachefs::bch_error_actions::BCH_ON_ERROR_continue as u8);
if opt.fsck {
opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_OPT_YES as u8);
opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_FIX_yes as u8);
opt_set!(fs_opts, norecovery, 0);
}