Update bcachefs sources to e14d7c7195 bcachefs: Compression levels

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-07-10 20:31:34 -04:00
parent 1c156d5c46
commit c8bec83e30
39 changed files with 645 additions and 237 deletions

View File

@ -1 +1 @@
04f2d2ae5bd69eecd0b4bb700efdc665b09745a1 e14d7c7195b974bbaf400f9c3f2bdaa94fc8d372

View File

@ -117,7 +117,7 @@ int cmd_dump(int argc, char *argv[])
opt_set(opts, norecovery, true); opt_set(opts, norecovery, true);
opt_set(opts, degraded, true); opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue); opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_OPT_NO); opt_set(opts, fix_errors, FSCK_FIX_no);
while ((opt = getopt(argc, argv, "o:fjvh")) != -1) while ((opt = getopt(argc, argv, "o:fjvh")) != -1)
switch (opt) { switch (opt) {

View File

@ -37,7 +37,7 @@ int cmd_fsck(int argc, char *argv[])
opt_set(opts, degraded, true); opt_set(opts, degraded, true);
opt_set(opts, fsck, true); opt_set(opts, fsck, true);
opt_set(opts, fix_errors, FSCK_OPT_ASK); opt_set(opts, fix_errors, FSCK_FIX_ask);
while ((opt = getopt_long(argc, argv, while ((opt = getopt_long(argc, argv,
"apynfo:rvh", "apynfo:rvh",
@ -45,14 +45,14 @@ int cmd_fsck(int argc, char *argv[])
switch (opt) { switch (opt) {
case 'a': /* outdated alias for -p */ case 'a': /* outdated alias for -p */
case 'p': case 'p':
opt_set(opts, fix_errors, FSCK_OPT_YES); opt_set(opts, fix_errors, FSCK_FIX_yes);
break; break;
case 'y': case 'y':
opt_set(opts, fix_errors, FSCK_OPT_YES); opt_set(opts, fix_errors, FSCK_FIX_no);
break; break;
case 'n': case 'n':
opt_set(opts, nochanges, true); opt_set(opts, nochanges, true);
opt_set(opts, fix_errors, FSCK_OPT_NO); opt_set(opts, fix_errors, FSCK_FIX_no);
break; break;
case 'f': case 'f':
/* force check, even if filesystem marked clean: */ /* force check, even if filesystem marked clean: */

View File

@ -234,7 +234,7 @@ int cmd_list_journal(int argc, char *argv[])
opt_set(opts, norecovery, true); opt_set(opts, norecovery, true);
opt_set(opts, degraded, true); opt_set(opts, degraded, true);
opt_set(opts, errors, BCH_ON_ERROR_continue); opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_OPT_YES); opt_set(opts, fix_errors, FSCK_FIX_yes);
opt_set(opts, keep_journal, true); opt_set(opts, keep_journal, true);
opt_set(opts, read_journal_only,true); opt_set(opts, read_journal_only,true);

View File

@ -44,6 +44,7 @@ struct user_namespace;
#define FMODE_32BITHASH ((__force fmode_t)0x200) #define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */ /* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH ((__force fmode_t)0x400) #define FMODE_64BITHASH ((__force fmode_t)0x400)
#define FMODE_BUFFERED ((__force fmode_t)0x800)
struct inode { struct inode {
unsigned long i_ino; unsigned long i_ino;

View File

@ -2,4 +2,9 @@
#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \ #define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \
LZ4_compress_destSize(src, dst, srclen, dstlen) LZ4_compress_destSize(src, dst, srclen, dstlen)
#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1
#define LZ4_MEM_COMPRESS 0 #define LZ4_MEM_COMPRESS 0
#define LZ4HC_MEM_COMPRESS 0
#define LZ4HC_MIN_CLEVEL 0

View File

@ -12,6 +12,7 @@
#define rcu_access_pointer(p) READ_ONCE(p) #define rcu_access_pointer(p) READ_ONCE(p)
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */ #define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
#define kvfree_rcu(ptr) kfree(ptr) /* XXX */
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v) #define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)

View File

@ -213,7 +213,7 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size)
#define vfree(p) free(p) #define vfree(p) free(p)
static inline void *__vmalloc(unsigned long size, gfp_t flags) static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags)
{ {
unsigned i; unsigned i;
void *p; void *p;
@ -234,6 +234,7 @@ static inline void *__vmalloc(unsigned long size, gfp_t flags)
return p; return p;
} }
#define __vmalloc __vmalloc_noprof
static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask) static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
{ {

View File

@ -604,8 +604,7 @@ struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
u64 v; u64 v;
for (i = 0; i < bch2_opts_nr; i++) { for (i = 0; i < bch2_opts_nr; i++) {
if (!strs.by_id[i] || if (!strs.by_id[i])
bch2_opt_table[i].type == BCH_OPT_FN)
continue; continue;
ret = bch2_opt_parse(NULL, ret = bch2_opt_parse(NULL,

View File

@ -774,9 +774,10 @@ struct bch_fs {
struct mutex sb_lock; struct mutex sb_lock;
/* snapshot.c: */ /* snapshot.c: */
GENRADIX(struct snapshot_t) snapshots; struct snapshot_table __rcu *snapshots;
struct bch_snapshot_table __rcu *snapshot_table; size_t snapshot_table_size;
struct mutex snapshot_table_lock; struct mutex snapshot_table_lock;
struct work_struct snapshot_delete_work; struct work_struct snapshot_delete_work;
struct work_struct snapshot_wait_for_pagecache_and_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked; snapshot_id_list snapshots_unlinked;

View File

@ -695,7 +695,7 @@ struct bch_reservation {
/* Maximum size (in u64s) a single pointer could be: */ /* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\ #define BKEY_EXTENT_PTR_U64s_MAX\
((sizeof(struct bch_extent_crc128) + \ ((sizeof(struct bch_extent_crc128) + \
sizeof(struct bch_extent_ptr)) / sizeof(u64)) sizeof(struct bch_extent_ptr)) / sizeof(__u64))
/* Maximum possible size of an entire extent value: */ /* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \ #define BKEY_EXTENT_VAL_U64s_MAX \
@ -707,7 +707,7 @@ struct bch_reservation {
/* Btree pointers don't carry around checksums: */ /* Btree pointers don't carry around checksums: */
#define BKEY_BTREE_PTR_VAL_U64s_MAX \ #define BKEY_BTREE_PTR_VAL_U64s_MAX \
((sizeof(struct bch_btree_ptr_v2) + \ ((sizeof(struct bch_btree_ptr_v2) + \
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64)) sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
#define BKEY_BTREE_PTR_U64s_MAX \ #define BKEY_BTREE_PTR_U64s_MAX \
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
@ -749,7 +749,7 @@ struct bch_inode_v3 {
} __packed __aligned(8); } __packed __aligned(8);
#define INODEv3_FIELDS_START_INITIAL 6 #define INODEv3_FIELDS_START_INITIAL 6
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64)) #define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
struct bch_inode_generation { struct bch_inode_generation {
struct bch_val v; struct bch_val v;
@ -916,7 +916,7 @@ struct bch_dirent {
#define DT_SUBVOL 16 #define DT_SUBVOL 16
#define BCH_DT_MAX 17 #define BCH_DT_MAX 17
#define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(u64) - \ #define BCH_NAME_MAX ((unsigned) (U8_MAX * sizeof(__u64) - \
sizeof(struct bkey) - \ sizeof(struct bkey) - \
offsetof(struct bch_dirent, d_name))) offsetof(struct bch_dirent, d_name)))
@ -1009,7 +1009,7 @@ struct bch_alloc_v4 {
} __packed __aligned(8); } __packed __aligned(8);
#define BCH_ALLOC_V4_U64s_V0 6 #define BCH_ALLOC_V4_U64s_V0 6
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64)) #define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
@ -1289,10 +1289,10 @@ struct bch_key {
}; };
#define BCH_KEY_MAGIC \ #define BCH_KEY_MAGIC \
(((u64) 'b' << 0)|((u64) 'c' << 8)| \ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \
((u64) 'h' << 16)|((u64) '*' << 24)| \ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \
((u64) '*' << 32)|((u64) 'k' << 40)| \ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \
((u64) 'e' << 48)|((u64) 'y' << 56)) ((__u64) 'e' << 48)|((__u64) 'y' << 56))
struct bch_encrypted_key { struct bch_encrypted_key {
__le64 magic; __le64 magic;
@ -1747,7 +1747,7 @@ LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
@ -1767,7 +1767,7 @@ LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52); LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64); LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE, LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
struct bch_sb, flags[2], 0, 4); struct bch_sb, flags[2], 0, 4);
LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64); LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
@ -1783,11 +1783,36 @@ LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56);
/* flags[4] 56-64 unused: */ LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
struct bch_sb, flags[4], 60, 64);
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16); struct bch_sb, flags[5], 0, 16);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
}
static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
{
SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
}
static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
{
return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
}
static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
{
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
}
/* /*
* Features: * Features:
* *
@ -2272,7 +2297,7 @@ static inline __u64 BTREE_NODE_ID(struct btree_node *n)
return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4); return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
} }
static inline void SET_BTREE_NODE_ID(struct btree_node *n, u64 v) static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
{ {
SET_BTREE_NODE_ID_LO(n, v); SET_BTREE_NODE_ID_LO(n, v);
SET_BTREE_NODE_ID_HI(n, v >> 4); SET_BTREE_NODE_ID_HI(n, v >> 4);

View File

@ -1811,7 +1811,7 @@ again:
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
(BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) && (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations &&
c->opts.fix_errors != FSCK_OPT_NO)) { c->opts.fix_errors != FSCK_FIX_no)) {
bch_info(c, "Starting topology repair pass"); bch_info(c, "Starting topology repair pass");
ret = bch2_repair_topology(c); ret = bch2_repair_topology(c);
if (ret) if (ret)

View File

@ -311,7 +311,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot && i->k->k.p.snapshot &&
bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot)); bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
} }
static noinline int static noinline int
@ -1229,7 +1229,7 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans,
struct bpos pos) struct bpos pos)
{ {
if (!btree_type_has_snapshots(id) || if (!btree_type_has_snapshots(id) ||
!snapshot_t(trans->c, pos.snapshot)->children[0]) bch2_snapshot_is_leaf(trans->c, pos.snapshot))
return 0; return 0;
return __check_pos_snapshot_overwritten(trans, id, pos); return __check_pos_snapshot_overwritten(trans, id, pos);

View File

@ -129,6 +129,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
keys = wb->keys[s.idx]; keys = wb->keys[s.idx];
nr = s.nr; nr = s.nr;
if (race_fault())
goto slowpath;
/* /*
* We first sort so that we can detect and skip redundant updates, and * We first sort so that we can detect and skip redundant updates, and
* then we attempt to flush in sorted btree order, as this is most * then we attempt to flush in sorted btree order, as this is most

View File

@ -120,12 +120,6 @@ static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false); return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
} }
static const unsigned bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
static inline bool bch2_checksum_type_valid(const struct bch_fs *c, static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
unsigned type) unsigned type)
{ {

View File

@ -296,21 +296,32 @@ static int attempt_compress(struct bch_fs *c,
void *workspace, void *workspace,
void *dst, size_t dst_len, void *dst, size_t dst_len,
void *src, size_t src_len, void *src, size_t src_len,
enum bch_compression_type compression_type) struct bch_compression_opt compression)
{ {
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type[compression.type];
switch (compression_type) { switch (compression_type) {
case BCH_COMPRESSION_TYPE_lz4: { case BCH_COMPRESSION_TYPE_lz4:
int len = src_len; if (compression.level < LZ4HC_MIN_CLEVEL) {
int ret = LZ4_compress_destSize( int len = src_len;
src, dst, int ret = LZ4_compress_destSize(
&len, dst_len, src, dst,
workspace); &len, dst_len,
workspace);
if (len < src_len)
return -len;
if (len < src_len) return ret;
return -len; } else {
int ret = LZ4_compress_HC(
src, dst,
src_len, dst_len,
compression.level,
workspace);
return ret; return ret ?: -1;
} }
case BCH_COMPRESSION_TYPE_gzip: { case BCH_COMPRESSION_TYPE_gzip: {
z_stream strm = { z_stream strm = {
.next_in = src, .next_in = src,
@ -320,7 +331,11 @@ static int attempt_compress(struct bch_fs *c,
}; };
zlib_set_workspace(&strm, workspace); zlib_set_workspace(&strm, workspace);
zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION, zlib_deflateInit2(&strm,
compression.level
? clamp_t(unsigned, compression.level,
Z_BEST_SPEED, Z_BEST_COMPRESSION)
: Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY); Z_DEFAULT_STRATEGY);
@ -333,8 +348,14 @@ static int attempt_compress(struct bch_fs *c,
return strm.total_out; return strm.total_out;
} }
case BCH_COMPRESSION_TYPE_zstd: { case BCH_COMPRESSION_TYPE_zstd: {
/*
* rescale:
* zstd max compression level is 22, our max level is 15
*/
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
ZSTD_CCtx *ctx = zstd_init_cctx(workspace, ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
zstd_cctx_workspace_bound(&c->zstd_params.cParams)); zstd_cctx_workspace_bound(&params.cParams));
/* /*
* ZSTD requires that when we decompress we pass in the exact * ZSTD requires that when we decompress we pass in the exact
@ -365,10 +386,12 @@ static int attempt_compress(struct bch_fs *c,
static unsigned __bio_compress(struct bch_fs *c, static unsigned __bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len, struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len, struct bio *src, size_t *src_len,
enum bch_compression_type compression_type) struct bch_compression_opt compression)
{ {
struct bbuf src_data = { NULL }, dst_data = { NULL }; struct bbuf src_data = { NULL }, dst_data = { NULL };
void *workspace; void *workspace;
enum bch_compression_type compression_type =
__bch2_compression_opt_to_type[compression.type];
unsigned pad; unsigned pad;
int ret = 0; int ret = 0;
@ -400,7 +423,7 @@ static unsigned __bio_compress(struct bch_fs *c,
ret = attempt_compress(c, workspace, ret = attempt_compress(c, workspace,
dst_data.b, *dst_len, dst_data.b, *dst_len,
src_data.b, *src_len, src_data.b, *src_len,
compression_type); compression);
if (ret > 0) { if (ret > 0) {
*dst_len = ret; *dst_len = ret;
ret = 0; ret = 0;
@ -447,22 +470,24 @@ static unsigned __bio_compress(struct bch_fs *c,
BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size); BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
BUG_ON(*dst_len & (block_bytes(c) - 1)); BUG_ON(*dst_len & (block_bytes(c) - 1));
BUG_ON(*src_len & (block_bytes(c) - 1)); BUG_ON(*src_len & (block_bytes(c) - 1));
ret = compression_type;
out: out:
bio_unmap_or_unbounce(c, src_data); bio_unmap_or_unbounce(c, src_data);
bio_unmap_or_unbounce(c, dst_data); bio_unmap_or_unbounce(c, dst_data);
return compression_type; return ret;
err: err:
compression_type = BCH_COMPRESSION_TYPE_incompressible; ret = BCH_COMPRESSION_TYPE_incompressible;
goto out; goto out;
} }
unsigned bch2_bio_compress(struct bch_fs *c, unsigned bch2_bio_compress(struct bch_fs *c,
struct bio *dst, size_t *dst_len, struct bio *dst, size_t *dst_len,
struct bio *src, size_t *src_len, struct bio *src, size_t *src_len,
unsigned compression_type) unsigned compression_opt)
{ {
unsigned orig_dst = dst->bi_iter.bi_size; unsigned orig_dst = dst->bi_iter.bi_size;
unsigned orig_src = src->bi_iter.bi_size; unsigned orig_src = src->bi_iter.bi_size;
unsigned compression_type;
/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size, src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
@ -470,11 +495,9 @@ unsigned bch2_bio_compress(struct bch_fs *c,
/* Don't generate a bigger output than input: */ /* Don't generate a bigger output than input: */
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
compression_type = BCH_COMPRESSION_TYPE_lz4;
compression_type = compression_type =
__bio_compress(c, dst, dst_len, src, src_len, compression_type); __bio_compress(c, dst, dst_len, src, src_len,
bch2_compression_decode(compression_opt));
dst->bi_iter.bi_size = orig_dst; dst->bi_iter.bi_size = orig_dst;
src->bi_iter.bi_size = orig_src; src->bi_iter.bi_size = orig_src;
@ -521,8 +544,10 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
} }
int bch2_check_set_has_compressed_data(struct bch_fs *c, int bch2_check_set_has_compressed_data(struct bch_fs *c,
unsigned compression_type) unsigned compression_opt)
{ {
unsigned compression_type = bch2_compression_decode(compression_opt).type;
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
return compression_type return compression_type
@ -546,14 +571,16 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{ {
size_t decompress_workspace_size = 0; size_t decompress_workspace_size = 0;
bool decompress_workspace_needed; bool decompress_workspace_needed;
ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max); ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
c->opts.encoded_extent_max);
struct { struct {
unsigned feature; unsigned feature;
unsigned type; enum bch_compression_type type;
size_t compress_workspace; size_t compress_workspace;
size_t decompress_workspace; size_t decompress_workspace;
} compression_types[] = { } compression_types[] = {
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 }, { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), }, zlib_inflate_workspacesize(), },
@ -612,16 +639,74 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
return 0; return 0;
} }
static u64 compression_opt_to_feature(unsigned v)
{
unsigned type = bch2_compression_decode(v).type;
return 1ULL << bch2_compression_opt_to_feature[type];
}
int bch2_fs_compress_init(struct bch_fs *c) int bch2_fs_compress_init(struct bch_fs *c)
{ {
u64 f = c->sb.features; u64 f = c->sb.features;
if (c->opts.compression) f |= compression_opt_to_feature(c->opts.compression);
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression]; f |= compression_opt_to_feature(c->opts.background_compression);
if (c->opts.background_compression)
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
return __bch2_fs_compress_init(c, f); return __bch2_fs_compress_init(c, f);
}
int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
struct printbuf *err)
{
char *val = kstrdup(_val, GFP_KERNEL);
char *p = val, *type_str, *level_str;
struct bch_compression_opt opt = { 0 };
int ret;
if (!val)
return -ENOMEM;
type_str = strsep(&p, ":");
level_str = p;
ret = match_string(bch2_compression_opts, -1, type_str);
if (ret < 0 && err)
prt_str(err, "invalid compression type");
if (ret < 0)
goto err;
opt.type = ret;
if (level_str) {
unsigned level;
ret = kstrtouint(level_str, 10, &level);
if (!ret && !opt.type && level)
ret = -EINVAL;
if (!ret && level > 15)
ret = -EINVAL;
if (ret < 0 && err)
prt_str(err, "invalid compression level");
if (ret < 0)
goto err;
opt.level = level;
}
*res = bch2_compression_encode(opt);
err:
kfree(val);
return ret;
}
void bch2_opt_compression_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
struct bch_compression_opt opt = bch2_compression_decode(v);
prt_str(out, bch2_compression_opts[opt.type]);
if (opt.level)
prt_printf(out, ":%u", opt.level);
} }

View File

@ -4,6 +4,35 @@
#include "extents_types.h" #include "extents_types.h"
struct bch_compression_opt {
u8 type:4,
level:4;
};
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
{
return (struct bch_compression_opt) {
.type = v & 15,
.level = v >> 4,
};
}
static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
{
return opt.type|(opt.level << 4);
}
static const unsigned __bch2_compression_opt_to_type[] = {
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
BCH_COMPRESSION_OPTS()
#undef x
};
static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
{
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *); struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
@ -15,4 +44,12 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
void bch2_fs_compress_exit(struct bch_fs *); void bch2_fs_compress_exit(struct bch_fs *);
int bch2_fs_compress_init(struct bch_fs *); int bch2_fs_compress_init(struct bch_fs *);
int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
#define bch2_opt_compression (struct bch_opt_fn) { \
.parse = bch2_opt_compression_parse, \
.to_text = bch2_opt_compression_to_text, \
}
#endif /* _BCACHEFS_COMPRESS_H */ #endif /* _BCACHEFS_COMPRESS_H */

View File

@ -455,9 +455,7 @@ int bch2_data_update_init(struct btree_trans *trans,
BCH_WRITE_DATA_ENCODED| BCH_WRITE_DATA_ENCODED|
BCH_WRITE_MOVE| BCH_WRITE_MOVE|
m->data_opts.write_flags; m->data_opts.write_flags;
m->op.compression_type = m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
bch2_compression_opt_to_type[io_opts.background_compression ?:
io_opts.compression];
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
bkey_for_each_ptr(ptrs, ptr) bkey_for_each_ptr(ptrs, ptr)

View File

@ -460,30 +460,37 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
return ret; return ret;
} }
int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v) int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{ {
struct bch_dev *ca; struct bch_dev *ca;
int g; int g;
if (!strlen(buf) || !strcmp(buf, "none")) { if (!val)
*v = 0; return -EINVAL;
if (!c)
return 0;
if (!strlen(val) || !strcmp(val, "none")) {
*res = 0;
return 0; return 0;
} }
/* Is it a device? */ /* Is it a device? */
ca = bch2_dev_lookup(c, buf); ca = bch2_dev_lookup(c, val);
if (!IS_ERR(ca)) { if (!IS_ERR(ca)) {
*v = dev_to_target(ca->dev_idx); *res = dev_to_target(ca->dev_idx);
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
return 0; return 0;
} }
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
g = bch2_disk_path_find(&c->disk_sb, buf); g = bch2_disk_path_find(&c->disk_sb, val);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (g >= 0) { if (g >= 0) {
*v = group_to_target(g); *res = group_to_target(g);
return 0; return 0;
} }

View File

@ -85,9 +85,14 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
#define bch2_opt_target (struct bch_opt_fn) { \
.parse = bch2_opt_target_parse, \
.to_text = bch2_opt_target_to_text, \
}
int bch2_sb_disk_groups_to_cpu(struct bch_fs *); int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);

View File

@ -204,7 +204,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
prt_str(out, ", continuing"); prt_str(out, ", continuing");
ret = -BCH_ERR_fsck_ignore; ret = -BCH_ERR_fsck_ignore;
} }
} else if (c->opts.fix_errors == FSCK_OPT_EXIT) { } else if (c->opts.fix_errors == FSCK_FIX_exit) {
prt_str(out, ", exiting"); prt_str(out, ", exiting");
ret = -BCH_ERR_fsck_errors_not_fixed; ret = -BCH_ERR_fsck_errors_not_fixed;
} else if (flags & FSCK_CAN_FIX) { } else if (flags & FSCK_CAN_FIX) {
@ -212,7 +212,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
? s->fix ? s->fix
: c->opts.fix_errors; : c->opts.fix_errors;
if (fix == FSCK_OPT_ASK) { if (fix == FSCK_FIX_ask) {
int ask; int ask;
prt_str(out, ": fix?"); prt_str(out, ": fix?");
@ -223,13 +223,13 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
if (ask >= YN_ALLNO && s) if (ask >= YN_ALLNO && s)
s->fix = ask == YN_ALLNO s->fix = ask == YN_ALLNO
? FSCK_OPT_NO ? FSCK_FIX_no
: FSCK_OPT_YES; : FSCK_FIX_yes;
ret = ask & 1 ret = ask & 1
? -BCH_ERR_fsck_fix ? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore; : -BCH_ERR_fsck_ignore;
} else if (fix == FSCK_OPT_YES || } else if (fix == FSCK_FIX_yes ||
(c->opts.nochanges && (c->opts.nochanges &&
!(flags & FSCK_CAN_IGNORE))) { !(flags & FSCK_CAN_IGNORE))) {
prt_str(out, ", fixing"); prt_str(out, ", fixing");
@ -244,7 +244,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
} }
if (ret == -BCH_ERR_fsck_ignore && if (ret == -BCH_ERR_fsck_ignore &&
(c->opts.fix_errors == FSCK_OPT_EXIT || (c->opts.fix_errors == FSCK_FIX_exit ||
!(flags & FSCK_CAN_IGNORE))) !(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed; ret = -BCH_ERR_fsck_errors_not_fixed;

View File

@ -91,13 +91,6 @@ do { \
* be able to repair: * be able to repair:
*/ */
enum fsck_err_opts {
FSCK_OPT_EXIT,
FSCK_OPT_YES,
FSCK_OPT_NO,
FSCK_OPT_ASK,
};
struct fsck_err_state { struct fsck_err_state {
struct list_head list; struct list_head list;
const char *fmt; const char *fmt;

View File

@ -35,6 +35,8 @@
#include <trace/events/writeback.h> #include <trace/events/writeback.h>
static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned);
struct folio_vec { struct folio_vec {
struct folio *fv_folio; struct folio *fv_folio;
size_t fv_offset; size_t fv_offset;
@ -1972,7 +1974,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_for_each(folios, fi) { darray_for_each(folios, fi) {
struct folio *f = *fi; struct folio *f = *fi;
u64 f_len = min(end, folio_end_pos(f)) - f_pos; u64 f_len = min(end, folio_end_pos(f)) - f_pos;
unsigned f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter); unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
if (!f_copied) { if (!f_copied) {
folios_trunc(&folios, fi); folios_trunc(&folios, fi);
@ -3373,6 +3375,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
struct quota_res quota_res = { 0 }; struct quota_res quota_res = { 0 };
struct bkey_s_c k; struct bkey_s_c k;
unsigned sectors; unsigned sectors;
bool is_allocation;
u64 hole_start, hole_end;
u32 snapshot; u32 snapshot;
bch2_trans_begin(&trans); bch2_trans_begin(&trans);
@ -3388,6 +3392,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if ((ret = bkey_err(k))) if ((ret = bkey_err(k)))
goto bkey_err; goto bkey_err;
hole_start = iter.pos.offset;
hole_end = bpos_min(k.k->p, end_pos).offset;
is_allocation = bkey_extent_is_allocation(k.k);
/* already reserved */ /* already reserved */
if (bkey_extent_is_reservation(k) && if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
@ -3401,17 +3409,26 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
continue; continue;
} }
/* if (!(mode & FALLOC_FL_ZERO_RANGE)) {
* XXX: for nocow mode, we should promote shared extents to ret = drop_locks_do(&trans,
* unshared here (bch2_clamp_data_hole(&inode->v,
*/ &hole_start,
&hole_end,
opts.data_replicas), 0));
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset; if (ret)
goto bkey_err;
if (!bkey_extent_is_allocation(k.k)) { if (hole_start == hole_end)
continue;
}
sectors = hole_end - hole_start;
if (!is_allocation) {
ret = bch2_quota_reservation_add(c, inode, ret = bch2_quota_reservation_add(c, inode,
&quota_res, &quota_res, sectors, true);
sectors, true);
if (unlikely(ret)) if (unlikely(ret))
goto bkey_err; goto bkey_err;
} }
@ -3423,15 +3440,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err; goto bkey_err;
i_sectors_acct(c, inode, &quota_res, i_sectors_delta); i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
drop_locks_do(&trans,
(mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err: bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res); bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0; ret = 0;
} }
bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */
mark_pagecache_reserved(inode, start_sector, iter.pos.offset);
if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 }; struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0; s64 i_sectors_delta = 0;
@ -3679,14 +3696,16 @@ err:
/* fseek: */ /* fseek: */
static int folio_data_offset(struct folio *folio, loff_t pos) static int folio_data_offset(struct folio *folio, loff_t pos,
unsigned min_replicas)
{ {
struct bch_folio *s = bch2_folio(folio); struct bch_folio *s = bch2_folio(folio);
unsigned i, sectors = folio_sectors(folio); unsigned i, sectors = folio_sectors(folio);
if (s) if (s)
for (i = folio_pos_to_s(folio, pos); i < sectors; i++) for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
if (s->s[i].state >= SECTOR_dirty) if (s->s[i].state >= SECTOR_dirty &&
s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
return i << SECTOR_SHIFT; return i << SECTOR_SHIFT;
return -1; return -1;
@ -3694,7 +3713,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos)
static loff_t bch2_seek_pagecache_data(struct inode *vinode, static loff_t bch2_seek_pagecache_data(struct inode *vinode,
loff_t start_offset, loff_t start_offset,
loff_t end_offset) loff_t end_offset,
unsigned min_replicas)
{ {
struct folio_batch fbatch; struct folio_batch fbatch;
pgoff_t start_index = start_offset >> PAGE_SHIFT; pgoff_t start_index = start_offset >> PAGE_SHIFT;
@ -3713,7 +3733,8 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
folio_lock(folio); folio_lock(folio);
offset = folio_data_offset(folio, offset = folio_data_offset(folio,
max(folio_pos(folio), start_offset)); max(folio_pos(folio), start_offset),
min_replicas);
if (offset >= 0) { if (offset >= 0) {
ret = clamp(folio_pos(folio) + offset, ret = clamp(folio_pos(folio) + offset,
start_offset, end_offset); start_offset, end_offset);
@ -3775,7 +3796,7 @@ err:
if (next_data > offset) if (next_data > offset)
next_data = bch2_seek_pagecache_data(&inode->v, next_data = bch2_seek_pagecache_data(&inode->v,
offset, next_data); offset, next_data, 0);
if (next_data >= isize) if (next_data >= isize)
return -ENXIO; return -ENXIO;
@ -3783,7 +3804,8 @@ err:
return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
} }
static bool folio_hole_offset(struct address_space *mapping, loff_t *offset) static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
unsigned min_replicas)
{ {
struct folio *folio; struct folio *folio;
struct bch_folio *s; struct bch_folio *s;
@ -3800,7 +3822,8 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
sectors = folio_sectors(folio); sectors = folio_sectors(folio);
for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
if (s->s[i].state < SECTOR_dirty) { if (s->s[i].state < SECTOR_dirty ||
s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
*offset = max(*offset, *offset = max(*offset,
folio_pos(folio) + (i << SECTOR_SHIFT)); folio_pos(folio) + (i << SECTOR_SHIFT));
goto unlock; goto unlock;
@ -3815,18 +3838,34 @@ unlock:
static loff_t bch2_seek_pagecache_hole(struct inode *vinode, static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
loff_t start_offset, loff_t start_offset,
loff_t end_offset) loff_t end_offset,
unsigned min_replicas)
{ {
struct address_space *mapping = vinode->i_mapping; struct address_space *mapping = vinode->i_mapping;
loff_t offset = start_offset; loff_t offset = start_offset;
while (offset < end_offset && while (offset < end_offset &&
!folio_hole_offset(mapping, &offset)) !folio_hole_offset(mapping, &offset, min_replicas))
; ;
return min(offset, end_offset); return min(offset, end_offset);
} }
static void bch2_clamp_data_hole(struct inode *inode,
u64 *hole_start,
u64 *hole_end,
unsigned min_replicas)
{
*hole_start = bch2_seek_pagecache_hole(inode,
*hole_start << 9, *hole_end << 9, min_replicas) >> 9;
if (*hole_start == *hole_end)
return;
*hole_end = bch2_seek_pagecache_data(inode,
*hole_start << 9, *hole_end << 9, min_replicas) >> 9;
}
static loff_t bch2_seek_hole(struct file *file, u64 offset) static loff_t bch2_seek_hole(struct file *file, u64 offset)
{ {
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
@ -3856,12 +3895,12 @@ retry:
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v, next_hole = bch2_seek_pagecache_hole(&inode->v,
offset, MAX_LFS_FILESIZE); offset, MAX_LFS_FILESIZE, 0);
break; break;
} else if (!bkey_extent_is_data(k.k)) { } else if (!bkey_extent_is_data(k.k)) {
next_hole = bch2_seek_pagecache_hole(&inode->v, next_hole = bch2_seek_pagecache_hole(&inode->v,
max(offset, bkey_start_offset(k.k) << 9), max(offset, bkey_start_offset(k.k) << 9),
k.k->p.offset << 9); k.k->p.offset << 9, 0);
if (next_hole < k.k->p.offset << 9) if (next_hole < k.k->p.offset << 9)
break; break;

View File

@ -894,7 +894,7 @@ static int check_inode(struct btree_trans *trans,
* particular is not atomic, so on the internal snapshot nodes * particular is not atomic, so on the internal snapshot nodes
* we can see inodes marked for deletion after a clean shutdown * we can see inodes marked for deletion after a clean shutdown
*/ */
if (bch2_snapshot_internal_node(c, k.k->p.snapshot)) if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
return 0; return 0;
if (!bkey_is_inode(k.k)) if (!bkey_is_inode(k.k))
@ -2122,6 +2122,8 @@ int bch2_check_directory_structure(struct bch_fs *c)
return ret; return ret;
} }
/* check_nlink pass: */
struct nlink_table { struct nlink_table {
size_t nr; size_t nr;
size_t size; size_t size;

View File

@ -1078,7 +1078,7 @@ static enum prep_encoded_ret {
/* Can we just write the entire extent as is? */ /* Can we just write the entire extent as is? */
if (op->crc.uncompressed_size == op->crc.live_size && if (op->crc.uncompressed_size == op->crc.live_size &&
op->crc.compressed_size <= wp->sectors_free && op->crc.compressed_size <= wp->sectors_free &&
(op->crc.compression_type == op->compression_type || (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
op->incompressible)) { op->incompressible)) {
if (!crc_is_compressed(op->crc) && if (!crc_is_compressed(op->crc) &&
op->csum_type != op->crc.csum_type && op->csum_type != op->crc.csum_type &&
@ -1126,7 +1126,7 @@ static enum prep_encoded_ret {
/* /*
* If we want to compress the data, it has to be decrypted: * If we want to compress the data, it has to be decrypted:
*/ */
if ((op->compression_type || if ((op->compression_opt ||
bch2_csum_type_is_encryption(op->crc.csum_type) != bch2_csum_type_is_encryption(op->crc.csum_type) !=
bch2_csum_type_is_encryption(op->csum_type)) && bch2_csum_type_is_encryption(op->csum_type)) &&
bch2_write_decrypt(op)) bch2_write_decrypt(op))
@ -1173,7 +1173,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
} }
if (ec_buf || if (ec_buf ||
op->compression_type || op->compression_opt ||
(op->csum_type && (op->csum_type &&
!(op->flags & BCH_WRITE_PAGES_STABLE)) || !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
(bch2_csum_type_is_encryption(op->csum_type) && (bch2_csum_type_is_encryption(op->csum_type) &&
@ -1196,16 +1196,16 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
dst->bi_iter.bi_size < c->opts.encoded_extent_max) dst->bi_iter.bi_size < c->opts.encoded_extent_max)
break; break;
BUG_ON(op->compression_type && BUG_ON(op->compression_opt &&
(op->flags & BCH_WRITE_DATA_ENCODED) && (op->flags & BCH_WRITE_DATA_ENCODED) &&
bch2_csum_type_is_encryption(op->crc.csum_type)); bch2_csum_type_is_encryption(op->crc.csum_type));
BUG_ON(op->compression_type && !bounce); BUG_ON(op->compression_opt && !bounce);
crc.compression_type = op->incompressible crc.compression_type = op->incompressible
? BCH_COMPRESSION_TYPE_incompressible ? BCH_COMPRESSION_TYPE_incompressible
: op->compression_type : op->compression_opt
? bch2_bio_compress(c, dst, &dst_len, src, &src_len, ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
op->compression_type) op->compression_opt)
: 0; : 0;
if (!crc_is_compressed(crc)) { if (!crc_is_compressed(crc)) {
dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);

View File

@ -86,7 +86,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
op->written = 0; op->written = 0;
op->error = 0; op->error = 0;
op->csum_type = bch2_data_checksum_type(c, opts); op->csum_type = bch2_data_checksum_type(c, opts);
op->compression_type = bch2_compression_opt_to_type[opts.compression]; op->compression_opt = opts.compression;
op->nr_replicas = 0; op->nr_replicas = 0;
op->nr_replicas_required = c->opts.data_replicas_required; op->nr_replicas_required = c->opts.data_replicas_required;
op->watermark = BCH_WATERMARK_normal; op->watermark = BCH_WATERMARK_normal;

View File

@ -115,8 +115,8 @@ struct bch_write_op {
u16 flags; u16 flags;
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
unsigned compression_opt:8;
unsigned csum_type:4; unsigned csum_type:4;
unsigned compression_type:4;
unsigned nr_replicas:4; unsigned nr_replicas:4;
unsigned nr_replicas_required:4; unsigned nr_replicas_required:4;
unsigned watermark:3; unsigned watermark:3;

View File

@ -5,6 +5,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "compress.h" #include "compress.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "error.h"
#include "opts.h" #include "opts.h"
#include "super-io.h" #include "super-io.h"
#include "util.h" #include "util.h"
@ -16,6 +17,11 @@ const char * const bch2_error_actions[] = {
NULL NULL
}; };
const char * const bch2_fsck_fix_opts[] = {
BCH_FIX_ERRORS_OPTS()
NULL
};
const char * const bch2_version_upgrade_opts[] = { const char * const bch2_version_upgrade_opts[] = {
BCH_VERSION_UPGRADE_OPTS() BCH_VERSION_UPGRADE_OPTS()
NULL NULL
@ -89,6 +95,37 @@ const char * const bch2_fs_usage_types[] = {
#undef x #undef x
int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
if (!val) {
*res = FSCK_FIX_yes;
} else {
int ret = match_string(bch2_fsck_fix_opts, -1, val);
if (ret < 0 && err)
prt_str(err, "fix_errors: invalid selection");
if (ret < 0)
return ret;
*res = ret;
}
return 0;
}
void bch2_opt_fix_errors_to_text(struct printbuf *out,
struct bch_fs *c,
struct bch_sb *sb,
u64 v)
{
prt_str(out, bch2_fsck_fix_opts[v]);
}
static const struct bch_opt_fn bch2_opt_fix_errors = {
.parse = bch2_opt_fix_errors_parse,
.to_text = bch2_opt_fix_errors_to_text,
};
const char * const bch2_d_types[BCH_DT_MAX] = { const char * const bch2_d_types[BCH_DT_MAX] = {
[DT_UNKNOWN] = "unknown", [DT_UNKNOWN] = "unknown",
[DT_FIFO] = "fifo", [DT_FIFO] = "fifo",
@ -167,11 +204,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \ #define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
.min = _min, .max = _max .min = _min, .max = _max
#define OPT_STR(_choices) .type = BCH_OPT_STR, \ #define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices),\ .min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices .choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, \ #define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
.parse = _fn##_parse, \
.to_text = _fn##_to_text
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
[Opt_##_name] = { \ [Opt_##_name] = { \
@ -267,15 +302,26 @@ int bch2_opt_parse(struct bch_fs *c,
switch (opt->type) { switch (opt->type) {
case BCH_OPT_BOOL: case BCH_OPT_BOOL:
ret = kstrtou64(val, 10, res); if (val) {
ret = kstrtou64(val, 10, res);
} else {
ret = 0;
*res = 1;
}
if (ret < 0 || (*res != 0 && *res != 1)) { if (ret < 0 || (*res != 0 && *res != 1)) {
if (err) if (err)
prt_printf(err, "%s: must be bool", prt_printf(err, "%s: must be bool", opt->attr.name);
opt->attr.name);
return ret; return ret;
} }
break; break;
case BCH_OPT_UINT: case BCH_OPT_UINT:
if (!val) {
prt_printf(err, "%s: required value",
opt->attr.name);
return -EINVAL;
}
ret = opt->flags & OPT_HUMAN_READABLE ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res) ? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res); : kstrtou64(val, 10, res);
@ -287,6 +333,12 @@ int bch2_opt_parse(struct bch_fs *c,
} }
break; break;
case BCH_OPT_STR: case BCH_OPT_STR:
if (!val) {
prt_printf(err, "%s: required value",
opt->attr.name);
return -EINVAL;
}
ret = match_string(opt->choices, -1, val); ret = match_string(opt->choices, -1, val);
if (ret < 0) { if (ret < 0) {
if (err) if (err)
@ -298,10 +350,7 @@ int bch2_opt_parse(struct bch_fs *c,
*res = ret; *res = ret;
break; break;
case BCH_OPT_FN: case BCH_OPT_FN:
if (!c) ret = opt->fn.parse(c, val, res, err);
return 0;
ret = opt->parse(c, val, res);
if (ret < 0) { if (ret < 0) {
if (err) if (err)
prt_printf(err, "%s: parse error", prt_printf(err, "%s: parse error",
@ -341,10 +390,10 @@ void bch2_opt_to_text(struct printbuf *out,
if (flags & OPT_SHOW_FULL_LIST) if (flags & OPT_SHOW_FULL_LIST)
prt_string_option(out, opt->choices, v); prt_string_option(out, opt->choices, v);
else else
prt_printf(out, "%s", opt->choices[v]); prt_str(out, opt->choices[v]);
break; break;
case BCH_OPT_FN: case BCH_OPT_FN:
opt->to_text(out, c, sb, v); opt->fn.to_text(out, c, sb, v);
break; break;
default: default:
BUG(); BUG();
@ -405,31 +454,19 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
name = strsep(&opt, "="); name = strsep(&opt, "=");
val = opt; val = opt;
if (val) { id = bch2_mount_opt_lookup(name);
id = bch2_mount_opt_lookup(name);
if (id < 0)
goto bad_opt;
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); /* Check for the form "noopt", negation of a boolean opt: */
if (ret < 0) if (id < 0 &&
goto bad_val; !val &&
} else { !strncmp("no", name, 2)) {
id = bch2_mount_opt_lookup(name); id = bch2_mount_opt_lookup(name + 2);
v = 1; val = "0";
if (id < 0 &&
!strncmp("no", name, 2)) {
id = bch2_mount_opt_lookup(name + 2);
v = 0;
}
if (id < 0)
goto bad_opt;
if (bch2_opt_table[id].type != BCH_OPT_BOOL)
goto no_val;
} }
if (id < 0)
goto bad_opt;
if (!(bch2_opt_table[id].flags & OPT_MOUNT)) if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt; goto bad_opt;
@ -442,6 +479,10 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
!IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
goto bad_opt; goto bad_opt;
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
if (ret < 0)
goto bad_val;
bch2_opt_set_by_id(opts, id, v); bch2_opt_set_by_id(opts, id, v);
} }
@ -456,10 +497,6 @@ bad_val:
pr_err("Invalid mount option %s", err.buf); pr_err("Invalid mount option %s", err.buf);
ret = -1; ret = -1;
goto out; goto out;
no_val:
pr_err("Mount option %s requires a value", name);
ret = -1;
goto out;
out: out:
kfree(copied_opts_start); kfree(copied_opts_start);
printbuf_exit(&err); printbuf_exit(&err);

View File

@ -8,7 +8,10 @@
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include "bcachefs_format.h" #include "bcachefs_format.h"
struct bch_fs;
extern const char * const bch2_error_actions[]; extern const char * const bch2_error_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[]; extern const char * const bch2_sb_compat[];
@ -67,6 +70,11 @@ enum opt_type {
BCH_OPT_FN, BCH_OPT_FN,
}; };
struct bch_opt_fn {
int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
};
/** /**
* x(name, shortopt, type, in mem type, mode, sb_opt) * x(name, shortopt, type, in mem type, mode, sb_opt)
* *
@ -98,6 +106,18 @@ enum opt_type {
#define BCACHEFS_VERBOSE_DEFAULT false #define BCACHEFS_VERBOSE_DEFAULT false
#endif #endif
#define BCH_FIX_ERRORS_OPTS() \
x(exit, 0) \
x(yes, 1) \
x(no, 2) \
x(ask, 3)
enum fsck_err_opts {
#define x(t, n) FSCK_FIX_##t,
BCH_FIX_ERRORS_OPTS()
#undef x
};
#define BCH_OPTS() \ #define BCH_OPTS() \
x(block_size, u16, \ x(block_size, u16, \
OPT_FS|OPT_FORMAT| \ OPT_FS|OPT_FORMAT| \
@ -154,12 +174,12 @@ enum opt_type {
NULL, NULL) \ NULL, NULL) \
x(compression, u8, \ x(compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_compression_opts), \ OPT_FN(bch2_opt_compression), \
BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \
NULL, NULL) \ NULL, NULL) \
x(background_compression, u8, \ x(background_compression, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_compression_opts), \ OPT_FN(bch2_opt_compression), \
BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \
NULL, NULL) \ NULL, NULL) \
x(str_hash, u8, \ x(str_hash, u8, \
@ -318,8 +338,8 @@ enum opt_type {
NULL, "Run fsck on mount") \ NULL, "Run fsck on mount") \
x(fix_errors, u8, \ x(fix_errors, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \ OPT_FN(bch2_opt_fix_errors), \
BCH2_NO_SB_OPT, false, \ BCH2_NO_SB_OPT, FSCK_FIX_exit, \
NULL, "Fix errors during fsck without asking") \ NULL, "Fix errors during fsck without asking") \
x(ratelimit_errors, u8, \ x(ratelimit_errors, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
@ -495,8 +515,8 @@ struct bch_option {
u64 min, max; u64 min, max;
const char * const *choices; const char * const *choices;
int (*parse)(struct bch_fs *, const char *, u64 *);
void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); struct bch_opt_fn fn;
const char *hint; const char *hint;
const char *help; const char *help;

View File

@ -5,6 +5,7 @@
#include "btree_iter.h" #include "btree_iter.h"
#include "buckets.h" #include "buckets.h"
#include "clock.h" #include "clock.h"
#include "compress.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "errcode.h" #include "errcode.h"
#include "extents.h" #include "extents.h"
@ -45,7 +46,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (!p.ptr.cached && if (!p.ptr.cached &&
p.crc.compression_type != p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression]) bch2_compression_opt_to_type(io_opts->background_compression))
data_opts->rewrite_ptrs |= 1U << i; data_opts->rewrite_ptrs |= 1U << i;
i++; i++;
} }

View File

@ -1162,12 +1162,9 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, " incomplete\n"); prt_str(&buf, " incomplete\n");
} }
prt_str(&buf, "Doing "); prt_printf(&buf, "Doing %s version upgrade from ",
if (BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)) BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
prt_str(&buf, "incompatible"); ? "incompatible" : "compatible");
else
prt_str(&buf, "compatible");
prt_str(&buf, "version upgrade from ");
bch2_version_to_text(&buf, old_version); bch2_version_to_text(&buf, old_version);
prt_str(&buf, " to "); prt_str(&buf, " to ");
bch2_version_to_text(&buf, new_version); bch2_version_to_text(&buf, new_version);
@ -1178,7 +1175,7 @@ static void check_version_upgrade(struct bch_fs *c)
prt_str(&buf, "fsck required"); prt_str(&buf, "fsck required");
c->recovery_passes_explicit |= recovery_passes; c->recovery_passes_explicit |= recovery_passes;
c->opts.fix_errors = FSCK_OPT_YES; c->opts.fix_errors = FSCK_FIX_yes;
} }
bch_info(c, "%s", buf.buf); bch_info(c, "%s", buf.buf);

View File

@ -12,9 +12,9 @@
static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_delete(struct btree_trans *, u32);
static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{ {
struct snapshot_t *s = snapshot_t(c, id); const struct snapshot_t *s = __snapshot_t(t, id);
if (s->skip[2] <= ancestor) if (s->skip[2] <= ancestor)
return s->skip[2]; return s->skip[2];
@ -27,22 +27,83 @@ static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor)
bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{ {
struct snapshot_table *t;
EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots);
rcu_read_lock();
t = rcu_dereference(c->snapshots);
while (id && id < ancestor) while (id && id < ancestor)
id = get_ancestor_below(c, id, ancestor); id = get_ancestor_below(t, id, ancestor);
rcu_read_unlock();
return id == ancestor; return id == ancestor;
} }
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{ {
struct snapshot_table *t;
rcu_read_lock();
t = rcu_dereference(c->snapshots);
while (id && id < ancestor) while (id && id < ancestor)
id = snapshot_t(c, id)->parent; id = __snapshot_t(t, id)->parent;
rcu_read_unlock();
return id == ancestor; return id == ancestor;
} }
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
{
u32 depth;
rcu_read_lock();
depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
rcu_read_unlock();
return depth;
}
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
size_t new_size;
struct snapshot_table *new, *old;
new_size = max(16UL, roundup_pow_of_two(idx + 1));
new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
if (!new)
return NULL;
old = c->snapshots;
if (old)
memcpy(new->s,
rcu_dereference_protected(c->snapshots, true)->s,
sizeof(new->s[0]) * c->snapshot_table_size);
rcu_assign_pointer(c->snapshots, new);
c->snapshot_table_size = new_size;
if (old)
kvfree_rcu(old);
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
lockdep_assert_held(&c->snapshot_table_lock);
if (likely(idx < c->snapshot_table_size))
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
return __snapshot_t_mut(c, id);
}
/* Snapshot tree: */ /* Snapshot tree: */
void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
@ -202,12 +263,15 @@ int bch2_mark_snapshot(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct snapshot_t *t; struct snapshot_t *t;
int ret = 0;
t = genradix_ptr_alloc(&c->snapshots, mutex_lock(&c->snapshot_table_lock);
U32_MAX - new.k->p.offset,
GFP_KERNEL); t = snapshot_t_mut(c, new.k->p.offset);
if (!t) if (!t) {
return -BCH_ERR_ENOMEM_mark_snapshot; ret = -BCH_ERR_ENOMEM_mark_snapshot;
goto err;
}
if (new.k->type == KEY_TYPE_snapshot) { if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
@ -231,8 +295,9 @@ int bch2_mark_snapshot(struct btree_trans *trans,
t->subvol = 0; t->subvol = 0;
t->tree = 0; t->tree = 0;
} }
err:
return 0; mutex_unlock(&c->snapshot_table_lock);
return ret;
} }
static int snapshot_lookup(struct btree_trans *trans, u32 id, static int snapshot_lookup(struct btree_trans *trans, u32 id,
@ -285,9 +350,14 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
nr_live += ret; nr_live += ret;
} }
snapshot_t(c, id)->equiv = nr_live == 1 mutex_lock(&c->snapshot_table_lock);
? snapshot_t(c, child[live_idx])->equiv
snapshot_t_mut(c, id)->equiv = nr_live == 1
? snapshot_t_mut(c, child[live_idx])->equiv
: id; : id;
mutex_unlock(&c->snapshot_table_lock);
return 0; return 0;
} }
@ -505,16 +575,18 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans,
static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id) static u32 snapshot_rand_ancestor_get(struct bch_fs *c, u32 id)
{ {
struct snapshot_t *s; const struct snapshot_t *s;
if (!id) if (!id)
return 0; return 0;
rcu_read_lock();
s = snapshot_t(c, id); s = snapshot_t(c, id);
if (!s->parent) if (s->parent)
return id; id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
rcu_read_unlock();
return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); return id;
} }
static int snapshot_rand_ancestor_good(struct btree_trans *trans, static int snapshot_rand_ancestor_good(struct btree_trans *trans,
@ -613,9 +685,7 @@ static int check_snapshot(struct btree_trans *trans,
struct bch_snapshot v; struct bch_snapshot v;
struct bkey_i_snapshot *u; struct bkey_i_snapshot *u;
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
struct snapshot_t *parent = parent_id u32 real_depth;
? snapshot_t(c, parent_id)
: NULL;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bool should_have_subvol; bool should_have_subvol;
u32 i, id; u32 i, id;
@ -706,16 +776,18 @@ static int check_snapshot(struct btree_trans *trans,
} }
ret = 0; ret = 0;
if (fsck_err_on(le32_to_cpu(s.depth) != (parent ? parent->depth + 1 : 0), c, real_depth = bch2_snapshot_depth(c, parent_id);
if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, c,
"snapshot with incorrect depth fields, should be %u:\n %s", "snapshot with incorrect depth fields, should be %u:\n %s",
parent->depth + 1, real_depth,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); ret = PTR_ERR_OR_ZERO(u);
if (ret) if (ret)
goto err; goto err;
u->v.depth = cpu_to_le32(parent ? parent->depth + 1 : 0); u->v.depth = cpu_to_le32(real_depth);
s = u->v; s = u->v;
} }
@ -799,9 +871,13 @@ static int check_subvol(struct btree_trans *trans,
if (!BCH_SUBVOLUME_SNAP(subvol.v)) { if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree; u32 snapshot_tree;
struct bch_snapshot_tree st; struct bch_snapshot_tree st;
rcu_read_lock();
snapshot_tree = snapshot_t(c, snapshot_root)->tree;
rcu_read_unlock();
ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
@ -845,7 +921,7 @@ int bch2_check_subvols(struct bch_fs *c)
void bch2_fs_snapshots_exit(struct bch_fs *c) void bch2_fs_snapshots_exit(struct bch_fs *c)
{ {
genradix_free(&c->snapshots); kfree(c->snapshots);
} }
int bch2_snapshots_read(struct bch_fs *c) int bch2_snapshots_read(struct bch_fs *c)
@ -987,7 +1063,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
struct bkey_i_snapshot *n; struct bkey_i_snapshot *n;
struct bkey_s_c k; struct bkey_s_c k;
unsigned i, j; unsigned i, j;
u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; u32 depth = bch2_snapshot_depth(c, parent);
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
@ -1126,7 +1202,7 @@ static int snapshot_delete_key(struct btree_trans *trans,
struct bpos *last_pos) struct bpos *last_pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (!bkey_eq(k.k->p, *last_pos)) if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0; equiv_seen->nr = 0;

View File

@ -32,17 +32,31 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
.min_val_size = 24, \ .min_val_size = 24, \
}) })
static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{ {
return genradix_ptr(&c->snapshots, U32_MAX - id); return &t->s[U32_MAX - id];
} }
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
{
return __snapshot_t(rcu_dereference(c->snapshots), id);
}
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{ {
return snapshot_t(c, id)->parent; return snapshot_t(c, id)->parent;
} }
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
rcu_read_lock();
id = __bch2_snapshot_parent_early(c, id);
rcu_read_unlock();
return id;
}
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
u32 parent = snapshot_t(c, id)->parent; u32 parent = snapshot_t(c, id)->parent;
@ -59,10 +73,21 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
#endif #endif
} }
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
rcu_read_lock();
id = __bch2_snapshot_parent(c, id);
rcu_read_unlock();
return id;
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{ {
rcu_read_lock();
while (n--) while (n--)
id = bch2_snapshot_parent(c, id); id = __bch2_snapshot_parent(c, id);
rcu_read_unlock();
return id; return id;
} }
@ -71,37 +96,60 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{ {
u32 parent; u32 parent;
while ((parent = bch2_snapshot_parent(c, id))) rcu_read_lock();
while ((parent = __bch2_snapshot_parent(c, id)))
id = parent; id = parent;
rcu_read_unlock();
return id; return id;
} }
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{ {
return snapshot_t(c, id)->equiv; return snapshot_t(c, id)->equiv;
} }
static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{ {
return id == snapshot_t(c, id)->equiv; rcu_read_lock();
id = __bch2_snapshot_equiv(c, id);
rcu_read_unlock();
return id;
} }
static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id) static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
{ {
struct snapshot_t *s = snapshot_t(c, id); return id == bch2_snapshot_equiv(c, id);
}
return s->children[0] || s->children[1]; static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s;
bool ret;
rcu_read_lock();
s = snapshot_t(c, id);
ret = s->children[0];
rcu_read_unlock();
return ret;
}
static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
{
return !bch2_snapshot_is_internal_node(c, id);
} }
static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
{ {
struct snapshot_t *s; const struct snapshot_t *s;
u32 parent = bch2_snapshot_parent(c, id); u32 parent = __bch2_snapshot_parent(c, id);
if (!parent) if (!parent)
return 0; return 0;
s = snapshot_t(c, bch2_snapshot_parent(c, id)); s = snapshot_t(c, __bch2_snapshot_parent(c, id));
if (id == s->children[0]) if (id == s->children[0])
return s->children[1]; return s->children[1];
if (id == s->children[1]) if (id == s->children[1])
@ -113,9 +161,15 @@ bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
{ {
struct snapshot_t *t = snapshot_t(c, id); const struct snapshot_t *t;
bool ret;
return (t->children[0]|t->children[1]) != 0; rcu_read_lock();
t = snapshot_t(c, id);
ret = (t->children[0]|t->children[1]) != 0;
rcu_read_unlock();
return ret;
} }
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)

View File

@ -16,6 +16,10 @@ struct snapshot_t {
u32 equiv; u32 equiv;
}; };
struct snapshot_table {
struct snapshot_t s[0];
};
typedef struct { typedef struct {
u32 subvol; u32 subvol;
u64 inum; u64 inum;

View File

@ -658,11 +658,18 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
struct printbuf err = PRINTBUF; struct printbuf err = PRINTBUF;
__le64 *i; __le64 *i;
int ret; int ret;
#ifndef __KERNEL__
retry:
#endif
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
sb->mode = FMODE_READ; sb->mode = FMODE_READ;
sb->have_bio = true; sb->have_bio = true;
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
sb->mode |= FMODE_BUFFERED;
#endif
if (!opt_get(*opts, noexcl)) if (!opt_get(*opts, noexcl))
sb->mode |= FMODE_EXCL; sb->mode |= FMODE_EXCL;
@ -747,7 +754,13 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
got_super: got_super:
if (le16_to_cpu(sb->sb->block_size) << 9 < if (le16_to_cpu(sb->sb->block_size) << 9 <
bdev_logical_block_size(sb->bdev)) { bdev_logical_block_size(sb->bdev) &&
opt_get(*opts, direct_io)) {
#ifndef __KERNEL__
opt_set(*opts, direct_io, false);
bch2_free_super(sb);
goto retry;
#endif
prt_printf(&err, "block size (%u) smaller than device block size (%u)", prt_printf(&err, "block size (%u) smaller than device block size (%u)",
le16_to_cpu(sb->sb->block_size) << 9, le16_to_cpu(sb->sb->block_size) << 9,
bdev_logical_block_size(sb->bdev)); bdev_logical_block_size(sb->bdev));

View File

@ -344,6 +344,19 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
{ {
int ret; int ret;
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*
* Ideally we'd start copygc/rebalance earlier instead of waiting for
* all of recovery/fsck to complete:
*/
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
ret = bch2_rebalance_start(c); ret = bch2_rebalance_start(c);
if (ret) { if (ret) {
bch_err(c, "error starting rebalance thread"); bch_err(c, "error starting rebalance thread");
@ -403,12 +416,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
return ret; return ret;
} }
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
if (!early) { if (!early) {
ret = bch2_fs_read_write_late(c); ret = bch2_fs_read_write_late(c);
if (ret) if (ret)

View File

@ -64,7 +64,7 @@ static inline void *vpmalloc_noprof(size_t size, gfp_t gfp_mask)
{ {
return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN, return (void *) get_free_pages_noprof(gfp_mask|__GFP_NOWARN,
get_order(size)) ?: get_order(size)) ?:
__vmalloc(size, gfp_mask); __vmalloc_noprof(size, gfp_mask);
} }
#define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp)) #define vpmalloc(_size, _gfp) alloc_hooks(vpmalloc_noprof(_size, _gfp))

View File

@ -183,16 +183,19 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
else if (mode & FMODE_WRITE) else if (mode & FMODE_WRITE)
flags = O_WRONLY; flags = O_WRONLY;
if (!(mode & FMODE_BUFFERED))
flags |= O_DIRECT;
#if 0 #if 0
/* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */ /* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
if (mode & FMODE_EXCL) if (mode & FMODE_EXCL)
flags |= O_EXCL; flags |= O_EXCL;
#endif #endif
buffered_fd = open(path, flags); buffered_fd = open(path, flags & ~O_DIRECT);
if (buffered_fd < 0) if (buffered_fd < 0)
return ERR_PTR(-errno); return ERR_PTR(-errno);
fd = open(path, flags|O_DIRECT); fd = open(path, flags);
if (fd < 0) if (fd < 0)
fd = dup(buffered_fd); fd = dup(buffered_fd);
if (fd < 0) { if (fd < 0) {
@ -200,9 +203,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
return ERR_PTR(-errno); return ERR_PTR(-errno);
} }
sync_fd = open(path, flags|O_DIRECT|O_SYNC); sync_fd = open(path, flags|O_SYNC);
if (sync_fd < 0) if (sync_fd < 0)
sync_fd = open(path, flags|O_SYNC); sync_fd = open(path, (flags & ~O_DIRECT)|O_SYNC);
if (sync_fd < 0) { if (sync_fd < 0) {
close(fd); close(fd);
close(buffered_fd); close(buffered_fd);

View File

@ -138,7 +138,7 @@ fn cmd_list_inner(opt: Cli) -> anyhow::Result<()> {
opt_set!(fs_opts, errors, bcachefs::bch_error_actions::BCH_ON_ERROR_continue as u8); opt_set!(fs_opts, errors, bcachefs::bch_error_actions::BCH_ON_ERROR_continue as u8);
if opt.fsck { if opt.fsck {
opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_OPT_YES as u8); opt_set!(fs_opts, fix_errors, bcachefs::fsck_err_opts::FSCK_FIX_yes as u8);
opt_set!(fs_opts, norecovery, 0); opt_set!(fs_opts, norecovery, 0);
} }