Update bcachefs sources to 70b5fb5daf bcachefs: Fix error reporting from bch2_journal_flush_seq

This commit is contained in:
Kent Overstreet 2021-11-16 09:12:15 -05:00
parent 8d86f9c15d
commit 9a058f1428
34 changed files with 769 additions and 492 deletions

View File

@ -1 +1 @@
6afa1fcb13a8c66b1cafa08027f484a3f846c52d
70b5fb5dafe66482c0d09a37bd547f56ef645bc4

View File

@ -340,7 +340,7 @@ btree_err:
if (unlikely(ret))
goto err;
bch2_inode_update_after_write(c, inode, &inode_u,
bch2_inode_update_after_write(&trans, inode, &inode_u,
ATTR_CTIME|ATTR_MODE);
set_cached_acl(&inode->v, type, acl);

View File

@ -218,8 +218,8 @@
#define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name)
#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
#else
#define bch2_fmt(_c, fmt) fmt "\n"
#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
#define bch2_fmt(_c, fmt) "%s: " fmt "\n", ((_c)->name)
#define bch2_fmt_inum(_c, _inum, fmt) "%s inum %llu: " fmt "\n", ((_c)->name), (_inum)
#endif
#define bch_info(c, fmt, ...) \
@ -495,6 +495,7 @@ struct bch_dev {
enum {
/* startup: */
BCH_FS_INITIALIZED,
BCH_FS_ALLOC_READ_DONE,
BCH_FS_ALLOC_CLEAN,
BCH_FS_ALLOCATOR_RUNNING,

View File

@ -1443,7 +1443,7 @@ LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
* journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist
* reflink: gates KEY_TYPE_reflink
* inline_data: gates KEY_TYPE_inline_data
* new_siphash: gates BCH_STR_HASH_SIPHASH
* new_siphash: gates BCH_STR_HASH_siphash
* new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE
*/
#define BCH_SB_FEATURES() \
@ -1519,12 +1519,17 @@ enum bch_error_actions {
BCH_ON_ERROR_NR
};
#define BCH_STR_HASH_TYPES() \
x(crc32c, 0) \
x(crc64, 1) \
x(siphash_old, 2) \
x(siphash, 3)
enum bch_str_hash_type {
BCH_STR_HASH_CRC32C = 0,
BCH_STR_HASH_CRC64 = 1,
BCH_STR_HASH_SIPHASH_OLD = 2,
BCH_STR_HASH_SIPHASH = 3,
BCH_STR_HASH_NR = 4,
#define x(t, n) BCH_STR_HASH_##t = n,
BCH_STR_HASH_TYPES()
#undef x
BCH_STR_HASH_NR
};
#define BCH_STR_HASH_OPTS() \
@ -1539,34 +1544,39 @@ enum bch_str_hash_opts {
BCH_STR_HASH_OPT_NR
};
#define BCH_CSUM_TYPES() \
x(none, 0) \
x(crc32c_nonzero, 1) \
x(crc64_nonzero, 2) \
x(chacha20_poly1305_80, 3) \
x(chacha20_poly1305_128, 4) \
x(crc32c, 5) \
x(crc64, 6) \
x(xxhash, 7)
enum bch_csum_type {
BCH_CSUM_NONE = 0,
BCH_CSUM_CRC32C_NONZERO = 1,
BCH_CSUM_CRC64_NONZERO = 2,
BCH_CSUM_CHACHA20_POLY1305_80 = 3,
BCH_CSUM_CHACHA20_POLY1305_128 = 4,
BCH_CSUM_CRC32C = 5,
BCH_CSUM_CRC64 = 6,
BCH_CSUM_XXHASH = 7,
BCH_CSUM_NR = 8,
#define x(t, n) BCH_CSUM_##t = n,
BCH_CSUM_TYPES()
#undef x
BCH_CSUM_NR
};
static const unsigned bch_crc_bytes[] = {
[BCH_CSUM_NONE] = 0,
[BCH_CSUM_CRC32C_NONZERO] = 4,
[BCH_CSUM_CRC32C] = 4,
[BCH_CSUM_CRC64_NONZERO] = 8,
[BCH_CSUM_CRC64] = 8,
[BCH_CSUM_XXHASH] = 8,
[BCH_CSUM_CHACHA20_POLY1305_80] = 10,
[BCH_CSUM_CHACHA20_POLY1305_128] = 16,
[BCH_CSUM_none] = 0,
[BCH_CSUM_crc32c_nonzero] = 4,
[BCH_CSUM_crc32c] = 4,
[BCH_CSUM_crc64_nonzero] = 8,
[BCH_CSUM_crc64] = 8,
[BCH_CSUM_xxhash] = 8,
[BCH_CSUM_chacha20_poly1305_80] = 10,
[BCH_CSUM_chacha20_poly1305_128] = 16,
};
static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
{
switch (type) {
case BCH_CSUM_CHACHA20_POLY1305_80:
case BCH_CSUM_CHACHA20_POLY1305_128:
case BCH_CSUM_chacha20_poly1305_80:
case BCH_CSUM_chacha20_poly1305_128:
return true;
default:
return false;

View File

@ -301,7 +301,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
list_for_each_entry_safe(b, t, &bc->freeable, list) {
touched++;
if (freed >= nr)
if (touched >= nr)
break;
if (++i > 3 &&
@ -316,7 +316,7 @@ restart:
list_for_each_entry_safe(b, t, &bc->live, list) {
touched++;
if (freed >= nr) {
if (touched >= nr) {
/* Save position */
if (&t->list != &bc->live)
list_move_tail(&bc->live, &t->list);

View File

@ -49,7 +49,7 @@ static inline int __btree_path_cmp(const struct btree_path *l,
unsigned r_level)
{
return cmp_int(l->btree_id, r_btree_id) ?:
cmp_int(l->cached, r_cached) ?:
cmp_int((int) l->cached, (int) r_cached) ?:
bpos_cmp(l->pos, r_pos) ?:
-cmp_int(l->level, r_level);
}
@ -760,6 +760,43 @@ out:
return ret;
}
void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
struct bpos pos, bool key_cache)
{
struct btree_path *path;
unsigned idx;
char buf[100];
trans_for_each_path_inorder(trans, path, idx) {
int cmp = cmp_int(path->btree_id, id) ?:
cmp_int(path->cached, key_cache);
if (cmp > 0)
break;
if (cmp < 0)
continue;
if (!(path->nodes_locked & 1) ||
!path->should_be_locked)
continue;
if (!key_cache) {
if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 &&
bkey_cmp(pos, path->l[0].b->key.k.p) <= 0)
return;
} else {
if (!bkey_cmp(pos, path->pos))
return;
}
}
bch2_dump_trans_paths_updates(trans);
panic("not locked: %s %s%s\n",
bch2_btree_ids[id],
(bch2_bpos_to_text(&PBUF(buf), pos), buf),
key_cache ? " cached" : "");
}
#else
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
@ -1647,19 +1684,19 @@ static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btr
return NULL;
}
static bool have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
{
struct btree_path *next;
next = prev_btree_path(trans, path);
if (next && path_l(next)->b == path_l(path)->b)
return true;
if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
return next;
next = next_btree_path(trans, path);
if (next && path_l(next)->b == path_l(path)->b)
return true;
if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
return next;
return false;
return NULL;
}
static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
@ -1686,11 +1723,20 @@ void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool inte
(dup = have_path_at_pos(trans, path))) {
dup->preserve = true;
path->preserve = false;
goto free;
}
if (!path->preserve &&
have_node_at_pos(trans, path))
__bch2_path_free(trans, path);
(dup = have_node_at_pos(trans, path)))
goto free;
return;
free:
if (path->should_be_locked &&
!btree_node_locked(dup, path->level))
return;
dup->should_be_locked |= path->should_be_locked;
__bch2_path_free(trans, path);
}
noinline __cold
@ -1704,11 +1750,13 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
btree_trans_verify_sorted(trans);
trans_for_each_path_inorder(trans, path, idx)
printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n",
printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n",
path->idx, path->ref, path->intent_ref,
path->preserve ? " preserve" : "",
path->should_be_locked ? " S" : "",
path->preserve ? " P" : "",
bch2_btree_ids[path->btree_id],
(bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1),
path->nodes_locked,
#ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated
#else

View File

@ -140,9 +140,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bke
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_trans_verify_paths(struct btree_trans *);
void bch2_trans_verify_locks(struct btree_trans *);
void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
struct bpos, bool);
#else
static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
struct bpos pos, bool key_cache) {}
#endif
void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
@ -227,8 +231,6 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos
iter->k.p.offset = iter->pos.offset = new_pos.offset;
iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot;
iter->k.size = 0;
if (iter->path->ref == 1)
iter->path->should_be_locked = false;
}
static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter)

View File

@ -338,7 +338,8 @@ struct btree_insert_entry {
enum btree_id btree_id:8;
u8 level;
bool cached:1;
bool trans_triggers_run:1;
bool insert_trigger_run:1;
bool overwrite_trigger_run:1;
struct bkey_i *k;
struct btree_path *path;
unsigned long ip_allocated;

View File

@ -815,10 +815,112 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0;
}
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
struct bkey _deleted = KEY(0, 0, 0);
struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
struct bkey_s_c old;
struct bkey unpacked;
struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates;
bool trans_trigger_run;
unsigned btree_id = 0;
int ret = 0;
/*
*
* For a given btree, this algorithm runs insert triggers before
* overwrite triggers: this is so that when extents are being moved
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
* they are re-added.
*/
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
while (btree_id_start < trans->updates + trans->nr_updates &&
btree_id_start->btree_id < btree_id)
btree_id_start++;
/*
* Running triggers will append more updates to the list of updates as
* we're walking it:
*/
do {
trans_trigger_run = false;
for (i = btree_id_start;
i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
i++) {
if (i->insert_trigger_run ||
(i->flags & BTREE_TRIGGER_NORUN) ||
!(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
continue;
BUG_ON(i->overwrite_trigger_run);
i->insert_trigger_run = true;
trans_trigger_run = true;
old = bch2_btree_path_peek_slot(i->path, &unpacked);
_deleted.p = i->path->pos;
if (old.k->type == i->k->k.type &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
i->overwrite_trigger_run = true;
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(i->k),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|i->flags);
} else {
ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(i->k),
BTREE_TRIGGER_INSERT|i->flags);
}
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip, _RET_IP_,
i->btree_id, &i->path->pos);
if (ret)
return ret;
}
} while (trans_trigger_run);
do {
trans_trigger_run = false;
for (i = btree_id_start;
i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
i++) {
if (i->overwrite_trigger_run ||
(i->flags & BTREE_TRIGGER_NORUN) ||
!(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
continue;
BUG_ON(!i->insert_trigger_run);
i->overwrite_trigger_run = true;
trans_trigger_run = true;
old = bch2_btree_path_peek_slot(i->path, &unpacked);
_deleted.p = i->path->pos;
ret = bch2_trans_mark_key(trans, old, deleted,
BTREE_TRIGGER_OVERWRITE|i->flags);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip, _RET_IP_,
i->btree_id, &i->path->pos);
if (ret)
return ret;
}
} while (trans_trigger_run);
}
trans_for_each_update(trans, i)
BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) &&
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
(!i->insert_trigger_run || !i->overwrite_trigger_run));
return 0;
}
int __bch2_trans_commit(struct btree_trans *trans)
{
struct btree_insert_entry *i = NULL;
bool trans_trigger_run;
unsigned u64s;
int ret = 0;
@ -853,30 +955,9 @@ int __bch2_trans_commit(struct btree_trans *trans)
i->btree_id, i->k->k.p);
#endif
/*
* Running triggers will append more updates to the list of updates as
* we're walking it:
*/
do {
trans_trigger_run = false;
trans_for_each_update(trans, i) {
if ((BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
!i->trans_triggers_run) {
i->trans_triggers_run = true;
trans_trigger_run = true;
ret = bch2_trans_mark_update(trans, i->path,
i->k, i->flags);
if (unlikely(ret)) {
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip, _RET_IP_,
i->btree_id, &i->path->pos);
goto out;
}
}
}
} while (trans_trigger_run);
ret = bch2_trans_commit_run_triggers(trans);
if (ret)
goto out;
trans_for_each_update(trans, i) {
BUG_ON(!i->path->should_be_locked);
@ -1285,7 +1366,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
if (i < trans->updates + trans->nr_updates &&
!btree_insert_entry_cmp(&n, i)) {
BUG_ON(i->trans_triggers_run);
BUG_ON(i->insert_trigger_run || i->overwrite_trigger_run);
/*
* This is a hack to ensure that inode creates update the btree,

View File

@ -117,6 +117,8 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
unsigned journal_seq,
bool gc)
{
BUG_ON(!gc && !journal_seq);
return this_cpu_ptr(gc
? ca->usage_gc
: ca->usage[journal_seq & JOURNAL_BUF_MASK]);
@ -142,6 +144,8 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
unsigned journal_seq,
bool gc)
{
BUG_ON(!gc && !journal_seq);
return this_cpu_ptr(gc
? c->usage_gc
: c->usage[journal_seq & JOURNAL_BUF_MASK]);
@ -360,6 +364,13 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
struct bch_fs_usage *fs_usage;
struct bch_dev_usage *u;
/*
* Hack for bch2_fs_initialize path, where we're first marking sb and
* journal non-transactionally:
*/
if (!journal_seq && !test_bit(BCH_FS_INITIALIZED, &c->flags))
journal_seq = 1;
percpu_rwsem_assert_held(&c->mark_lock);
preempt_disable();
@ -1866,41 +1877,6 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
}
}
int bch2_trans_mark_update(struct btree_trans *trans,
struct btree_path *path,
struct bkey_i *new,
unsigned flags)
{
struct bkey _deleted = KEY(0, 0, 0);
struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
struct bkey_s_c old;
struct bkey unpacked;
int ret;
_deleted.p = path->pos;
if (unlikely(flags & BTREE_TRIGGER_NORUN))
return 0;
if (!btree_node_type_needs_gc(path->btree_id))
return 0;
old = bch2_btree_path_peek_slot(path, &unpacked);
if (old.k->type == new->k.type &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
ret = bch2_trans_mark_key(trans, deleted, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|flags) ?:
bch2_trans_mark_key(trans, old, deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
return ret;
}
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct bch_dev *ca, size_t b,
enum bch_data_type type,

View File

@ -233,8 +233,6 @@ int bch2_mark_update(struct btree_trans *, struct btree_path *,
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
struct bkey_s_c, unsigned);
int bch2_trans_mark_update(struct btree_trans *, struct btree_path *,
struct bkey_i *, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,

View File

@ -35,18 +35,18 @@ struct bch2_checksum_state {
static void bch2_checksum_init(struct bch2_checksum_state *state)
{
switch (state->type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C:
case BCH_CSUM_CRC64:
case BCH_CSUM_none:
case BCH_CSUM_crc32c:
case BCH_CSUM_crc64:
state->seed = 0;
break;
case BCH_CSUM_CRC32C_NONZERO:
case BCH_CSUM_crc32c_nonzero:
state->seed = U32_MAX;
break;
case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_crc64_nonzero:
state->seed = U64_MAX;
break;
case BCH_CSUM_XXHASH:
case BCH_CSUM_xxhash:
xxh64_reset(&state->h64state, 0);
break;
default:
@ -57,15 +57,15 @@ static void bch2_checksum_init(struct bch2_checksum_state *state)
static u64 bch2_checksum_final(const struct bch2_checksum_state *state)
{
switch (state->type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C:
case BCH_CSUM_CRC64:
case BCH_CSUM_none:
case BCH_CSUM_crc32c:
case BCH_CSUM_crc64:
return state->seed;
case BCH_CSUM_CRC32C_NONZERO:
case BCH_CSUM_crc32c_nonzero:
return state->seed ^ U32_MAX;
case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_crc64_nonzero:
return state->seed ^ U64_MAX;
case BCH_CSUM_XXHASH:
case BCH_CSUM_xxhash:
return xxh64_digest(&state->h64state);
default:
BUG();
@ -75,17 +75,17 @@ static u64 bch2_checksum_final(const struct bch2_checksum_state *state)
static void bch2_checksum_update(struct bch2_checksum_state *state, const void *data, size_t len)
{
switch (state->type) {
case BCH_CSUM_NONE:
case BCH_CSUM_none:
return;
case BCH_CSUM_CRC32C_NONZERO:
case BCH_CSUM_CRC32C:
case BCH_CSUM_crc32c_nonzero:
case BCH_CSUM_crc32c:
state->seed = crc32c(state->seed, data, len);
break;
case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_CRC64:
case BCH_CSUM_crc64_nonzero:
case BCH_CSUM_crc64:
state->seed = crc64_be(state->seed, data, len);
break;
case BCH_CSUM_XXHASH:
case BCH_CSUM_xxhash:
xxh64_update(&state->h64state, data, len);
break;
default:
@ -161,12 +161,12 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
struct nonce nonce, const void *data, size_t len)
{
switch (type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C_NONZERO:
case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_CRC32C:
case BCH_CSUM_XXHASH:
case BCH_CSUM_CRC64: {
case BCH_CSUM_none:
case BCH_CSUM_crc32c_nonzero:
case BCH_CSUM_crc64_nonzero:
case BCH_CSUM_crc32c:
case BCH_CSUM_xxhash:
case BCH_CSUM_crc64: {
struct bch2_checksum_state state;
state.type = type;
@ -177,8 +177,8 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
}
case BCH_CSUM_CHACHA20_POLY1305_80:
case BCH_CSUM_CHACHA20_POLY1305_128: {
case BCH_CSUM_chacha20_poly1305_80:
case BCH_CSUM_chacha20_poly1305_128: {
SHASH_DESC_ON_STACK(desc, c->poly1305);
u8 digest[POLY1305_DIGEST_SIZE];
struct bch_csum ret = { 0 };
@ -212,13 +212,13 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
struct bio_vec bv;
switch (type) {
case BCH_CSUM_NONE:
case BCH_CSUM_none:
return (struct bch_csum) { 0 };
case BCH_CSUM_CRC32C_NONZERO:
case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_CRC32C:
case BCH_CSUM_XXHASH:
case BCH_CSUM_CRC64: {
case BCH_CSUM_crc32c_nonzero:
case BCH_CSUM_crc64_nonzero:
case BCH_CSUM_crc32c:
case BCH_CSUM_xxhash:
case BCH_CSUM_crc64: {
struct bch2_checksum_state state;
state.type = type;
@ -238,8 +238,8 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
}
case BCH_CSUM_CHACHA20_POLY1305_80:
case BCH_CSUM_CHACHA20_POLY1305_128: {
case BCH_CSUM_chacha20_poly1305_80:
case BCH_CSUM_chacha20_poly1305_128: {
SHASH_DESC_ON_STACK(desc, c->poly1305);
u8 digest[POLY1305_DIGEST_SIZE];
struct bch_csum ret = { 0 };

View File

@ -13,9 +13,9 @@ static inline bool bch2_checksum_mergeable(unsigned type)
{
switch (type) {
case BCH_CSUM_NONE:
case BCH_CSUM_CRC32C:
case BCH_CSUM_CRC64:
case BCH_CSUM_none:
case BCH_CSUM_crc32c:
case BCH_CSUM_crc64:
return true;
default:
return false;
@ -78,13 +78,13 @@ static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
{
switch (type) {
case BCH_CSUM_OPT_none:
return BCH_CSUM_NONE;
return BCH_CSUM_none;
case BCH_CSUM_OPT_crc32c:
return data ? BCH_CSUM_CRC32C : BCH_CSUM_CRC32C_NONZERO;
return data ? BCH_CSUM_crc32c : BCH_CSUM_crc32c_nonzero;
case BCH_CSUM_OPT_crc64:
return data ? BCH_CSUM_CRC64 : BCH_CSUM_CRC64_NONZERO;
return data ? BCH_CSUM_crc64 : BCH_CSUM_crc64_nonzero;
case BCH_CSUM_OPT_xxhash:
return BCH_CSUM_XXHASH;
return BCH_CSUM_xxhash;
default:
BUG();
}
@ -95,8 +95,8 @@ static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
{
if (c->sb.encryption_type)
return c->opts.wide_macs
? BCH_CSUM_CHACHA20_POLY1305_128
: BCH_CSUM_CHACHA20_POLY1305_80;
? BCH_CSUM_chacha20_poly1305_128
: BCH_CSUM_chacha20_poly1305_80;
return bch2_csum_opt_to_type(opt, true);
}
@ -104,7 +104,7 @@ static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
{
if (c->sb.encryption_type)
return BCH_CSUM_CHACHA20_POLY1305_128;
return BCH_CSUM_chacha20_poly1305_128;
return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
}

View File

@ -197,8 +197,8 @@ static void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
struct bkey_s_c_dirent d, subvol_inum *target)
int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
struct bkey_s_c_dirent d, subvol_inum *target)
{
struct bch_subvolume s;
int ret = 0;
@ -418,16 +418,15 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans,
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ret;
}
if (ret)
goto err;
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(trans, dir, d, inum);
if (ret > 0)
ret = -ENOENT;
err:
if (ret)
bch2_trans_iter_exit(trans, iter);
@ -448,10 +447,10 @@ retry:
ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
name, inum, 0);
bch2_trans_iter_exit(&trans, &iter);
if (ret == -EINTR)
goto retry;
if (!ret)
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}

View File

@ -29,6 +29,9 @@ static inline unsigned dirent_val_u64s(unsigned len)
sizeof(u64));
}
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *);
int bch2_dirent_create(struct btree_trans *, subvol_inum,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *, int);

View File

@ -1154,7 +1154,7 @@ static void ec_stripe_key_init(struct bch_fs *c,
s->v.nr_blocks = nr_data + nr_parity;
s->v.nr_redundant = nr_parity;
s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
s->v.csum_type = BCH_CSUM_CRC32C;
s->v.csum_type = BCH_CSUM_crc32c;
s->v.pad = 0;
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {

View File

@ -969,12 +969,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
crc.compressed_size,
crc.uncompressed_size,
crc.offset, crc.nonce,
crc.csum_type,
crc.compression_type);
bch2_csum_types[crc.csum_type],
bch2_compression_types[crc.compression_type]);
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
ec = &entry->stripe_ptr;

View File

@ -1172,16 +1172,16 @@ static int __bch2_writepage(struct page *page,
do_io:
s = bch2_page_state_create(page, __GFP_NOFAIL);
ret = bch2_get_page_disk_reservation(c, inode, page, true);
if (ret) {
SetPageError(page);
mapping_set_error(page->mapping, ret);
unlock_page(page);
return 0;
}
/*
* Things get really hairy with errors during writeback:
*/
ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret);
/* Before unlocking the page, get copy of reservations: */
spin_lock(&s->lock);
orig = *s;
spin_unlock(&s->lock);
for (i = 0; i < PAGE_SECTORS; i++) {
if (s->s[i].state < SECTOR_DIRTY)
@ -1214,7 +1214,7 @@ do_io:
offset = 0;
while (1) {
unsigned sectors = 1, dirty_sectors = 0, reserved_sectors = 0;
unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0;
u64 sector;
while (offset < PAGE_SECTORS &&
@ -1224,16 +1224,15 @@ do_io:
if (offset == PAGE_SECTORS)
break;
sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
while (offset + sectors < PAGE_SECTORS &&
orig.s[offset + sectors].state >= SECTOR_DIRTY)
orig.s[offset + sectors].state >= SECTOR_DIRTY) {
reserved_sectors += orig.s[offset + sectors].replicas_reserved;
dirty_sectors += orig.s[offset + sectors].state == SECTOR_DIRTY;
sectors++;
for (i = offset; i < offset + sectors; i++) {
reserved_sectors += orig.s[i].replicas_reserved;
dirty_sectors += orig.s[i].state == SECTOR_DIRTY;
}
BUG_ON(!sectors);
sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
if (w->io &&
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
@ -2189,12 +2188,13 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret, ret2 = 0;
int ret, ret2, ret3;
ret = file_write_and_wait_range(file, start, end);
ret2 = bch2_flush_inode(c, inode_inum(inode));
ret2 = sync_inode_metadata(&inode->v, 1);
ret3 = bch2_flush_inode(c, inode_inum(inode));
return ret ?: ret2;
return ret ?: ret2 ?: ret3;
}
/* truncate: */
@ -2299,6 +2299,14 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
s->s[i].state = SECTOR_UNALLOCATED;
}
/*
* Caller needs to know whether this page will be written out by
* writeback - doing an i_size update if necessary - or whether it will
* be responsible for the i_size update:
*/
ret = s->s[(min_t(u64, inode->v.i_size - (index << PAGE_SHIFT),
PAGE_SIZE) - 1) >> 9].state >= SECTOR_DIRTY;
zero_user_segment(page, start_offset, end_offset);
/*
@ -2307,8 +2315,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick.
*/
ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret);
BUG_ON(bch2_get_page_disk_reservation(c, inode, page, false));
/*
* This removes any writeable userspace mappings; we need to force
@ -2330,6 +2337,20 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
from, round_up(from, PAGE_SIZE));
}
static int bch2_truncate_pages(struct bch_inode_info *inode,
loff_t start, loff_t end)
{
int ret = __bch2_truncate_page(inode, start >> PAGE_SHIFT,
start, end);
if (ret >= 0 &&
start >> PAGE_SHIFT != end >> PAGE_SHIFT)
ret = __bch2_truncate_page(inode,
end >> PAGE_SHIFT,
start, end);
return ret;
}
static int bch2_extend(struct user_namespace *mnt_userns,
struct bch_inode_info *inode,
struct bch_inode_unpacked *inode_u,
@ -2420,7 +2441,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
iattr->ia_valid &= ~ATTR_SIZE;
ret = bch2_truncate_page(inode, iattr->ia_size);
if (unlikely(ret))
if (unlikely(ret < 0))
goto err;
/*
@ -2486,48 +2507,39 @@ static int inode_update_times_fn(struct bch_inode_info *inode,
static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
u64 end = offset + len;
u64 block_start = round_up(offset, block_bytes(c));
u64 block_end = round_down(end, block_bytes(c));
bool truncated_last_page;
int ret = 0;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
ret = __bch2_truncate_page(inode,
offset >> PAGE_SHIFT,
offset, offset + len);
if (unlikely(ret))
ret = bch2_truncate_pages(inode, offset, end);
if (unlikely(ret < 0))
goto err;
if (offset >> PAGE_SHIFT !=
(offset + len) >> PAGE_SHIFT) {
ret = __bch2_truncate_page(inode,
(offset + len) >> PAGE_SHIFT,
offset, offset + len);
if (unlikely(ret))
goto err;
}
truncated_last_page = ret;
truncate_pagecache_range(&inode->v, offset, offset + len - 1);
truncate_pagecache_range(&inode->v, offset, end - 1);
if (discard_start < discard_end) {
if (block_start < block_end ) {
s64 i_sectors_delta = 0;
ret = bch2_fpunch(c, inode_inum(inode),
discard_start, discard_end,
block_start >> 9, block_end >> 9,
&i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta);
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME) ?: ret;
if (end >= inode->v.i_size && !truncated_last_page) {
ret = bch2_write_inode_size(c, inode, inode->v.i_size,
ATTR_MTIME|ATTR_CTIME);
} else {
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME);
}
mutex_unlock(&inode->ei_update_lock);
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
}
@ -2547,31 +2559,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
/*
* We need i_mutex to keep the page cache consistent with the extents
* btree, and the btree consistent with i_size - we don't need outside
* locking for the extents btree itself, because we're using linked
* iterators
*/
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (insert) {
ret = -EFBIG;
if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
goto err;
return -EFBIG;
ret = -EINVAL;
if (offset >= inode->v.i_size)
goto err;
return -EINVAL;
src_start = U64_MAX;
shift = len;
} else {
ret = -EINVAL;
if (offset + len >= inode->v.i_size)
goto err;
return -EINVAL;
src_start = offset + len;
shift = -len;
@ -2581,7 +2580,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret)
goto err;
return ret;
if (insert) {
i_size_write(&inode->v, new_size);
@ -2598,7 +2597,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (ret)
goto err;
return ret;
}
bch2_bkey_buf_init(&copy);
@ -2711,18 +2710,19 @@ reassemble:
bch2_bkey_buf_exit(&copy, c);
if (ret)
goto err;
return ret;
mutex_lock(&inode->ei_update_lock);
if (!insert) {
i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME);
}
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
mutex_unlock(&inode->ei_update_lock);
return ret;
}
@ -2817,6 +2817,17 @@ bkey_err:
if (ret == -EINTR)
ret = 0;
}
if (ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
bch2_fpunch_at(&trans, &iter, inode_inum(inode),
end_sector, &i_sectors_delta);
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
bch2_quota_reservation_put(c, inode, &quota_res);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
@ -2825,77 +2836,58 @@ bkey_err:
static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
loff_t offset, loff_t len)
{
struct address_space *mapping = inode->v.i_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
loff_t end = offset + len;
loff_t block_start = round_down(offset, block_bytes(c));
loff_t block_end = round_up(end, block_bytes(c));
int ret;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
u64 end = offset + len;
u64 block_start = round_down(offset, block_bytes(c));
u64 block_end = round_up(end, block_bytes(c));
bool truncated_last_page = false;
int ret, ret2 = 0;
if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
ret = inode_newsize_ok(&inode->v, end);
if (ret)
goto err;
return ret;
}
if (mode & FALLOC_FL_ZERO_RANGE) {
ret = __bch2_truncate_page(inode,
offset >> PAGE_SHIFT,
offset, end);
ret = bch2_truncate_pages(inode, offset, end);
if (unlikely(ret < 0))
return ret;
if (!ret &&
offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
ret = __bch2_truncate_page(inode,
end >> PAGE_SHIFT,
offset, end);
if (unlikely(ret))
goto err;
truncated_last_page = ret;
truncate_pagecache_range(&inode->v, offset, end - 1);
block_start = round_up(offset, block_bytes(c));
block_end = round_down(end, block_bytes(c));
}
ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9);
if (ret)
goto err;
/*
* Do we need to extend the file?
*
* If we zeroed up to the end of the file, we dropped whatever writes
* were going to write out the current i_size, so we have to extend
* manually even if FL_KEEP_SIZE was set:
* On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update,
* so that the VFS cache i_size is consistent with the btree i_size:
*/
if (ret &&
!(ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)))
return ret;
if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
end = inode->v.i_size;
if (end >= inode->v.i_size &&
(!(mode & FALLOC_FL_KEEP_SIZE) ||
(mode & FALLOC_FL_ZERO_RANGE))) {
/*
* Sync existing appends before extending i_size,
* as in bch2_extend():
*/
ret = filemap_write_and_wait_range(mapping,
inode->ei_inode.bi_size, S64_MAX);
if (ret)
goto err;
if (mode & FALLOC_FL_KEEP_SIZE)
end = inode->v.i_size;
else
i_size_write(&inode->v, end);
(((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) ||
!(mode & FALLOC_FL_KEEP_SIZE))) {
spin_lock(&inode->v.i_lock);
i_size_write(&inode->v, end);
spin_unlock(&inode->v.i_lock);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, end, 0);
ret2 = bch2_write_inode_size(c, inode, end, 0);
mutex_unlock(&inode->ei_update_lock);
}
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
return ret ?: ret2;
}
long bch2_fallocate_dispatch(struct file *file, int mode,
@ -2908,6 +2900,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
if (!percpu_ref_tryget(&c->writes))
return -EROFS;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
ret = bchfs_fallocate(inode, mode, offset, len);
else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
@ -2919,6 +2915,9 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
else
ret = -EOPNOTSUPP;
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
percpu_ref_put(&c->writes);
return ret;

View File

@ -36,7 +36,7 @@
static struct kmem_cache *bch2_inode_cache;
static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum,
static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_inode_info *,
struct bch_inode_unpacked *);
@ -92,11 +92,19 @@ void bch2_pagecache_block_get(struct pagecache_lock *lock)
__pagecache_lock_get(lock, -1);
}
void bch2_inode_update_after_write(struct bch_fs *c,
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
unsigned fields)
{
struct bch_fs *c = trans->c;
BUG_ON(bi->bi_inum != inode->v.i_ino);
bch2_assert_pos_locked(trans, BTREE_ID_inodes,
POS(0, bi->bi_inum),
0 && c->opts.inodes_use_key_cache);
set_nlink(&inode->v, bch2_inode_nlink_get(bi));
i_uid_write(&inode->v, bi->bi_uid);
i_gid_write(&inode->v, bi->bi_gid);
@ -125,6 +133,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
int ret;
bch2_trans_init(&trans, c, 0, 512);
trans.ip = _RET_IP_;
retry:
bch2_trans_begin(&trans);
@ -139,7 +148,7 @@ retry:
* this is important for inode updates via bchfs_write_index_update
*/
if (!ret)
bch2_inode_update_after_write(c, inode, &inode_u, fields);
bch2_inode_update_after_write(&trans, inode, &inode_u, fields);
bch2_trans_iter_exit(&trans, &iter);
@ -214,6 +223,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{
struct bch_inode_unpacked inode_u;
struct bch_inode_info *inode;
struct btree_trans trans;
int ret;
inode = to_bch_ei(iget5_locked(c->vfs_sb,
@ -226,14 +236,19 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
if (!(inode->v.i_state & I_NEW))
return &inode->v;
ret = bch2_inode_find_by_inum(c, inum, &inode_u);
bch2_trans_init(&trans, c, 8, 0);
ret = lockrestart_do(&trans,
bch2_inode_find_by_inum_trans(&trans, inum, &inode_u));
if (!ret)
bch2_vfs_inode_init(&trans, inum, inode, &inode_u);
bch2_trans_exit(&trans);
if (ret) {
iget_failed(&inode->v);
return ERR_PTR(ret);
}
bch2_vfs_inode_init(c, inum, inode, &inode_u);
unlock_new_inode(&inode->v);
return &inode->v;
@ -305,7 +320,7 @@ err_before_quota:
}
if (!(flags & BCH_CREATE_TMPFILE)) {
bch2_inode_update_after_write(c, dir, &dir_u,
bch2_inode_update_after_write(&trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&dir->ei_update_lock);
}
@ -313,7 +328,8 @@ err_before_quota:
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
inum.inum = inode_u.bi_inum;
bch2_vfs_inode_init(c, inum, inode, &inode_u);
bch2_iget5_set(&inode->v, &inum);
bch2_vfs_inode_init(&trans, inum, inode, &inode_u);
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@ -428,11 +444,9 @@ static int __bch2_link(struct bch_fs *c,
&dentry->d_name));
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
bch2_inode_update_after_write(c, dir, &dir_u,
bch2_inode_update_after_write(&trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME);
}
bch2_trans_exit(&trans);
@ -480,11 +494,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
deleting_snapshot));
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
bch2_inode_update_after_write(c, dir, &dir_u,
bch2_inode_update_after_write(&trans, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(c, inode, &inode_u,
bch2_inode_update_after_write(&trans, inode, &inode_u,
ATTR_MTIME);
}
@ -612,18 +624,18 @@ static int bch2_rename2(struct user_namespace *mnt_userns,
BUG_ON(dst_inode &&
dst_inode->v.i_ino != dst_inode_u.bi_inum);
bch2_inode_update_after_write(c, src_dir, &src_dir_u,
bch2_inode_update_after_write(&trans, src_dir, &src_dir_u,
ATTR_MTIME|ATTR_CTIME);
if (src_dir != dst_dir)
bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(c, src_inode, &src_inode_u,
bch2_inode_update_after_write(&trans, src_inode, &src_inode_u,
ATTR_CTIME);
if (dst_inode)
bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u,
ATTR_CTIME);
err:
bch2_trans_exit(&trans);
@ -741,7 +753,7 @@ btree_err:
if (unlikely(ret))
goto err_trans;
bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid);
bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid);
if (acl)
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
@ -1110,53 +1122,237 @@ static const struct address_space_operations bch_address_space_operations = {
.error_remove_page = generic_error_remove_page,
};
#if 0
struct bcachefs_fid {
u64 inum;
u32 subvol;
u32 gen;
} __packed;
struct bcachefs_fid_with_parent {
struct bcachefs_fid fid;
struct bcachefs_fid dir;
} __packed;
static int bcachefs_fid_valid(int fh_len, int fh_type)
{
switch (fh_type) {
case FILEID_BCACHEFS_WITHOUT_PARENT:
return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
case FILEID_BCACHEFS_WITH_PARENT:
return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
default:
return false;
}
}
static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
{
return (struct bcachefs_fid) {
.inum = inode->ei_inode.bi_inum,
.subvol = inode->ei_subvol,
.gen = inode->ei_inode.bi_generation,
};
}
static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
struct inode *vdir)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_inode_info *dir = to_bch_ei(vdir);
if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32))
return FILEID_INVALID;
if (!S_ISDIR(inode->v.i_mode) && dir) {
struct bcachefs_fid_with_parent *fid = (void *) fh;
fid->fid = bch2_inode_to_fid(inode);
fid->dir = bch2_inode_to_fid(dir);
*len = sizeof(*fid) / sizeof(u32);
return FILEID_BCACHEFS_WITH_PARENT;
} else {
struct bcachefs_fid *fid = (void *) fh;
*fid = bch2_inode_to_fid(inode);
*len = sizeof(*fid) / sizeof(u32);
return FILEID_BCACHEFS_WITHOUT_PARENT;
}
}
static struct inode *bch2_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
struct bcachefs_fid fid)
{
struct bch_fs *c = sb->s_fs_info;
struct inode *vinode;
if (ino < BCACHEFS_ROOT_INO)
return ERR_PTR(-ESTALE);
vinode = bch2_vfs_inode_get(c, ino);
if (IS_ERR(vinode))
return ERR_CAST(vinode);
if (generation && vinode->i_generation != generation) {
/* we didn't find the right inode.. */
struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
.subvol = fid.subvol,
.inum = fid.inum,
});
if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
iput(vinode);
return ERR_PTR(-ESTALE);
vinode = ERR_PTR(-ESTALE);
}
return vinode;
}
static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
int fh_len, int fh_type)
{
return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
bch2_nfs_get_inode);
struct bcachefs_fid *fid = (void *) _fid;
if (!bcachefs_fid_valid(fh_len, fh_type))
return NULL;
return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
}
static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
int fh_len, int fh_type)
{
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
bch2_nfs_get_inode);
struct bcachefs_fid_with_parent *fid = (void *) _fid;
if (!bcachefs_fid_valid(fh_len, fh_type) ||
fh_type != FILEID_BCACHEFS_WITH_PARENT)
return NULL;
return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
}
static struct dentry *bch2_get_parent(struct dentry *child)
{
struct bch_inode_info *inode = to_bch_ei(child->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
subvol_inum parent_inum = {
.subvol = inode->ei_inode.bi_parent_subvol ?:
inode->ei_subvol,
.inum = inode->ei_inode.bi_dir,
};
if (!parent_inum.inum)
return NULL;
return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
}
static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
{
struct bch_inode_info *inode = to_bch_ei(child->d_inode);
struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans trans;
struct btree_iter iter1;
struct btree_iter iter2;
struct bkey_s_c k;
struct bkey_s_c_dirent d;
struct bch_inode_unpacked inode_u;
subvol_inum target;
u32 snapshot;
unsigned name_len;
int ret;
if (!S_ISDIR(dir->v.i_mode))
return -EINVAL;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents,
POS(dir->ei_inode.bi_inum, 0), 0);
retry:
bch2_trans_begin(&trans);
ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter1, snapshot);
bch2_btree_iter_set_snapshot(&iter2, snapshot);
ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u);
if (ret)
goto err;
if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
k = bch2_btree_iter_peek_slot(&iter1);
ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type != KEY_TYPE_dirent) {
ret = -ENOENT;
goto err;
}
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto err;
if (target.subvol == inode->ei_subvol &&
target.inum == inode->ei_inode.bi_inum)
goto found;
} else {
/*
* File with multiple hardlinks and our backref is to the wrong
* directory - linear search:
*/
for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
if (k.k->p.inode > dir->ei_inode.bi_inum)
break;
if (k.k->type != KEY_TYPE_dirent)
continue;
d = bkey_s_c_to_dirent(k);
ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target);
if (ret < 0)
break;
if (ret)
continue;
if (target.subvol == inode->ei_subvol &&
target.inum == inode->ei_inode.bi_inum)
goto found;
}
}
ret = -ENOENT;
goto err;
found:
name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX);
memcpy(name, d.v->d_name, name_len);
name[name_len] = '\0';
err:
if (ret == -EINTR)
goto retry;
bch2_trans_iter_exit(&trans, &iter1);
bch2_trans_iter_exit(&trans, &iter2);
bch2_trans_exit(&trans);
return ret;
}
#endif
static const struct export_operations bch_export_ops = {
//.fh_to_dentry = bch2_fh_to_dentry,
//.fh_to_parent = bch2_fh_to_parent,
//.get_parent = bch2_get_parent,
.encode_fh = bch2_encode_fh,
.fh_to_dentry = bch2_fh_to_dentry,
.fh_to_parent = bch2_fh_to_parent,
.get_parent = bch2_get_parent,
.get_name = bch2_get_name,
};
static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum,
static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi)
{
bch2_inode_update_after_write(c, inode, bi, ~0);
bch2_inode_update_after_write(trans, inode, bi, ~0);
inode->v.i_blocks = bi->bi_sectors;
inode->v.i_ino = bi->bi_inum;
@ -1655,6 +1851,8 @@ got_sb:
sb->s_flags |= SB_POSIXACL;
#endif
sb->s_shrink.seeks = 0;
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
if (IS_ERR(vinode)) {
bch_err(c, "error mounting: error getting root inode %i",

View File

@ -173,7 +173,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
typedef int (*inode_set_fn)(struct bch_inode_info *,
struct bch_inode_unpacked *, void *);
void bch2_inode_update_after_write(struct bch_fs *,
void bch2_inode_update_after_write(struct btree_trans *,
struct bch_inode_info *,
struct bch_inode_unpacked *,
unsigned);

View File

@ -220,6 +220,7 @@ int bch2_inode_unpack(struct bkey_s_c k,
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_journal_seq= 0;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
@ -722,9 +723,9 @@ err:
return ret;
}
static int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
subvol_inum inum,
struct bch_inode_unpacked *inode)
int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
subvol_inum inum,
struct bch_inode_unpacked *inode)
{
struct btree_iter iter;
int ret;

View File

@ -89,6 +89,8 @@ int bch2_inode_create(struct btree_trans *, struct btree_iter *,
int bch2_inode_rm(struct bch_fs *, subvol_inum, bool);
int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
struct bch_inode_unpacked *);

View File

@ -187,7 +187,6 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
int bch2_sum_sector_overwrites(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct bkey_i *new,
bool *maybe_extending,
bool *usage_increasing,
s64 *i_sectors_delta,
s64 *disk_sectors_delta)
@ -199,7 +198,6 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
int ret = 0;
*maybe_extending = true;
*usage_increasing = false;
*i_sectors_delta = 0;
*disk_sectors_delta = 0;
@ -226,31 +224,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
(!new_compressed && bch2_bkey_sectors_compressed(old))))
*usage_increasing = true;
if (bkey_cmp(old.k->p, new->k.p) >= 0) {
/*
* Check if there's already data above where we're
* going to be writing to - this means we're definitely
* not extending the file:
*
* Note that it's not sufficient to check if there's
* data up to the sector offset we're going to be
* writing to, because i_size could be up to one block
* less:
*/
if (!bkey_cmp(old.k->p, new->k.p)) {
old = bch2_btree_iter_next(&iter);
ret = bkey_err(old);
if (ret)
break;
}
if (old.k && !bkey_err(old) &&
old.k->p.inode == extent_iter->pos.inode &&
bkey_extent_is_data(old.k))
*maybe_extending = false;
if (bkey_cmp(old.k->p, new->k.p) >= 0)
break;
}
}
bch2_trans_iter_exit(trans, &iter);
@ -267,12 +242,10 @@ int bch2_extent_update(struct btree_trans *trans,
s64 *i_sectors_delta_total,
bool check_enospc)
{
/* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p;
struct btree_iter inode_iter;
struct bch_inode_unpacked inode_u;
struct bpos next_pos;
bool extending = false, usage_increasing;
bool usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
int ret;
@ -290,84 +263,51 @@ int bch2_extent_update(struct btree_trans *trans,
if (ret)
return ret;
new_i_size = min(k->k.p.offset << 9, new_i_size);
next_pos = k->k.p;
ret = bch2_sum_sector_overwrites(trans, iter, k,
&extending,
&usage_increasing,
&i_sectors_delta,
&disk_sectors_delta);
if (ret)
return ret;
if (!usage_increasing)
check_enospc = false;
if (disk_res &&
disk_sectors_delta > (s64) disk_res->sectors) {
ret = bch2_disk_reservation_add(trans->c, disk_res,
disk_sectors_delta - disk_res->sectors,
!check_enospc
!check_enospc || !usage_increasing
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
return ret;
}
new_i_size = extending
? min(k->k.p.offset << 9, new_i_size)
: 0;
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum,
BTREE_ITER_INTENT);
if (ret)
return ret;
/*
* XXX:
* writeback can race a bit with truncate, because truncate
* first updates the inode then truncates the pagecache. This is
* ugly, but lets us preserve the invariant that the in memory
* i_size is always >= the on disk i_size.
*
BUG_ON(new_i_size > inode_u.bi_size &&
(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
*/
BUG_ON(new_i_size > inode_u.bi_size && !extending);
if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
new_i_size > inode_u.bi_size)
inode_u.bi_size = new_i_size;
else
new_i_size = 0;
inode_u.bi_sectors += i_sectors_delta;
if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;
ret = bch2_trans_update(trans, &inode_iter,
&inode_p.inode.k_i, 0);
}
ret = bch2_trans_update(trans, iter, k, 0) ?:
bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL);
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
next_pos = k->k.p;
ret = bch2_trans_update(trans, iter, k, 0) ?:
bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL);
BUG_ON(ret == -ENOSPC);
if (ret)
return ret;
bch2_btree_iter_set_pos(iter, next_pos);
if (i_sectors_delta_total)
*i_sectors_delta_total += i_sectors_delta;
bch2_btree_iter_set_pos(iter, next_pos);
return 0;
}
@ -385,26 +325,31 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
int ret = 0, ret2 = 0;
u32 snapshot;
while (1) {
while (!ret || ret == -EINTR) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
if (ret)
ret2 = ret;
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto btree_err;
continue;
bch2_btree_iter_set_snapshot(iter, snapshot);
k = bch2_btree_iter_peek(iter);
if (bkey_cmp(iter->pos, end_pos) >= 0)
if (bkey_cmp(iter->pos, end_pos) >= 0) {
bch2_btree_iter_set_pos(iter, end_pos);
break;
}
ret = bkey_err(k);
if (ret)
goto btree_err;
continue;
bkey_init(&delete.k);
delete.k.p = iter->pos;
@ -417,18 +362,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
&disk_res, NULL,
0, i_sectors_delta, false);
bch2_disk_reservation_put(c, &disk_res);
btree_err:
if (ret == -EINTR) {
ret2 = ret;
ret = 0;
}
if (ret)
break;
}
if (bkey_cmp(iter->pos, end_pos) > 0)
bch2_btree_iter_set_pos(iter, end_pos);
return ret ?: ret2;
}
@ -2104,7 +2039,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (crc_is_compressed(pick.crc) ||
(pick.crc.csum_type != BCH_CSUM_NONE &&
(pick.crc.csum_type != BCH_CSUM_none &&
(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
(bch2_csum_type_is_encryption(pick.crc.csum_type) &&
(flags & BCH_READ_USER_MAPPED)) ||

View File

@ -56,7 +56,7 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
}
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, bool *, s64 *, s64 *);
struct bkey_i *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, subvol_inum,
struct btree_iter *, struct bkey_i *,
struct disk_reservation *, u64 *, u64, s64 *, bool);

View File

@ -107,7 +107,12 @@ void bch2_journal_halt(struct journal *j)
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
j->err_seq = journal_cur_seq(j);
/*
* XXX: we're not using j->lock here because this can be called from
* interrupt context, this can race with journal_write_done()
*/
if (!j->err_seq)
j->err_seq = journal_cur_seq(j);
journal_wake(j);
closure_wake_up(&journal_cur_buf(j)->wait);
}
@ -551,7 +556,10 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
spin_lock(&j->lock);
BUG_ON(seq > journal_cur_seq(j));
if (WARN_ONCE(seq > journal_cur_seq(j),
"requested to flush journal seq %llu, but currently at %llu",
seq, journal_cur_seq(j)))
goto out;
/* Recheck under lock: */
if (j->err_seq && seq >= j->err_seq) {

View File

@ -1259,14 +1259,15 @@ static void journal_write_done(struct closure *cl)
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = w->devs_written;
j->seq_ondisk = seq;
if (err && (!j->err_seq || seq < j->err_seq))
j->err_seq = seq;
if (!err) {
j->seq_ondisk = seq;
if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq;
}
if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq;
}
} else if (!j->err_seq || seq < j->err_seq)
j->err_seq = seq;
/*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard

View File

@ -646,6 +646,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (fifo_free(&j->pin) <= 32)
min_nr = 1;
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
min_nr = 1;
trace_journal_reclaim_start(c,
min_nr,
j->prereserved.reserved,

View File

@ -160,7 +160,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct extent_ptr_decoded p;
struct bpos next_pos;
bool did_work = false;
bool extending = false, should_check_enospc;
bool should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_begin(&trans);
@ -226,7 +226,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
op->opts.data_replicas);
ret = bch2_sum_sector_overwrites(&trans, &iter, insert,
&extending,
&should_check_enospc,
&i_sectors_delta,
&disk_sectors_delta);

View File

@ -31,17 +31,32 @@ const char * const bch2_btree_ids[] = {
NULL
};
const char * const bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
const char * const bch2_csum_opts[] = {
BCH_CSUM_OPTS()
NULL
};
const char * const bch2_compression_types[] = {
BCH_COMPRESSION_TYPES()
NULL
};
const char * const bch2_compression_opts[] = {
BCH_COMPRESSION_OPTS()
NULL
};
const char * const bch2_str_hash_types[] = {
BCH_STR_HASH_TYPES()
NULL
};
const char * const bch2_str_hash_opts[] = {
BCH_STR_HASH_OPTS()
NULL
};

View File

@ -12,9 +12,12 @@ extern const char * const bch2_error_actions[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const bch2_btree_ids[];
extern const char * const bch2_csum_types[];
extern const char * const bch2_csum_opts[];
extern const char * const bch2_compression_types[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const bch2_data_types[];
extern const char * const bch2_cache_replacement_policies[];
extern const char * const bch2_member_states[];
@ -140,7 +143,7 @@ enum opt_type {
NULL, NULL) \
x(str_hash, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_str_hash_types), \
OPT_STR(bch2_str_hash_opts), \
BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \
NULL, "Hash function for directory entries and xattrs")\
x(metadata_target, u16, \

View File

@ -518,57 +518,38 @@ static void replay_now_at(struct journal *j, u64 seq)
}
static int __bch2_journal_replay_key(struct btree_trans *trans,
enum btree_id id, unsigned level,
struct bkey_i *k)
struct journal_key *k)
{
struct btree_iter iter;
unsigned iter_flags =
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS;
int ret;
bch2_trans_node_iter_init(trans, &iter, id, k->k.p,
BTREE_MAX_DEPTH, level,
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS);
if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
iter_flags);
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
{
unsigned commit_flags = BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW;
unsigned commit_flags =
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED;
if (!k->allocated)
commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
return bch2_trans_do(c, NULL, NULL, commit_flags,
__bch2_journal_replay_key(&trans, k->btree_id, k->level, k->k));
}
static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
{
struct btree_iter iter;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p,
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{
return bch2_trans_do(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY,
__bch2_alloc_replay_key(&trans, k));
__bch2_journal_replay_key(&trans, k));
}
static int journal_sort_seq_cmp(const void *_l, const void *_r)
@ -606,7 +587,7 @@ static int bch2_journal_replay(struct bch_fs *c,
if (!i->level && i->btree_id == BTREE_ID_alloc) {
j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
ret = bch2_alloc_replay_key(c, i->k);
ret = bch2_journal_replay_key(c, i);
if (ret)
goto err;
}
@ -1050,6 +1031,8 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->sb.clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
else
bch_info(c, "recovering from unclean shutdown");
if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
@ -1068,7 +1051,6 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
ret = -EINVAL;
goto err;
}
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
@ -1498,7 +1480,7 @@ int bch2_fs_initialize(struct bch_fs *c)
}
err = "error writing first journal entry";
ret = bch2_journal_meta(&c->journal);
ret = bch2_journal_flush(&c->journal);
if (ret)
goto err;

View File

@ -44,7 +44,10 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
pr_buf(out, "idx %llu front_pad %u back_pad %u",
le64_to_cpu(p.v->idx),
le32_to_cpu(p.v->front_pad),
le32_to_cpu(p.v->back_pad));
}
bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
@ -347,7 +350,8 @@ s64 bch2_remap_range(struct bch_fs *c,
inode_u.bi_size < new_i_size) {
inode_u.bi_size = new_i_size;
ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, NULL, 0);
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
}
bch2_trans_iter_exit(&trans, &inode_iter);

View File

@ -20,13 +20,13 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
{
switch (opt) {
case BCH_STR_HASH_OPT_crc32c:
return BCH_STR_HASH_CRC32C;
return BCH_STR_HASH_crc32c;
case BCH_STR_HASH_OPT_crc64:
return BCH_STR_HASH_CRC64;
return BCH_STR_HASH_crc64;
case BCH_STR_HASH_OPT_siphash:
return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
? BCH_STR_HASH_SIPHASH
: BCH_STR_HASH_SIPHASH_OLD;
? BCH_STR_HASH_siphash
: BCH_STR_HASH_siphash_old;
default:
BUG();
}
@ -51,7 +51,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
.siphash_key = { .k0 = bi->bi_hash_seed }
};
if (unlikely(info.type == BCH_STR_HASH_SIPHASH_OLD)) {
if (unlikely(info.type == BCH_STR_HASH_siphash_old)) {
SHASH_DESC_ON_STACK(desc, c->sha256);
u8 digest[SHA256_DIGEST_SIZE];
@ -77,16 +77,16 @@ static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
const struct bch_hash_info *info)
{
switch (info->type) {
case BCH_STR_HASH_CRC32C:
case BCH_STR_HASH_crc32c:
ctx->crc32c = crc32c(~0, &info->siphash_key.k0,
sizeof(info->siphash_key.k0));
break;
case BCH_STR_HASH_CRC64:
case BCH_STR_HASH_crc64:
ctx->crc64 = crc64_be(~0, &info->siphash_key.k0,
sizeof(info->siphash_key.k0));
break;
case BCH_STR_HASH_SIPHASH_OLD:
case BCH_STR_HASH_SIPHASH:
case BCH_STR_HASH_siphash_old:
case BCH_STR_HASH_siphash:
SipHash24_Init(&ctx->siphash, &info->siphash_key);
break;
default:
@ -99,14 +99,14 @@ static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
const void *data, size_t len)
{
switch (info->type) {
case BCH_STR_HASH_CRC32C:
case BCH_STR_HASH_crc32c:
ctx->crc32c = crc32c(ctx->crc32c, data, len);
break;
case BCH_STR_HASH_CRC64:
case BCH_STR_HASH_crc64:
ctx->crc64 = crc64_be(ctx->crc64, data, len);
break;
case BCH_STR_HASH_SIPHASH_OLD:
case BCH_STR_HASH_SIPHASH:
case BCH_STR_HASH_siphash_old:
case BCH_STR_HASH_siphash:
SipHash24_Update(&ctx->siphash, data, len);
break;
default:
@ -118,12 +118,12 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
const struct bch_hash_info *info)
{
switch (info->type) {
case BCH_STR_HASH_CRC32C:
case BCH_STR_HASH_crc32c:
return ctx->crc32c;
case BCH_STR_HASH_CRC64:
case BCH_STR_HASH_crc64:
return ctx->crc64 >> 1;
case BCH_STR_HASH_SIPHASH_OLD:
case BCH_STR_HASH_SIPHASH:
case BCH_STR_HASH_siphash_old:
case BCH_STR_HASH_siphash:
return SipHash24_End(&ctx->siphash) >> 1;
default:
BUG();

View File

@ -441,8 +441,16 @@ int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
if (BCH_SB_HAS_ERRORS(c->disk_sb.sb))
set_bit(BCH_FS_ERROR, &c->flags);
else
clear_bit(BCH_FS_ERROR, &c->flags);
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))
set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
else
clear_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
if (BCH_SB_INITIALIZED(c->disk_sb.sb))
set_bit(BCH_FS_INITIALIZED, &c->flags);
ret = bch2_sb_replicas_to_cpu_replicas(c);
if (ret)