mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to bee7b5a4fa21 bcachefs: Pin btree cache in ram for random access in fsck
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
9e6d9560d0
commit
7a716b76b5
@ -1 +1 @@
|
|||||||
50847e296b34efabe199e408ec4d72f10a866c39
|
bee7b5a4fa2135c9ec9d1c9424018ee494500bb5
|
||||||
|
@ -2,6 +2,12 @@
|
|||||||
#define _LINUX_SORT_H
|
#define _LINUX_SORT_H
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
void sort_r(void *base, size_t num, size_t size,
|
||||||
|
cmp_r_func_t cmp_func,
|
||||||
|
swap_r_func_t swap_func,
|
||||||
|
const void *priv);
|
||||||
|
|
||||||
static inline void sort(void *base, size_t num, size_t size,
|
static inline void sort(void *base, size_t num, size_t size,
|
||||||
int (*cmp_func)(const void *, const void *),
|
int (*cmp_func)(const void *, const void *),
|
||||||
|
@ -129,8 +129,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
|||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
|
|
||||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
|
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
|
||||||
bch2_inconsistent_error(c);
|
return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
|
||||||
return -EIO;
|
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -553,60 +552,61 @@ static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
|
static u64 mem_may_pin_bytes(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct sysinfo i;
|
struct sysinfo i;
|
||||||
u64 mem_bytes;
|
|
||||||
|
|
||||||
si_meminfo(&i);
|
si_meminfo(&i);
|
||||||
mem_bytes = i.totalram * i.mem_unit;
|
|
||||||
return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
|
u64 mem_bytes = i.totalram * i.mem_unit;
|
||||||
|
return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||||
unsigned btree_leaf_mask,
|
u64 btree_leaf_mask,
|
||||||
unsigned btree_interior_mask,
|
u64 btree_interior_mask,
|
||||||
struct bbpos start, struct bbpos *end)
|
struct bbpos start, struct bbpos *end)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct bch_fs *c = trans->c;
|
||||||
struct bkey_s_c k;
|
s64 mem_may_pin = mem_may_pin_bytes(c);
|
||||||
size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
|
|
||||||
enum btree_id btree;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
|
btree_interior_mask |= btree_leaf_mask;
|
||||||
unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
|
|
||||||
|
c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
|
||||||
|
c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
|
||||||
|
c->btree_cache.pinned_nodes_start = start;
|
||||||
|
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
|
||||||
|
|
||||||
|
for (enum btree_id btree = start.btree;
|
||||||
|
btree < BTREE_ID_NR && !ret;
|
||||||
|
btree++) {
|
||||||
|
unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct btree *b;
|
||||||
|
|
||||||
if (!((1U << btree) & btree_leaf_mask) &&
|
if (!((1U << btree) & btree_leaf_mask) &&
|
||||||
!((1U << btree) & btree_interior_mask))
|
!((1U << btree) & btree_interior_mask))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bch2_trans_node_iter_init(trans, &iter, btree,
|
__for_each_btree_node(trans, iter, btree,
|
||||||
btree == start.btree ? start.pos : POS_MIN,
|
btree == start.btree ? start.pos : POS_MIN,
|
||||||
0, depth, 0);
|
0, depth, BTREE_ITER_PREFETCH, b, ret) {
|
||||||
/*
|
mem_may_pin -= btree_buf_bytes(b);
|
||||||
* for_each_btree_key_contineu() doesn't check the return value
|
if (mem_may_pin <= 0) {
|
||||||
* from bch2_btree_iter_advance(), which is needed when
|
c->btree_cache.pinned_nodes_end = *end =
|
||||||
* iterating over interior nodes where we'll see keys at
|
BBPOS(btree, b->key.k.p);
|
||||||
* SPOS_MAX:
|
|
||||||
*/
|
|
||||||
do {
|
|
||||||
k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
|
|
||||||
ret = bkey_err(k);
|
|
||||||
if (!k.k || ret)
|
|
||||||
break;
|
|
||||||
|
|
||||||
--btree_nodes;
|
|
||||||
if (!btree_nodes) {
|
|
||||||
*end = BBPOS(btree, k.k->p);
|
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} while (bch2_btree_iter_advance(&iter));
|
}
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
*end = BBPOS_MAX;
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -664,62 +664,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
|
|
||||||
struct bpos bucket)
|
|
||||||
{
|
|
||||||
return bch2_dev_exists2(c, bucket.inode)
|
|
||||||
? bucket_pos_to_bp(c, bucket, 0)
|
|
||||||
: bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
|
|
||||||
struct bpos start, struct bpos *end)
|
|
||||||
{
|
|
||||||
struct btree_iter alloc_iter;
|
|
||||||
struct btree_iter bp_iter;
|
|
||||||
struct bkey_s_c alloc_k, bp_k;
|
|
||||||
size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
|
|
||||||
bool alloc_end = false, bp_end = false;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
|
|
||||||
start, 0, 1, 0);
|
|
||||||
bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
|
||||||
bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
|
|
||||||
while (1) {
|
|
||||||
alloc_k = !alloc_end
|
|
||||||
? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
|
|
||||||
: bkey_s_c_null;
|
|
||||||
bp_k = !bp_end
|
|
||||||
? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
|
|
||||||
: bkey_s_c_null;
|
|
||||||
|
|
||||||
ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
|
|
||||||
if ((!alloc_k.k && !bp_k.k) || ret) {
|
|
||||||
*end = SPOS_MAX;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
--btree_nodes;
|
|
||||||
if (!btree_nodes) {
|
|
||||||
*end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
|
|
||||||
bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
|
|
||||||
if (!bch2_btree_iter_advance(&alloc_iter))
|
|
||||||
alloc_end = true;
|
|
||||||
} else {
|
|
||||||
if (!bch2_btree_iter_advance(&bp_iter))
|
|
||||||
bp_end = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bch2_trans_iter_exit(trans, &bp_iter);
|
|
||||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_trans *trans = bch2_trans_get(c);
|
struct btree_trans *trans = bch2_trans_get(c);
|
||||||
@ -730,10 +674,16 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
|||||||
bkey_init(&s.last_flushed.k->k);
|
bkey_init(&s.last_flushed.k->k);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
|
struct bbpos end;
|
||||||
|
ret = bch2_get_btree_in_memory_pos(trans,
|
||||||
|
BIT_ULL(BTREE_ID_backpointers),
|
||||||
|
BIT_ULL(BTREE_ID_backpointers),
|
||||||
|
BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
s.bucket_end = end.pos;
|
||||||
|
|
||||||
if ( bpos_eq(s.bucket_start, POS_MIN) &&
|
if ( bpos_eq(s.bucket_start, POS_MIN) &&
|
||||||
!bpos_eq(s.bucket_end, SPOS_MAX))
|
!bpos_eq(s.bucket_end, SPOS_MAX))
|
||||||
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
|
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
|
||||||
@ -761,6 +711,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
|||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||||
|
|
||||||
|
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
||||||
|
c->btree_cache.pinned_nodes_interior_mask = 0;
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -866,6 +819,9 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
|
|
||||||
|
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
||||||
|
c->btree_cache.pinned_nodes_interior_mask = 0;
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,6 @@ static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define BBPOS_MIN BBPOS(0, POS_MIN)
|
#define BBPOS_MIN BBPOS(0, POS_MIN)
|
||||||
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
|
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, SPOS_MAX)
|
||||||
|
|
||||||
#endif /* _BCACHEFS_BBPOS_TYPES_H */
|
#endif /* _BCACHEFS_BBPOS_TYPES_H */
|
||||||
|
@ -505,6 +505,7 @@ enum gc_phase {
|
|||||||
GC_PHASE_BTREE_deleted_inodes,
|
GC_PHASE_BTREE_deleted_inodes,
|
||||||
GC_PHASE_BTREE_logged_ops,
|
GC_PHASE_BTREE_logged_ops,
|
||||||
GC_PHASE_BTREE_rebalance_work,
|
GC_PHASE_BTREE_rebalance_work,
|
||||||
|
GC_PHASE_BTREE_subvolume_children,
|
||||||
|
|
||||||
GC_PHASE_PENDING_DELETE,
|
GC_PHASE_PENDING_DELETE,
|
||||||
};
|
};
|
||||||
|
@ -840,7 +840,9 @@ struct bch_sb_field_downgrade {
|
|||||||
x(snapshot_skiplists, BCH_VERSION(1, 1)) \
|
x(snapshot_skiplists, BCH_VERSION(1, 1)) \
|
||||||
x(deleted_inodes, BCH_VERSION(1, 2)) \
|
x(deleted_inodes, BCH_VERSION(1, 2)) \
|
||||||
x(rebalance_work, BCH_VERSION(1, 3)) \
|
x(rebalance_work, BCH_VERSION(1, 3)) \
|
||||||
x(member_seq, BCH_VERSION(1, 4))
|
x(member_seq, BCH_VERSION(1, 4)) \
|
||||||
|
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
|
||||||
|
x(btree_subvolume_children, BCH_VERSION(1, 6))
|
||||||
|
|
||||||
enum bcachefs_metadata_version {
|
enum bcachefs_metadata_version {
|
||||||
bcachefs_metadata_version_min = 9,
|
bcachefs_metadata_version_min = 9,
|
||||||
@ -1488,7 +1490,9 @@ enum btree_id_flags {
|
|||||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
|
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||||
|
x(subvolume_children, 19, 0, \
|
||||||
|
BIT_ULL(KEY_TYPE_set))
|
||||||
|
|
||||||
enum btree_id {
|
enum btree_id {
|
||||||
#define x(name, nr, ...) BTREE_ID_##name = nr,
|
#define x(name, nr, ...) BTREE_ID_##name = nr,
|
||||||
|
@ -78,6 +78,7 @@ bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
|
|||||||
|
|
||||||
enum btree_update_flags {
|
enum btree_update_flags {
|
||||||
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
|
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
|
||||||
|
__BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE,
|
||||||
__BTREE_UPDATE_NOJOURNAL,
|
__BTREE_UPDATE_NOJOURNAL,
|
||||||
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
|
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
|
||||||
|
|
||||||
@ -91,6 +92,8 @@ enum btree_update_flags {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
|
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
|
||||||
|
#define BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE \
|
||||||
|
(1U << __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE)
|
||||||
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
|
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
|
||||||
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
|
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
#include "bcachefs.h"
|
#include "bcachefs.h"
|
||||||
|
#include "bbpos.h"
|
||||||
#include "bkey_buf.h"
|
#include "bkey_buf.h"
|
||||||
#include "btree_cache.h"
|
#include "btree_cache.h"
|
||||||
#include "btree_io.h"
|
#include "btree_io.h"
|
||||||
@ -208,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&bc->lock);
|
lockdep_assert_held(&bc->lock);
|
||||||
|
|
||||||
|
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||||
|
|
||||||
|
u64 mask = b->c.level
|
||||||
|
? bc->pinned_nodes_interior_mask
|
||||||
|
: bc->pinned_nodes_leaf_mask;
|
||||||
|
|
||||||
|
if ((mask & BIT_ULL(b->c.btree_id)) &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
|
||||||
|
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||||
|
|
||||||
wait_on_io:
|
wait_on_io:
|
||||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||||
(1U << BTREE_NODE_read_in_flight)|
|
(1U << BTREE_NODE_read_in_flight)|
|
||||||
@ -905,7 +918,7 @@ retry:
|
|||||||
|
|
||||||
if (unlikely(btree_node_read_error(b))) {
|
if (unlikely(btree_node_read_error(b))) {
|
||||||
six_unlock_type(&b->c.lock, lock_type);
|
six_unlock_type(&b->c.lock, lock_type);
|
||||||
return ERR_PTR(-EIO);
|
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||||
@ -996,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
|
|||||||
|
|
||||||
if (unlikely(btree_node_read_error(b))) {
|
if (unlikely(btree_node_read_error(b))) {
|
||||||
six_unlock_type(&b->c.lock, lock_type);
|
six_unlock_type(&b->c.lock, lock_type);
|
||||||
return ERR_PTR(-EIO);
|
return ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
EBUG_ON(b->c.btree_id != path->btree_id);
|
EBUG_ON(b->c.btree_id != path->btree_id);
|
||||||
@ -1079,7 +1092,7 @@ lock_node:
|
|||||||
|
|
||||||
if (unlikely(btree_node_read_error(b))) {
|
if (unlikely(btree_node_read_error(b))) {
|
||||||
six_unlock_read(&b->c.lock);
|
six_unlock_read(&b->c.lock);
|
||||||
b = ERR_PTR(-EIO);
|
b = ERR_PTR(-BCH_ERR_btree_node_read_error);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -407,7 +407,7 @@ again:
|
|||||||
printbuf_reset(&buf);
|
printbuf_reset(&buf);
|
||||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
|
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(ret == -EIO, c,
|
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c,
|
||||||
btree_node_unreadable,
|
btree_node_unreadable,
|
||||||
"Topology repair: unreadable btree node at btree %s level %u:\n"
|
"Topology repair: unreadable btree node at btree %s level %u:\n"
|
||||||
" %s",
|
" %s",
|
||||||
@ -979,7 +979,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
|||||||
false);
|
false);
|
||||||
ret = PTR_ERR_OR_ZERO(child);
|
ret = PTR_ERR_OR_ZERO(child);
|
||||||
|
|
||||||
if (ret == -EIO) {
|
if (bch2_err_matches(ret, EIO)) {
|
||||||
bch2_topology_error(c);
|
bch2_topology_error(c);
|
||||||
|
|
||||||
if (__fsck_err(c,
|
if (__fsck_err(c,
|
||||||
|
@ -581,8 +581,7 @@ static int __btree_err(int ret,
|
|||||||
break;
|
break;
|
||||||
case -BCH_ERR_btree_node_read_err_bad_node:
|
case -BCH_ERR_btree_node_read_err_bad_node:
|
||||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||||
bch2_topology_error(c);
|
ret = bch2_topology_error(c);
|
||||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
|
|
||||||
break;
|
break;
|
||||||
case -BCH_ERR_btree_node_read_err_incompatible:
|
case -BCH_ERR_btree_node_read_err_incompatible:
|
||||||
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
bch2_print_string_as_lines(KERN_ERR, out.buf);
|
||||||
@ -1737,7 +1736,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
|
|||||||
list_move(&b->list, &c->btree_cache.freeable);
|
list_move(&b->list, &c->btree_cache.freeable);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_btree_node_read_error;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1841,7 +1840,7 @@ static void btree_node_write_work(struct work_struct *work)
|
|||||||
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
|
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
|
||||||
|
|
||||||
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
|
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
|
||||||
ret = -BCH_ERR_btree_write_all_failed;
|
ret = -BCH_ERR_btree_node_write_all_failed;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2303,7 +2303,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
|||||||
btree_iter_path(trans, iter)->level);
|
btree_iter_path(trans, iter)->level);
|
||||||
|
|
||||||
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
|
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
|
||||||
return bkey_s_c_err(-EIO);
|
return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
|
||||||
|
|
||||||
bch2_btree_iter_verify(iter);
|
bch2_btree_iter_verify(iter);
|
||||||
bch2_btree_iter_verify_entry_exit(iter);
|
bch2_btree_iter_verify_entry_exit(iter);
|
||||||
@ -2501,6 +2501,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
|||||||
k = bch2_btree_iter_peek_upto(&iter2, end);
|
k = bch2_btree_iter_peek_upto(&iter2, end);
|
||||||
|
|
||||||
if (k.k && !bkey_err(k)) {
|
if (k.k && !bkey_err(k)) {
|
||||||
|
swap(iter->key_cache_path, iter2.key_cache_path);
|
||||||
iter->k = iter2.k;
|
iter->k = iter2.k;
|
||||||
k.k = &iter->k;
|
k.k = &iter->k;
|
||||||
}
|
}
|
||||||
@ -2760,6 +2761,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
|
|||||||
struct btree_trans *trans = src->trans;
|
struct btree_trans *trans = src->trans;
|
||||||
|
|
||||||
*dst = *src;
|
*dst = *src;
|
||||||
|
#ifdef TRACK_PATH_ALLOCATED
|
||||||
|
dst->ip_allocated = _RET_IP_;
|
||||||
|
#endif
|
||||||
if (src->path)
|
if (src->path)
|
||||||
__btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
|
__btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
|
||||||
if (src->update_path)
|
if (src->update_path)
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/rhashtable.h>
|
#include <linux/rhashtable.h>
|
||||||
|
|
||||||
|
#include "bbpos_types.h"
|
||||||
#include "btree_key_cache_types.h"
|
#include "btree_key_cache_types.h"
|
||||||
#include "buckets_types.h"
|
#include "buckets_types.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
@ -173,6 +174,11 @@ struct btree_cache {
|
|||||||
*/
|
*/
|
||||||
struct task_struct *alloc_lock;
|
struct task_struct *alloc_lock;
|
||||||
struct closure_waitlist alloc_wait;
|
struct closure_waitlist alloc_wait;
|
||||||
|
|
||||||
|
struct bbpos pinned_nodes_start;
|
||||||
|
struct bbpos pinned_nodes_end;
|
||||||
|
u64 pinned_nodes_leaf_mask;
|
||||||
|
u64 pinned_nodes_interior_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_node_iter {
|
struct btree_node_iter {
|
||||||
@ -654,6 +660,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
|
|||||||
BIT_ULL(BKEY_TYPE_inodes)| \
|
BIT_ULL(BKEY_TYPE_inodes)| \
|
||||||
BIT_ULL(BKEY_TYPE_stripes)| \
|
BIT_ULL(BKEY_TYPE_stripes)| \
|
||||||
BIT_ULL(BKEY_TYPE_reflink)| \
|
BIT_ULL(BKEY_TYPE_reflink)| \
|
||||||
|
BIT_ULL(BKEY_TYPE_subvolumes)| \
|
||||||
BIT_ULL(BKEY_TYPE_btree))
|
BIT_ULL(BKEY_TYPE_btree))
|
||||||
|
|
||||||
#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
|
#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
|
||||||
@ -727,7 +734,7 @@ struct btree_root {
|
|||||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||||
u8 level;
|
u8 level;
|
||||||
u8 alive;
|
u8 alive;
|
||||||
s8 error;
|
s16 error;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum btree_gc_coalesce_fail_reason {
|
enum btree_gc_coalesce_fail_reason {
|
||||||
|
@ -82,40 +82,169 @@ static noinline int extent_back_merge(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static struct bkey_s_c peek_slot_including_whiteouts(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
* When deleting, check if we need to emit a whiteout (because we're overwriting
|
enum btree_id btree, struct bpos pos)
|
||||||
* something in an ancestor snapshot)
|
|
||||||
*/
|
|
||||||
static int need_whiteout_for_snapshot(struct btree_trans *trans,
|
|
||||||
enum btree_id btree_id, struct bpos pos)
|
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
u32 snapshot = pos.snapshot;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
|
for_each_btree_key_norestart(trans, *iter, btree, pos,
|
||||||
return 0;
|
|
||||||
|
|
||||||
pos.snapshot++;
|
|
||||||
|
|
||||||
for_each_btree_key_norestart(trans, iter, btree_id, pos,
|
|
||||||
BTREE_ITER_ALL_SNAPSHOTS|
|
BTREE_ITER_ALL_SNAPSHOTS|
|
||||||
BTREE_ITER_NOPRESERVE, k, ret) {
|
BTREE_ITER_NOPRESERVE, k, ret) {
|
||||||
if (!bkey_eq(k.k->p, pos))
|
if (!bkey_eq(k.k->p, pos))
|
||||||
break;
|
break;
|
||||||
|
if (bch2_snapshot_is_ancestor(trans->c, pos.snapshot, k.k->p.snapshot))
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(trans, iter);
|
||||||
|
|
||||||
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
|
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
|
||||||
k.k->p.snapshot)) {
|
}
|
||||||
ret = !bkey_whiteout(k.k);
|
|
||||||
break;
|
/*
|
||||||
}
|
* When deleting, check if we need to emit a whiteout (because we're overwriting
|
||||||
}
|
* something in an ancestor snapshot)
|
||||||
|
*/
|
||||||
|
static int need_whiteout_for_snapshot(struct btree_trans *trans, enum btree_id btree, struct bpos pos)
|
||||||
|
{
|
||||||
|
pos.snapshot = bch2_snapshot_parent(trans->c, pos.snapshot);
|
||||||
|
if (!pos.snapshot)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k = peek_slot_including_whiteouts(trans, &iter, btree, pos);
|
||||||
|
int ret = bkey_err(k) ?: k.k && !bkey_whiteout(k.k);
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're overwriting a key at @pos in snapshot @snapshot, so we need to insert a
|
||||||
|
* whiteout: that might be in @snapshot, or if there are overwites in sibling
|
||||||
|
* snapshots, find the common ancestor where @pos is overwritten in every
|
||||||
|
* descendent and insert the whiteout there - which might be at @pos.
|
||||||
|
*/
|
||||||
|
static int delete_interior_snapshot_key(struct btree_trans *trans,
|
||||||
|
enum btree_id btree,
|
||||||
|
struct bpos whiteout, bool deleting,
|
||||||
|
struct bpos overwrite, bool old_is_whiteout)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct bpos orig_whiteout = whiteout, sib = whiteout;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
|
||||||
|
|
||||||
|
for_each_btree_key_norestart(trans, iter, btree, sib,
|
||||||
|
BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_INTENT, k, ret) {
|
||||||
|
BUG_ON(bpos_gt(k.k->p, overwrite));
|
||||||
|
|
||||||
|
if (bpos_lt(k.k->p, sib)) /* unrelated branch - skip */
|
||||||
|
continue;
|
||||||
|
if (bpos_gt(k.k->p, sib)) /* did not find @sib */
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* @overwrite is also written in @sib, now check parent */
|
||||||
|
whiteout.snapshot = bch2_snapshot_parent(c, whiteout.snapshot);
|
||||||
|
if (bpos_eq(whiteout, overwrite))
|
||||||
|
break;
|
||||||
|
|
||||||
|
sib = whiteout;
|
||||||
|
sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (!deleting && bpos_eq(whiteout, orig_whiteout))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (!bpos_eq(iter.pos, whiteout)) {
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
bch2_trans_iter_init(trans, &iter, btree, whiteout, BTREE_ITER_INTENT);
|
||||||
|
k = bch2_btree_iter_peek_slot(&iter);
|
||||||
|
ret = bkey_err(k);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
|
||||||
|
iter.flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
||||||
|
|
||||||
|
struct bkey_i *delete = bch2_trans_kmalloc(trans, sizeof(*delete));
|
||||||
|
ret = PTR_ERR_OR_ZERO(delete);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
bkey_init(&delete->k);
|
||||||
|
delete->k.p = whiteout;
|
||||||
|
|
||||||
|
ret = !bpos_eq(whiteout, overwrite)
|
||||||
|
? !old_is_whiteout
|
||||||
|
: need_whiteout_for_snapshot(trans, btree, whiteout);
|
||||||
|
if (ret < 0)
|
||||||
|
goto err;
|
||||||
|
if (ret)
|
||||||
|
delete->k.type = KEY_TYPE_whiteout;
|
||||||
|
|
||||||
|
ret = bch2_trans_update(trans, &iter, delete,
|
||||||
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||||
|
BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE);
|
||||||
|
out:
|
||||||
|
err:
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're overwriting a key in a snapshot that has ancestors: if we're
|
||||||
|
* overwriting a key in a different snapshot, we need to check if it is now
|
||||||
|
* fully overritten and can be deleted, and if we're deleting a key in the
|
||||||
|
* current snapshot we need to check if we need to leave a whiteout.
|
||||||
|
*/
|
||||||
|
static noinline int
|
||||||
|
overwrite_interior_snapshot_key(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
struct bkey_i *k)
|
||||||
|
{
|
||||||
|
struct bkey_s_c old = bch2_btree_iter_peek_slot(iter);
|
||||||
|
|
||||||
|
int ret = bkey_err(old);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (!bkey_deleted(old.k)) {
|
||||||
|
if (old.k->p.snapshot != k->k.p.snapshot) {
|
||||||
|
/*
|
||||||
|
* We're overwriting a key in a different snapshot:
|
||||||
|
* check if it's also been overwritten in siblings
|
||||||
|
*/
|
||||||
|
ret = delete_interior_snapshot_key(trans, iter->btree_id,
|
||||||
|
k->k.p, bkey_deleted(&k->k),
|
||||||
|
old.k->p, bkey_whiteout(old.k));
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
if (bkey_deleted(&k->k))
|
||||||
|
return 1;
|
||||||
|
} else if (bkey_deleted(&k->k)) {
|
||||||
|
/*
|
||||||
|
* We're deleting a key in the current snapshot:
|
||||||
|
* check if we need to leave a whiteout
|
||||||
|
*/
|
||||||
|
ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
return ret;
|
||||||
|
if (ret)
|
||||||
|
k->k.type = KEY_TYPE_whiteout;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
|
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
|
||||||
enum btree_id id,
|
enum btree_id id,
|
||||||
struct bpos old_pos,
|
struct bpos old_pos,
|
||||||
@ -503,32 +632,29 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
|
|||||||
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
struct bkey_i *k, enum btree_update_flags flags)
|
struct bkey_i *k, enum btree_update_flags flags)
|
||||||
{
|
{
|
||||||
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (iter->flags & BTREE_ITER_IS_EXTENTS)
|
if (iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||||
return bch2_trans_update_extent(trans, iter, k, flags);
|
return bch2_trans_update_extent(trans, iter, k, flags);
|
||||||
|
|
||||||
if (bkey_deleted(&k->k) &&
|
if (!(flags & (BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE|
|
||||||
!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
|
BTREE_UPDATE_KEY_CACHE_RECLAIM)) &&
|
||||||
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
|
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
|
||||||
ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
|
bch2_snapshot_parent(trans->c, k->k.p.snapshot)) {
|
||||||
if (unlikely(ret < 0))
|
int ret = overwrite_interior_snapshot_key(trans, iter, k);
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
k->k.type = KEY_TYPE_whiteout;
|
return ret < 0 ? ret : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure that updates to cached btrees go to the key cache:
|
* Ensure that updates to cached btrees go to the key cache:
|
||||||
*/
|
*/
|
||||||
|
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
|
||||||
struct btree_path *path = trans->paths + path_idx;
|
struct btree_path *path = trans->paths + path_idx;
|
||||||
|
|
||||||
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
|
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
|
||||||
!path->cached &&
|
!path->cached &&
|
||||||
!path->level &&
|
!path->level &&
|
||||||
btree_id_cached(trans->c, path->btree_id)) {
|
btree_id_cached(trans->c, path->btree_id)) {
|
||||||
ret = bch2_trans_update_get_key_cache(trans, iter, path);
|
int ret = bch2_trans_update_get_key_cache(trans, iter, path);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -789,6 +915,27 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
|||||||
|
|
||||||
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||||
struct bpos pos, bool set)
|
struct bpos pos, bool set)
|
||||||
|
{
|
||||||
|
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||||
|
int ret = PTR_ERR_OR_ZERO(k);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
bkey_init(&k->k);
|
||||||
|
k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
|
||||||
|
k->k.p = pos;
|
||||||
|
|
||||||
|
struct btree_iter iter;
|
||||||
|
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
|
||||||
|
|
||||||
|
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||||
|
bch2_trans_update(trans, &iter, k, 0);
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
|
||||||
|
struct bpos pos, bool set)
|
||||||
{
|
{
|
||||||
struct bkey_i k;
|
struct bkey_i k;
|
||||||
|
|
||||||
|
@ -63,11 +63,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
|||||||
struct bpos, struct bpos, unsigned, u64 *);
|
struct bpos, struct bpos, unsigned, u64 *);
|
||||||
|
|
||||||
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
|
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
|
||||||
|
int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
|
||||||
|
|
||||||
static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
|
static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
|
||||||
enum btree_id btree, struct bpos pos)
|
enum btree_id btree, struct bpos pos)
|
||||||
{
|
{
|
||||||
return bch2_btree_bit_mod(trans, btree, pos, false);
|
return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
|
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
|
||||||
|
@ -1844,8 +1844,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
|||||||
__func__, buf1.buf, buf2.buf);
|
__func__, buf1.buf, buf2.buf);
|
||||||
printbuf_exit(&buf1);
|
printbuf_exit(&buf1);
|
||||||
printbuf_exit(&buf2);
|
printbuf_exit(&buf2);
|
||||||
bch2_topology_error(c);
|
ret = bch2_topology_error(c);
|
||||||
ret = -EIO;
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1053,7 +1053,8 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
|||||||
(int) bch2_bkey_needs_rebalance(c, old);
|
(int) bch2_bkey_needs_rebalance(c, old);
|
||||||
|
|
||||||
if (mod) {
|
if (mod) {
|
||||||
int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
|
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
|
||||||
|
new.k->p, mod > 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -201,17 +201,17 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
||||||
u64 dir, u32 snapshot,
|
u32 dir_subvol, u64 dir, u32 snapshot,
|
||||||
const struct bch_hash_info *hash_info,
|
const struct bch_hash_info *hash_info,
|
||||||
u8 type, const struct qstr *name, u64 dst_inum,
|
u8 type, const struct qstr *name, u64 dst_inum,
|
||||||
u64 *dir_offset,
|
u64 *dir_offset,
|
||||||
bch_str_hash_flags_t str_hash_flags)
|
bch_str_hash_flags_t str_hash_flags)
|
||||||
{
|
{
|
||||||
subvol_inum zero_inum = { 0 };
|
subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
|
||||||
struct bkey_i_dirent *dirent;
|
struct bkey_i_dirent *dirent;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
|
dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
|
||||||
ret = PTR_ERR_OR_ZERO(dirent);
|
ret = PTR_ERR_OR_ZERO(dirent);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -220,7 +220,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
|||||||
dirent->k.p.snapshot = snapshot;
|
dirent->k.p.snapshot = snapshot;
|
||||||
|
|
||||||
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
||||||
zero_inum, snapshot,
|
dir_inum, snapshot,
|
||||||
&dirent->k_i, str_hash_flags,
|
&dirent->k_i, str_hash_flags,
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||||
*dir_offset = dirent->k.p.offset;
|
*dir_offset = dirent->k.p.offset;
|
||||||
@ -522,7 +522,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
|
|||||||
SPOS(dir, 0, snapshot),
|
SPOS(dir, 0, snapshot),
|
||||||
POS(dir, U64_MAX), 0, k, ret)
|
POS(dir, U64_MAX), 0, k, ret)
|
||||||
if (k.k->type == KEY_TYPE_dirent) {
|
if (k.k->type == KEY_TYPE_dirent) {
|
||||||
ret = -ENOTEMPTY;
|
ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
@ -35,7 +35,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
|
|||||||
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
|
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
|
||||||
struct bkey_s_c_dirent, subvol_inum *);
|
struct bkey_s_c_dirent, subvol_inum *);
|
||||||
|
|
||||||
int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32,
|
int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
|
||||||
const struct bch_hash_info *, u8,
|
const struct bch_hash_info *, u8,
|
||||||
const struct qstr *, u64, u64 *,
|
const struct qstr *, u64, u64 *,
|
||||||
bch_str_hash_flags_t);
|
bch_str_hash_flags_t);
|
||||||
|
@ -109,6 +109,8 @@
|
|||||||
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
|
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
|
||||||
x(ENOENT, ENOENT_dev_not_found) \
|
x(ENOENT, ENOENT_dev_not_found) \
|
||||||
x(ENOENT, ENOENT_dev_idx_not_found) \
|
x(ENOENT, ENOENT_dev_idx_not_found) \
|
||||||
|
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
|
||||||
|
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
|
||||||
x(0, open_buckets_empty) \
|
x(0, open_buckets_empty) \
|
||||||
x(0, freelist_empty) \
|
x(0, freelist_empty) \
|
||||||
x(BCH_ERR_freelist_empty, no_buckets_found) \
|
x(BCH_ERR_freelist_empty, no_buckets_found) \
|
||||||
@ -178,6 +180,7 @@
|
|||||||
x(EINVAL, opt_parse_error) \
|
x(EINVAL, opt_parse_error) \
|
||||||
x(EINVAL, remove_with_metadata_missing_unimplemented)\
|
x(EINVAL, remove_with_metadata_missing_unimplemented)\
|
||||||
x(EINVAL, remove_would_lose_data) \
|
x(EINVAL, remove_would_lose_data) \
|
||||||
|
x(EINVAL, btree_iter_with_journal_not_supported) \
|
||||||
x(EROFS, erofs_trans_commit) \
|
x(EROFS, erofs_trans_commit) \
|
||||||
x(EROFS, erofs_no_writes) \
|
x(EROFS, erofs_no_writes) \
|
||||||
x(EROFS, erofs_journal_err) \
|
x(EROFS, erofs_journal_err) \
|
||||||
@ -227,7 +230,10 @@
|
|||||||
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
||||||
x(EIO, btree_node_read_err) \
|
x(EIO, btree_node_read_err) \
|
||||||
x(EIO, sb_not_downgraded) \
|
x(EIO, sb_not_downgraded) \
|
||||||
x(EIO, btree_write_all_failed) \
|
x(EIO, btree_node_write_all_failed) \
|
||||||
|
x(EIO, btree_node_read_error) \
|
||||||
|
x(EIO, btree_node_read_validate_error) \
|
||||||
|
x(EIO, btree_need_topology_repair) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include "bcachefs.h"
|
#include "bcachefs.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
|
#include "recovery.h"
|
||||||
#include "super.h"
|
#include "super.h"
|
||||||
#include <linux/thread_with_file.h>
|
#include <linux/thread_with_file.h>
|
||||||
|
|
||||||
@ -25,11 +26,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_topology_error(struct bch_fs *c)
|
int bch2_topology_error(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
set_bit(BCH_FS_topology_error, &c->flags);
|
set_bit(BCH_FS_topology_error, &c->flags);
|
||||||
if (!test_bit(BCH_FS_fsck_running, &c->flags))
|
if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
|
||||||
bch2_inconsistent_error(c);
|
bch2_inconsistent_error(c);
|
||||||
|
return -BCH_ERR_btree_need_topology_repair;
|
||||||
|
} else {
|
||||||
|
return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
|
||||||
|
-BCH_ERR_btree_node_read_validate_error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_fatal_error(struct bch_fs *c)
|
void bch2_fatal_error(struct bch_fs *c)
|
||||||
|
@ -30,7 +30,7 @@ struct work_struct;
|
|||||||
|
|
||||||
bool bch2_inconsistent_error(struct bch_fs *);
|
bool bch2_inconsistent_error(struct bch_fs *);
|
||||||
|
|
||||||
void bch2_topology_error(struct bch_fs *);
|
int bch2_topology_error(struct bch_fs *);
|
||||||
|
|
||||||
#define bch2_fs_inconsistent(c, ...) \
|
#define bch2_fs_inconsistent(c, ...) \
|
||||||
({ \
|
({ \
|
||||||
|
@ -107,6 +107,7 @@ int bch2_create_trans(struct btree_trans *trans,
|
|||||||
u32 new_subvol, dir_snapshot;
|
u32 new_subvol, dir_snapshot;
|
||||||
|
|
||||||
ret = bch2_subvolume_create(trans, new_inode->bi_inum,
|
ret = bch2_subvolume_create(trans, new_inode->bi_inum,
|
||||||
|
dir.subvol,
|
||||||
snapshot_src.subvol,
|
snapshot_src.subvol,
|
||||||
&new_subvol, &snapshot,
|
&new_subvol, &snapshot,
|
||||||
(flags & BCH_CREATE_SNAPSHOT_RO) != 0);
|
(flags & BCH_CREATE_SNAPSHOT_RO) != 0);
|
||||||
@ -242,7 +243,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
|
|||||||
struct bch_inode_unpacked *dir_u,
|
struct bch_inode_unpacked *dir_u,
|
||||||
struct bch_inode_unpacked *inode_u,
|
struct bch_inode_unpacked *inode_u,
|
||||||
const struct qstr *name,
|
const struct qstr *name,
|
||||||
bool deleting_snapshot)
|
bool deleting_subvol)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_iter dir_iter = { NULL };
|
struct btree_iter dir_iter = { NULL };
|
||||||
@ -270,18 +271,25 @@ int bch2_unlink_trans(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
|
if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
|
||||||
ret = bch2_empty_dir_trans(trans, inum);
|
ret = bch2_empty_dir_trans(trans, inum);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deleting_snapshot && !inode_u->bi_subvol) {
|
if (deleting_subvol && !inode_u->bi_subvol) {
|
||||||
ret = -BCH_ERR_ENOENT_not_subvol;
|
ret = -BCH_ERR_ENOENT_not_subvol;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deleting_snapshot || inode_u->bi_subvol) {
|
if (inode_u->bi_subvol) {
|
||||||
|
/* Recursive subvolume destroy not allowed (yet?) */
|
||||||
|
ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (deleting_subvol || inode_u->bi_subvol) {
|
||||||
ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
|
ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -503,7 +503,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
|
|||||||
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
|
||||||
__bch2_link(c, inode, dir, dentry);
|
__bch2_link(c, inode, dir, dentry);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return bch2_err_class(ret);
|
||||||
|
|
||||||
ihold(&inode->v);
|
ihold(&inode->v);
|
||||||
d_instantiate(dentry, &inode->v);
|
d_instantiate(dentry, &inode->v);
|
||||||
@ -555,8 +555,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
|||||||
struct bch_inode_info *dir= to_bch_ei(vdir);
|
struct bch_inode_info *dir= to_bch_ei(vdir);
|
||||||
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
struct bch_fs *c = dir->v.i_sb->s_fs_info;
|
||||||
|
|
||||||
return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
|
int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
|
||||||
__bch2_unlink(vdir, dentry, false);
|
__bch2_unlink(vdir, dentry, false);
|
||||||
|
return bch2_err_class(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_symlink(struct mnt_idmap *idmap,
|
static int bch2_symlink(struct mnt_idmap *idmap,
|
||||||
@ -591,7 +592,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
|
|||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
iput(&inode->v);
|
iput(&inode->v);
|
||||||
return ret;
|
return bch2_err_class(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_mkdir(struct mnt_idmap *idmap,
|
static int bch2_mkdir(struct mnt_idmap *idmap,
|
||||||
|
@ -252,7 +252,7 @@ create_lostfound:
|
|||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = bch2_dirent_create_snapshot(trans,
|
ret = bch2_dirent_create_snapshot(trans,
|
||||||
root_inode.bi_inum, snapshot, &root_hash_info,
|
0, root_inode.bi_inum, snapshot, &root_hash_info,
|
||||||
mode_to_type(lostfound->bi_mode),
|
mode_to_type(lostfound->bi_mode),
|
||||||
&lostfound_str,
|
&lostfound_str,
|
||||||
lostfound->bi_inum,
|
lostfound->bi_inum,
|
||||||
@ -275,9 +275,24 @@ static int reattach_inode(struct btree_trans *trans,
|
|||||||
char name_buf[20];
|
char name_buf[20];
|
||||||
struct qstr name;
|
struct qstr name;
|
||||||
u64 dir_offset = 0;
|
u64 dir_offset = 0;
|
||||||
|
u32 dirent_snapshot = inode_snapshot;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
|
if (inode->bi_subvol) {
|
||||||
|
inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
|
||||||
|
|
||||||
|
u64 root_inum;
|
||||||
|
ret = subvol_lookup(trans, inode->bi_parent_subvol,
|
||||||
|
&dirent_snapshot, &root_inum);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
|
||||||
|
} else {
|
||||||
|
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -291,14 +306,16 @@ static int reattach_inode(struct btree_trans *trans,
|
|||||||
|
|
||||||
dir_hash = bch2_hash_info_init(trans->c, &lostfound);
|
dir_hash = bch2_hash_info_init(trans->c, &lostfound);
|
||||||
|
|
||||||
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
|
|
||||||
name = (struct qstr) QSTR(name_buf);
|
name = (struct qstr) QSTR(name_buf);
|
||||||
|
|
||||||
ret = bch2_dirent_create_snapshot(trans,
|
ret = bch2_dirent_create_snapshot(trans,
|
||||||
lostfound.bi_inum, inode_snapshot,
|
inode->bi_parent_subvol, lostfound.bi_inum,
|
||||||
|
dirent_snapshot,
|
||||||
&dir_hash,
|
&dir_hash,
|
||||||
inode_d_type(inode),
|
inode_d_type(inode),
|
||||||
&name, inode->bi_inum, &dir_offset,
|
&name,
|
||||||
|
inode->bi_subvol ?: inode->bi_inum,
|
||||||
|
&dir_offset,
|
||||||
BCH_HASH_SET_MUST_CREATE);
|
BCH_HASH_SET_MUST_CREATE);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -564,13 +581,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct inode_walker_entry *
|
static struct inode_walker_entry *
|
||||||
lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
|
lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
|
||||||
u32 snapshot, bool is_whiteout)
|
|
||||||
{
|
{
|
||||||
|
bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
|
||||||
|
u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
|
||||||
|
|
||||||
struct inode_walker_entry *i;
|
struct inode_walker_entry *i;
|
||||||
|
|
||||||
snapshot = bch2_snapshot_equiv(c, snapshot);
|
|
||||||
|
|
||||||
__darray_for_each(w->inodes, i)
|
__darray_for_each(w->inodes, i)
|
||||||
if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
|
if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
|
||||||
goto found;
|
goto found;
|
||||||
@ -581,20 +597,24 @@ found:
|
|||||||
|
|
||||||
if (snapshot != i->snapshot && !is_whiteout) {
|
if (snapshot != i->snapshot && !is_whiteout) {
|
||||||
struct inode_walker_entry new = *i;
|
struct inode_walker_entry new = *i;
|
||||||
size_t pos;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
new.snapshot = snapshot;
|
new.snapshot = snapshot;
|
||||||
new.count = 0;
|
new.count = 0;
|
||||||
|
|
||||||
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
|
struct printbuf buf = PRINTBUF;
|
||||||
w->last_pos.inode, snapshot, i->snapshot);
|
bch2_bkey_val_to_text(&buf, c, k);
|
||||||
|
|
||||||
|
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
|
||||||
|
"unexpected because we should always update the inode when we update a key in that inode\n"
|
||||||
|
"%s",
|
||||||
|
w->last_pos.inode, snapshot, i->snapshot, buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
|
||||||
while (i > w->inodes.data && i[-1].snapshot > snapshot)
|
while (i > w->inodes.data && i[-1].snapshot > snapshot)
|
||||||
--i;
|
--i;
|
||||||
|
|
||||||
pos = i - w->inodes.data;
|
size_t pos = i - w->inodes.data;
|
||||||
ret = darray_insert_item(&w->inodes, pos, new);
|
int ret = darray_insert_item(&w->inodes, pos, new);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
||||||
@ -605,21 +625,21 @@ found:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
|
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
|
||||||
struct inode_walker *w, struct bpos pos,
|
struct inode_walker *w,
|
||||||
bool is_whiteout)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
if (w->last_pos.inode != pos.inode) {
|
if (w->last_pos.inode != k.k->p.inode) {
|
||||||
int ret = get_inodes_all_snapshots(trans, w, pos.inode);
|
int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
} else if (bkey_cmp(w->last_pos, pos)) {
|
} else if (bkey_cmp(w->last_pos, k.k->p)) {
|
||||||
darray_for_each(w->inodes, i)
|
darray_for_each(w->inodes, i)
|
||||||
i->seen_this_pos = false;
|
i->seen_this_pos = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
w->last_pos = pos;
|
w->last_pos = k.k->p;
|
||||||
|
|
||||||
return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
|
return lookup_inode_for_snapshot(trans->c, w, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __get_visible_inodes(struct btree_trans *trans,
|
static int __get_visible_inodes(struct btree_trans *trans,
|
||||||
@ -767,6 +787,43 @@ fsck_err:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
struct bpos pos)
|
||||||
|
{
|
||||||
|
return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
struct bch_inode_unpacked *inode,
|
||||||
|
u32 *snapshot)
|
||||||
|
{
|
||||||
|
if (inode->bi_subvol) {
|
||||||
|
u64 inum;
|
||||||
|
int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
|
||||||
|
if (ret)
|
||||||
|
return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
|
||||||
|
}
|
||||||
|
|
||||||
|
return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
|
||||||
|
struct bkey_s_c_dirent d)
|
||||||
|
{
|
||||||
|
return inode->bi_dir == d.k->p.inode &&
|
||||||
|
inode->bi_dir_offset == d.k->p.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
|
||||||
|
struct bch_inode_unpacked *inode)
|
||||||
|
{
|
||||||
|
return d.v->d_type == DT_SUBVOL
|
||||||
|
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
|
||||||
|
: le64_to_cpu(d.v->d_inum) == inode->bi_inum;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
|
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
@ -779,6 +836,49 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
|
|||||||
return k.k->type == KEY_TYPE_set;
|
return k.k->type == KEY_TYPE_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
|
||||||
|
struct bch_inode_unpacked *inode,
|
||||||
|
u32 inode_snapshot, bool *write_inode)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
|
||||||
|
struct btree_iter dirent_iter = {};
|
||||||
|
struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
|
||||||
|
int ret = bkey_err(d);
|
||||||
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (fsck_err_on(ret,
|
||||||
|
c, inode_points_to_missing_dirent,
|
||||||
|
"inode points to missing dirent\n%s",
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
|
||||||
|
fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
|
||||||
|
c, inode_points_to_wrong_dirent,
|
||||||
|
"inode points to dirent that does not point back:\n%s",
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, inode_k),
|
||||||
|
prt_newline(&buf),
|
||||||
|
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
|
||||||
|
/*
|
||||||
|
* We just clear the backpointer fields for now. If we find a
|
||||||
|
* dirent that points to this inode in check_dirents(), we'll
|
||||||
|
* update it then; then when we get to check_path() if the
|
||||||
|
* backpointer is still 0 we'll reattach it.
|
||||||
|
*/
|
||||||
|
inode->bi_dir = 0;
|
||||||
|
inode->bi_dir_offset = 0;
|
||||||
|
inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
|
||||||
|
*write_inode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
fsck_err:
|
||||||
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
bch_err_fn(c, ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_inode(struct btree_trans *trans,
|
static int check_inode(struct btree_trans *trans,
|
||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bkey_s_c k,
|
struct bkey_s_c k,
|
||||||
@ -923,6 +1023,22 @@ static int check_inode(struct btree_trans *trans,
|
|||||||
do_update = true;
|
do_update = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (u.bi_dir || u.bi_dir_offset) {
|
||||||
|
ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fsck_err_on(u.bi_parent_subvol &&
|
||||||
|
(u.bi_subvol == 0 ||
|
||||||
|
u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
|
||||||
|
c, inode_bi_parent_nonzero,
|
||||||
|
"inode %llu:%u has subvol %u but nonzero parent subvol %u",
|
||||||
|
u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
|
||||||
|
u.bi_parent_subvol = 0;
|
||||||
|
do_update = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (u.bi_subvol) {
|
if (u.bi_subvol) {
|
||||||
struct bch_subvolume s;
|
struct bch_subvolume s;
|
||||||
|
|
||||||
@ -980,28 +1096,6 @@ int bch2_check_inodes(struct bch_fs *c)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
|
|
||||||
struct btree_iter *iter,
|
|
||||||
struct bpos pos)
|
|
||||||
{
|
|
||||||
return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
|
|
||||||
struct bkey_s_c_dirent d)
|
|
||||||
{
|
|
||||||
return inode->bi_dir == d.k->p.inode &&
|
|
||||||
inode->bi_dir_offset == d.k->p.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
|
|
||||||
struct bch_inode_unpacked *inode)
|
|
||||||
{
|
|
||||||
return d.v->d_type == DT_SUBVOL
|
|
||||||
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
|
|
||||||
: le64_to_cpu(d.v->d_inum) == inode->bi_inum;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -1310,7 +1404,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
|
i = walk_inode(trans, inode, k);
|
||||||
ret = PTR_ERR_OR_ZERO(i);
|
ret = PTR_ERR_OR_ZERO(i);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -1489,28 +1583,27 @@ fsck_err:
|
|||||||
return ret ?: trans_was_restarted(trans, restart_count);
|
return ret ?: trans_was_restarted(trans, restart_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_inode_backpointer(struct btree_trans *trans,
|
static int check_dirent_inode_dirent(struct btree_trans *trans,
|
||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bkey_s_c_dirent d,
|
struct bkey_s_c_dirent d,
|
||||||
struct bch_inode_unpacked *target,
|
struct bch_inode_unpacked *target,
|
||||||
u32 target_snapshot)
|
u32 target_snapshot)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_iter bp_iter = { NULL };
|
|
||||||
struct printbuf buf = PRINTBUF;
|
struct printbuf buf = PRINTBUF;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
if (inode_points_to_dirent(target, d))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (!target->bi_dir &&
|
if (!target->bi_dir &&
|
||||||
!target->bi_dir_offset) {
|
!target->bi_dir_offset) {
|
||||||
target->bi_dir = d.k->p.inode;
|
target->bi_dir = d.k->p.inode;
|
||||||
target->bi_dir_offset = d.k->p.offset;
|
target->bi_dir_offset = d.k->p.offset;
|
||||||
|
return __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!inode_points_to_dirent(target, d)) {
|
struct btree_iter bp_iter = { NULL };
|
||||||
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
|
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
|
||||||
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
|
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
|
||||||
ret = bkey_err(bp_dirent);
|
ret = bkey_err(bp_dirent);
|
||||||
@ -1520,14 +1613,33 @@ static int check_inode_backpointer(struct btree_trans *trans,
|
|||||||
bool backpointer_exists = !ret;
|
bool backpointer_exists = !ret;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
|
if (fsck_err_on(!backpointer_exists,
|
||||||
|
c, inode_wrong_backpointer,
|
||||||
|
"inode %llu:%u has wrong backpointer:\n"
|
||||||
|
"got %llu:%llu\n"
|
||||||
|
"should be %llu:%llu",
|
||||||
|
target->bi_inum, target_snapshot,
|
||||||
|
target->bi_dir,
|
||||||
|
target->bi_dir_offset,
|
||||||
|
d.k->p.inode,
|
||||||
|
d.k->p.offset)) {
|
||||||
|
target->bi_dir = d.k->p.inode;
|
||||||
|
target->bi_dir_offset = d.k->p.offset;
|
||||||
|
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
bch2_bkey_val_to_text(&buf, c, d.s_c);
|
bch2_bkey_val_to_text(&buf, c, d.s_c);
|
||||||
prt_newline(&buf);
|
prt_newline(&buf);
|
||||||
if (backpointer_exists)
|
if (backpointer_exists)
|
||||||
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
|
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
|
||||||
|
|
||||||
if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
|
if (fsck_err_on(backpointer_exists &&
|
||||||
|
(S_ISDIR(target->bi_mode) ||
|
||||||
|
target->bi_subvol),
|
||||||
c, inode_dir_multiple_links,
|
c, inode_dir_multiple_links,
|
||||||
"directory %llu:%u with multiple links\n%s",
|
"%s %llu:%u with multiple links\n%s",
|
||||||
|
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
|
||||||
target->bi_inum, target_snapshot, buf.buf)) {
|
target->bi_inum, target_snapshot, buf.buf)) {
|
||||||
ret = __remove_dirent(trans, d.k->p);
|
ret = __remove_dirent(trans, d.k->p);
|
||||||
goto out;
|
goto out;
|
||||||
@ -1544,30 +1656,10 @@ static int check_inode_backpointer(struct btree_trans *trans,
|
|||||||
target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
|
target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
|
||||||
target->bi_nlink++;
|
target->bi_nlink++;
|
||||||
target->bi_flags &= ~BCH_INODE_unlinked;
|
target->bi_flags &= ~BCH_INODE_unlinked;
|
||||||
|
|
||||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsck_err_on(!backpointer_exists,
|
|
||||||
c, inode_wrong_backpointer,
|
|
||||||
"inode %llu:%u has wrong backpointer:\n"
|
|
||||||
"got %llu:%llu\n"
|
|
||||||
"should be %llu:%llu",
|
|
||||||
target->bi_inum, target_snapshot,
|
|
||||||
target->bi_dir,
|
|
||||||
target->bi_dir_offset,
|
|
||||||
d.k->p.inode,
|
|
||||||
d.k->p.offset)) {
|
|
||||||
target->bi_dir = d.k->p.inode;
|
|
||||||
target->bi_dir_offset = d.k->p.offset;
|
|
||||||
|
|
||||||
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out:
|
out:
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
@ -1588,7 +1680,7 @@ static int check_dirent_target(struct btree_trans *trans,
|
|||||||
struct printbuf buf = PRINTBUF;
|
struct printbuf buf = PRINTBUF;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ret = check_inode_backpointer(trans, iter, d, target, target_snapshot);
|
ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -1606,28 +1698,13 @@ static int check_dirent_target(struct btree_trans *trans,
|
|||||||
|
|
||||||
bkey_reassemble(&n->k_i, d.s_c);
|
bkey_reassemble(&n->k_i, d.s_c);
|
||||||
n->v.d_type = inode_d_type(target);
|
n->v.d_type = inode_d_type(target);
|
||||||
|
if (n->v.d_type == DT_SUBVOL) {
|
||||||
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
|
n->v.d_parent_subvol = target->bi_parent_subvol;
|
||||||
if (ret)
|
n->v.d_child_subvol = target->bi_subvol;
|
||||||
goto err;
|
} else {
|
||||||
|
n->v.d_inum = target->bi_inum;
|
||||||
d = dirent_i_to_s_c(n);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
|
|
||||||
target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
|
|
||||||
c, dirent_d_parent_subvol_wrong,
|
|
||||||
"dirent has wrong d_parent_subvol field: got %u, should be %u",
|
|
||||||
le32_to_cpu(d.v->d_parent_subvol),
|
|
||||||
target->bi_parent_subvol)) {
|
|
||||||
n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
|
|
||||||
ret = PTR_ERR_OR_ZERO(n);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
bkey_reassemble(&n->k_i, d.s_c);
|
|
||||||
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
|
|
||||||
|
|
||||||
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
|
ret = bch2_trans_update(trans, iter, &n->k_i, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -1641,45 +1718,113 @@ fsck_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
/* find a subvolume that's a descendent of @snapshot: */
|
||||||
|
static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
|
||||||
|
{
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
|
||||||
|
if (k.k->type != KEY_TYPE_subvolume)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
|
||||||
|
if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
*subvolid = k.k->p.offset;
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!ret)
|
||||||
|
ret = -ENOENT;
|
||||||
|
found:
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
struct bkey_s_c_dirent d)
|
struct bkey_s_c_dirent d)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree_iter subvol_iter = {};
|
||||||
struct bch_inode_unpacked subvol_root;
|
struct bch_inode_unpacked subvol_root;
|
||||||
|
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
|
||||||
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
|
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
|
||||||
u32 target_snapshot;
|
u32 parent_snapshot;
|
||||||
u64 target_inum;
|
u64 parent_inum;
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ret = subvol_lookup(trans, target_subvol,
|
ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
|
||||||
&target_snapshot, &target_inum);
|
|
||||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (fsck_err_on(ret, c, dirent_to_missing_subvol,
|
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
|
||||||
"dirent points to missing subvolume %u",
|
"dirent parent_subvol points to missing subvolume\n%s",
|
||||||
le32_to_cpu(d.v->d_child_subvol)))
|
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
|
||||||
return __remove_dirent(trans, d.k->p);
|
fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
|
||||||
|
c, dirent_not_visible_in_parent_subvol,
|
||||||
|
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
|
||||||
|
parent_snapshot,
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
|
||||||
|
u32 new_parent_subvol;
|
||||||
|
ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
ret = lookup_inode(trans, target_inum,
|
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
|
||||||
&subvol_root, &target_snapshot);
|
ret = PTR_ERR_OR_ZERO(new_dirent);
|
||||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
if (ret)
|
||||||
return ret;
|
goto err;
|
||||||
|
|
||||||
if (fsck_err_on(ret, c, subvol_to_missing_root,
|
new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
|
||||||
"subvolume %u points to missing subvolume root %llu",
|
|
||||||
target_subvol,
|
|
||||||
target_inum)) {
|
|
||||||
bch_err(c, "repair not implemented yet");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
|
struct bkey_s_c_subvolume s =
|
||||||
c, subvol_root_wrong_bi_subvol,
|
bch2_bkey_get_iter_typed(trans, &subvol_iter,
|
||||||
"subvol root %llu has wrong bi_subvol field: got %u, should be %u",
|
BTREE_ID_subvolumes, POS(0, target_subvol),
|
||||||
|
0, subvolume);
|
||||||
|
ret = bkey_err(s.s_c);
|
||||||
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
if (fsck_err(c, dirent_to_missing_subvol,
|
||||||
|
"dirent points to missing subvolume\n%s",
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
|
||||||
|
return __remove_dirent(trans, d.k->p);
|
||||||
|
ret = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
|
||||||
|
c, subvol_fs_path_parent_wrong,
|
||||||
|
"subvol with wrong fs_path_parent, should be be %u\n%s",
|
||||||
|
parent_subvol,
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
|
struct bkey_i_subvolume *n =
|
||||||
|
bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
|
||||||
|
ret = PTR_ERR_OR_ZERO(n);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
n->v.fs_path_parent = le32_to_cpu(parent_subvol);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 target_inum = le64_to_cpu(s.v->inode);
|
||||||
|
u32 target_snapshot = le32_to_cpu(s.v->snapshot);
|
||||||
|
|
||||||
|
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
|
||||||
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
|
||||||
|
c, inode_bi_parent_wrong,
|
||||||
|
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
|
||||||
target_inum,
|
target_inum,
|
||||||
subvol_root.bi_subvol, target_subvol)) {
|
subvol_root.bi_parent_subvol, parent_subvol)) {
|
||||||
subvol_root.bi_subvol = target_subvol;
|
subvol_root.bi_parent_subvol = parent_subvol;
|
||||||
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
|
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -1689,7 +1834,11 @@ static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *ite
|
|||||||
target_snapshot);
|
target_snapshot);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
out:
|
||||||
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
bch2_trans_iter_exit(trans, &subvol_iter);
|
||||||
|
printbuf_exit(&buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1731,7 +1880,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
|||||||
|
|
||||||
BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
|
BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
|
||||||
|
|
||||||
i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
|
i = walk_inode(trans, dir, k);
|
||||||
ret = PTR_ERR_OR_ZERO(i);
|
ret = PTR_ERR_OR_ZERO(i);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto err;
|
goto err;
|
||||||
@ -1777,7 +1926,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
|||||||
d = bkey_s_c_to_dirent(k);
|
d = bkey_s_c_to_dirent(k);
|
||||||
|
|
||||||
if (d.v->d_type == DT_SUBVOL) {
|
if (d.v->d_type == DT_SUBVOL) {
|
||||||
ret = check_subvol_dirent(trans, iter, d);
|
ret = check_dirent_to_subvol(trans, iter, d);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
} else {
|
} else {
|
||||||
@ -1858,7 +2007,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
|
i = walk_inode(trans, inode, k);
|
||||||
ret = PTR_ERR_OR_ZERO(i);
|
ret = PTR_ERR_OR_ZERO(i);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -1997,62 +2146,52 @@ static int path_down(struct bch_fs *c, pathbuf *p,
|
|||||||
*
|
*
|
||||||
* XXX: we should also be verifying that inodes are in the right subvolumes
|
* XXX: we should also be verifying that inodes are in the right subvolumes
|
||||||
*/
|
*/
|
||||||
static int check_path(struct btree_trans *trans,
|
static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
|
||||||
pathbuf *p,
|
|
||||||
struct bch_inode_unpacked *inode,
|
|
||||||
u32 snapshot)
|
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree_iter inode_iter = {};
|
||||||
|
struct bch_inode_unpacked inode;
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
snapshot = bch2_snapshot_equiv(c, snapshot);
|
|
||||||
p->nr = 0;
|
p->nr = 0;
|
||||||
|
|
||||||
while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
|
BUG_ON(bch2_inode_unpack(inode_k, &inode));
|
||||||
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
|
|
||||||
|
while (!(inode.bi_inum == BCACHEFS_ROOT_INO &&
|
||||||
|
inode.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
|
||||||
struct btree_iter dirent_iter;
|
struct btree_iter dirent_iter;
|
||||||
struct bkey_s_c_dirent d;
|
struct bkey_s_c_dirent d;
|
||||||
u32 parent_snapshot = snapshot;
|
u32 parent_snapshot = snapshot;
|
||||||
|
|
||||||
if (inode->bi_subvol) {
|
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
|
||||||
u64 inum;
|
|
||||||
|
|
||||||
ret = subvol_lookup(trans, inode->bi_parent_subvol,
|
|
||||||
&parent_snapshot, &inum);
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
d = dirent_get_by_pos(trans, &dirent_iter,
|
|
||||||
SPOS(inode->bi_dir, inode->bi_dir_offset,
|
|
||||||
parent_snapshot));
|
|
||||||
ret = bkey_err(d.s_c);
|
ret = bkey_err(d.s_c);
|
||||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!ret && !dirent_points_to_inode(d, inode)) {
|
if (!ret && !dirent_points_to_inode(d, &inode)) {
|
||||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||||
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
|
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bch2_err_matches(ret, ENOENT)) {
|
if (bch2_err_matches(ret, ENOENT)) {
|
||||||
|
ret = 0;
|
||||||
if (fsck_err(c, inode_unreachable,
|
if (fsck_err(c, inode_unreachable,
|
||||||
"unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
|
"unreachable inode\n%s",
|
||||||
inode->bi_inum, snapshot,
|
(printbuf_reset(&buf),
|
||||||
bch2_d_type_str(inode_d_type(inode)),
|
bch2_bkey_val_to_text(&buf, c, inode_k),
|
||||||
inode->bi_nlink,
|
buf.buf)))
|
||||||
inode->bi_dir,
|
ret = reattach_inode(trans, &inode, snapshot);
|
||||||
inode->bi_dir_offset))
|
goto out;
|
||||||
ret = reattach_inode(trans, inode, snapshot);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||||
|
|
||||||
if (!S_ISDIR(inode->bi_mode))
|
if (!S_ISDIR(inode.bi_mode))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
ret = path_down(c, p, inode->bi_inum, snapshot);
|
ret = path_down(c, p, inode.bi_inum, snapshot);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "memory allocation failure");
|
bch_err(c, "memory allocation failure");
|
||||||
return ret;
|
return ret;
|
||||||
@ -2060,7 +2199,12 @@ static int check_path(struct btree_trans *trans,
|
|||||||
|
|
||||||
snapshot = parent_snapshot;
|
snapshot = parent_snapshot;
|
||||||
|
|
||||||
ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
|
bch2_trans_iter_exit(trans, &inode_iter);
|
||||||
|
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
|
||||||
|
SPOS(0, inode.bi_dir, snapshot), 0);
|
||||||
|
ret = bkey_err(inode_k) ?:
|
||||||
|
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
|
||||||
|
: bch2_inode_unpack(inode_k, &inode);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* Should have been caught in dirents pass */
|
/* Should have been caught in dirents pass */
|
||||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
@ -2068,30 +2212,35 @@ static int check_path(struct btree_trans *trans,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (path_is_dup(p, inode->bi_inum, snapshot)) {
|
snapshot = inode_k.k->p.snapshot;
|
||||||
|
|
||||||
|
if (path_is_dup(p, inode.bi_inum, snapshot)) {
|
||||||
/* XXX print path */
|
/* XXX print path */
|
||||||
bch_err(c, "directory structure loop");
|
bch_err(c, "directory structure loop");
|
||||||
|
|
||||||
darray_for_each(*p, i)
|
darray_for_each(*p, i)
|
||||||
pr_err("%llu:%u", i->inum, i->snapshot);
|
pr_err("%llu:%u", i->inum, i->snapshot);
|
||||||
pr_err("%llu:%u", inode->bi_inum, snapshot);
|
pr_err("%llu:%u", inode.bi_inum, snapshot);
|
||||||
|
|
||||||
if (!fsck_err(c, dir_loop, "directory structure loop"))
|
if (!fsck_err(c, dir_loop, "directory structure loop"))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = remove_backpointer(trans, inode);
|
ret = remove_backpointer(trans, &inode);
|
||||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
bch_err_msg(c, ret, "removing dirent");
|
bch_err_msg(c, ret, "removing dirent");
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
ret = reattach_inode(trans, inode, snapshot);
|
ret = reattach_inode(trans, &inode, snapshot);
|
||||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
|
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
bch2_trans_iter_exit(trans, &inode_iter);
|
||||||
|
printbuf_exit(&buf);
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -2103,7 +2252,6 @@ fsck_err:
|
|||||||
*/
|
*/
|
||||||
int bch2_check_directory_structure(struct bch_fs *c)
|
int bch2_check_directory_structure(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct bch_inode_unpacked u;
|
|
||||||
pathbuf path = { 0, };
|
pathbuf path = { 0, };
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -2116,12 +2264,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
|
|||||||
if (!bkey_is_inode(k.k))
|
if (!bkey_is_inode(k.k))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
BUG_ON(bch2_inode_unpack(k, &u));
|
if (bch2_inode_flags(k) & BCH_INODE_unlinked)
|
||||||
|
|
||||||
if (u.bi_flags & BCH_INODE_unlinked)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
check_path(trans, &path, &u, iter.pos.snapshot);
|
check_path(trans, &path, k);
|
||||||
})));
|
})));
|
||||||
darray_exit(&path);
|
darray_exit(&path);
|
||||||
|
|
||||||
|
@ -620,7 +620,8 @@ int bch2_trigger_inode(struct btree_trans *trans,
|
|||||||
bool old_deleted = bkey_is_deleted_inode(old);
|
bool old_deleted = bkey_is_deleted_inode(old);
|
||||||
bool new_deleted = bkey_is_deleted_inode(new.s_c);
|
bool new_deleted = bkey_is_deleted_inode(new.s_c);
|
||||||
if (old_deleted != new_deleted) {
|
if (old_deleted != new_deleted) {
|
||||||
int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
|
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
|
||||||
|
new.k->p, new_deleted);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1169,7 +1170,7 @@ fsck_err:
|
|||||||
bch2_trans_iter_exit(trans, &inode_iter);
|
bch2_trans_iter_exit(trans, &inode_iter);
|
||||||
return ret;
|
return ret;
|
||||||
delete:
|
delete:
|
||||||
ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
|
ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,6 +177,20 @@ static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
|
|||||||
return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
|
return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u32 bch2_inode_flags(struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
switch (k.k->type) {
|
||||||
|
case KEY_TYPE_inode:
|
||||||
|
return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
|
||||||
|
case KEY_TYPE_inode_v2:
|
||||||
|
return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
|
||||||
|
case KEY_TYPE_inode_v3:
|
||||||
|
return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* i_nlink: */
|
/* i_nlink: */
|
||||||
|
|
||||||
static inline unsigned nlink_bias(umode_t mode)
|
static inline unsigned nlink_bias(umode_t mode)
|
||||||
|
@ -44,7 +44,7 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
|||||||
u64 dev_bucket, u64 time, bool set)
|
u64 dev_bucket, u64 time, bool set)
|
||||||
{
|
{
|
||||||
return time
|
return time
|
||||||
? bch2_btree_bit_mod(trans, BTREE_ID_lru,
|
? bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru,
|
||||||
lru_pos(lru_id, dev_bucket, time), set)
|
lru_pos(lru_id, dev_bucket, time), set)
|
||||||
: 0;
|
: 0;
|
||||||
}
|
}
|
||||||
|
@ -332,6 +332,11 @@ enum fsck_err_opts {
|
|||||||
OPT_BOOL(), \
|
OPT_BOOL(), \
|
||||||
BCH2_NO_SB_OPT, false, \
|
BCH2_NO_SB_OPT, false, \
|
||||||
NULL, "Run fsck on mount") \
|
NULL, "Run fsck on mount") \
|
||||||
|
x(fsck_memory_usage_percent, u8, \
|
||||||
|
OPT_FS|OPT_MOUNT, \
|
||||||
|
OPT_UINT(20, 70), \
|
||||||
|
BCH2_NO_SB_OPT, 50, \
|
||||||
|
NULL, "Maximum percentage of system ram fsck is allowed to pin")\
|
||||||
x(fix_errors, u8, \
|
x(fix_errors, u8, \
|
||||||
OPT_FS|OPT_MOUNT, \
|
OPT_FS|OPT_MOUNT, \
|
||||||
OPT_FN(bch2_opt_fix_errors), \
|
OPT_FN(bch2_opt_fix_errors), \
|
||||||
|
@ -264,7 +264,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
|||||||
bkey_copy(&r->key, (struct bkey_i *) entry->start);
|
bkey_copy(&r->key, (struct bkey_i *) entry->start);
|
||||||
r->error = 0;
|
r->error = 0;
|
||||||
} else {
|
} else {
|
||||||
r->error = -EIO;
|
r->error = -BCH_ERR_btree_node_read_error;
|
||||||
}
|
}
|
||||||
r->alive = true;
|
r->alive = true;
|
||||||
break;
|
break;
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
|
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
|
||||||
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
|
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
|
||||||
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
|
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
|
||||||
|
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
|
||||||
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
|
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
|
||||||
x(fs_upgrade_for_subvolumes, 22, 0) \
|
x(fs_upgrade_for_subvolumes, 22, 0) \
|
||||||
x(resume_logged_ops, 23, PASS_ALWAYS) \
|
x(resume_logged_ops, 23, PASS_ALWAYS) \
|
||||||
|
@ -46,7 +46,13 @@
|
|||||||
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
|
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
|
||||||
BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
|
BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
|
||||||
x(rebalance_work, \
|
x(rebalance_work, \
|
||||||
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
|
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
|
||||||
|
x(subvolume_fs_parent, \
|
||||||
|
BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
|
||||||
|
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
|
||||||
|
x(btree_subvolume_children, \
|
||||||
|
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
|
||||||
|
BCH_FSCK_ERR_subvol_children_not_set)
|
||||||
|
|
||||||
#define DOWNGRADE_TABLE()
|
#define DOWNGRADE_TABLE()
|
||||||
|
|
||||||
|
@ -231,7 +231,7 @@
|
|||||||
x(dirent_name_dot_or_dotdot, 223) \
|
x(dirent_name_dot_or_dotdot, 223) \
|
||||||
x(dirent_name_has_slash, 224) \
|
x(dirent_name_has_slash, 224) \
|
||||||
x(dirent_d_type_wrong, 225) \
|
x(dirent_d_type_wrong, 225) \
|
||||||
x(dirent_d_parent_subvol_wrong, 226) \
|
x(inode_bi_parent_wrong, 226) \
|
||||||
x(dirent_in_missing_dir_inode, 227) \
|
x(dirent_in_missing_dir_inode, 227) \
|
||||||
x(dirent_in_non_dir_inode, 228) \
|
x(dirent_in_non_dir_inode, 228) \
|
||||||
x(dirent_to_missing_inode, 229) \
|
x(dirent_to_missing_inode, 229) \
|
||||||
@ -253,7 +253,16 @@
|
|||||||
x(reflink_p_front_pad_bad, 245) \
|
x(reflink_p_front_pad_bad, 245) \
|
||||||
x(journal_entry_dup_same_device, 246) \
|
x(journal_entry_dup_same_device, 246) \
|
||||||
x(inode_bi_subvol_missing, 247) \
|
x(inode_bi_subvol_missing, 247) \
|
||||||
x(inode_bi_subvol_wrong, 248)
|
x(inode_bi_subvol_wrong, 248) \
|
||||||
|
x(inode_points_to_missing_dirent, 249) \
|
||||||
|
x(inode_points_to_wrong_dirent, 250) \
|
||||||
|
x(inode_bi_parent_nonzero, 251) \
|
||||||
|
x(dirent_to_missing_parent_subvol, 252) \
|
||||||
|
x(dirent_not_visible_in_parent_subvol, 253) \
|
||||||
|
x(subvol_fs_path_parent_wrong, 254) \
|
||||||
|
x(subvol_root_fs_path_parent_nonzero, 255) \
|
||||||
|
x(subvol_children_not_set, 256) \
|
||||||
|
x(subvol_children_bad, 257)
|
||||||
|
|
||||||
enum bch_sb_error_id {
|
enum bch_sb_error_id {
|
||||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||||
|
@ -13,13 +13,26 @@
|
|||||||
|
|
||||||
static int bch2_subvolume_delete(struct btree_trans *, u32);
|
static int bch2_subvolume_delete(struct btree_trans *, u32);
|
||||||
|
|
||||||
|
static struct bpos subvolume_children_pos(struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
if (k.k->type != KEY_TYPE_subvolume)
|
||||||
|
return POS_MIN;
|
||||||
|
|
||||||
|
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
|
||||||
|
if (!s.v->fs_path_parent)
|
||||||
|
return POS_MIN;
|
||||||
|
return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
|
||||||
|
}
|
||||||
|
|
||||||
static int check_subvol(struct btree_trans *trans,
|
static int check_subvol(struct btree_trans *trans,
|
||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct bkey_s_c_subvolume subvol;
|
struct bkey_s_c_subvolume subvol;
|
||||||
|
struct btree_iter subvol_children_iter = {};
|
||||||
struct bch_snapshot snapshot;
|
struct bch_snapshot snapshot;
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
unsigned snapid;
|
unsigned snapid;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -42,6 +55,42 @@ static int check_subvol(struct btree_trans *trans,
|
|||||||
return ret ?: -BCH_ERR_transaction_restart_nested;
|
return ret ?: -BCH_ERR_transaction_restart_nested;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
|
||||||
|
subvol.v->fs_path_parent,
|
||||||
|
c, subvol_root_fs_path_parent_nonzero,
|
||||||
|
"root subvolume has nonzero fs_path_parent\n%s",
|
||||||
|
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||||
|
struct bkey_i_subvolume *n =
|
||||||
|
bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
|
||||||
|
ret = PTR_ERR_OR_ZERO(n);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
n->v.fs_path_parent = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subvol.v->fs_path_parent) {
|
||||||
|
struct bpos pos = subvolume_children_pos(k);
|
||||||
|
|
||||||
|
struct bkey_s_c subvol_children_k =
|
||||||
|
bch2_bkey_get_iter(trans, &subvol_children_iter,
|
||||||
|
BTREE_ID_subvolume_children, pos, 0);
|
||||||
|
ret = bkey_err(subvol_children_k);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
|
||||||
|
c, subvol_children_not_set,
|
||||||
|
"subvolume not set in subvolume_children btree at %llu:%llu\n%s",
|
||||||
|
pos.inode, pos.offset,
|
||||||
|
(printbuf_reset(&buf),
|
||||||
|
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||||
|
ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct bch_inode_unpacked inode;
|
struct bch_inode_unpacked inode;
|
||||||
struct btree_iter inode_iter = {};
|
struct btree_iter inode_iter = {};
|
||||||
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
|
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
|
||||||
@ -102,9 +151,10 @@ static int check_subvol(struct btree_trans *trans,
|
|||||||
SET_BCH_SUBVOLUME_SNAP(&s->v, true);
|
SET_BCH_SUBVOLUME_SNAP(&s->v, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
bch2_trans_iter_exit(trans, &subvol_children_iter);
|
||||||
|
printbuf_exit(&buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,6 +169,42 @@ int bch2_check_subvols(struct bch_fs *c)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_subvol_child(struct btree_trans *trans,
|
||||||
|
struct btree_iter *child_iter,
|
||||||
|
struct bkey_s_c child_k)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct bch_subvolume s;
|
||||||
|
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
|
||||||
|
0, subvolume, &s);
|
||||||
|
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (fsck_err_on(ret ||
|
||||||
|
le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
|
||||||
|
c, subvol_children_bad,
|
||||||
|
"incorrect entry in subvolume_children btree %llu:%llu",
|
||||||
|
child_k.k->p.inode, child_k.k->p.offset)) {
|
||||||
|
ret = bch2_btree_delete_at(trans, child_iter, 0);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
err:
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_check_subvol_children(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
int ret = bch2_trans_run(c,
|
||||||
|
for_each_btree_key_commit(trans, iter,
|
||||||
|
BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||||
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||||
|
check_subvol_child(trans, &iter, k)));
|
||||||
|
bch_err_fn(c, ret);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Subvolumes: */
|
/* Subvolumes: */
|
||||||
|
|
||||||
int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
|
int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||||
@ -143,8 +229,50 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
le64_to_cpu(s.v->inode),
|
le64_to_cpu(s.v->inode),
|
||||||
le32_to_cpu(s.v->snapshot));
|
le32_to_cpu(s.v->snapshot));
|
||||||
|
|
||||||
if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
|
if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
|
||||||
prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
|
prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
|
||||||
|
prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
|
||||||
|
{
|
||||||
|
return !bpos_eq(pos, POS_MIN)
|
||||||
|
? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
|
||||||
|
: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_subvolume_trigger(struct btree_trans *trans,
|
||||||
|
enum btree_id btree_id, unsigned level,
|
||||||
|
struct bkey_s_c old, struct bkey_s new,
|
||||||
|
unsigned flags)
|
||||||
|
{
|
||||||
|
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
|
||||||
|
struct bpos children_pos_old = subvolume_children_pos(old);
|
||||||
|
struct bpos children_pos_new = subvolume_children_pos(new.s_c);
|
||||||
|
|
||||||
|
if (!bpos_eq(children_pos_old, children_pos_new)) {
|
||||||
|
int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
|
||||||
|
subvolume_children_mod(trans, children_pos_new, true);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
|
||||||
|
{
|
||||||
|
struct btree_iter iter;
|
||||||
|
|
||||||
|
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
|
||||||
|
struct bkey_s_c k = bch2_btree_iter_peek(&iter);
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
|
return bkey_err(k) ?: k.k && k.k->p.inode == subvol
|
||||||
|
? -BCH_ERR_ENOTEMPTY_subvol_not_empty
|
||||||
|
: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline int
|
static __always_inline int
|
||||||
@ -228,8 +356,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
|
|||||||
if (k.k->type != KEY_TYPE_subvolume)
|
if (k.k->type != KEY_TYPE_subvolume)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
|
if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
|
||||||
le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
|
le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
|
s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
|
||||||
@ -237,7 +365,7 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
s->v.parent = cpu_to_le32(new_parent);
|
s->v.creation_parent = cpu_to_le32(new_parent);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,7 +388,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
|
|||||||
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
|
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||||
bch2_subvolume_reparent(trans, &iter, k,
|
bch2_subvolume_reparent(trans, &iter, k,
|
||||||
subvolid_to_delete, le32_to_cpu(s.parent)));
|
subvolid_to_delete, le32_to_cpu(s.creation_parent)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -391,6 +519,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
|
int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
|
||||||
|
u32 parent_subvolid,
|
||||||
u32 src_subvolid,
|
u32 src_subvolid,
|
||||||
u32 *new_subvolid,
|
u32 *new_subvolid,
|
||||||
u32 *new_snapshotid,
|
u32 *new_snapshotid,
|
||||||
@ -450,7 +579,8 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
|
|||||||
new_subvol->v.flags = 0;
|
new_subvol->v.flags = 0;
|
||||||
new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
|
new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
|
||||||
new_subvol->v.inode = cpu_to_le64(inode);
|
new_subvol->v.inode = cpu_to_le64(inode);
|
||||||
new_subvol->v.parent = cpu_to_le32(src_subvolid);
|
new_subvol->v.creation_parent = cpu_to_le32(src_subvolid);
|
||||||
|
new_subvol->v.fs_path_parent = cpu_to_le32(parent_subvolid);
|
||||||
new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
|
new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
|
||||||
new_subvol->v.otime.hi = 0;
|
new_subvol->v.otime.hi = 0;
|
||||||
|
|
||||||
|
@ -7,17 +7,22 @@
|
|||||||
enum bkey_invalid_flags;
|
enum bkey_invalid_flags;
|
||||||
|
|
||||||
int bch2_check_subvols(struct bch_fs *);
|
int bch2_check_subvols(struct bch_fs *);
|
||||||
|
int bch2_check_subvol_children(struct bch_fs *);
|
||||||
|
|
||||||
int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
|
int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
|
||||||
enum bkey_invalid_flags, struct printbuf *);
|
enum bkey_invalid_flags, struct printbuf *);
|
||||||
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||||
|
int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
|
||||||
|
struct bkey_s_c, struct bkey_s, unsigned);
|
||||||
|
|
||||||
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
|
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
|
||||||
.key_invalid = bch2_subvolume_invalid, \
|
.key_invalid = bch2_subvolume_invalid, \
|
||||||
.val_to_text = bch2_subvolume_to_text, \
|
.val_to_text = bch2_subvolume_to_text, \
|
||||||
|
.trigger = bch2_subvolume_trigger, \
|
||||||
.min_val_size = 16, \
|
.min_val_size = 16, \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
int bch2_subvol_has_children(struct btree_trans *, u32);
|
||||||
int bch2_subvolume_get(struct btree_trans *, unsigned,
|
int bch2_subvolume_get(struct btree_trans *, unsigned,
|
||||||
bool, int, struct bch_subvolume *);
|
bool, int, struct bch_subvolume *);
|
||||||
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
|
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
|
||||||
@ -29,8 +34,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *);
|
|||||||
void bch2_delete_dead_snapshots_async(struct bch_fs *);
|
void bch2_delete_dead_snapshots_async(struct bch_fs *);
|
||||||
|
|
||||||
int bch2_subvolume_unlink(struct btree_trans *, u32);
|
int bch2_subvolume_unlink(struct btree_trans *, u32);
|
||||||
int bch2_subvolume_create(struct btree_trans *, u64, u32,
|
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
|
||||||
u32 *, u32 *, bool);
|
|
||||||
|
|
||||||
int bch2_fs_subvolumes_init(struct bch_fs *);
|
int bch2_fs_subvolumes_init(struct bch_fs *);
|
||||||
|
|
||||||
|
@ -19,8 +19,8 @@ struct bch_subvolume {
|
|||||||
* This is _not_ necessarily the subvolume of the directory containing
|
* This is _not_ necessarily the subvolume of the directory containing
|
||||||
* this subvolume:
|
* this subvolume:
|
||||||
*/
|
*/
|
||||||
__le32 parent;
|
__le32 creation_parent;
|
||||||
__le32 pad;
|
__le32 fs_path_parent;
|
||||||
bch_le128 otime;
|
bch_le128 otime;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -102,6 +102,8 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
|
|||||||
* mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
|
* mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
|
||||||
* @s: mean and variance number of samples and their sums
|
* @s: mean and variance number of samples and their sums
|
||||||
* @x: new value to include in the &mean_and_variance_weighted
|
* @x: new value to include in the &mean_and_variance_weighted
|
||||||
|
* @initted: caller must track whether this is the first use or not
|
||||||
|
* @weight: ewma weight
|
||||||
*
|
*
|
||||||
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
|
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
|
||||||
* values are stored bitshifted for performance and added precision.
|
* values are stored bitshifted for performance and added precision.
|
||||||
@ -132,6 +134,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
|||||||
/**
|
/**
|
||||||
* mean_and_variance_weighted_get_mean() - get mean from @s
|
* mean_and_variance_weighted_get_mean() - get mean from @s
|
||||||
* @s: mean and variance number of samples and their sums
|
* @s: mean and variance number of samples and their sums
|
||||||
|
* @weight: ewma weight
|
||||||
*/
|
*/
|
||||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
|
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
|
||||||
u8 weight)
|
u8 weight)
|
||||||
@ -143,6 +146,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
|||||||
/**
|
/**
|
||||||
* mean_and_variance_weighted_get_variance() -- get variance from @s
|
* mean_and_variance_weighted_get_variance() -- get variance from @s
|
||||||
* @s: mean and variance number of samples and their sums
|
* @s: mean and variance number of samples and their sums
|
||||||
|
* @weight: ewma weight
|
||||||
*/
|
*/
|
||||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
|
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
|
||||||
u8 weight)
|
u8 weight)
|
||||||
@ -155,6 +159,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
|
|||||||
/**
|
/**
|
||||||
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s
|
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s
|
||||||
* @s: mean and variance number of samples and their sums
|
* @s: mean and variance number of samples and their sums
|
||||||
|
* @weight: ewma weight
|
||||||
*/
|
*/
|
||||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
|
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
|
||||||
u8 weight)
|
u8 weight)
|
||||||
|
Loading…
Reference in New Issue
Block a user