Update bcachefs sources to bee7b5a4fa21 bcachefs: Pin btree cache in ram for random access in fsck

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-02-09 21:30:46 -05:00
parent 9e6d9560d0
commit 7a716b76b5
36 changed files with 867 additions and 384 deletions

View File

@ -1 +1 @@
50847e296b34efabe199e408ec4d72f10a866c39 bee7b5a4fa2135c9ec9d1c9424018ee494500bb5

View File

@ -2,6 +2,12 @@
#define _LINUX_SORT_H #define _LINUX_SORT_H
#include <stdlib.h> #include <stdlib.h>
#include <linux/types.h>
void sort_r(void *base, size_t num, size_t size,
cmp_r_func_t cmp_func,
swap_r_func_t swap_func,
const void *priv);
static inline void sort(void *base, size_t num, size_t size, static inline void sort(void *base, size_t num, size_t size,
int (*cmp_func)(const void *, const void *), int (*cmp_func)(const void *, const void *),

View File

@ -129,8 +129,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
printbuf_exit(&buf); printbuf_exit(&buf);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
bch2_inconsistent_error(c); return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
return -EIO;
} else { } else {
return 0; return 0;
} }
@ -553,60 +552,61 @@ static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
}; };
} }
static size_t btree_nodes_fit_in_ram(struct bch_fs *c) static u64 mem_may_pin_bytes(struct bch_fs *c)
{ {
struct sysinfo i; struct sysinfo i;
u64 mem_bytes;
si_meminfo(&i); si_meminfo(&i);
mem_bytes = i.totalram * i.mem_unit;
return div_u64(mem_bytes >> 1, c->opts.btree_node_size); u64 mem_bytes = i.totalram * i.mem_unit;
return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
}
static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
{
return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
} }
static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
unsigned btree_leaf_mask, u64 btree_leaf_mask,
unsigned btree_interior_mask, u64 btree_interior_mask,
struct bbpos start, struct bbpos *end) struct bbpos start, struct bbpos *end)
{ {
struct btree_iter iter; struct bch_fs *c = trans->c;
struct bkey_s_c k; s64 mem_may_pin = mem_may_pin_bytes(c);
size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
enum btree_id btree;
int ret = 0; int ret = 0;
for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) { btree_interior_mask |= btree_leaf_mask;
unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
c->btree_cache.pinned_nodes_start = start;
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
for (enum btree_id btree = start.btree;
btree < BTREE_ID_NR && !ret;
btree++) {
unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
struct btree_iter iter;
struct btree *b;
if (!((1U << btree) & btree_leaf_mask) && if (!((1U << btree) & btree_leaf_mask) &&
!((1U << btree) & btree_interior_mask)) !((1U << btree) & btree_interior_mask))
continue; continue;
bch2_trans_node_iter_init(trans, &iter, btree, __for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN, btree == start.btree ? start.pos : POS_MIN,
0, depth, 0); 0, depth, BTREE_ITER_PREFETCH, b, ret) {
/* mem_may_pin -= btree_buf_bytes(b);
* for_each_btree_key_contineu() doesn't check the return value if (mem_may_pin <= 0) {
* from bch2_btree_iter_advance(), which is needed when c->btree_cache.pinned_nodes_end = *end =
* iterating over interior nodes where we'll see keys at BBPOS(btree, b->key.k.p);
* SPOS_MAX:
*/
do {
k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
ret = bkey_err(k);
if (!k.k || ret)
break;
--btree_nodes;
if (!btree_nodes) {
*end = BBPOS(btree, k.k->p);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return 0; return 0;
} }
} while (bch2_btree_iter_advance(&iter)); }
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
} }
*end = BBPOS_MAX;
return ret; return ret;
} }
@ -664,62 +664,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
return 0; return 0;
} }
static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
struct bpos bucket)
{
return bch2_dev_exists2(c, bucket.inode)
? bucket_pos_to_bp(c, bucket, 0)
: bucket;
}
static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
struct bpos start, struct bpos *end)
{
struct btree_iter alloc_iter;
struct btree_iter bp_iter;
struct bkey_s_c alloc_k, bp_k;
size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
bool alloc_end = false, bp_end = false;
int ret = 0;
bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
start, 0, 1, 0);
bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
while (1) {
alloc_k = !alloc_end
? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
: bkey_s_c_null;
bp_k = !bp_end
? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
: bkey_s_c_null;
ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
if ((!alloc_k.k && !bp_k.k) || ret) {
*end = SPOS_MAX;
break;
}
--btree_nodes;
if (!btree_nodes) {
*end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
break;
}
if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
if (!bch2_btree_iter_advance(&alloc_iter))
alloc_end = true;
} else {
if (!bch2_btree_iter_advance(&bp_iter))
bp_end = true;
}
}
bch2_trans_iter_exit(trans, &bp_iter);
bch2_trans_iter_exit(trans, &alloc_iter);
return ret;
}
int bch2_check_extents_to_backpointers(struct bch_fs *c) int bch2_check_extents_to_backpointers(struct bch_fs *c)
{ {
struct btree_trans *trans = bch2_trans_get(c); struct btree_trans *trans = bch2_trans_get(c);
@ -730,10 +674,16 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bkey_init(&s.last_flushed.k->k); bkey_init(&s.last_flushed.k->k);
while (1) { while (1) {
ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end); struct bbpos end;
ret = bch2_get_btree_in_memory_pos(trans,
BIT_ULL(BTREE_ID_backpointers),
BIT_ULL(BTREE_ID_backpointers),
BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
if (ret) if (ret)
break; break;
s.bucket_end = end.pos;
if ( bpos_eq(s.bucket_start, POS_MIN) && if ( bpos_eq(s.bucket_start, POS_MIN) &&
!bpos_eq(s.bucket_end, SPOS_MAX)) !bpos_eq(s.bucket_end, SPOS_MAX))
bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@ -761,6 +711,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_trans_put(trans); bch2_trans_put(trans);
bch2_bkey_buf_exit(&s.last_flushed, c); bch2_bkey_buf_exit(&s.last_flushed, c);
c->btree_cache.pinned_nodes_leaf_mask = 0;
c->btree_cache.pinned_nodes_interior_mask = 0;
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
@ -866,6 +819,9 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
} }
bch2_trans_put(trans); bch2_trans_put(trans);
c->btree_cache.pinned_nodes_leaf_mask = 0;
c->btree_cache.pinned_nodes_interior_mask = 0;
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }

View File

@ -13,6 +13,6 @@ static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
} }
#define BBPOS_MIN BBPOS(0, POS_MIN) #define BBPOS_MIN BBPOS(0, POS_MIN)
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) #define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, SPOS_MAX)
#endif /* _BCACHEFS_BBPOS_TYPES_H */ #endif /* _BCACHEFS_BBPOS_TYPES_H */

View File

@ -505,6 +505,7 @@ enum gc_phase {
GC_PHASE_BTREE_deleted_inodes, GC_PHASE_BTREE_deleted_inodes,
GC_PHASE_BTREE_logged_ops, GC_PHASE_BTREE_logged_ops,
GC_PHASE_BTREE_rebalance_work, GC_PHASE_BTREE_rebalance_work,
GC_PHASE_BTREE_subvolume_children,
GC_PHASE_PENDING_DELETE, GC_PHASE_PENDING_DELETE,
}; };

View File

@ -840,7 +840,9 @@ struct bch_sb_field_downgrade {
x(snapshot_skiplists, BCH_VERSION(1, 1)) \ x(snapshot_skiplists, BCH_VERSION(1, 1)) \
x(deleted_inodes, BCH_VERSION(1, 2)) \ x(deleted_inodes, BCH_VERSION(1, 2)) \
x(rebalance_work, BCH_VERSION(1, 3)) \ x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
x(btree_subvolume_children, BCH_VERSION(1, 6))
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,
@ -1488,7 +1490,9 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_logged_op_truncate)| \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert)) \ BIT_ULL(KEY_TYPE_logged_op_finsert)) \
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
x(subvolume_children, 19, 0, \
BIT_ULL(KEY_TYPE_set))
enum btree_id { enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr, #define x(name, nr, ...) BTREE_ID_##name = nr,

View File

@ -78,6 +78,7 @@ bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
enum btree_update_flags { enum btree_update_flags {
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
__BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE,
__BTREE_UPDATE_NOJOURNAL, __BTREE_UPDATE_NOJOURNAL,
__BTREE_UPDATE_KEY_CACHE_RECLAIM, __BTREE_UPDATE_KEY_CACHE_RECLAIM,
@ -91,6 +92,8 @@ enum btree_update_flags {
}; };
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
#define BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE \
(1U << __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE)
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bbpos.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "btree_cache.h" #include "btree_cache.h"
#include "btree_io.h" #include "btree_io.h"
@ -208,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
int ret = 0; int ret = 0;
lockdep_assert_held(&bc->lock); lockdep_assert_held(&bc->lock);
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
u64 mask = b->c.level
? bc->pinned_nodes_interior_mask
: bc->pinned_nodes_leaf_mask;
if ((mask & BIT_ULL(b->c.btree_id)) &&
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
return -BCH_ERR_ENOMEM_btree_node_reclaim;
wait_on_io: wait_on_io:
if (b->flags & ((1U << BTREE_NODE_dirty)| if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_read_in_flight)|
@ -905,7 +918,7 @@ retry:
if (unlikely(btree_node_read_error(b))) { if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type); six_unlock_type(&b->c.lock, lock_type);
return ERR_PTR(-EIO); return ERR_PTR(-BCH_ERR_btree_node_read_error);
} }
EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(b->c.btree_id != path->btree_id);
@ -996,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (unlikely(btree_node_read_error(b))) { if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type); six_unlock_type(&b->c.lock, lock_type);
return ERR_PTR(-EIO); return ERR_PTR(-BCH_ERR_btree_node_read_error);
} }
EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(b->c.btree_id != path->btree_id);
@ -1079,7 +1092,7 @@ lock_node:
if (unlikely(btree_node_read_error(b))) { if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock); six_unlock_read(&b->c.lock);
b = ERR_PTR(-EIO); b = ERR_PTR(-BCH_ERR_btree_node_read_error);
goto out; goto out;
} }

View File

@ -407,7 +407,7 @@ again:
printbuf_reset(&buf); printbuf_reset(&buf);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
if (mustfix_fsck_err_on(ret == -EIO, c, if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c,
btree_node_unreadable, btree_node_unreadable,
"Topology repair: unreadable btree node at btree %s level %u:\n" "Topology repair: unreadable btree node at btree %s level %u:\n"
" %s", " %s",
@ -979,7 +979,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
false); false);
ret = PTR_ERR_OR_ZERO(child); ret = PTR_ERR_OR_ZERO(child);
if (ret == -EIO) { if (bch2_err_matches(ret, EIO)) {
bch2_topology_error(c); bch2_topology_error(c);
if (__fsck_err(c, if (__fsck_err(c,

View File

@ -581,8 +581,7 @@ static int __btree_err(int ret,
break; break;
case -BCH_ERR_btree_node_read_err_bad_node: case -BCH_ERR_btree_node_read_err_bad_node:
bch2_print_string_as_lines(KERN_ERR, out.buf); bch2_print_string_as_lines(KERN_ERR, out.buf);
bch2_topology_error(c); ret = bch2_topology_error(c);
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
break; break;
case -BCH_ERR_btree_node_read_err_incompatible: case -BCH_ERR_btree_node_read_err_incompatible:
bch2_print_string_as_lines(KERN_ERR, out.buf); bch2_print_string_as_lines(KERN_ERR, out.buf);
@ -1737,7 +1736,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
list_move(&b->list, &c->btree_cache.freeable); list_move(&b->list, &c->btree_cache.freeable);
mutex_unlock(&c->btree_cache.lock); mutex_unlock(&c->btree_cache.lock);
ret = -EIO; ret = -BCH_ERR_btree_node_read_error;
goto err; goto err;
} }
@ -1841,7 +1840,7 @@ static void btree_node_write_work(struct work_struct *work)
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
ret = -BCH_ERR_btree_write_all_failed; ret = -BCH_ERR_btree_node_write_all_failed;
goto err; goto err;
} }

View File

@ -2303,7 +2303,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
btree_iter_path(trans, iter)->level); btree_iter_path(trans, iter)->level);
if (iter->flags & BTREE_ITER_WITH_JOURNAL) if (iter->flags & BTREE_ITER_WITH_JOURNAL)
return bkey_s_c_err(-EIO); return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify_entry_exit(iter);
@ -2501,6 +2501,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
k = bch2_btree_iter_peek_upto(&iter2, end); k = bch2_btree_iter_peek_upto(&iter2, end);
if (k.k && !bkey_err(k)) { if (k.k && !bkey_err(k)) {
swap(iter->key_cache_path, iter2.key_cache_path);
iter->k = iter2.k; iter->k = iter2.k;
k.k = &iter->k; k.k = &iter->k;
} }
@ -2760,6 +2761,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
struct btree_trans *trans = src->trans; struct btree_trans *trans = src->trans;
*dst = *src; *dst = *src;
#ifdef TRACK_PATH_ALLOCATED
dst->ip_allocated = _RET_IP_;
#endif
if (src->path) if (src->path)
__btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT); __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
if (src->update_path) if (src->update_path)

View File

@ -6,6 +6,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/rhashtable.h> #include <linux/rhashtable.h>
#include "bbpos_types.h"
#include "btree_key_cache_types.h" #include "btree_key_cache_types.h"
#include "buckets_types.h" #include "buckets_types.h"
#include "errcode.h" #include "errcode.h"
@ -173,6 +174,11 @@ struct btree_cache {
*/ */
struct task_struct *alloc_lock; struct task_struct *alloc_lock;
struct closure_waitlist alloc_wait; struct closure_waitlist alloc_wait;
struct bbpos pinned_nodes_start;
struct bbpos pinned_nodes_end;
u64 pinned_nodes_leaf_mask;
u64 pinned_nodes_interior_mask;
}; };
struct btree_node_iter { struct btree_node_iter {
@ -654,6 +660,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
BIT_ULL(BKEY_TYPE_inodes)| \ BIT_ULL(BKEY_TYPE_inodes)| \
BIT_ULL(BKEY_TYPE_stripes)| \ BIT_ULL(BKEY_TYPE_stripes)| \
BIT_ULL(BKEY_TYPE_reflink)| \ BIT_ULL(BKEY_TYPE_reflink)| \
BIT_ULL(BKEY_TYPE_subvolumes)| \
BIT_ULL(BKEY_TYPE_btree)) BIT_ULL(BKEY_TYPE_btree))
#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \ #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \
@ -727,7 +734,7 @@ struct btree_root {
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level; u8 level;
u8 alive; u8 alive;
s8 error; s16 error;
}; };
enum btree_gc_coalesce_fail_reason { enum btree_gc_coalesce_fail_reason {

View File

@ -82,40 +82,169 @@ static noinline int extent_back_merge(struct btree_trans *trans,
return 0; return 0;
} }
/* static struct bkey_s_c peek_slot_including_whiteouts(struct btree_trans *trans, struct btree_iter *iter,
* When deleting, check if we need to emit a whiteout (because we're overwriting enum btree_id btree, struct bpos pos)
* something in an ancestor snapshot)
*/
static int need_whiteout_for_snapshot(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos)
{ {
struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
u32 snapshot = pos.snapshot;
int ret; int ret;
if (!bch2_snapshot_parent(trans->c, pos.snapshot)) for_each_btree_key_norestart(trans, *iter, btree, pos,
return 0;
pos.snapshot++;
for_each_btree_key_norestart(trans, iter, btree_id, pos,
BTREE_ITER_ALL_SNAPSHOTS| BTREE_ITER_ALL_SNAPSHOTS|
BTREE_ITER_NOPRESERVE, k, ret) { BTREE_ITER_NOPRESERVE, k, ret) {
if (!bkey_eq(k.k->p, pos)) if (!bkey_eq(k.k->p, pos))
break; break;
if (bch2_snapshot_is_ancestor(trans->c, pos.snapshot, k.k->p.snapshot))
if (bch2_snapshot_is_ancestor(trans->c, snapshot, return k;
k.k->p.snapshot)) {
ret = !bkey_whiteout(k.k);
break;
}
} }
bch2_trans_iter_exit(trans, iter);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
}
/*
* When deleting, check if we need to emit a whiteout (because we're overwriting
* something in an ancestor snapshot)
*/
static int need_whiteout_for_snapshot(struct btree_trans *trans, enum btree_id btree, struct bpos pos)
{
pos.snapshot = bch2_snapshot_parent(trans->c, pos.snapshot);
if (!pos.snapshot)
return 0;
struct btree_iter iter;
struct bkey_s_c k = peek_slot_including_whiteouts(trans, &iter, btree, pos);
int ret = bkey_err(k) ?: k.k && !bkey_whiteout(k.k);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
/*
* We're overwriting a key at @pos in snapshot @snapshot, so we need to insert a
* whiteout: that might be in @snapshot, or if there are overwites in sibling
* snapshots, find the common ancestor where @pos is overwritten in every
* descendent and insert the whiteout there - which might be at @pos.
*/
static int delete_interior_snapshot_key(struct btree_trans *trans,
enum btree_id btree,
struct bpos whiteout, bool deleting,
struct bpos overwrite, bool old_is_whiteout)
{
struct bch_fs *c = trans->c;
struct bpos orig_whiteout = whiteout, sib = whiteout;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
for_each_btree_key_norestart(trans, iter, btree, sib,
BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_INTENT, k, ret) {
BUG_ON(bpos_gt(k.k->p, overwrite));
if (bpos_lt(k.k->p, sib)) /* unrelated branch - skip */
continue;
if (bpos_gt(k.k->p, sib)) /* did not find @sib */
break;
/* @overwrite is also written in @sib, now check parent */
whiteout.snapshot = bch2_snapshot_parent(c, whiteout.snapshot);
if (bpos_eq(whiteout, overwrite))
break;
sib = whiteout;
sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
}
if (ret)
goto err;
if (!deleting && bpos_eq(whiteout, orig_whiteout))
goto out;
if (!bpos_eq(iter.pos, whiteout)) {
bch2_trans_iter_exit(trans, &iter);
bch2_trans_iter_init(trans, &iter, btree, whiteout, BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
goto err;
}
iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
iter.flags |= BTREE_ITER_FILTER_SNAPSHOTS;
struct bkey_i *delete = bch2_trans_kmalloc(trans, sizeof(*delete));
ret = PTR_ERR_OR_ZERO(delete);
if (ret)
goto err;
bkey_init(&delete->k);
delete->k.p = whiteout;
ret = !bpos_eq(whiteout, overwrite)
? !old_is_whiteout
: need_whiteout_for_snapshot(trans, btree, whiteout);
if (ret < 0)
goto err;
if (ret)
delete->k.type = KEY_TYPE_whiteout;
ret = bch2_trans_update(trans, &iter, delete,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE);
out:
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
/*
* We're overwriting a key in a snapshot that has ancestors: if we're
* overwriting a key in a different snapshot, we need to check if it is now
* fully overritten and can be deleted, and if we're deleting a key in the
* current snapshot we need to check if we need to leave a whiteout.
*/
static noinline int
overwrite_interior_snapshot_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *k)
{
struct bkey_s_c old = bch2_btree_iter_peek_slot(iter);
int ret = bkey_err(old);
if (ret)
return ret;
if (!bkey_deleted(old.k)) {
if (old.k->p.snapshot != k->k.p.snapshot) {
/*
* We're overwriting a key in a different snapshot:
* check if it's also been overwritten in siblings
*/
ret = delete_interior_snapshot_key(trans, iter->btree_id,
k->k.p, bkey_deleted(&k->k),
old.k->p, bkey_whiteout(old.k));
if (ret)
return ret;
if (bkey_deleted(&k->k))
return 1;
} else if (bkey_deleted(&k->k)) {
/*
* We're deleting a key in the current snapshot:
* check if we need to leave a whiteout
*/
ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
if (unlikely(ret < 0))
return ret;
if (ret)
k->k.type = KEY_TYPE_whiteout;
}
}
return 0;
}
int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
enum btree_id id, enum btree_id id,
struct bpos old_pos, struct bpos old_pos,
@ -503,32 +632,29 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_i *k, enum btree_update_flags flags) struct bkey_i *k, enum btree_update_flags flags)
{ {
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
int ret;
if (iter->flags & BTREE_ITER_IS_EXTENTS) if (iter->flags & BTREE_ITER_IS_EXTENTS)
return bch2_trans_update_extent(trans, iter, k, flags); return bch2_trans_update_extent(trans, iter, k, flags);
if (bkey_deleted(&k->k) && if (!(flags & (BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE|
!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && BTREE_UPDATE_KEY_CACHE_RECLAIM)) &&
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); bch2_snapshot_parent(trans->c, k->k.p.snapshot)) {
if (unlikely(ret < 0)) int ret = overwrite_interior_snapshot_key(trans, iter, k);
return ret;
if (ret) if (ret)
k->k.type = KEY_TYPE_whiteout; return ret < 0 ? ret : 0;
} }
/* /*
* Ensure that updates to cached btrees go to the key cache: * Ensure that updates to cached btrees go to the key cache:
*/ */
btree_path_idx_t path_idx = iter->update_path ?: iter->path;
struct btree_path *path = trans->paths + path_idx; struct btree_path *path = trans->paths + path_idx;
if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
!path->cached && !path->cached &&
!path->level && !path->level &&
btree_id_cached(trans->c, path->btree_id)) { btree_id_cached(trans->c, path->btree_id)) {
ret = bch2_trans_update_get_key_cache(trans, iter, path); int ret = bch2_trans_update_get_key_cache(trans, iter, path);
if (ret) if (ret)
return ret; return ret;
@ -789,6 +915,27 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
struct bpos pos, bool set) struct bpos pos, bool set)
{
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
int ret = PTR_ERR_OR_ZERO(k);
if (ret)
return ret;
bkey_init(&k->k);
k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
k->k.p = pos;
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(trans, &iter, k, 0);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
struct bpos pos, bool set)
{ {
struct bkey_i k; struct bkey_i k;

View File

@ -63,11 +63,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
struct bpos, struct bpos, unsigned, u64 *); struct bpos, struct bpos, unsigned, u64 *);
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans, static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
enum btree_id btree, struct bpos pos) enum btree_id btree, struct bpos pos)
{ {
return bch2_btree_bit_mod(trans, btree, pos, false); return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
} }
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,

View File

@ -1844,8 +1844,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
__func__, buf1.buf, buf2.buf); __func__, buf1.buf, buf2.buf);
printbuf_exit(&buf1); printbuf_exit(&buf1);
printbuf_exit(&buf2); printbuf_exit(&buf2);
bch2_topology_error(c); ret = bch2_topology_error(c);
ret = -EIO;
goto err; goto err;
} }

View File

@ -1053,7 +1053,8 @@ int bch2_trigger_extent(struct btree_trans *trans,
(int) bch2_bkey_needs_rebalance(c, old); (int) bch2_bkey_needs_rebalance(c, old);
if (mod) { if (mod) {
int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0); int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
new.k->p, mod > 0);
if (ret) if (ret)
return ret; return ret;
} }

View File

@ -201,17 +201,17 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
} }
int bch2_dirent_create_snapshot(struct btree_trans *trans, int bch2_dirent_create_snapshot(struct btree_trans *trans,
u64 dir, u32 snapshot, u32 dir_subvol, u64 dir, u32 snapshot,
const struct bch_hash_info *hash_info, const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum, u8 type, const struct qstr *name, u64 dst_inum,
u64 *dir_offset, u64 *dir_offset,
bch_str_hash_flags_t str_hash_flags) bch_str_hash_flags_t str_hash_flags)
{ {
subvol_inum zero_inum = { 0 }; subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
struct bkey_i_dirent *dirent; struct bkey_i_dirent *dirent;
int ret; int ret;
dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum); dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent); ret = PTR_ERR_OR_ZERO(dirent);
if (ret) if (ret)
return ret; return ret;
@ -220,7 +220,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
dirent->k.p.snapshot = snapshot; dirent->k.p.snapshot = snapshot;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
zero_inum, snapshot, dir_inum, snapshot,
&dirent->k_i, str_hash_flags, &dirent->k_i, str_hash_flags,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
*dir_offset = dirent->k.p.offset; *dir_offset = dirent->k.p.offset;
@ -522,7 +522,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
SPOS(dir, 0, snapshot), SPOS(dir, 0, snapshot),
POS(dir, U64_MAX), 0, k, ret) POS(dir, U64_MAX), 0, k, ret)
if (k.k->type == KEY_TYPE_dirent) { if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY; ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
break; break;
} }
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);

View File

@ -35,7 +35,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_read_target(struct btree_trans *, subvol_inum, int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *); struct bkey_s_c_dirent, subvol_inum *);
int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32, int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8, const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *, const struct qstr *, u64, u64 *,
bch_str_hash_flags_t); bch_str_hash_flags_t);

View File

@ -109,6 +109,8 @@
x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
x(ENOENT, ENOENT_dev_not_found) \ x(ENOENT, ENOENT_dev_not_found) \
x(ENOENT, ENOENT_dev_idx_not_found) \ x(ENOENT, ENOENT_dev_idx_not_found) \
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
x(0, open_buckets_empty) \ x(0, open_buckets_empty) \
x(0, freelist_empty) \ x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \ x(BCH_ERR_freelist_empty, no_buckets_found) \
@ -178,6 +180,7 @@
x(EINVAL, opt_parse_error) \ x(EINVAL, opt_parse_error) \
x(EINVAL, remove_with_metadata_missing_unimplemented)\ x(EINVAL, remove_with_metadata_missing_unimplemented)\
x(EINVAL, remove_would_lose_data) \ x(EINVAL, remove_would_lose_data) \
x(EINVAL, btree_iter_with_journal_not_supported) \
x(EROFS, erofs_trans_commit) \ x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \ x(EROFS, erofs_no_writes) \
x(EROFS, erofs_journal_err) \ x(EROFS, erofs_journal_err) \
@ -227,7 +230,10 @@
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \ x(EIO, btree_node_read_err) \
x(EIO, sb_not_downgraded) \ x(EIO, sb_not_downgraded) \
x(EIO, btree_write_all_failed) \ x(EIO, btree_node_write_all_failed) \
x(EIO, btree_node_read_error) \
x(EIO, btree_node_read_validate_error) \
x(EIO, btree_need_topology_repair) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "error.h" #include "error.h"
#include "recovery.h"
#include "super.h" #include "super.h"
#include <linux/thread_with_file.h> #include <linux/thread_with_file.h>
@ -25,11 +26,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
} }
} }
void bch2_topology_error(struct bch_fs *c) int bch2_topology_error(struct bch_fs *c)
{ {
set_bit(BCH_FS_topology_error, &c->flags); set_bit(BCH_FS_topology_error, &c->flags);
if (!test_bit(BCH_FS_fsck_running, &c->flags)) if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
bch2_inconsistent_error(c); bch2_inconsistent_error(c);
return -BCH_ERR_btree_need_topology_repair;
} else {
return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
-BCH_ERR_btree_node_read_validate_error;
}
} }
void bch2_fatal_error(struct bch_fs *c) void bch2_fatal_error(struct bch_fs *c)

View File

@ -30,7 +30,7 @@ struct work_struct;
bool bch2_inconsistent_error(struct bch_fs *); bool bch2_inconsistent_error(struct bch_fs *);
void bch2_topology_error(struct bch_fs *); int bch2_topology_error(struct bch_fs *);
#define bch2_fs_inconsistent(c, ...) \ #define bch2_fs_inconsistent(c, ...) \
({ \ ({ \

View File

@ -107,6 +107,7 @@ int bch2_create_trans(struct btree_trans *trans,
u32 new_subvol, dir_snapshot; u32 new_subvol, dir_snapshot;
ret = bch2_subvolume_create(trans, new_inode->bi_inum, ret = bch2_subvolume_create(trans, new_inode->bi_inum,
dir.subvol,
snapshot_src.subvol, snapshot_src.subvol,
&new_subvol, &snapshot, &new_subvol, &snapshot,
(flags & BCH_CREATE_SNAPSHOT_RO) != 0); (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
@ -242,7 +243,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *dir_u,
struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *inode_u,
const struct qstr *name, const struct qstr *name,
bool deleting_snapshot) bool deleting_subvol)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter dir_iter = { NULL }; struct btree_iter dir_iter = { NULL };
@ -270,18 +271,25 @@ int bch2_unlink_trans(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
ret = bch2_empty_dir_trans(trans, inum); ret = bch2_empty_dir_trans(trans, inum);
if (ret) if (ret)
goto err; goto err;
} }
if (deleting_snapshot && !inode_u->bi_subvol) { if (deleting_subvol && !inode_u->bi_subvol) {
ret = -BCH_ERR_ENOENT_not_subvol; ret = -BCH_ERR_ENOENT_not_subvol;
goto err; goto err;
} }
if (deleting_snapshot || inode_u->bi_subvol) { if (inode_u->bi_subvol) {
/* Recursive subvolume destroy not allowed (yet?) */
ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
if (ret)
goto err;
}
if (deleting_subvol || inode_u->bi_subvol) {
ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
if (ret) if (ret)
goto err; goto err;

View File

@ -503,7 +503,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
bch2_subvol_is_ro(c, inode->ei_subvol) ?: bch2_subvol_is_ro(c, inode->ei_subvol) ?:
__bch2_link(c, inode, dir, dentry); __bch2_link(c, inode, dir, dentry);
if (unlikely(ret)) if (unlikely(ret))
return ret; return bch2_err_class(ret);
ihold(&inode->v); ihold(&inode->v);
d_instantiate(dentry, &inode->v); d_instantiate(dentry, &inode->v);
@ -555,8 +555,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct bch_inode_info *dir= to_bch_ei(vdir); struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info; struct bch_fs *c = dir->v.i_sb->s_fs_info;
return bch2_subvol_is_ro(c, dir->ei_subvol) ?: int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
__bch2_unlink(vdir, dentry, false); __bch2_unlink(vdir, dentry, false);
return bch2_err_class(ret);
} }
static int bch2_symlink(struct mnt_idmap *idmap, static int bch2_symlink(struct mnt_idmap *idmap,
@ -591,7 +592,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
return 0; return 0;
err: err:
iput(&inode->v); iput(&inode->v);
return ret; return bch2_err_class(ret);
} }
static int bch2_mkdir(struct mnt_idmap *idmap, static int bch2_mkdir(struct mnt_idmap *idmap,

View File

@ -252,7 +252,7 @@ create_lostfound:
goto err; goto err;
ret = bch2_dirent_create_snapshot(trans, ret = bch2_dirent_create_snapshot(trans,
root_inode.bi_inum, snapshot, &root_hash_info, 0, root_inode.bi_inum, snapshot, &root_hash_info,
mode_to_type(lostfound->bi_mode), mode_to_type(lostfound->bi_mode),
&lostfound_str, &lostfound_str,
lostfound->bi_inum, lostfound->bi_inum,
@ -275,9 +275,24 @@ static int reattach_inode(struct btree_trans *trans,
char name_buf[20]; char name_buf[20];
struct qstr name; struct qstr name;
u64 dir_offset = 0; u64 dir_offset = 0;
u32 dirent_snapshot = inode_snapshot;
int ret; int ret;
ret = lookup_lostfound(trans, inode_snapshot, &lostfound); if (inode->bi_subvol) {
inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
u64 root_inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
&dirent_snapshot, &root_inum);
if (ret)
return ret;
snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
} else {
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
}
ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
if (ret) if (ret)
return ret; return ret;
@ -291,14 +306,16 @@ static int reattach_inode(struct btree_trans *trans,
dir_hash = bch2_hash_info_init(trans->c, &lostfound); dir_hash = bch2_hash_info_init(trans->c, &lostfound);
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
name = (struct qstr) QSTR(name_buf); name = (struct qstr) QSTR(name_buf);
ret = bch2_dirent_create_snapshot(trans, ret = bch2_dirent_create_snapshot(trans,
lostfound.bi_inum, inode_snapshot, inode->bi_parent_subvol, lostfound.bi_inum,
dirent_snapshot,
&dir_hash, &dir_hash,
inode_d_type(inode), inode_d_type(inode),
&name, inode->bi_inum, &dir_offset, &name,
inode->bi_subvol ?: inode->bi_inum,
&dir_offset,
BCH_HASH_SET_MUST_CREATE); BCH_HASH_SET_MUST_CREATE);
if (ret) if (ret)
return ret; return ret;
@ -564,13 +581,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
} }
static struct inode_walker_entry * static struct inode_walker_entry *
lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
u32 snapshot, bool is_whiteout)
{ {
bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
struct inode_walker_entry *i; struct inode_walker_entry *i;
snapshot = bch2_snapshot_equiv(c, snapshot);
__darray_for_each(w->inodes, i) __darray_for_each(w->inodes, i)
if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
goto found; goto found;
@ -581,20 +597,24 @@ found:
if (snapshot != i->snapshot && !is_whiteout) { if (snapshot != i->snapshot && !is_whiteout) {
struct inode_walker_entry new = *i; struct inode_walker_entry new = *i;
size_t pos;
int ret;
new.snapshot = snapshot; new.snapshot = snapshot;
new.count = 0; new.count = 0;
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u", struct printbuf buf = PRINTBUF;
w->last_pos.inode, snapshot, i->snapshot); bch2_bkey_val_to_text(&buf, c, k);
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
"unexpected because we should always update the inode when we update a key in that inode\n"
"%s",
w->last_pos.inode, snapshot, i->snapshot, buf.buf);
printbuf_exit(&buf);
while (i > w->inodes.data && i[-1].snapshot > snapshot) while (i > w->inodes.data && i[-1].snapshot > snapshot)
--i; --i;
pos = i - w->inodes.data; size_t pos = i - w->inodes.data;
ret = darray_insert_item(&w->inodes, pos, new); int ret = darray_insert_item(&w->inodes, pos, new);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
@ -605,21 +625,21 @@ found:
} }
static struct inode_walker_entry *walk_inode(struct btree_trans *trans, static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
struct inode_walker *w, struct bpos pos, struct inode_walker *w,
bool is_whiteout) struct bkey_s_c k)
{ {
if (w->last_pos.inode != pos.inode) { if (w->last_pos.inode != k.k->p.inode) {
int ret = get_inodes_all_snapshots(trans, w, pos.inode); int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
} else if (bkey_cmp(w->last_pos, pos)) { } else if (bkey_cmp(w->last_pos, k.k->p)) {
darray_for_each(w->inodes, i) darray_for_each(w->inodes, i)
i->seen_this_pos = false; i->seen_this_pos = false;
} }
w->last_pos = pos; w->last_pos = k.k->p;
return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout); return lookup_inode_for_snapshot(trans->c, w, k);
} }
static int __get_visible_inodes(struct btree_trans *trans, static int __get_visible_inodes(struct btree_trans *trans,
@ -767,6 +787,43 @@ fsck_err:
goto out; goto out;
} }
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos pos)
{
return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
}
static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
u32 *snapshot)
{
if (inode->bi_subvol) {
u64 inum;
int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
if (ret)
return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
}
return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
}
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
struct bkey_s_c_dirent d)
{
return inode->bi_dir == d.k->p.inode &&
inode->bi_dir_offset == d.k->p.offset;
}
static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
return d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
: le64_to_cpu(d.v->d_inum) == inode->bi_inum;
}
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
{ {
struct btree_iter iter; struct btree_iter iter;
@ -779,6 +836,49 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
return k.k->type == KEY_TYPE_set; return k.k->type == KEY_TYPE_set;
} }
static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
struct bch_inode_unpacked *inode,
u32 inode_snapshot, bool *write_inode)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
struct btree_iter dirent_iter = {};
struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
int ret = bkey_err(d);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
if (fsck_err_on(ret,
c, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s",
(bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
c, inode_points_to_wrong_dirent,
"inode points to dirent that does not point back:\n%s",
(bch2_bkey_val_to_text(&buf, c, inode_k),
prt_newline(&buf),
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
/*
* We just clear the backpointer fields for now. If we find a
* dirent that points to this inode in check_dirents(), we'll
* update it then; then when we get to check_path() if the
* backpointer is still 0 we'll reattach it.
*/
inode->bi_dir = 0;
inode->bi_dir_offset = 0;
inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
*write_inode = true;
}
ret = 0;
fsck_err:
bch2_trans_iter_exit(trans, &dirent_iter);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
static int check_inode(struct btree_trans *trans, static int check_inode(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k, struct bkey_s_c k,
@ -923,6 +1023,22 @@ static int check_inode(struct btree_trans *trans,
do_update = true; do_update = true;
} }
if (u.bi_dir || u.bi_dir_offset) {
ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
if (ret)
goto err;
}
if (fsck_err_on(u.bi_parent_subvol &&
(u.bi_subvol == 0 ||
u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
c, inode_bi_parent_nonzero,
"inode %llu:%u has subvol %u but nonzero parent subvol %u",
u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
u.bi_parent_subvol = 0;
do_update = true;
}
if (u.bi_subvol) { if (u.bi_subvol) {
struct bch_subvolume s; struct bch_subvolume s;
@ -980,28 +1096,6 @@ int bch2_check_inodes(struct bch_fs *c)
return ret; return ret;
} }
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos pos)
{
return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
}
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
struct bkey_s_c_dirent d)
{
return inode->bi_dir == d.k->p.inode &&
inode->bi_dir_offset == d.k->p.offset;
}
static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
return d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
: le64_to_cpu(d.v->d_inum) == inode->bi_inum;
}
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
@ -1310,7 +1404,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err; goto err;
} }
i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout); i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i); ret = PTR_ERR_OR_ZERO(i);
if (ret) if (ret)
goto err; goto err;
@ -1489,85 +1583,83 @@ fsck_err:
return ret ?: trans_was_restarted(trans, restart_count); return ret ?: trans_was_restarted(trans, restart_count);
} }
static int check_inode_backpointer(struct btree_trans *trans, static int check_dirent_inode_dirent(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c_dirent d, struct bkey_s_c_dirent d,
struct bch_inode_unpacked *target, struct bch_inode_unpacked *target,
u32 target_snapshot) u32 target_snapshot)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter bp_iter = { NULL };
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
if (inode_points_to_dirent(target, d))
return 0;
if (!target->bi_dir && if (!target->bi_dir &&
!target->bi_dir_offset) { !target->bi_dir_offset) {
target->bi_dir = d.k->p.inode; target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset; target->bi_dir_offset = d.k->p.offset;
return __bch2_fsck_write_inode(trans, target, target_snapshot);
}
struct btree_iter bp_iter = { NULL };
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
ret = bkey_err(bp_dirent);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
bool backpointer_exists = !ret;
ret = 0;
if (fsck_err_on(!backpointer_exists,
c, inode_wrong_backpointer,
"inode %llu:%u has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
target->bi_inum, target_snapshot,
target->bi_dir,
target->bi_dir_offset,
d.k->p.inode,
d.k->p.offset)) {
target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset;
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
goto out;
}
bch2_bkey_val_to_text(&buf, c, d.s_c);
prt_newline(&buf);
if (backpointer_exists)
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
if (fsck_err_on(backpointer_exists &&
(S_ISDIR(target->bi_mode) ||
target->bi_subvol),
c, inode_dir_multiple_links,
"%s %llu:%u with multiple links\n%s",
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
target->bi_inum, target_snapshot, buf.buf)) {
ret = __remove_dirent(trans, d.k->p);
goto out;
}
/*
* hardlinked file with nlink 0:
* We're just adjusting nlink here so check_nlinks() will pick
* it up, it ignores inodes with nlink 0
*/
if (fsck_err_on(backpointer_exists && !target->bi_nlink,
c, inode_multiple_links_but_nlink_0,
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_unlinked;
ret = __bch2_fsck_write_inode(trans, target, target_snapshot); ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
if (ret) if (ret)
goto err; goto err;
} }
if (!inode_points_to_dirent(target, d)) {
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
ret = bkey_err(bp_dirent);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
bool backpointer_exists = !ret;
ret = 0;
bch2_bkey_val_to_text(&buf, c, d.s_c);
prt_newline(&buf);
if (backpointer_exists)
bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
c, inode_dir_multiple_links,
"directory %llu:%u with multiple links\n%s",
target->bi_inum, target_snapshot, buf.buf)) {
ret = __remove_dirent(trans, d.k->p);
goto out;
}
/*
* hardlinked file with nlink 0:
* We're just adjusting nlink here so check_nlinks() will pick
* it up, it ignores inodes with nlink 0
*/
if (fsck_err_on(backpointer_exists && !target->bi_nlink,
c, inode_multiple_links_but_nlink_0,
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_unlinked;
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
if (ret)
goto err;
}
if (fsck_err_on(!backpointer_exists,
c, inode_wrong_backpointer,
"inode %llu:%u has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
target->bi_inum, target_snapshot,
target->bi_dir,
target->bi_dir_offset,
d.k->p.inode,
d.k->p.offset)) {
target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset;
ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
if (ret)
goto err;
}
}
out: out:
err: err:
fsck_err: fsck_err:
@ -1588,7 +1680,7 @@ static int check_dirent_target(struct btree_trans *trans,
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
ret = check_inode_backpointer(trans, iter, d, target, target_snapshot); ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
if (ret) if (ret)
goto err; goto err;
@ -1606,27 +1698,12 @@ static int check_dirent_target(struct btree_trans *trans,
bkey_reassemble(&n->k_i, d.s_c); bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = inode_d_type(target); n->v.d_type = inode_d_type(target);
if (n->v.d_type == DT_SUBVOL) {
ret = bch2_trans_update(trans, iter, &n->k_i, 0); n->v.d_parent_subvol = target->bi_parent_subvol;
if (ret) n->v.d_child_subvol = target->bi_subvol;
goto err; } else {
n->v.d_inum = target->bi_inum;
d = dirent_i_to_s_c(n); }
}
if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
c, dirent_d_parent_subvol_wrong,
"dirent has wrong d_parent_subvol field: got %u, should be %u",
le32_to_cpu(d.v->d_parent_subvol),
target->bi_parent_subvol)) {
n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto err;
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
ret = bch2_trans_update(trans, iter, &n->k_i, 0); ret = bch2_trans_update(trans, iter, &n->k_i, 0);
if (ret) if (ret)
@ -1641,45 +1718,113 @@ fsck_err:
return ret; return ret;
} }
static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter, /* find a subvolume that's a descendent of @snapshot: */
struct bkey_s_c_dirent d) static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret;
for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_subvolume)
continue;
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
bch2_trans_iter_exit(trans, &iter);
*subvolid = k.k->p.offset;
goto found;
}
}
if (!ret)
ret = -ENOENT;
found:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_dirent d)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter subvol_iter = {};
struct bch_inode_unpacked subvol_root; struct bch_inode_unpacked subvol_root;
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 target_snapshot; u32 parent_snapshot;
u64 target_inum; u64 parent_inum;
struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
ret = subvol_lookup(trans, target_subvol, ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
&target_snapshot, &target_inum);
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
return ret; return ret;
if (fsck_err_on(ret, c, dirent_to_missing_subvol, if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent points to missing subvolume %u", "dirent parent_subvol points to missing subvolume\n%s",
le32_to_cpu(d.v->d_child_subvol))) (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
return __remove_dirent(trans, d.k->p); fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
c, dirent_not_visible_in_parent_subvol,
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
u32 new_parent_subvol;
ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
if (ret)
goto err;
ret = lookup_inode(trans, target_inum, struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
&subvol_root, &target_snapshot); ret = PTR_ERR_OR_ZERO(new_dirent);
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret)
return ret; goto err;
if (fsck_err_on(ret, c, subvol_to_missing_root, new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
"subvolume %u points to missing subvolume root %llu",
target_subvol,
target_inum)) {
bch_err(c, "repair not implemented yet");
return -EINVAL;
} }
if (fsck_err_on(subvol_root.bi_subvol != target_subvol, struct bkey_s_c_subvolume s =
c, subvol_root_wrong_bi_subvol, bch2_bkey_get_iter_typed(trans, &subvol_iter,
"subvol root %llu has wrong bi_subvol field: got %u, should be %u", BTREE_ID_subvolumes, POS(0, target_subvol),
0, subvolume);
ret = bkey_err(s.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
if (ret) {
if (fsck_err(c, dirent_to_missing_subvol,
"dirent points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
return __remove_dirent(trans, d.k->p);
ret = 0;
goto out;
}
if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
c, subvol_fs_path_parent_wrong,
"subvol with wrong fs_path_parent, should be be %u\n%s",
parent_subvol,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
struct bkey_i_subvolume *n =
bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto err;
n->v.fs_path_parent = le32_to_cpu(parent_subvol);
}
u64 target_inum = le64_to_cpu(s.v->inode);
u32 target_snapshot = le32_to_cpu(s.v->snapshot);
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum, target_inum,
subvol_root.bi_subvol, target_subvol)) { subvol_root.bi_parent_subvol, parent_subvol)) {
subvol_root.bi_subvol = target_subvol; subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret) if (ret)
return ret; return ret;
@ -1689,7 +1834,11 @@ static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *ite
target_snapshot); target_snapshot);
if (ret) if (ret)
return ret; return ret;
out:
err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &subvol_iter);
printbuf_exit(&buf);
return ret; return ret;
} }
@ -1731,7 +1880,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout); i = walk_inode(trans, dir, k);
ret = PTR_ERR_OR_ZERO(i); ret = PTR_ERR_OR_ZERO(i);
if (ret < 0) if (ret < 0)
goto err; goto err;
@ -1777,7 +1926,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
d = bkey_s_c_to_dirent(k); d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) { if (d.v->d_type == DT_SUBVOL) {
ret = check_subvol_dirent(trans, iter, d); ret = check_dirent_to_subvol(trans, iter, d);
if (ret) if (ret)
goto err; goto err;
} else { } else {
@ -1858,7 +2007,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
if (ret) if (ret)
return ret; return ret;
i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout); i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i); ret = PTR_ERR_OR_ZERO(i);
if (ret) if (ret)
return ret; return ret;
@ -1997,62 +2146,52 @@ static int path_down(struct bch_fs *c, pathbuf *p,
* *
* XXX: we should also be verifying that inodes are in the right subvolumes * XXX: we should also be verifying that inodes are in the right subvolumes
*/ */
static int check_path(struct btree_trans *trans, static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
pathbuf *p,
struct bch_inode_unpacked *inode,
u32 snapshot)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
struct bch_inode_unpacked inode;
struct printbuf buf = PRINTBUF;
u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
int ret = 0; int ret = 0;
snapshot = bch2_snapshot_equiv(c, snapshot);
p->nr = 0; p->nr = 0;
while (!(inode->bi_inum == BCACHEFS_ROOT_INO && BUG_ON(bch2_inode_unpack(inode_k, &inode));
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
while (!(inode.bi_inum == BCACHEFS_ROOT_INO &&
inode.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
struct btree_iter dirent_iter; struct btree_iter dirent_iter;
struct bkey_s_c_dirent d; struct bkey_s_c_dirent d;
u32 parent_snapshot = snapshot; u32 parent_snapshot = snapshot;
if (inode->bi_subvol) { d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
u64 inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
&parent_snapshot, &inum);
if (ret)
break;
}
d = dirent_get_by_pos(trans, &dirent_iter,
SPOS(inode->bi_dir, inode->bi_dir_offset,
parent_snapshot));
ret = bkey_err(d.s_c); ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
break; break;
if (!ret && !dirent_points_to_inode(d, inode)) { if (!ret && !dirent_points_to_inode(d, &inode)) {
bch2_trans_iter_exit(trans, &dirent_iter); bch2_trans_iter_exit(trans, &dirent_iter);
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
} }
if (bch2_err_matches(ret, ENOENT)) { if (bch2_err_matches(ret, ENOENT)) {
if (fsck_err(c, inode_unreachable, ret = 0;
"unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", if (fsck_err(c, inode_unreachable,
inode->bi_inum, snapshot, "unreachable inode\n%s",
bch2_d_type_str(inode_d_type(inode)), (printbuf_reset(&buf),
inode->bi_nlink, bch2_bkey_val_to_text(&buf, c, inode_k),
inode->bi_dir, buf.buf)))
inode->bi_dir_offset)) ret = reattach_inode(trans, &inode, snapshot);
ret = reattach_inode(trans, inode, snapshot); goto out;
break;
} }
bch2_trans_iter_exit(trans, &dirent_iter); bch2_trans_iter_exit(trans, &dirent_iter);
if (!S_ISDIR(inode->bi_mode)) if (!S_ISDIR(inode.bi_mode))
break; break;
ret = path_down(c, p, inode->bi_inum, snapshot); ret = path_down(c, p, inode.bi_inum, snapshot);
if (ret) { if (ret) {
bch_err(c, "memory allocation failure"); bch_err(c, "memory allocation failure");
return ret; return ret;
@ -2060,7 +2199,12 @@ static int check_path(struct btree_trans *trans,
snapshot = parent_snapshot; snapshot = parent_snapshot;
ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot); bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
ret = bkey_err(inode_k) ?:
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
: bch2_inode_unpack(inode_k, &inode);
if (ret) { if (ret) {
/* Should have been caught in dirents pass */ /* Should have been caught in dirents pass */
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
@ -2068,30 +2212,35 @@ static int check_path(struct btree_trans *trans,
break; break;
} }
if (path_is_dup(p, inode->bi_inum, snapshot)) { snapshot = inode_k.k->p.snapshot;
if (path_is_dup(p, inode.bi_inum, snapshot)) {
/* XXX print path */ /* XXX print path */
bch_err(c, "directory structure loop"); bch_err(c, "directory structure loop");
darray_for_each(*p, i) darray_for_each(*p, i)
pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", i->inum, i->snapshot);
pr_err("%llu:%u", inode->bi_inum, snapshot); pr_err("%llu:%u", inode.bi_inum, snapshot);
if (!fsck_err(c, dir_loop, "directory structure loop")) if (!fsck_err(c, dir_loop, "directory structure loop"))
return 0; return 0;
ret = remove_backpointer(trans, inode); ret = remove_backpointer(trans, &inode);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(c, ret, "removing dirent"); bch_err_msg(c, ret, "removing dirent");
if (ret) if (ret)
break; break;
ret = reattach_inode(trans, inode, snapshot); ret = reattach_inode(trans, &inode, snapshot);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
break; break;
} }
} }
out:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
printbuf_exit(&buf);
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }
@ -2103,7 +2252,6 @@ fsck_err:
*/ */
int bch2_check_directory_structure(struct bch_fs *c) int bch2_check_directory_structure(struct bch_fs *c)
{ {
struct bch_inode_unpacked u;
pathbuf path = { 0, }; pathbuf path = { 0, };
int ret; int ret;
@ -2116,12 +2264,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
if (!bkey_is_inode(k.k)) if (!bkey_is_inode(k.k))
continue; continue;
BUG_ON(bch2_inode_unpack(k, &u)); if (bch2_inode_flags(k) & BCH_INODE_unlinked)
if (u.bi_flags & BCH_INODE_unlinked)
continue; continue;
check_path(trans, &path, &u, iter.pos.snapshot); check_path(trans, &path, k);
}))); })));
darray_exit(&path); darray_exit(&path);

View File

@ -620,7 +620,8 @@ int bch2_trigger_inode(struct btree_trans *trans,
bool old_deleted = bkey_is_deleted_inode(old); bool old_deleted = bkey_is_deleted_inode(old);
bool new_deleted = bkey_is_deleted_inode(new.s_c); bool new_deleted = bkey_is_deleted_inode(new.s_c);
if (old_deleted != new_deleted) { if (old_deleted != new_deleted) {
int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted); int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
new.k->p, new_deleted);
if (ret) if (ret)
return ret; return ret;
} }
@ -1169,7 +1170,7 @@ fsck_err:
bch2_trans_iter_exit(trans, &inode_iter); bch2_trans_iter_exit(trans, &inode_iter);
return ret; return ret;
delete: delete:
ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
goto out; goto out;
} }

View File

@ -177,6 +177,20 @@ static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode); return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
} }
static inline u32 bch2_inode_flags(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_inode:
return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
case KEY_TYPE_inode_v2:
return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
case KEY_TYPE_inode_v3:
return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
default:
return 0;
}
}
/* i_nlink: */ /* i_nlink: */
static inline unsigned nlink_bias(umode_t mode) static inline unsigned nlink_bias(umode_t mode)

View File

@ -44,8 +44,8 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
u64 dev_bucket, u64 time, bool set) u64 dev_bucket, u64 time, bool set)
{ {
return time return time
? bch2_btree_bit_mod(trans, BTREE_ID_lru, ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru,
lru_pos(lru_id, dev_bucket, time), set) lru_pos(lru_id, dev_bucket, time), set)
: 0; : 0;
} }

View File

@ -332,6 +332,11 @@ enum fsck_err_opts {
OPT_BOOL(), \ OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \ BCH2_NO_SB_OPT, false, \
NULL, "Run fsck on mount") \ NULL, "Run fsck on mount") \
x(fsck_memory_usage_percent, u8, \
OPT_FS|OPT_MOUNT, \
OPT_UINT(20, 70), \
BCH2_NO_SB_OPT, 50, \
NULL, "Maximum percentage of system ram fsck is allowed to pin")\
x(fix_errors, u8, \ x(fix_errors, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
OPT_FN(bch2_opt_fix_errors), \ OPT_FN(bch2_opt_fix_errors), \

View File

@ -264,7 +264,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
bkey_copy(&r->key, (struct bkey_i *) entry->start); bkey_copy(&r->key, (struct bkey_i *) entry->start);
r->error = 0; r->error = 0;
} else { } else {
r->error = -EIO; r->error = -BCH_ERR_btree_node_read_error;
} }
r->alive = true; r->alive = true;
break; break;

View File

@ -34,6 +34,7 @@
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \ x(fs_upgrade_for_subvolumes, 22, 0) \
x(resume_logged_ops, 23, PASS_ALWAYS) \ x(resume_logged_ops, 23, PASS_ALWAYS) \

View File

@ -46,7 +46,13 @@
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
x(rebalance_work, \ x(rebalance_work, \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
x(subvolume_fs_parent, \
BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
x(btree_subvolume_children, \
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
BCH_FSCK_ERR_subvol_children_not_set)
#define DOWNGRADE_TABLE() #define DOWNGRADE_TABLE()

View File

@ -231,7 +231,7 @@
x(dirent_name_dot_or_dotdot, 223) \ x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \ x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \ x(dirent_d_type_wrong, 225) \
x(dirent_d_parent_subvol_wrong, 226) \ x(inode_bi_parent_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \ x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \ x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \ x(dirent_to_missing_inode, 229) \
@ -253,7 +253,16 @@
x(reflink_p_front_pad_bad, 245) \ x(reflink_p_front_pad_bad, 245) \
x(journal_entry_dup_same_device, 246) \ x(journal_entry_dup_same_device, 246) \
x(inode_bi_subvol_missing, 247) \ x(inode_bi_subvol_missing, 247) \
x(inode_bi_subvol_wrong, 248) x(inode_bi_subvol_wrong, 248) \
x(inode_points_to_missing_dirent, 249) \
x(inode_points_to_wrong_dirent, 250) \
x(inode_bi_parent_nonzero, 251) \
x(dirent_to_missing_parent_subvol, 252) \
x(dirent_not_visible_in_parent_subvol, 253) \
x(subvol_fs_path_parent_wrong, 254) \
x(subvol_root_fs_path_parent_nonzero, 255) \
x(subvol_children_not_set, 256) \
x(subvol_children_bad, 257)
enum bch_sb_error_id { enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n, #define x(t, n) BCH_FSCK_ERR_##t = n,

View File

@ -13,13 +13,26 @@
static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_delete(struct btree_trans *, u32);
static struct bpos subvolume_children_pos(struct bkey_s_c k)
{
if (k.k->type != KEY_TYPE_subvolume)
return POS_MIN;
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
if (!s.v->fs_path_parent)
return POS_MIN;
return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
}
static int check_subvol(struct btree_trans *trans, static int check_subvol(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k) struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c_subvolume subvol; struct bkey_s_c_subvolume subvol;
struct btree_iter subvol_children_iter = {};
struct bch_snapshot snapshot; struct bch_snapshot snapshot;
struct printbuf buf = PRINTBUF;
unsigned snapid; unsigned snapid;
int ret = 0; int ret = 0;
@ -42,6 +55,42 @@ static int check_subvol(struct btree_trans *trans,
return ret ?: -BCH_ERR_transaction_restart_nested; return ret ?: -BCH_ERR_transaction_restart_nested;
} }
if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
subvol.v->fs_path_parent,
c, subvol_root_fs_path_parent_nonzero,
"root subvolume has nonzero fs_path_parent\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct bkey_i_subvolume *n =
bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
ret = PTR_ERR_OR_ZERO(n);
if (ret)
goto err;
n->v.fs_path_parent = 0;
}
if (subvol.v->fs_path_parent) {
struct bpos pos = subvolume_children_pos(k);
struct bkey_s_c subvol_children_k =
bch2_bkey_get_iter(trans, &subvol_children_iter,
BTREE_ID_subvolume_children, pos, 0);
ret = bkey_err(subvol_children_k);
if (ret)
goto err;
if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
c, subvol_children_not_set,
"subvolume not set in subvolume_children btree at %llu:%llu\n%s",
pos.inode, pos.offset,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
if (ret)
goto err;
}
}
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
struct btree_iter inode_iter = {}; struct btree_iter inode_iter = {};
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode, ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
@ -102,9 +151,10 @@ static int check_subvol(struct btree_trans *trans,
SET_BCH_SUBVOLUME_SNAP(&s->v, true); SET_BCH_SUBVOLUME_SNAP(&s->v, true);
} }
} }
err: err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &subvol_children_iter);
printbuf_exit(&buf);
return ret; return ret;
} }
@ -119,6 +169,42 @@ int bch2_check_subvols(struct bch_fs *c)
return ret; return ret;
} }
static int check_subvol_child(struct btree_trans *trans,
struct btree_iter *child_iter,
struct bkey_s_c child_k)
{
struct bch_fs *c = trans->c;
struct bch_subvolume s;
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
0, subvolume, &s);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
if (fsck_err_on(ret ||
le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
c, subvol_children_bad,
"incorrect entry in subvolume_children btree %llu:%llu",
child_k.k->p.inode, child_k.k->p.offset)) {
ret = bch2_btree_delete_at(trans, child_iter, 0);
if (ret)
goto err;
}
err:
fsck_err:
return ret;
}
int bch2_check_subvol_children(struct bch_fs *c)
{
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter,
BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_subvol_child(trans, &iter, k)));
bch_err_fn(c, ret);
return 0;
}
/* Subvolumes: */ /* Subvolumes: */
int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
@ -143,8 +229,50 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
le64_to_cpu(s.v->inode), le64_to_cpu(s.v->inode),
le32_to_cpu(s.v->snapshot)); le32_to_cpu(s.v->snapshot));
if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent)) if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
prt_printf(out, " parent %u", le32_to_cpu(s.v->parent)); prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
}
}
static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
{
return !bpos_eq(pos, POS_MIN)
? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
: 0;
}
int bch2_subvolume_trigger(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s new,
unsigned flags)
{
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
struct bpos children_pos_old = subvolume_children_pos(old);
struct bpos children_pos_new = subvolume_children_pos(new.s_c);
if (!bpos_eq(children_pos_old, children_pos_new)) {
int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
subvolume_children_mod(trans, children_pos_new, true);
if (ret)
return ret;
}
}
return 0;
}
int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
{
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
struct bkey_s_c k = bch2_btree_iter_peek(&iter);
bch2_trans_iter_exit(trans, &iter);
return bkey_err(k) ?: k.k && k.k->p.inode == subvol
? -BCH_ERR_ENOTEMPTY_subvol_not_empty
: 0;
} }
static __always_inline int static __always_inline int
@ -228,8 +356,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_subvolume) if (k.k->type != KEY_TYPE_subvolume)
return 0; return 0;
if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) && if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent) le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
return 0; return 0;
s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
@ -237,7 +365,7 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
s->v.parent = cpu_to_le32(new_parent); s->v.creation_parent = cpu_to_le32(new_parent);
return 0; return 0;
} }
@ -260,7 +388,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_subvolume_reparent(trans, &iter, k, bch2_subvolume_reparent(trans, &iter, k,
subvolid_to_delete, le32_to_cpu(s.parent))); subvolid_to_delete, le32_to_cpu(s.creation_parent)));
} }
/* /*
@ -391,6 +519,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
} }
int bch2_subvolume_create(struct btree_trans *trans, u64 inode, int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
u32 parent_subvolid,
u32 src_subvolid, u32 src_subvolid,
u32 *new_subvolid, u32 *new_subvolid,
u32 *new_snapshotid, u32 *new_snapshotid,
@ -447,12 +576,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
if (ret) if (ret)
goto err; goto err;
new_subvol->v.flags = 0; new_subvol->v.flags = 0;
new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
new_subvol->v.inode = cpu_to_le64(inode); new_subvol->v.inode = cpu_to_le64(inode);
new_subvol->v.parent = cpu_to_le32(src_subvolid); new_subvol->v.creation_parent = cpu_to_le32(src_subvolid);
new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); new_subvol->v.fs_path_parent = cpu_to_le32(parent_subvolid);
new_subvol->v.otime.hi = 0; new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
new_subvol->v.otime.hi = 0;
SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);

View File

@ -7,17 +7,22 @@
enum bkey_invalid_flags; enum bkey_invalid_flags;
int bch2_check_subvols(struct bch_fs *); int bch2_check_subvols(struct bch_fs *);
int bch2_check_subvol_children(struct bch_fs *);
int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *); enum bkey_invalid_flags, struct printbuf *);
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
.key_invalid = bch2_subvolume_invalid, \ .key_invalid = bch2_subvolume_invalid, \
.val_to_text = bch2_subvolume_to_text, \ .val_to_text = bch2_subvolume_to_text, \
.trigger = bch2_subvolume_trigger, \
.min_val_size = 16, \ .min_val_size = 16, \
}) })
int bch2_subvol_has_children(struct btree_trans *, u32);
int bch2_subvolume_get(struct btree_trans *, unsigned, int bch2_subvolume_get(struct btree_trans *, unsigned,
bool, int, struct bch_subvolume *); bool, int, struct bch_subvolume *);
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
@ -29,8 +34,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *); void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
u32 *, u32 *, bool);
int bch2_fs_subvolumes_init(struct bch_fs *); int bch2_fs_subvolumes_init(struct bch_fs *);

View File

@ -19,8 +19,8 @@ struct bch_subvolume {
* This is _not_ necessarily the subvolume of the directory containing * This is _not_ necessarily the subvolume of the directory containing
* this subvolume: * this subvolume:
*/ */
__le32 parent; __le32 creation_parent;
__le32 pad; __le32 fs_path_parent;
bch_le128 otime; bch_le128 otime;
}; };

View File

@ -102,6 +102,8 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
* mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
* @s: mean and variance number of samples and their sums * @s: mean and variance number of samples and their sums
* @x: new value to include in the &mean_and_variance_weighted * @x: new value to include in the &mean_and_variance_weighted
* @initted: caller must track whether this is the first use or not
* @weight: ewma weight
* *
* see linked pdf: function derived from equations 140-143 where alpha = 2^w. * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
* values are stored bitshifted for performance and added precision. * values are stored bitshifted for performance and added precision.
@ -132,6 +134,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
/** /**
* mean_and_variance_weighted_get_mean() - get mean from @s * mean_and_variance_weighted_get_mean() - get mean from @s
* @s: mean and variance number of samples and their sums * @s: mean and variance number of samples and their sums
* @weight: ewma weight
*/ */
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s, s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
u8 weight) u8 weight)
@ -143,6 +146,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
/** /**
* mean_and_variance_weighted_get_variance() -- get variance from @s * mean_and_variance_weighted_get_variance() -- get variance from @s
* @s: mean and variance number of samples and their sums * @s: mean and variance number of samples and their sums
* @weight: ewma weight
*/ */
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s, u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
u8 weight) u8 weight)
@ -155,6 +159,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
/** /**
* mean_and_variance_weighted_get_stddev() - get standard deviation from @s * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
* @s: mean and variance number of samples and their sums * @s: mean and variance number of samples and their sums
* @weight: ewma weight
*/ */
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s, u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
u8 weight) u8 weight)