mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 5e73602f6c bcachefs: Fix for fsck hanging
This commit is contained in:
parent
612f6b9ab7
commit
868abec1ab
@ -1 +1 @@
|
||||
79847e4824278463f7eb826dfd78221979e29a8b
|
||||
5e73602f6c2569e7b81b3ea658502ac9b546cb61
|
||||
|
@ -91,6 +91,7 @@ do { \
|
||||
} while (0)
|
||||
|
||||
#define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; })
|
||||
|
||||
#define __wait_event_timeout(wq, condition, timeout) \
|
||||
___wait_event(wq, ___wait_cond_timeout(condition), \
|
||||
|
@ -14,6 +14,9 @@
|
||||
#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
|
||||
#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
|
||||
|
||||
#define BCH_FORCE_IF_LOST \
|
||||
(BCH_FORCE_IF_DATA_LOST| \
|
||||
BCH_FORCE_IF_METADATA_LOST)
|
||||
#define BCH_FORCE_IF_DEGRADED \
|
||||
(BCH_FORCE_IF_DATA_DEGRADED| \
|
||||
BCH_FORCE_IF_METADATA_DEGRADED)
|
||||
|
@ -516,12 +516,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
|
||||
if (!bch2_btree_node_relock(iter, level))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Ideally this invariant would always be true, and hopefully in the
|
||||
* future it will be, but for now set_pos_same_leaf() breaks it:
|
||||
*/
|
||||
BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
|
||||
!btree_iter_pos_in_node(iter, l->b));
|
||||
BUG_ON(!btree_iter_pos_in_node(iter, l->b));
|
||||
|
||||
/*
|
||||
* node iterators don't use leaf node iterator:
|
||||
@ -1457,36 +1452,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
|
||||
/* Iterate across keys (in leaf nodes only) */
|
||||
|
||||
void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
|
||||
EBUG_ON(iter->level != 0);
|
||||
EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
|
||||
EBUG_ON(!btree_node_locked(iter, 0));
|
||||
EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
|
||||
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos = new_pos;
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
|
||||
btree_iter_advance_to_pos(iter, l, -1);
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* keeping a node locked that's outside (even just outside) iter->pos
|
||||
* breaks __bch2_btree_node_lock(). This seems to only affect
|
||||
* bch2_btree_node_get_sibling so for now it's fixed there, but we
|
||||
* should try to get rid of this corner case.
|
||||
*
|
||||
* (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
|
||||
*/
|
||||
|
||||
if (bch2_btree_node_iter_end(&l->iter) &&
|
||||
btree_iter_pos_after_node(iter, l->b))
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
|
||||
{
|
||||
unsigned l = iter->level;
|
||||
@ -1552,40 +1517,57 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
btree_iter_pos_changed(iter, cmp);
|
||||
}
|
||||
|
||||
static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter)
|
||||
{
|
||||
struct bpos pos = iter->k.p;
|
||||
|
||||
if (unlikely(!bkey_cmp(pos, POS_MAX)))
|
||||
return false;
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
pos = bkey_successor(pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter)
|
||||
{
|
||||
struct bpos pos = bkey_start_pos(&iter->k);
|
||||
|
||||
if (unlikely(!bkey_cmp(pos, POS_MIN)))
|
||||
return false;
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
pos = bkey_predecessor(pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
bool ret;
|
||||
struct bpos next_pos = iter->l[0].b->key.k.p;
|
||||
bool ret = bkey_cmp(next_pos, POS_MAX) != 0;
|
||||
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos = l->b->key.k.p;
|
||||
|
||||
ret = bkey_cmp(iter->pos, POS_MAX) != 0;
|
||||
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
iter->k.p = iter->pos = bkey_successor(iter->pos);
|
||||
next_pos = bkey_successor(next_pos);
|
||||
|
||||
btree_iter_pos_changed(iter, 1);
|
||||
bch2_btree_iter_set_pos(iter, next_pos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
bool ret;
|
||||
struct bpos next_pos = iter->l[0].b->data->min_key;
|
||||
bool ret = bkey_cmp(next_pos, POS_MIN) != 0;
|
||||
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos = l->b->data->min_key;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
|
||||
ret = bkey_cmp(iter->pos, POS_MIN) != 0;
|
||||
if (ret) {
|
||||
iter->k.p = iter->pos = bkey_predecessor(iter->pos);
|
||||
next_pos = bkey_predecessor(next_pos);
|
||||
|
||||
if (iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
iter->k.p = iter->pos = bkey_predecessor(iter->pos);
|
||||
next_pos = bkey_predecessor(next_pos);
|
||||
}
|
||||
|
||||
btree_iter_pos_changed(iter, -1);
|
||||
bch2_btree_iter_set_pos(iter, next_pos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1651,8 +1633,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
* iter->pos should always be equal to the key we just
|
||||
* returned - except extents can straddle iter->pos:
|
||||
*/
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
@ -1667,14 +1648,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
|
||||
{
|
||||
if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
|
||||
if (!bch2_btree_iter_advance_pos(iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? iter->k.p
|
||||
: bkey_successor(iter->k.p));
|
||||
|
||||
return bch2_btree_iter_peek(iter);
|
||||
}
|
||||
|
||||
@ -1726,10 +1702,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
|
||||
k = __bch2_btree_iter_peek_with_updates(iter);
|
||||
|
||||
if (k.k && bkey_deleted(k.k)) {
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? iter->k.p
|
||||
: bkey_successor(iter->k.p));
|
||||
bch2_btree_iter_advance_pos(iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1744,8 +1717,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
|
||||
* iter->pos should always be equal to the key we just
|
||||
* returned - except extents can straddle iter->pos:
|
||||
*/
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
@ -1754,14 +1726,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
|
||||
{
|
||||
if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
|
||||
if (!bch2_btree_iter_advance_pos(iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? iter->k.p
|
||||
: bkey_successor(iter->k.p));
|
||||
|
||||
return bch2_btree_iter_peek_with_updates(iter);
|
||||
}
|
||||
|
||||
@ -1789,7 +1756,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
k = __btree_iter_peek(iter, l);
|
||||
if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
|
||||
if (!k.k ||
|
||||
((iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? bkey_cmp(bkey_start_pos(k.k), pos) >= 0
|
||||
: bkey_cmp(bkey_start_pos(k.k), pos) > 0))
|
||||
k = __btree_iter_prev(iter, l);
|
||||
|
||||
if (likely(k.k))
|
||||
@ -1800,8 +1770,13 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
|
||||
/* Extents can straddle iter->pos: */
|
||||
if (bkey_cmp(k.k->p, pos) < 0)
|
||||
iter->pos = k.k->p;
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
bch2_btree_iter_verify_level(iter, 0);
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -1811,16 +1786,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
*/
|
||||
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
{
|
||||
struct bpos pos = bkey_start_pos(&iter->k);
|
||||
|
||||
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
|
||||
bch2_btree_iter_checks(iter);
|
||||
|
||||
if (unlikely(!bkey_cmp(pos, POS_MIN)))
|
||||
if (!bch2_btree_iter_rewind_pos(iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
|
||||
|
||||
return bch2_btree_iter_peek_prev(iter);
|
||||
}
|
||||
|
||||
@ -1926,14 +1894,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
|
||||
{
|
||||
if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
|
||||
if (!bch2_btree_iter_advance_pos(iter))
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter,
|
||||
(iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
? iter->k.p
|
||||
: bkey_successor(iter->k.p));
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
}
|
||||
|
||||
|
@ -174,7 +174,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
|
||||
|
||||
void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
|
||||
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
|
||||
|
||||
|
@ -951,12 +951,8 @@ retry:
|
||||
|
||||
trans_for_each_iter(trans, iter)
|
||||
if ((trans->iters_live & (1ULL << iter->idx)) &&
|
||||
(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
|
||||
if (trans->flags & BTREE_INSERT_NOUNLOCK)
|
||||
bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
|
||||
else
|
||||
bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
|
||||
}
|
||||
(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT))
|
||||
bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
|
||||
out:
|
||||
bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
|
||||
|
||||
|
@ -2440,7 +2440,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
struct address_space *mapping = inode->v.i_mapping;
|
||||
struct bkey_buf copy;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *src, *dst;
|
||||
struct btree_iter *src, *dst, *del;
|
||||
loff_t shift, new_size;
|
||||
u64 src_start;
|
||||
int ret;
|
||||
@ -2510,6 +2510,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
POS(inode->v.i_ino, src_start >> 9),
|
||||
BTREE_ITER_INTENT);
|
||||
dst = bch2_trans_copy_iter(&trans, src);
|
||||
del = bch2_trans_copy_iter(&trans, src);
|
||||
|
||||
while (1) {
|
||||
struct disk_reservation disk_res =
|
||||
@ -2530,8 +2531,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
|
||||
if (!k.k || k.k->p.inode != inode->v.i_ino)
|
||||
break;
|
||||
|
||||
BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
|
||||
|
||||
if (insert &&
|
||||
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
|
||||
break;
|
||||
@ -2563,6 +2562,7 @@ reassemble:
|
||||
delete.k.p = copy.k->k.p;
|
||||
delete.k.size = copy.k->k.size;
|
||||
delete.k.p.offset -= shift >> 9;
|
||||
bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k));
|
||||
|
||||
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
|
||||
|
||||
@ -2583,9 +2583,7 @@ reassemble:
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
|
||||
|
||||
ret = bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
|
||||
ret = bch2_trans_update(&trans, del, &delete, trigger_flags) ?:
|
||||
bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
|
||||
bch2_trans_commit(&trans, &disk_res,
|
||||
&inode->ei_journal_seq,
|
||||
|
@ -193,7 +193,7 @@ static int hash_redo_key(const struct bch_hash_desc desc,
|
||||
bch2_trans_update(trans, k_iter, &delete, 0);
|
||||
|
||||
return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
|
||||
tmp, BCH_HASH_SET_MUST_CREATE);
|
||||
tmp, 0);
|
||||
}
|
||||
|
||||
static int fsck_hash_delete_at(struct btree_trans *trans,
|
||||
@ -1072,6 +1072,11 @@ static void inc_link(struct bch_fs *c, nlink_table *links,
|
||||
if (inum < range_start || inum >= *range_end)
|
||||
return;
|
||||
|
||||
if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) {
|
||||
*range_end = inum;
|
||||
return;
|
||||
}
|
||||
|
||||
link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
|
||||
if (!link) {
|
||||
bch_verbose(c, "allocation failed during fsck - will need another pass");
|
||||
@ -1353,16 +1358,17 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
|
||||
break;
|
||||
|
||||
nlinks_pos = range_start + nlinks_iter.pos;
|
||||
if (iter->pos.offset > nlinks_pos) {
|
||||
|
||||
if (link && nlinks_pos < iter->pos.offset) {
|
||||
/* Should have been caught by dirents pass: */
|
||||
need_fsck_err_on(link && link->count, c,
|
||||
need_fsck_err_on(link->count, c,
|
||||
"missing inode %llu (nlink %u)",
|
||||
nlinks_pos, link->count);
|
||||
genradix_iter_advance(&nlinks_iter, links);
|
||||
goto peek_nlinks;
|
||||
}
|
||||
|
||||
if (iter->pos.offset < nlinks_pos || !link)
|
||||
if (!link || nlinks_pos > iter->pos.offset)
|
||||
link = &zero_links;
|
||||
|
||||
if (k.k && k.k->type == KEY_TYPE_inode) {
|
||||
|
@ -479,7 +479,7 @@ int bch2_inode_create(struct btree_trans *trans,
|
||||
u64 min, max, start, *hint;
|
||||
int ret;
|
||||
|
||||
unsigned cpu = raw_smp_processor_id();
|
||||
u64 cpu = raw_smp_processor_id();
|
||||
unsigned bits = (c->opts.inodes_32bit
|
||||
? 31 : 63) - c->inode_shard_bits;
|
||||
|
||||
|
@ -575,6 +575,8 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
|
||||
/* Recheck under lock: */
|
||||
if (j->err_seq && seq >= j->err_seq) {
|
||||
ret = -EIO;
|
||||
@ -640,9 +642,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
|
||||
u64 start_time = local_clock();
|
||||
int ret, ret2;
|
||||
|
||||
ret = wait_event_killable(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
|
||||
ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
|
||||
|
||||
bch2_time_stats_update(j->flush_seq_time, start_time);
|
||||
if (!ret)
|
||||
bch2_time_stats_update(j->flush_seq_time, start_time);
|
||||
|
||||
return ret ?: ret2 < 0 ? ret2 : 0;
|
||||
}
|
||||
@ -1158,6 +1161,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
"seq:\t\t\t%llu\n"
|
||||
"last_seq:\t\t%llu\n"
|
||||
"last_seq_ondisk:\t%llu\n"
|
||||
"flushed_seq_ondisk:\t%llu\n"
|
||||
"prereserved:\t\t%u/%u\n"
|
||||
"nr flush writes:\t%llu\n"
|
||||
"nr noflush writes:\t%llu\n"
|
||||
@ -1170,6 +1174,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
journal_cur_seq(j),
|
||||
journal_last_seq(j),
|
||||
j->last_seq_ondisk,
|
||||
j->flushed_seq_ondisk,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
j->nr_flush_writes,
|
||||
|
@ -222,6 +222,11 @@ enum opt_type {
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Allow mounting in degraded mode") \
|
||||
x(very_degraded, u8, \
|
||||
OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Allow mounting in when data will be missing") \
|
||||
x(discard, u8, \
|
||||
OPT_MOUNT|OPT_DEVICE, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -1088,6 +1088,13 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
bch_info(c, "recovering from clean shutdown, journal seq %llu",
|
||||
le64_to_cpu(clean->journal_seq));
|
||||
|
||||
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
|
||||
bch_info(c, "alloc_v2 feature bit not set, fsck required");
|
||||
c->opts.fsck = true;
|
||||
c->opts.fix_errors = FSCK_OPT_YES;
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_alloc_v2;
|
||||
}
|
||||
|
||||
if (!c->replicas.entries ||
|
||||
c->opts.rebuild_replicas) {
|
||||
bch_info(c, "building replicas info");
|
||||
|
@ -958,94 +958,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
unsigned flags, bool print)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_replicas_entry *e;
|
||||
unsigned i, nr_online, nr_offline;
|
||||
struct replicas_status ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
ret.replicas[i].redundancy = INT_MAX;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
bool ret = true;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
for_each_cpu_replicas_entry(&c->replicas, e) {
|
||||
if (e->data_type >= ARRAY_SIZE(ret.replicas))
|
||||
panic("e %p data_type %u\n", e, e->data_type);
|
||||
unsigned i, nr_online = 0, dflags = 0;
|
||||
bool metadata = e->data_type < BCH_DATA_user;
|
||||
|
||||
nr_online = nr_offline = 0;
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
nr_online += test_bit(e->devs[i], devs.d);
|
||||
|
||||
for (i = 0; i < e->nr_devs; i++) {
|
||||
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
|
||||
e->devs[i]));
|
||||
if (nr_online < e->nr_required)
|
||||
dflags |= metadata
|
||||
? BCH_FORCE_IF_METADATA_LOST
|
||||
: BCH_FORCE_IF_DATA_LOST;
|
||||
|
||||
if (test_bit(e->devs[i], online_devs.d))
|
||||
nr_online++;
|
||||
else
|
||||
nr_offline++;
|
||||
if (nr_online < e->nr_devs)
|
||||
dflags |= metadata
|
||||
? BCH_FORCE_IF_METADATA_DEGRADED
|
||||
: BCH_FORCE_IF_DATA_DEGRADED;
|
||||
|
||||
if (dflags & ~flags) {
|
||||
if (print) {
|
||||
char buf[100];
|
||||
|
||||
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
||||
bch_err(c, "insufficient devices online (%u) for replicas entry %s",
|
||||
nr_online, buf);
|
||||
}
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
ret.replicas[e->data_type].redundancy =
|
||||
min(ret.replicas[e->data_type].redundancy,
|
||||
(int) nr_online - (int) e->nr_required);
|
||||
|
||||
ret.replicas[e->data_type].nr_offline =
|
||||
max(ret.replicas[e->data_type].nr_offline,
|
||||
nr_offline);
|
||||
}
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
if (ret.replicas[i].redundancy == INT_MAX)
|
||||
ret.replicas[i].redundancy = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *c)
|
||||
{
|
||||
return __bch2_replicas_status(c, bch2_online_devs(c));
|
||||
}
|
||||
|
||||
static bool have_enough_devs(struct replicas_status s,
|
||||
enum bch_data_type type,
|
||||
bool force_if_degraded,
|
||||
bool force_if_lost)
|
||||
{
|
||||
return (!s.replicas[type].nr_offline || force_if_degraded) &&
|
||||
(s.replicas[type].redundancy >= 0 || force_if_lost);
|
||||
}
|
||||
|
||||
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
{
|
||||
return (have_enough_devs(s, BCH_DATA_journal,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_btree,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_user,
|
||||
flags & BCH_FORCE_IF_DATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_DATA_LOST));
|
||||
}
|
||||
|
||||
int bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
{
|
||||
struct replicas_status s = bch2_replicas_status(c);
|
||||
|
||||
return (meta
|
||||
? min(s.replicas[BCH_DATA_journal].redundancy,
|
||||
s.replicas[BCH_DATA_btree].redundancy)
|
||||
: s.replicas[BCH_DATA_user].redundancy) + 1;
|
||||
}
|
||||
|
||||
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_replicas_entry *e;
|
||||
|
@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
|
||||
e->devs[0] = dev;
|
||||
}
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
int redundancy;
|
||||
unsigned nr_offline;
|
||||
} replicas[BCH_DATA_NR];
|
||||
};
|
||||
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
|
||||
unsigned, bool);
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *,
|
||||
struct bch_devs_mask);
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *);
|
||||
bool bch2_have_enough_devs(struct replicas_status, unsigned);
|
||||
|
||||
int bch2_replicas_online(struct bch_fs *, bool);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
|
@ -767,15 +767,13 @@ int bch2_write_super(struct bch_fs *c)
|
||||
nr_wrote = dev_mask_nr(&sb_written);
|
||||
|
||||
can_mount_with_written =
|
||||
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
|
||||
sb_written.d[i] = ~sb_written.d[i];
|
||||
|
||||
can_mount_without_written =
|
||||
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
|
||||
|
||||
/*
|
||||
* If we would be able to mount _without_ the devices we successfully
|
||||
@ -786,6 +784,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
* mount with the devices we did successfully write to:
|
||||
*/
|
||||
if (bch2_fs_fatal_err_on(!nr_wrote ||
|
||||
!can_mount_with_written ||
|
||||
(can_mount_without_written &&
|
||||
!can_mount_with_written), c,
|
||||
"Unable to write superblock to sufficient devices"))
|
||||
|
@ -1264,7 +1264,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
{
|
||||
struct bch_devs_mask new_online_devs;
|
||||
struct replicas_status s;
|
||||
struct bch_dev *ca2;
|
||||
int i, nr_rw = 0, required;
|
||||
|
||||
@ -1300,9 +1299,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
new_online_devs = bch2_online_devs(c);
|
||||
__clear_bit(ca->dev_idx, new_online_devs.d);
|
||||
|
||||
s = __bch2_replicas_status(c, new_online_devs);
|
||||
|
||||
return bch2_have_enough_devs(s, flags);
|
||||
return bch2_have_enough_devs(c, new_online_devs, flags, false);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -1310,14 +1307,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct replicas_status s;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_dev *ca;
|
||||
unsigned i, flags = c->opts.degraded
|
||||
? BCH_FORCE_IF_DEGRADED
|
||||
: 0;
|
||||
unsigned i, flags = 0;
|
||||
|
||||
if (!c->opts.degraded) {
|
||||
if (c->opts.very_degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
|
||||
|
||||
if (c->opts.degraded)
|
||||
flags |= BCH_FORCE_IF_DEGRADED;
|
||||
|
||||
if (!c->opts.degraded &&
|
||||
!c->opts.very_degraded) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
|
||||
@ -1337,9 +1338,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
s = bch2_replicas_status(c);
|
||||
|
||||
return bch2_have_enough_devs(s, flags);
|
||||
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
|
||||
}
|
||||
|
||||
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
@ -199,9 +199,6 @@ read_attribute(new_stripes);
|
||||
|
||||
rw_attribute(pd_controllers_update_seconds);
|
||||
|
||||
read_attribute(meta_replicas_have);
|
||||
read_attribute(data_replicas_have);
|
||||
|
||||
read_attribute(io_timers_read);
|
||||
read_attribute(io_timers_write);
|
||||
|
||||
@ -347,9 +344,6 @@ SHOW(bch2_fs)
|
||||
|
||||
sysfs_print(promote_whole_extents, c->promote_whole_extents);
|
||||
|
||||
sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
|
||||
sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_btree_node_size,
|
||||
&sysfs_btree_cache_size,
|
||||
|
||||
&sysfs_meta_replicas_have,
|
||||
&sysfs_data_replicas_have,
|
||||
|
||||
&sysfs_journal_write_delay_ms,
|
||||
&sysfs_journal_reclaim_delay_ms,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user