mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 8fd009dd76 bcachefs: Rip out code for storing backpointers in alloc keys
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
d22c79d2ff
commit
7f102ee83d
@ -1 +1 @@
|
||||
0342eebf85b7be76f01bacec8f958c6e6039535b
|
||||
8fd009dd764dabd79e2b42e1c85812a08ad1d6c0
|
||||
|
@ -113,17 +113,40 @@ static inline void *bio_data(struct bio *bio)
|
||||
|
||||
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
|
||||
|
||||
static inline struct bio_vec *bio_next_segment(const struct bio *bio,
|
||||
static inline struct bio_vec bio_iter_all_peek(const struct bio *bio,
|
||||
struct bvec_iter_all *iter)
|
||||
{
|
||||
if (iter->idx >= bio->bi_vcnt)
|
||||
return NULL;
|
||||
if (WARN_ON(iter->idx >= bio->bi_vcnt))
|
||||
return (struct bio_vec) { NULL };
|
||||
|
||||
return &bio->bi_io_vec[iter->idx];
|
||||
return bvec_iter_all_peek(bio->bi_io_vec, iter);
|
||||
}
|
||||
|
||||
static inline void bio_iter_all_advance(const struct bio *bio,
|
||||
struct bvec_iter_all *iter,
|
||||
unsigned bytes)
|
||||
{
|
||||
bvec_iter_all_advance(bio->bi_io_vec, iter, bytes);
|
||||
|
||||
WARN_ON(iter->idx > bio->bi_vcnt ||
|
||||
(iter->idx == bio->bi_vcnt && iter->done));
|
||||
}
|
||||
|
||||
#define bio_for_each_segment_all_continue(bvl, bio, iter) \
|
||||
for (; \
|
||||
iter.idx < bio->bi_vcnt && \
|
||||
((bvl = bio_iter_all_peek(bio, &iter)), true); \
|
||||
bio_iter_all_advance((bio), &iter, bvl.bv_len))
|
||||
|
||||
/*
|
||||
* drivers should _never_ use the all version - the bio may have been split
|
||||
* before it got to the driver and the driver won't own all of it
|
||||
*/
|
||||
#define bio_for_each_segment_all(bvl, bio, iter) \
|
||||
for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
|
||||
for (bvec_iter_all_init(&iter); \
|
||||
iter.idx < (bio)->bi_vcnt && \
|
||||
((bvl = bio_iter_all_peek((bio), &iter)), true); \
|
||||
bio_iter_all_advance((bio), &iter, bvl.bv_len))
|
||||
|
||||
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
|
||||
unsigned bytes)
|
||||
|
@ -43,10 +43,6 @@ struct bvec_iter {
|
||||
current bvec */
|
||||
};
|
||||
|
||||
struct bvec_iter_all {
|
||||
int idx;
|
||||
};
|
||||
|
||||
/*
|
||||
* various member access, note that bio_data should of course not be used
|
||||
* on highmem page vectors
|
||||
@ -98,4 +94,52 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
|
||||
((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
|
||||
bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
|
||||
|
||||
/*
|
||||
* bvec_iter_all: for advancing over individual pages in a bio, as it was when
|
||||
* it was first created:
|
||||
*/
|
||||
struct bvec_iter_all {
|
||||
int idx;
|
||||
unsigned done;
|
||||
};
|
||||
|
||||
static inline void bvec_iter_all_init(struct bvec_iter_all *iter_all)
|
||||
{
|
||||
iter_all->done = 0;
|
||||
iter_all->idx = 0;
|
||||
}
|
||||
|
||||
static inline struct bio_vec __bvec_iter_all_peek(const struct bio_vec *bvec,
|
||||
const struct bvec_iter_all *iter)
|
||||
{
|
||||
struct bio_vec bv = bvec[iter->idx];
|
||||
|
||||
BUG_ON(iter->done >= bv.bv_len);
|
||||
|
||||
bv.bv_offset += iter->done;
|
||||
bv.bv_len -= iter->done;
|
||||
return bv;
|
||||
}
|
||||
|
||||
static inline struct bio_vec bvec_iter_all_peek(const struct bio_vec *bvec,
|
||||
const struct bvec_iter_all *iter)
|
||||
{
|
||||
struct bio_vec bv = __bvec_iter_all_peek(bvec, iter);
|
||||
|
||||
bv.bv_len = min_t(unsigned, PAGE_SIZE - bv.bv_offset, bv.bv_len);
|
||||
return bv;
|
||||
}
|
||||
|
||||
static inline void bvec_iter_all_advance(const struct bio_vec *bvec,
|
||||
struct bvec_iter_all *iter,
|
||||
unsigned bytes)
|
||||
{
|
||||
iter->done += bytes;
|
||||
|
||||
while (iter->done && iter->done >= bvec[iter->idx].bv_len) {
|
||||
iter->done -= bvec[iter->idx].bv_len;
|
||||
iter->idx++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __LINUX_BVEC_ITER_H */
|
||||
|
@ -831,10 +831,35 @@ DEFINE_EVENT(transaction_event, trans_restart_injected,
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_split_race,
|
||||
TRACE_EVENT(trans_restart_split_race,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
unsigned long caller_ip,
|
||||
struct btree *b),
|
||||
TP_ARGS(trans, caller_ip, b),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(u8, level )
|
||||
__field(u16, written )
|
||||
__field(u16, blocks )
|
||||
__field(u16, u64s_remaining )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->level = b->c.level;
|
||||
__entry->written = b->written;
|
||||
__entry->blocks = btree_blocks(trans->c);
|
||||
__entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
|
||||
),
|
||||
|
||||
TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
|
||||
__entry->trans_fn, (void *) __entry->caller_ip,
|
||||
__entry->level,
|
||||
__entry->written, __entry->blocks,
|
||||
__entry->u64s_remaining)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim,
|
||||
|
@ -451,6 +451,8 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
|
||||
|
||||
if (src < dst)
|
||||
memset(src, 0, dst - src);
|
||||
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0);
|
||||
} else {
|
||||
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
|
||||
|
||||
@ -476,38 +478,26 @@ static noinline struct bkey_i_alloc_v4 *
|
||||
__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i_alloc_v4 *ret;
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
|
||||
unsigned bytes = sizeof(struct bkey_i_alloc_v4) +
|
||||
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) *
|
||||
sizeof(struct bch_backpointer);
|
||||
void *src, *dst;
|
||||
|
||||
/*
|
||||
* Reserve space for one more backpointer here:
|
||||
* Not sketchy at doing it this way, nope...
|
||||
*/
|
||||
ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
|
||||
ret = bch2_trans_kmalloc(trans, sizeof(struct bkey_i_alloc_v4));
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
void *src, *dst;
|
||||
|
||||
bkey_reassemble(&ret->k_i, k);
|
||||
|
||||
src = alloc_v4_backpointers(&ret->v);
|
||||
SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
|
||||
dst = alloc_v4_backpointers(&ret->v);
|
||||
|
||||
memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
|
||||
sizeof(struct bch_backpointer));
|
||||
if (src < dst)
|
||||
memset(src, 0, dst - src);
|
||||
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0);
|
||||
set_alloc_v4_u64s(ret);
|
||||
} else {
|
||||
ret = bch2_trans_kmalloc(trans, sizeof(struct bkey_i_alloc_v4) +
|
||||
sizeof(struct bch_backpointer));
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
bkey_alloc_v4_init(&ret->k_i);
|
||||
ret->k.p = k.k->p;
|
||||
bch2_alloc_to_v4(k, &ret->v);
|
||||
@ -517,8 +507,12 @@ __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
|
||||
|
||||
static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_alloc_v4 a;
|
||||
|
||||
if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
|
||||
BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) {
|
||||
((a = bkey_s_c_to_alloc_v4(k), true) &&
|
||||
BCH_ALLOC_V4_BACKPOINTERS_START(a.v) == BCH_ALLOC_V4_U64s &&
|
||||
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0)) {
|
||||
/*
|
||||
* Reserve space for one more backpointer here:
|
||||
* Not sketchy at doing it this way, nope...
|
||||
@ -962,10 +956,17 @@ struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, s
|
||||
struct bpos next;
|
||||
|
||||
bch2_trans_copy_iter(&iter2, iter);
|
||||
k = bch2_btree_iter_peek_upto(&iter2,
|
||||
bkey_min(bkey_min(end,
|
||||
iter->path->l[0].b->key.k.p),
|
||||
POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)));
|
||||
|
||||
if (!bpos_eq(iter->path->l[0].b->key.k.p, SPOS_MAX))
|
||||
end = bkey_min(end, bpos_nosnap_successor(iter->path->l[0].b->key.k.p));
|
||||
|
||||
end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1));
|
||||
|
||||
/*
|
||||
* btree node min/max is a closed interval, upto takes a half
|
||||
* open interval:
|
||||
*/
|
||||
k = bch2_btree_iter_peek_upto(&iter2, end);
|
||||
next = iter2.pos;
|
||||
bch2_trans_iter_exit(iter->trans, &iter2);
|
||||
|
||||
@ -1760,7 +1761,7 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
void bch2_do_discards(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
|
||||
!queue_work(system_long_wq, &c->discard_work))
|
||||
!queue_work(c->write_ref_wq, &c->discard_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
}
|
||||
|
||||
@ -1886,11 +1887,12 @@ err:
|
||||
void bch2_do_invalidates(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
|
||||
!queue_work(system_long_wq, &c->invalidate_work))
|
||||
!queue_work(c->write_ref_wq, &c->invalidate_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
|
||||
static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned long *last_updated)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
@ -1910,6 +1912,12 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
|
||||
* freespace/need_discard/need_gc_gens btrees as needed:
|
||||
*/
|
||||
while (1) {
|
||||
if (*last_updated + HZ * 10 < jiffies) {
|
||||
bch_info(ca, "%s: currently at %llu/%llu",
|
||||
__func__, iter.pos.offset, ca->mi.nbuckets);
|
||||
*last_updated = jiffies;
|
||||
}
|
||||
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
if (bkey_ge(iter.pos, end)) {
|
||||
@ -1989,6 +1997,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
bool doing_init = false;
|
||||
unsigned long last_updated = jiffies;
|
||||
|
||||
/*
|
||||
* We can crash during the device add path, so we need to check this on
|
||||
@ -2004,7 +2013,7 @@ int bch2_fs_freespace_init(struct bch_fs *c)
|
||||
doing_init = true;
|
||||
}
|
||||
|
||||
ret = bch2_dev_freespace_init(c, ca);
|
||||
ret = bch2_dev_freespace_init(c, ca, &last_updated);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
return ret;
|
||||
|
@ -345,17 +345,17 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
|
||||
struct bch_backpointer bp;
|
||||
u64 bp_offset = 0;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1,
|
||||
&bp_offset, &bp,
|
||||
&bp_pos, &bp,
|
||||
BTREE_ITER_NOPRESERVE);
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bp_offset != U64_MAX) {
|
||||
if (!bkey_eq(bp_pos, POS_MAX)) {
|
||||
/*
|
||||
* Bucket may have data in it - we don't call
|
||||
* bc2h_trans_inconnsistent() because fsck hasn't
|
||||
|
@ -69,6 +69,10 @@ void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer
|
||||
|
||||
void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
prt_str(out, "bucket=");
|
||||
bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
|
||||
prt_str(out, " ");
|
||||
|
||||
bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
|
||||
}
|
||||
|
||||
@ -81,117 +85,6 @@ void bch2_backpointer_swab(struct bkey_s k)
|
||||
bch2_bpos_swab(&bp.v->pos);
|
||||
}
|
||||
|
||||
#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1)
|
||||
|
||||
static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r)
|
||||
{
|
||||
return cmp_int(l.bucket_offset, r.bucket_offset);
|
||||
}
|
||||
|
||||
static int bch2_backpointer_del_by_offset(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if (bp_offset < BACKPOINTER_OFFSET_MAX) {
|
||||
struct bch_backpointer *bps;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
unsigned i, nr;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
bucket,
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_alloc_v4) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
a = bch2_alloc_to_v4_mut(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
if (ret)
|
||||
goto err;
|
||||
bps = alloc_v4_backpointers(&a->v);
|
||||
nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (bps[i].bucket_offset == bp_offset)
|
||||
goto found;
|
||||
if (bps[i].bucket_offset > bp_offset)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
found:
|
||||
if (memcmp(&bps[i], &bp, sizeof(bp))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
array_remove_item(bps, nr, i);
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
|
||||
set_alloc_v4_u64s(a);
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
} else {
|
||||
bp_offset -= BACKPOINTER_OFFSET_MAX;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, bucket, bp_offset),
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_bucket_backpointer_del(struct btree_trans *trans,
|
||||
struct bkey_i_alloc_v4 *a,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
|
||||
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
int cmp = backpointer_cmp(bps[i], bp) ?:
|
||||
memcmp(&bps[i], &bp, sizeof(bp));
|
||||
if (!cmp) {
|
||||
array_remove_item(bps, nr, i);
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
|
||||
set_alloc_v4_u64s(a);
|
||||
return true;
|
||||
}
|
||||
if (cmp >= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c bp_k,
|
||||
@ -245,7 +138,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
struct bkey_i_alloc_v4 *a,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
bool insert)
|
||||
@ -262,7 +155,7 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
bkey_backpointer_init(&bp_k->k_i);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset);
|
||||
bp_k->v = bp;
|
||||
|
||||
if (!insert) {
|
||||
@ -271,7 +164,7 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
|
||||
bp_k->k.p,
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
@ -298,94 +191,62 @@ err:
|
||||
/*
|
||||
* Find the next backpointer >= *bp_offset:
|
||||
*/
|
||||
int __bch2_get_next_backpointer(struct btree_trans *trans,
|
||||
int bch2_get_next_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket, int gen,
|
||||
u64 *bp_offset,
|
||||
struct bpos *bp_pos_ret,
|
||||
struct bch_backpointer *dst,
|
||||
struct bpos *bp_pos,
|
||||
struct bch_backpointer *bp,
|
||||
unsigned iter_flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos bp_pos, bp_end_pos;
|
||||
struct btree_iter alloc_iter, bp_iter = { NULL };
|
||||
struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
|
||||
struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_alloc_v4 a;
|
||||
size_t i;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (*bp_offset == U64_MAX)
|
||||
return 0;
|
||||
|
||||
bp_pos = bucket_pos_to_bp(c, bucket,
|
||||
max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
|
||||
bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
|
||||
if (bpos_ge(*bp_pos, bp_end_pos))
|
||||
goto done;
|
||||
|
||||
if (gen >= 0) {
|
||||
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
|
||||
bucket, BTREE_ITER_CACHED);
|
||||
bucket, BTREE_ITER_CACHED|iter_flags);
|
||||
k = bch2_btree_iter_peek_slot(&alloc_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (k.k->type != KEY_TYPE_alloc_v4)
|
||||
if (k.k->type != KEY_TYPE_alloc_v4 ||
|
||||
bkey_s_c_to_alloc_v4(k).v->gen != gen)
|
||||
goto done;
|
||||
|
||||
a = bkey_s_c_to_alloc_v4(k);
|
||||
if (gen >= 0 && a.v->gen != gen)
|
||||
goto done;
|
||||
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) {
|
||||
if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset)
|
||||
continue;
|
||||
|
||||
*dst = alloc_v4_backpointers_c(a.v)[i];
|
||||
*bp_offset = dst->bucket_offset;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
|
||||
|
||||
for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
|
||||
bp_pos, 0, k, ret) {
|
||||
*bp_pos, iter_flags, k, ret) {
|
||||
if (bpos_ge(k.k->p, bp_end_pos))
|
||||
break;
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
*dst = *bkey_s_c_to_backpointer(k).v;
|
||||
*bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX;
|
||||
*bp_pos_ret = k.k->p;
|
||||
*bp_pos = k.k->p;
|
||||
*bp = *bkey_s_c_to_backpointer(k).v;
|
||||
goto out;
|
||||
}
|
||||
done:
|
||||
*bp_offset = U64_MAX;
|
||||
*bp_pos = SPOS_MAX;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_get_next_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket, int gen,
|
||||
u64 *bp_offset,
|
||||
struct bch_backpointer *dst,
|
||||
unsigned iter_flags)
|
||||
{
|
||||
struct bpos bp_pos;
|
||||
|
||||
return __bch2_get_next_backpointer(trans, bucket, gen,
|
||||
bp_offset, &bp_pos,
|
||||
dst, iter_flags);
|
||||
}
|
||||
|
||||
static void backpointer_not_found(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bpos bp_pos,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c k,
|
||||
const char *thing_it_points_to)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
|
||||
|
||||
if (likely(!bch2_backpointers_no_use_write_buffer))
|
||||
return;
|
||||
@ -396,14 +257,9 @@ static void backpointer_not_found(struct btree_trans *trans,
|
||||
bch2_bpos_to_text(&buf, bucket);
|
||||
prt_printf(&buf, "\n ");
|
||||
|
||||
if (bp_offset >= BACKPOINTER_OFFSET_MAX) {
|
||||
struct bpos bp_pos =
|
||||
bucket_pos_to_bp(c, bucket,
|
||||
bp_offset - BACKPOINTER_OFFSET_MAX);
|
||||
prt_printf(&buf, "backpointer pos: ");
|
||||
bch2_bpos_to_text(&buf, bp_pos);
|
||||
prt_printf(&buf, "\n ");
|
||||
}
|
||||
|
||||
bch2_backpointer_to_text(&buf, &bp);
|
||||
prt_printf(&buf, "\n ");
|
||||
@ -418,11 +274,12 @@ static void backpointer_not_found(struct btree_trans *trans,
|
||||
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp)
|
||||
struct bpos bp_pos,
|
||||
struct bch_backpointer bp,
|
||||
unsigned iter_flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_node_iter_init(trans, iter,
|
||||
@ -430,7 +287,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
bp.pos,
|
||||
0,
|
||||
min(bp.level, c->btree_roots[bp.btree_id].level),
|
||||
0);
|
||||
iter_flags);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
if (bkey_err(k)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
@ -455,7 +312,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
* been written out yet - backpointer_get_node() checks for
|
||||
* this:
|
||||
*/
|
||||
b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
|
||||
b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
|
||||
if (!IS_ERR_OR_NULL(b))
|
||||
return bkey_i_to_s_c(&b->key);
|
||||
|
||||
@ -466,7 +323,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
|
||||
backpointer_not_found(trans, bp_pos, bp, k, "extent");
|
||||
}
|
||||
|
||||
return bkey_s_c_null;
|
||||
@ -474,11 +331,11 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bpos bp_pos,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
|
||||
struct btree *b;
|
||||
|
||||
BUG_ON(!bp.level);
|
||||
@ -501,7 +358,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
if (b && btree_node_will_make_reachable(b)) {
|
||||
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
|
||||
} else {
|
||||
backpointer_not_found(trans, bucket, bp_offset, bp,
|
||||
backpointer_not_found(trans, bp_pos, bp,
|
||||
bkey_i_to_s_c(&b->key), "btree node");
|
||||
b = NULL;
|
||||
}
|
||||
@ -570,7 +427,7 @@ struct bpos_level {
|
||||
};
|
||||
|
||||
static int check_bp_exists(struct btree_trans *trans,
|
||||
struct bpos bucket_pos,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
struct bpos bucket_start,
|
||||
@ -578,40 +435,20 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
struct bpos_level *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter, bp_iter = { NULL };
|
||||
struct btree_iter bp_iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c alloc_k, bp_k;
|
||||
struct bkey_s_c bp_k;
|
||||
int ret;
|
||||
|
||||
if (bpos_lt(bucket_pos, bucket_start) ||
|
||||
bpos_gt(bucket_pos, bucket_end))
|
||||
if (bpos_lt(bucket, bucket_start) ||
|
||||
bpos_gt(bucket, bucket_end))
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
|
||||
alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (alloc_k.k->type == KEY_TYPE_alloc_v4) {
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k);
|
||||
const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v);
|
||||
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
int cmp = backpointer_cmp(bps[i], bp) ?:
|
||||
memcmp(&bps[i], &bp, sizeof(bp));
|
||||
if (!cmp)
|
||||
goto out;
|
||||
if (cmp >= 0)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!bch2_dev_bucket_exists(c, bucket))
|
||||
goto missing;
|
||||
}
|
||||
|
||||
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, bucket_pos, bp.bucket_offset),
|
||||
bucket_pos_to_bp(c, bucket, bp.bucket_offset),
|
||||
0);
|
||||
bp_k = bch2_btree_iter_peek_slot(&bp_iter);
|
||||
ret = bkey_err(bp_k);
|
||||
@ -635,11 +472,9 @@ out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
missing:
|
||||
|
||||
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
|
||||
bch2_btree_ids[bp.btree_id], bp.level);
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
@ -648,12 +483,8 @@ missing:
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_backpointers ||
|
||||
c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "%s", buf.buf)) {
|
||||
struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
bch2_bucket_backpointer_mod(trans, a, bp, orig_k, true);
|
||||
}
|
||||
fsck_err(c, "%s", buf.buf))
|
||||
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
|
||||
|
||||
goto out;
|
||||
}
|
||||
@ -952,53 +783,40 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
}
|
||||
|
||||
static int check_one_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 *bp_offset,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bpos *last_flushed_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bch_backpointer bp;
|
||||
struct bbpos pos;
|
||||
struct bpos bp_pos;
|
||||
struct bbpos pos = bp_to_bbpos(*bp.v);
|
||||
struct bkey_s_c k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
ret = __bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp_pos, &bp, 0);
|
||||
if (ret || *bp_offset == U64_MAX)
|
||||
return ret;
|
||||
|
||||
pos = bp_to_bbpos(bp);
|
||||
if (bbpos_cmp(pos, start) < 0 ||
|
||||
bbpos_cmp(pos, end) > 0)
|
||||
return 0;
|
||||
|
||||
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
|
||||
k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
|
||||
ret = bkey_err(k);
|
||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||
return 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!k.k && !bpos_eq(*last_flushed_pos, bp_pos)) {
|
||||
*last_flushed_pos = bp_pos;
|
||||
if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
|
||||
*last_flushed_pos = bp.k->p;
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
|
||||
-BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fsck_err_on(!k.k, c,
|
||||
"backpointer for %llu:%llu:%llu (btree pos %llu:%llu) points to missing extent\n %s",
|
||||
bucket.inode, bucket.offset, (u64) bp.bucket_offset,
|
||||
bp_pos.inode, bp_pos.offset,
|
||||
(bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
|
||||
ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
|
||||
if (ret == -ENOENT)
|
||||
bch_err(c, "backpointer at %llu not found", *bp_offset);
|
||||
}
|
||||
"backpointer for missing extent\n %s",
|
||||
(bch2_backpointer_k_to_text(&buf, c, bp.s_c), buf.buf)))
|
||||
return bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -1013,25 +831,13 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bpos last_flushed_pos = SPOS_MAX;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
u64 bp_offset = 0;
|
||||
|
||||
while (!(ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_one_backpointer(trans, iter.pos, &bp_offset,
|
||||
start, end, &last_flushed_pos))) &&
|
||||
bp_offset < U64_MAX)
|
||||
bp_offset++;
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret < 0 ? ret : 0;
|
||||
return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
check_one_backpointer(trans, start, end,
|
||||
bkey_s_c_to_backpointer(k),
|
||||
&last_flushed_pos));
|
||||
}
|
||||
|
||||
int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
||||
|
@ -53,16 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_bucket_backpointer_del(struct btree_trans *,
|
||||
struct bkey_i_alloc_v4 *,
|
||||
struct bch_backpointer);
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *,
|
||||
struct bkey_i_alloc_v4 *,
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos,
|
||||
struct bch_backpointer, struct bkey_s_c, bool);
|
||||
|
||||
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
struct bkey_i_alloc_v4 *a,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
bool insert)
|
||||
@ -71,13 +66,8 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
struct bkey_i_backpointer *bp_k;
|
||||
int ret;
|
||||
|
||||
if (!insert &&
|
||||
unlikely(BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v)) &&
|
||||
bch2_bucket_backpointer_del(trans, a, bp))
|
||||
return 0;
|
||||
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer))
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, a, bp, orig_k, insert);
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert);
|
||||
|
||||
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
|
||||
ret = PTR_ERR_OR_ZERO(bp_k);
|
||||
@ -85,7 +75,7 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
bkey_backpointer_init(&bp_k->k_i);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset);
|
||||
bp_k->v = bp;
|
||||
|
||||
if (!insert) {
|
||||
@ -126,11 +116,12 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
|
||||
u64 *, struct bch_backpointer *, unsigned);
|
||||
struct bpos *, struct bch_backpointer *, unsigned);
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64, struct bch_backpointer);
|
||||
struct bpos, struct bch_backpointer,
|
||||
unsigned);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64, struct bch_backpointer);
|
||||
struct bpos, struct bch_backpointer);
|
||||
|
||||
int bch2_check_btree_backpointers(struct bch_fs *);
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *);
|
||||
|
@ -629,18 +629,6 @@ struct btree_path_buf {
|
||||
|
||||
#define REPLICAS_DELTA_LIST_MAX (1U << 16)
|
||||
|
||||
struct snapshot_t {
|
||||
u32 parent;
|
||||
u32 children[2];
|
||||
u32 subvol; /* Nonzero only if a subvolume points to this node: */
|
||||
u32 equiv;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
u32 subvol;
|
||||
u64 inum;
|
||||
} subvol_inum;
|
||||
|
||||
#define BCACHEFS_ROOT_SUBVOL_INUM \
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
@ -808,6 +796,12 @@ struct bch_fs {
|
||||
struct workqueue_struct *btree_io_complete_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
/*
|
||||
* Use a dedicated wq for write ref holder tasks. Required to avoid
|
||||
* dependency problems with other wq tasks that can block on ref
|
||||
* draining, such as read-only transition.
|
||||
*/
|
||||
struct workqueue_struct *write_ref_wq;
|
||||
|
||||
/* ALLOCATION */
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
@ -937,6 +931,7 @@ struct bch_fs {
|
||||
/* COPYGC */
|
||||
struct task_struct *copygc_thread;
|
||||
struct write_point copygc_write_point;
|
||||
s64 copygc_wait_at;
|
||||
s64 copygc_wait;
|
||||
bool copygc_running;
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
@ -971,6 +966,10 @@ struct bch_fs {
|
||||
reflink_gc_table reflink_gc_table;
|
||||
size_t reflink_gc_nr;
|
||||
|
||||
/* fs.c */
|
||||
struct list_head vfs_inodes_list;
|
||||
struct mutex vfs_inodes_lock;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
struct bio_set dio_write_bioset;
|
||||
|
@ -1554,7 +1554,8 @@ struct bch_sb_field_journal_seq_blacklist {
|
||||
x(unwritten_extents, 24) \
|
||||
x(bucket_gens, 25) \
|
||||
x(lru_v2, 26) \
|
||||
x(fragmentation_lru, 27)
|
||||
x(fragmentation_lru, 27) \
|
||||
x(no_bps_in_alloc_keys, 28)
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
|
@ -572,15 +572,15 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
|
||||
|
||||
if (c->opts.reconstruct_alloc ||
|
||||
fsck_err_on(!g->gen_valid, c,
|
||||
"bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
|
||||
if (!g->gen_valid &&
|
||||
(c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
|
||||
if (!p.ptr.cached) {
|
||||
g->gen_valid = true;
|
||||
g->gen = p.ptr.gen;
|
||||
@ -589,14 +589,15 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c,
|
||||
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
|
||||
if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
|
||||
(c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
|
||||
if (!p.ptr.cached) {
|
||||
g->gen_valid = true;
|
||||
g->gen = p.ptr.gen;
|
||||
@ -609,25 +610,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
|
||||
(c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
|
||||
do_update = true;
|
||||
|
||||
if (fsck_err_on(!p.ptr.cached &&
|
||||
gen_cmp(p.ptr.gen, g->gen) < 0, c,
|
||||
"bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
|
||||
if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
|
||||
(c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
|
||||
do_update = true;
|
||||
|
||||
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
|
||||
@ -757,7 +759,7 @@ found:
|
||||
if (level)
|
||||
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
|
||||
|
||||
if (c->opts.verbose) {
|
||||
if (0) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, *k);
|
||||
bch_info(c, "updated %s", buf.buf);
|
||||
|
@ -2722,12 +2722,12 @@ static inline void btree_path_list_add(struct btree_trans *trans,
|
||||
|
||||
void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
if (iter->path)
|
||||
bch2_path_put(trans, iter->path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
if (iter->update_path)
|
||||
bch2_path_put_nokeep(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
if (iter->path)
|
||||
bch2_path_put(trans, iter->path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
if (iter->key_cache_path)
|
||||
bch2_path_put(trans, iter->key_cache_path,
|
||||
iter->flags & BTREE_ITER_INTENT);
|
||||
|
@ -60,6 +60,7 @@ enum btree_insert_flags {
|
||||
int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
|
||||
unsigned, unsigned);
|
||||
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
|
||||
int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
|
||||
|
||||
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
|
||||
struct bkey_i *, enum btree_update_flags);
|
||||
@ -94,8 +95,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
|
||||
struct btree_trans_commit_hook *);
|
||||
int __bch2_trans_commit(struct btree_trans *, unsigned);
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *, const char *, ...);
|
||||
int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
|
||||
int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
|
||||
|
||||
/**
|
||||
* bch2_trans_commit - insert keys at given iterator positions
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "btree_locking.h"
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
@ -363,6 +364,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as,
|
||||
BUG_ON(ret);
|
||||
|
||||
trace_and_count(c, btree_node_alloc, c, b);
|
||||
bch2_increment_clock(c, btree_sectors(c), WRITE);
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -686,7 +688,8 @@ err:
|
||||
bch2_trans_unlock(&trans);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
|
||||
bch2_btree_path_level_init(&trans, path, b);
|
||||
path->l[b->c.level].lock_seq = b->c.lock.state.seq;
|
||||
path->l[b->c.level].b = b;
|
||||
|
||||
bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
|
||||
|
||||
@ -1677,7 +1680,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
BUG_ON(!as || as->b);
|
||||
bch2_verify_keylist_sorted(keys);
|
||||
|
||||
if (!(local_clock() & 63))
|
||||
if ((local_clock() & 63) == 63)
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &b->c);
|
||||
@ -1717,7 +1720,7 @@ split:
|
||||
* bch2_btree_path_upgrade() and allocating more nodes:
|
||||
*/
|
||||
if (b->c.level >= as->update_level) {
|
||||
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_);
|
||||
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
|
||||
}
|
||||
|
||||
|
@ -622,14 +622,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
|
||||
prefetch(&trans->c->journal.flags);
|
||||
|
||||
h = trans->hooks;
|
||||
while (h) {
|
||||
ret = h->fn(trans, h);
|
||||
if (ret)
|
||||
return ret;
|
||||
h = h->next;
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
/* Multiple inserts might go to same leaf: */
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
@ -696,6 +688,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
goto revert_fs_usage;
|
||||
}
|
||||
|
||||
h = trans->hooks;
|
||||
while (h) {
|
||||
ret = h->fn(trans, h);
|
||||
if (ret)
|
||||
goto revert_fs_usage;
|
||||
h = h->next;
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
|
||||
ret = run_one_mem_trigger(trans, i, i->flags);
|
||||
@ -1426,10 +1426,15 @@ int bch2_trans_update_extent(struct btree_trans *trans,
|
||||
update->k.p = k.k->p;
|
||||
update->k.p.snapshot = insert->k.p.snapshot;
|
||||
|
||||
if (insert->k.p.snapshot != k.k->p.snapshot ||
|
||||
(btree_type_has_snapshots(btree_id) &&
|
||||
need_whiteout_for_snapshot(trans, btree_id, update->k.p)))
|
||||
if (insert->k.p.snapshot != k.k->p.snapshot) {
|
||||
update->k.type = KEY_TYPE_whiteout;
|
||||
} else if (btree_type_has_snapshots(btree_id)) {
|
||||
ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret)
|
||||
update->k.type = KEY_TYPE_whiteout;
|
||||
}
|
||||
|
||||
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
|
||||
@ -1797,6 +1802,20 @@ int bch2_btree_delete_at(struct btree_trans *trans,
|
||||
return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
|
||||
}
|
||||
|
||||
int bch2_btree_delete_at_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bpos pos)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
|
||||
k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||
if (IS_ERR(k))
|
||||
return PTR_ERR(k);
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.p = pos;
|
||||
return bch2_trans_update_buffered(trans, btree, k);
|
||||
}
|
||||
|
||||
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
struct bpos start, struct bpos end,
|
||||
unsigned update_flags,
|
||||
@ -1919,14 +1938,19 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...)
|
||||
static int
|
||||
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
va_list args)
|
||||
{
|
||||
va_list args;
|
||||
int ret;
|
||||
|
||||
va_start(args, fmt);
|
||||
ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args);
|
||||
va_end(args);
|
||||
if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
|
||||
ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|commit_flags,
|
||||
__bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1937,16 +1961,22 @@ int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
|
||||
int ret;
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
|
||||
ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
__bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
|
||||
}
|
||||
|
||||
ret = __bch2_fs_log_msg(c, 0, fmt, args);
|
||||
va_end(args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use for logging messages during recovery to enable reserved space and avoid
|
||||
* blocking.
|
||||
*/
|
||||
int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int ret;
|
||||
|
||||
va_start(args, fmt);
|
||||
ret = __bch2_fs_log_msg(c, JOURNAL_WATERMARK_reserved, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
@ -109,9 +109,9 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
struct journal *j = &c->journal;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct journal_entry_pin pin;
|
||||
struct btree_write_buffered_key *i, *dst, *keys;
|
||||
struct btree_write_buffered_key *i, *keys;
|
||||
struct btree_iter iter = { NULL };
|
||||
size_t nr = 0, skipped = 0, fast = 0;
|
||||
size_t nr = 0, skipped = 0, fast = 0, slowpath = 0;
|
||||
bool write_locked = false;
|
||||
union btree_write_buffer_state s;
|
||||
int ret = 0;
|
||||
@ -135,15 +135,13 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
*
|
||||
* However, since we're not flushing in the order they appear in the
|
||||
* journal we won't be able to drop our journal pin until everything is
|
||||
* flushed - which means this could deadlock the journal, if we weren't
|
||||
* passing BTREE_INSERT_JORUNAL_RECLAIM. This causes the update to fail
|
||||
* flushed - which means this could deadlock the journal if we weren't
|
||||
* passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail
|
||||
* if it would block taking a journal reservation.
|
||||
*
|
||||
* If that happens, we sort them by the order they appeared in the
|
||||
* journal - after dropping redundant entries - and then restart
|
||||
* flushing, this time dropping journal pins as we go.
|
||||
* If that happens, simply skip the key so we can optimistically insert
|
||||
* as many keys as possible in the fast path.
|
||||
*/
|
||||
|
||||
sort(keys, nr, sizeof(keys[0]),
|
||||
btree_write_buffered_key_cmp, NULL);
|
||||
|
||||
@ -152,6 +150,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
i[0].btree == i[1].btree &&
|
||||
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
|
||||
skipped++;
|
||||
i->journal_seq = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -177,8 +176,14 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
bch2_trans_begin(trans);
|
||||
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||
|
||||
if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
|
||||
slowpath++;
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
i->journal_seq = 0;
|
||||
}
|
||||
|
||||
if (write_locked)
|
||||
@ -187,7 +192,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
|
||||
trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
|
||||
|
||||
if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
|
||||
if (slowpath)
|
||||
goto slowpath;
|
||||
|
||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||
@ -198,23 +203,19 @@ out:
|
||||
slowpath:
|
||||
trace_write_buffer_flush_slowpath(trans, i - keys, nr);
|
||||
|
||||
dst = keys;
|
||||
for (; i < keys + nr; i++) {
|
||||
if (i + 1 < keys + nr &&
|
||||
i[0].btree == i[1].btree &&
|
||||
bpos_eq(i[0].k.k.p, i[1].k.k.p))
|
||||
continue;
|
||||
|
||||
*dst = *i;
|
||||
dst++;
|
||||
}
|
||||
nr = dst - keys;
|
||||
|
||||
/*
|
||||
* Now sort the rest by journal seq and bump the journal pin as we go.
|
||||
* The slowpath zapped the seq of keys that were successfully flushed so
|
||||
* we can skip those here.
|
||||
*/
|
||||
sort(keys, nr, sizeof(keys[0]),
|
||||
btree_write_buffered_journal_cmp,
|
||||
NULL);
|
||||
|
||||
for (i = keys; i < keys + nr; i++) {
|
||||
if (!i->journal_seq)
|
||||
continue;
|
||||
|
||||
if (i->journal_seq > pin.seq) {
|
||||
struct journal_entry_pin pin2;
|
||||
|
||||
|
@ -1407,17 +1407,17 @@ static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
struct bpos bucket_pos;
|
||||
struct bpos bucket;
|
||||
struct bch_backpointer bp;
|
||||
s64 sectors;
|
||||
int ret;
|
||||
|
||||
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
|
||||
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
|
||||
sectors = bp.bucket_len;
|
||||
if (!insert)
|
||||
sectors = -sectors;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
|
||||
a = bch2_trans_start_alloc_update(trans, &iter, bucket);
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
@ -1428,7 +1428,7 @@ static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
goto err;
|
||||
|
||||
if (!p.ptr.cached) {
|
||||
ret = bch2_bucket_backpointer_mod(trans, a, bp, k, insert);
|
||||
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
@ -19,11 +19,11 @@ struct { \
|
||||
|
||||
typedef DARRAY(void) darray_void;
|
||||
|
||||
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
|
||||
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
|
||||
{
|
||||
if (d->nr + more > d->size) {
|
||||
size_t new_size = roundup_pow_of_two(d->nr + more);
|
||||
void *data = krealloc_array(d->data, new_size, t_size, GFP_KERNEL);
|
||||
void *data = krealloc_array(d->data, new_size, t_size, gfp);
|
||||
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
@ -35,20 +35,30 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define darray_make_room_gfp(_d, _more, _gfp) \
|
||||
__darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
|
||||
|
||||
#define darray_make_room(_d, _more) \
|
||||
__darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more))
|
||||
darray_make_room_gfp(_d, _more, GFP_KERNEL)
|
||||
|
||||
#define darray_top(_d) ((_d).data[(_d).nr])
|
||||
|
||||
#define darray_push(_d, _item) \
|
||||
#define darray_push_gfp(_d, _item, _gfp) \
|
||||
({ \
|
||||
int _ret = darray_make_room((_d), 1); \
|
||||
int _ret = darray_make_room_gfp((_d), 1, _gfp); \
|
||||
\
|
||||
if (!_ret) \
|
||||
(_d)->data[(_d)->nr++] = (_item); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define darray_push(_d, _item) darray_push_gfp(_d, _item, GFP_KERNEL)
|
||||
|
||||
#define darray_pop(_d) ((_d)->data[--(_d)->nr])
|
||||
|
||||
#define darray_first(_d) ((_d).data[0])
|
||||
#define darray_last(_d) ((_d).data[(_d).nr - 1])
|
||||
|
||||
#define darray_insert_item(_d, _pos, _item) \
|
||||
({ \
|
||||
size_t pos = (_pos); \
|
||||
|
@ -163,7 +163,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
|
||||
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
|
||||
!ptr->cached) {
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
|
||||
/*
|
||||
* See comment below:
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
|
||||
*/
|
||||
rewrites_found |= 1U << i;
|
||||
}
|
||||
i++;
|
||||
@ -205,7 +209,14 @@ restart_drop_extra_replicas:
|
||||
if (!p.ptr.cached &&
|
||||
durability - ptr_durability >= m->op.opts.data_replicas) {
|
||||
durability -= ptr_durability;
|
||||
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
|
||||
/*
|
||||
* Currently, we're dropping unneeded replicas
|
||||
* instead of marking them as cached, since
|
||||
* cached data in stripe buckets prevents them
|
||||
* from being reused:
|
||||
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
|
||||
*/
|
||||
goto restart_drop_extra_replicas;
|
||||
}
|
||||
}
|
||||
|
@ -826,7 +826,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
void bch2_do_stripe_deletes(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
|
||||
!schedule_work(&c->ec_stripe_delete_work))
|
||||
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
@ -887,7 +887,7 @@ err:
|
||||
static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
struct bpos bucket, u8 gen,
|
||||
struct ec_stripe_buf *s,
|
||||
u64 *bp_offset)
|
||||
struct bpos *bp_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_backpointer bp;
|
||||
@ -900,10 +900,10 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
int ret, dev, block;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||
bp_offset, &bp, BTREE_ITER_CACHED);
|
||||
bp_pos, &bp, BTREE_ITER_CACHED);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (*bp_offset == U64_MAX)
|
||||
if (bpos_eq(*bp_pos, SPOS_MAX))
|
||||
return 0;
|
||||
|
||||
if (bp.level) {
|
||||
@ -911,7 +911,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
struct btree_iter node_iter;
|
||||
struct btree *b;
|
||||
|
||||
b = bch2_backpointer_get_node(trans, &node_iter, bucket, *bp_offset, bp);
|
||||
b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp);
|
||||
bch2_trans_iter_exit(trans, &node_iter);
|
||||
|
||||
if (!b)
|
||||
@ -925,7 +925,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
|
||||
k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_INTENT);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -984,7 +984,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_extent_ptr bucket = s->key.v.ptrs[block];
|
||||
struct bpos bucket_pos = PTR_BUCKET_POS(c, &bucket);
|
||||
u64 bp_offset = 0;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
@ -992,13 +992,13 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
|
||||
s, &bp_offset));
|
||||
s, &bp_pos));
|
||||
if (ret)
|
||||
break;
|
||||
if (bp_offset == U64_MAX)
|
||||
if (bkey_eq(bp_pos, POS_MAX))
|
||||
break;
|
||||
|
||||
bp_offset++;
|
||||
bp_pos = bpos_nosnap_successor(bp_pos);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
1148
libbcachefs/fs-io.c
1148
libbcachefs/fs-io.c
File diff suppressed because it is too large
Load Diff
@ -451,19 +451,20 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||
return ret;
|
||||
|
||||
if (path.dentry->d_sb->s_fs_info != c) {
|
||||
path_put(&path);
|
||||
return -EXDEV;
|
||||
ret = -EXDEV;
|
||||
goto err;
|
||||
}
|
||||
|
||||
dir = path.dentry->d_parent->d_inode;
|
||||
|
||||
ret = __bch2_unlink(dir, path.dentry, true);
|
||||
if (!ret) {
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
fsnotify_rmdir(dir, path.dentry);
|
||||
d_delete(path.dentry);
|
||||
}
|
||||
err:
|
||||
path_put(&path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
114
libbcachefs/fs.c
114
libbcachefs/fs.c
@ -105,6 +105,11 @@ retry:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto retry;
|
||||
|
||||
bch2_fs_fatal_err_on(ret == -ENOENT, c,
|
||||
"inode %u:%llu not found when updating",
|
||||
inode_inum(inode).subvol,
|
||||
inode_inum(inode).inum);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
@ -201,6 +206,10 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
|
||||
unlock_new_inode(&inode->v);
|
||||
|
||||
return &inode->v;
|
||||
@ -314,6 +323,9 @@ err_before_quota:
|
||||
|
||||
inode = old;
|
||||
} else {
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
/*
|
||||
* we really don't want insert_inode_locked2() to be setting
|
||||
* I_NEW...
|
||||
@ -447,14 +459,22 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
|
||||
inode_inum(dir), &dir_u,
|
||||
&inode_u, &dentry->d_name,
|
||||
deleting_snapshot));
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
if (likely(!ret)) {
|
||||
bch2_inode_update_after_write(&trans, dir, &dir_u,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
bch2_inode_update_after_write(&trans, inode, &inode_u,
|
||||
ATTR_MTIME);
|
||||
}
|
||||
|
||||
if (inode_u.bi_subvol) {
|
||||
/*
|
||||
* Subvolume deletion is asynchronous, but we still want to tell
|
||||
* the VFS that it's been deleted here:
|
||||
*/
|
||||
set_nlink(&inode->v, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
|
||||
|
||||
@ -1349,6 +1369,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
||||
inode->v.i_op = &bch_special_inode_operations;
|
||||
break;
|
||||
}
|
||||
|
||||
mapping_set_large_folios(inode->v.i_mapping);
|
||||
}
|
||||
|
||||
static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||
@ -1362,6 +1384,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||
inode_init_once(&inode->v);
|
||||
mutex_init(&inode->ei_update_lock);
|
||||
two_state_lock_init(&inode->ei_pagecache_lock);
|
||||
INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
|
||||
mutex_init(&inode->ei_quota_lock);
|
||||
|
||||
return &inode->v;
|
||||
@ -1426,53 +1449,78 @@ static void bch2_evict_inode(struct inode *vinode)
|
||||
KEY_TYPE_QUOTA_WARN);
|
||||
bch2_inode_rm(c, inode_inum(inode));
|
||||
}
|
||||
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_del_init(&inode->ei_vfs_inode_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
}
|
||||
|
||||
void bch2_evict_subvolume_inodes(struct bch_fs *c,
|
||||
snapshot_id_list *s)
|
||||
void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
|
||||
{
|
||||
struct super_block *sb = c->vfs_sb;
|
||||
struct inode *inode;
|
||||
struct bch_inode_info *inode, **i;
|
||||
DARRAY(struct bch_inode_info *) grabbed;
|
||||
bool clean_pass = false, this_pass_clean;
|
||||
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
||||
if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
|
||||
(inode->i_state & I_FREEING))
|
||||
continue;
|
||||
/*
|
||||
* Initially, we scan for inodes without I_DONTCACHE, then mark them to
|
||||
* be pruned with d_mark_dontcache().
|
||||
*
|
||||
* Once we've had a clean pass where we didn't find any inodes without
|
||||
* I_DONTCACHE, we wait for them to be freed:
|
||||
*/
|
||||
|
||||
d_mark_dontcache(inode);
|
||||
d_prune_aliases(inode);
|
||||
}
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
darray_init(&grabbed);
|
||||
darray_make_room(&grabbed, 1024);
|
||||
again:
|
||||
cond_resched();
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
||||
if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) ||
|
||||
(inode->i_state & I_FREEING))
|
||||
this_pass_clean = true;
|
||||
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
|
||||
if (!snapshot_list_has_id(s, inode->ei_subvol))
|
||||
continue;
|
||||
|
||||
if (!(inode->i_state & I_DONTCACHE)) {
|
||||
d_mark_dontcache(inode);
|
||||
d_prune_aliases(inode);
|
||||
}
|
||||
if (!(inode->v.i_state & I_DONTCACHE) &&
|
||||
!(inode->v.i_state & I_FREEING)) {
|
||||
this_pass_clean = false;
|
||||
|
||||
d_mark_dontcache(&inode->v);
|
||||
d_prune_aliases(&inode->v);
|
||||
|
||||
/*
|
||||
* If i_count was zero, we have to take and release a
|
||||
* ref in order for I_DONTCACHE to be noticed and the
|
||||
* inode to be dropped;
|
||||
*/
|
||||
|
||||
if (!atomic_read(&inode->v.i_count) &&
|
||||
igrab(&inode->v) &&
|
||||
darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN))
|
||||
break;
|
||||
} else if (clean_pass && this_pass_clean) {
|
||||
wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
|
||||
DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) &&
|
||||
!(inode->i_state & I_FREEING)) {
|
||||
wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW);
|
||||
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
goto again;
|
||||
}
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
|
||||
darray_for_each(grabbed, i)
|
||||
iput(&(*i)->v);
|
||||
grabbed.nr = 0;
|
||||
|
||||
if (!clean_pass || !this_pass_clean) {
|
||||
clean_pass = this_pass_clean;
|
||||
goto again;
|
||||
}
|
||||
|
||||
darray_exit(&grabbed);
|
||||
}
|
||||
|
||||
static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
struct bch_inode_info {
|
||||
struct inode v;
|
||||
struct list_head ei_vfs_inode_list;
|
||||
unsigned long ei_flags;
|
||||
|
||||
struct mutex ei_update_lock;
|
||||
|
@ -803,9 +803,6 @@ retry:
|
||||
|
||||
bch2_inode_unpack(k, &inode_u);
|
||||
|
||||
/* Subvolume root? */
|
||||
BUG_ON(inode_u.bi_subvol);
|
||||
|
||||
bkey_inode_generation_init(&delete.k_i);
|
||||
delete.k.p = iter.pos;
|
||||
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
|
||||
|
@ -151,11 +151,11 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
struct bio_vec bv;
|
||||
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
if (bv->bv_page != ZERO_PAGE(0))
|
||||
mempool_free(bv->bv_page, &c->bio_bounce_pages);
|
||||
if (bv.bv_page != ZERO_PAGE(0))
|
||||
mempool_free(bv.bv_page, &c->bio_bounce_pages);
|
||||
bio->bi_vcnt = 0;
|
||||
}
|
||||
|
||||
@ -385,6 +385,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
|
||||
struct open_buckets open_buckets;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf old, new;
|
||||
unsigned sectors_allocated;
|
||||
bool have_reservation = false;
|
||||
bool unwritten = opts.nocow &&
|
||||
c->sb.version >= bcachefs_metadata_version_unwritten_extents;
|
||||
@ -395,6 +396,8 @@ int bch2_extent_fallocate(struct btree_trans *trans,
|
||||
closure_init_stack(&cl);
|
||||
open_buckets.nr = 0;
|
||||
retry:
|
||||
sectors_allocated = 0;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
@ -451,15 +454,16 @@ retry:
|
||||
opts.data_replicas,
|
||||
opts.data_replicas,
|
||||
RESERVE_none, 0, &cl, &wp);
|
||||
if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
|
||||
if (ret) {
|
||||
bch2_trans_unlock(trans);
|
||||
closure_sync(&cl);
|
||||
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
||||
goto retry;
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
sectors = min(sectors, wp->sectors_free);
|
||||
sectors_allocated = sectors;
|
||||
|
||||
bch2_key_resize(&e->k, sectors);
|
||||
|
||||
@ -486,6 +490,9 @@ out:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!ret && sectors_allocated)
|
||||
bch2_increment_clock(c, sectors_allocated, WRITE);
|
||||
|
||||
bch2_open_buckets_put(c, &open_buckets);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
bch2_bkey_buf_exit(&new, c);
|
||||
@ -1475,7 +1482,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_ptrs_c ptrs;
|
||||
const struct bch_extent_ptr *ptr, *ptr2;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct {
|
||||
struct bpos b;
|
||||
unsigned gen;
|
||||
@ -1530,11 +1537,12 @@ retry:
|
||||
bucket_to_u64(buckets[nr_buckets].b));
|
||||
|
||||
prefetch(buckets[nr_buckets].l);
|
||||
nr_buckets++;
|
||||
|
||||
if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
|
||||
goto err_get_ioref;
|
||||
|
||||
nr_buckets++;
|
||||
|
||||
if (ptr->unwritten)
|
||||
op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
|
||||
}
|
||||
@ -1625,12 +1633,8 @@ err:
|
||||
}
|
||||
return;
|
||||
err_get_ioref:
|
||||
bkey_for_each_ptr(ptrs, ptr2) {
|
||||
if (ptr2 == ptr)
|
||||
break;
|
||||
|
||||
percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
|
||||
}
|
||||
for (i = 0; i < nr_buckets; i++)
|
||||
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
|
||||
|
||||
/* Fall back to COW path: */
|
||||
goto out;
|
||||
@ -1639,9 +1643,8 @@ err_bucket_stale:
|
||||
bch2_bucket_nocow_unlock(&c->nocow_locks,
|
||||
buckets[i].b,
|
||||
BUCKET_NOCOW_LOCK_UPDATE);
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr2)
|
||||
percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
|
||||
for (i = 0; i < nr_buckets; i++)
|
||||
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
|
||||
|
||||
/* We can retry this: */
|
||||
ret = BCH_ERR_transaction_restart;
|
||||
@ -1889,6 +1892,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
|
||||
prt_str(out, "pos: ");
|
||||
bch2_bpos_to_text(out, op->pos);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_str(out, "started: ");
|
||||
bch2_pr_time_units(out, local_clock() - op->start_time);
|
||||
@ -1897,6 +1901,11 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
|
||||
prt_str(out, "flags: ");
|
||||
prt_bitflags(out, bch2_write_flags, op->flags);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
/* Cache promotion on read */
|
||||
|
@ -76,6 +76,67 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect stuck journal conditions and trigger shutdown. Technically the journal
|
||||
* can end up stuck for a variety of reasons, such as a blocked I/O, journal
|
||||
* reservation lockup, etc. Since this is a fatal error with potentially
|
||||
* unpredictable characteristics, we want to be fairly conservative before we
|
||||
* decide to shut things down.
|
||||
*
|
||||
* Consider the journal stuck when it appears full with no ability to commit
|
||||
* btree transactions, to discard journal buckets, nor acquire priority
|
||||
* (reserved watermark) reservation.
|
||||
*/
|
||||
static inline bool
|
||||
journal_error_check_stuck(struct journal *j, int error, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool stuck = false;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
if (!(error == JOURNAL_ERR_journal_full ||
|
||||
error == JOURNAL_ERR_journal_pin_full) ||
|
||||
nr_unwritten_journal_entries(j) ||
|
||||
(flags & JOURNAL_WATERMARK_MASK) != JOURNAL_WATERMARK_reserved)
|
||||
return stuck;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
if (j->can_discard) {
|
||||
spin_unlock(&j->lock);
|
||||
return stuck;
|
||||
}
|
||||
|
||||
stuck = true;
|
||||
|
||||
/*
|
||||
* The journal shutdown path will set ->err_seq, but do it here first to
|
||||
* serialize against concurrent failures and avoid duplicate error
|
||||
* reports.
|
||||
*/
|
||||
if (j->err_seq) {
|
||||
spin_unlock(&j->lock);
|
||||
return stuck;
|
||||
}
|
||||
j->err_seq = journal_cur_seq(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
|
||||
bch2_journal_errors[error]);
|
||||
bch2_journal_debug_to_text(&buf, j);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_journal_pins_to_text(&buf, j);
|
||||
bch_err(c, "Journal pins:\n%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_fatal_error(c);
|
||||
dump_stack();
|
||||
|
||||
return stuck;
|
||||
}
|
||||
|
||||
/* journal entry close/open: */
|
||||
|
||||
void __bch2_journal_buf_put(struct journal *j)
|
||||
@ -163,6 +224,7 @@ void bch2_journal_halt(struct journal *j)
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
|
||||
if (!j->err_seq)
|
||||
j->err_seq = journal_cur_seq(j);
|
||||
journal_wake(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
@ -363,6 +425,12 @@ retry:
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/* check once more in case somebody else shut things down... */
|
||||
if (bch2_journal_error(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recheck after taking the lock, so we don't race with another thread
|
||||
* that just did journal_entry_open() and call journal_entry_close()
|
||||
@ -410,28 +478,8 @@ unlock:
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
|
||||
if ((ret == JOURNAL_ERR_journal_full ||
|
||||
ret == JOURNAL_ERR_journal_pin_full) &&
|
||||
!can_discard &&
|
||||
!nr_unwritten_journal_entries(j) &&
|
||||
(flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)",
|
||||
bch2_journal_errors[ret]);
|
||||
|
||||
bch2_journal_debug_to_text(&buf, j);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_journal_pins_to_text(&buf, j);
|
||||
bch_err(c, "Journal pins:\n%s", buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
bch2_fatal_error(c);
|
||||
dump_stack();
|
||||
}
|
||||
if (journal_error_check_stuck(j, ret, flags))
|
||||
ret = -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
|
@ -210,24 +210,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
clean = j->space[journal_space_clean].total;
|
||||
total = j->space[journal_space_total].total;
|
||||
|
||||
if (!clean_ondisk &&
|
||||
journal_cur_seq(j) == j->seq_ondisk) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
__bch2_journal_debug_to_text(&buf, j);
|
||||
bch_err(c, "journal stuck\n%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
/*
|
||||
* Hack: bch2_fatal_error() calls bch2_journal_halt() which
|
||||
* takes journal lock:
|
||||
*/
|
||||
spin_unlock(&j->lock);
|
||||
bch2_fatal_error(c);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
ret = JOURNAL_ERR_journal_stuck;
|
||||
} else if (!j->space[journal_space_discarded].next_entry)
|
||||
if (!j->space[journal_space_discarded].next_entry)
|
||||
ret = JOURNAL_ERR_journal_full;
|
||||
|
||||
if ((j->space[journal_space_clean_ondisk].next_entry <
|
||||
|
@ -148,7 +148,8 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
|
||||
if (c->opts.reconstruct_alloc ||
|
||||
fsck_err(c, "incorrect lru entry: lru %s time %llu\n"
|
||||
" %s\n"
|
||||
" for %s",
|
||||
bch2_lru_types[type],
|
||||
|
@ -627,9 +627,12 @@ void bch2_verify_bucket_evacuated(struct btree_trans *trans, struct bpos bucket,
|
||||
struct bkey_s_c k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bch_backpointer bp;
|
||||
u64 bp_offset = 0;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
unsigned nr_bps = 0;
|
||||
int ret;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
bucket, BTREE_ITER_CACHED);
|
||||
again:
|
||||
@ -650,6 +653,7 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
set_btree_iter_dontneed(&iter);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return;
|
||||
failed_to_evacuate:
|
||||
@ -665,17 +669,16 @@ failed_to_evacuate:
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||
&bp_offset, &bp,
|
||||
&bp_pos, &bp,
|
||||
BTREE_ITER_CACHED);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
if (bp_offset == U64_MAX)
|
||||
if (bkey_eq(bp_pos, POS_MAX))
|
||||
break;
|
||||
|
||||
k = bch2_backpointer_get_key(trans, &iter,
|
||||
bucket, bp_offset, bp);
|
||||
k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -686,6 +689,10 @@ failed_to_evacuate:
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (++nr_bps > 10)
|
||||
break;
|
||||
bp_pos = bpos_nosnap_successor(bp_pos);
|
||||
}
|
||||
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
@ -709,11 +716,17 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
struct data_update_opts data_opts;
|
||||
unsigned dirty_sectors, bucket_size;
|
||||
u64 fragmentation;
|
||||
u64 bp_offset = 0, cur_inum = U64_MAX;
|
||||
u64 cur_inum = U64_MAX;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
|
||||
/*
|
||||
* We're not run in a context that handles transaction restarts:
|
||||
*/
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
bucket, BTREE_ITER_CACHED);
|
||||
ret = lockrestart_do(trans,
|
||||
@ -740,13 +753,13 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||
&bp_offset, &bp,
|
||||
&bp_pos, &bp,
|
||||
BTREE_ITER_CACHED);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
goto err;
|
||||
if (bp_offset == U64_MAX)
|
||||
if (bkey_eq(bp_pos, POS_MAX))
|
||||
break;
|
||||
|
||||
if (!bp.level) {
|
||||
@ -754,8 +767,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
struct bkey_s_c k;
|
||||
unsigned i = 0;
|
||||
|
||||
k = bch2_backpointer_get_key(trans, &iter,
|
||||
bucket, bp_offset, bp);
|
||||
k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -810,8 +822,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
} else {
|
||||
struct btree *b;
|
||||
|
||||
b = bch2_backpointer_get_node(trans, &iter,
|
||||
bucket, bp_offset, bp);
|
||||
b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||
continue;
|
||||
@ -839,7 +850,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
next:
|
||||
bp_offset++;
|
||||
bp_pos = bpos_nosnap_successor(bp_pos);
|
||||
}
|
||||
|
||||
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
|
||||
|
@ -16,9 +16,20 @@ struct bch_move_stats {
|
||||
atomic64_t sectors_raced;
|
||||
};
|
||||
|
||||
struct move_bucket_in_flight {
|
||||
struct move_bucket_key {
|
||||
struct bpos bucket;
|
||||
u8 gen;
|
||||
};
|
||||
|
||||
struct move_bucket {
|
||||
struct move_bucket_key k;
|
||||
unsigned sectors;
|
||||
};
|
||||
|
||||
struct move_bucket_in_flight {
|
||||
struct move_bucket_in_flight *next;
|
||||
struct rhash_head hash;
|
||||
struct move_bucket bucket;
|
||||
atomic_t count;
|
||||
};
|
||||
|
||||
|
@ -34,8 +34,51 @@
|
||||
#include <linux/sort.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
struct buckets_in_flight {
|
||||
struct rhashtable table;
|
||||
struct move_bucket_in_flight *first;
|
||||
struct move_bucket_in_flight *last;
|
||||
size_t nr;
|
||||
size_t sectors;
|
||||
};
|
||||
|
||||
static const struct rhashtable_params bch_move_bucket_params = {
|
||||
.head_offset = offsetof(struct move_bucket_in_flight, hash),
|
||||
.key_offset = offsetof(struct move_bucket_in_flight, bucket.k),
|
||||
.key_len = sizeof(struct move_bucket_key),
|
||||
};
|
||||
|
||||
static struct move_bucket_in_flight *
|
||||
move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b)
|
||||
{
|
||||
struct move_bucket_in_flight *new = kzalloc(sizeof(*new), GFP_KERNEL);
|
||||
int ret;
|
||||
|
||||
if (!new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
new->bucket = b;
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&list->table, &new->hash,
|
||||
bch_move_bucket_params);
|
||||
if (ret) {
|
||||
kfree(new);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
if (!list->first)
|
||||
list->first = new;
|
||||
else
|
||||
list->last->next = new;
|
||||
|
||||
list->last = new;
|
||||
list->nr++;
|
||||
list->sectors += b.sectors;
|
||||
return new;
|
||||
}
|
||||
|
||||
static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
struct bpos bucket, u64 time, u8 *gen)
|
||||
struct move_bucket *b, u64 time)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@ -43,10 +86,13 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
const struct bch_alloc_v4 *a;
|
||||
int ret;
|
||||
|
||||
if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset))
|
||||
if (bch2_bucket_is_open(trans->c,
|
||||
b->k.bucket.inode,
|
||||
b->k.bucket.offset))
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED);
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
b->k.bucket, BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -55,12 +101,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
a = bch2_alloc_to_v4(k, &_a);
|
||||
*gen = a->gen;
|
||||
b->k.gen = a->gen;
|
||||
b->sectors = a->dirty_sectors;
|
||||
|
||||
ret = data_type_movable(a->data_type) &&
|
||||
a->fragmentation_lru &&
|
||||
a->fragmentation_lru <= time;
|
||||
|
||||
if (ret) {
|
||||
if (!ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
@ -71,41 +119,16 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef FIFO(struct move_bucket_in_flight) move_buckets_in_flight;
|
||||
|
||||
struct move_bucket {
|
||||
struct bpos bucket;
|
||||
u8 gen;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct move_bucket) move_buckets;
|
||||
|
||||
static int move_bucket_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct move_bucket *l = _l;
|
||||
const struct move_bucket *r = _r;
|
||||
|
||||
return bkey_cmp(l->bucket, r->bucket);
|
||||
}
|
||||
|
||||
static bool bucket_in_flight(move_buckets *buckets_sorted, struct move_bucket b)
|
||||
{
|
||||
return bsearch(&b,
|
||||
buckets_sorted->data,
|
||||
buckets_sorted->nr,
|
||||
sizeof(buckets_sorted->data[0]),
|
||||
move_bucket_cmp) != NULL;
|
||||
}
|
||||
|
||||
static void move_buckets_wait(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
move_buckets_in_flight *buckets_in_flight,
|
||||
size_t nr, bool verify_evacuated)
|
||||
struct buckets_in_flight *list,
|
||||
bool flush)
|
||||
{
|
||||
while (!fifo_empty(buckets_in_flight)) {
|
||||
struct move_bucket_in_flight *i = &fifo_peek_front(buckets_in_flight);
|
||||
struct move_bucket_in_flight *i;
|
||||
int ret;
|
||||
|
||||
if (fifo_used(buckets_in_flight) > nr)
|
||||
while ((i = list->first)) {
|
||||
if (flush)
|
||||
move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count));
|
||||
|
||||
if (atomic_read(&i->count))
|
||||
@ -116,66 +139,82 @@ static void move_buckets_wait(struct btree_trans *trans,
|
||||
* reads, which inits another btree_trans; this one must be
|
||||
* unlocked:
|
||||
*/
|
||||
if (verify_evacuated)
|
||||
bch2_verify_bucket_evacuated(trans, i->bucket, i->gen);
|
||||
buckets_in_flight->front++;
|
||||
bch2_verify_bucket_evacuated(trans, i->bucket.k.bucket, i->bucket.k.gen);
|
||||
|
||||
list->first = i->next;
|
||||
if (!list->first)
|
||||
list->last = NULL;
|
||||
|
||||
list->nr--;
|
||||
list->sectors -= i->bucket.sectors;
|
||||
|
||||
ret = rhashtable_remove_fast(&list->table, &i->hash,
|
||||
bch_move_bucket_params);
|
||||
BUG_ON(ret);
|
||||
kfree(i);
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
}
|
||||
|
||||
static bool bucket_in_flight(struct buckets_in_flight *list,
|
||||
struct move_bucket_key k)
|
||||
{
|
||||
return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params);
|
||||
}
|
||||
|
||||
typedef DARRAY(struct move_bucket) move_buckets;
|
||||
|
||||
static int bch2_copygc_get_buckets(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
move_buckets_in_flight *buckets_in_flight,
|
||||
struct buckets_in_flight *buckets_in_flight,
|
||||
move_buckets *buckets)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
move_buckets buckets_sorted = { 0 };
|
||||
struct move_bucket_in_flight *i;
|
||||
struct bkey_s_c k;
|
||||
size_t fifo_iter, nr_to_get;
|
||||
size_t nr_to_get = max(16UL, buckets_in_flight->nr / 4);
|
||||
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
|
||||
int ret;
|
||||
|
||||
move_buckets_wait(trans, ctxt, buckets_in_flight, buckets_in_flight->size / 2, true);
|
||||
move_buckets_wait(trans, ctxt, buckets_in_flight, false);
|
||||
|
||||
nr_to_get = max(16UL, fifo_used(buckets_in_flight) / 4);
|
||||
|
||||
fifo_for_each_entry_ptr(i, buckets_in_flight, fifo_iter) {
|
||||
ret = darray_push(&buckets_sorted, ((struct move_bucket) {i->bucket, i->gen}));
|
||||
if (ret) {
|
||||
bch_err(trans->c, "error allocating move_buckets_sorted");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
sort(buckets_sorted.data,
|
||||
buckets_sorted.nr,
|
||||
sizeof(buckets_sorted.data[0]),
|
||||
move_bucket_cmp,
|
||||
NULL);
|
||||
ret = bch2_btree_write_buffer_flush(trans);
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
|
||||
__func__, bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
|
||||
0, k, ({
|
||||
struct move_bucket b = { .bucket = u64_to_bucket(k.k->p.offset) };
|
||||
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
|
||||
int ret = 0;
|
||||
|
||||
if (!bucket_in_flight(&buckets_sorted, b) &&
|
||||
bch2_bucket_is_movable(trans, b.bucket, lru_pos_time(k.k->p), &b.gen))
|
||||
ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
|
||||
saw++;
|
||||
|
||||
if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
|
||||
not_movable++;
|
||||
else if (bucket_in_flight(buckets_in_flight, b.k))
|
||||
in_flight++;
|
||||
else {
|
||||
ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
|
||||
if (ret >= 0)
|
||||
sectors += b.sectors;
|
||||
}
|
||||
ret;
|
||||
}));
|
||||
err:
|
||||
darray_exit(&buckets_sorted);
|
||||
|
||||
pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
|
||||
buckets_in_flight->nr, buckets_in_flight->sectors,
|
||||
saw, in_flight, not_movable, buckets->nr, sectors, nr_to_get, ret);
|
||||
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
static int bch2_copygc(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
move_buckets_in_flight *buckets_in_flight)
|
||||
struct buckets_in_flight *buckets_in_flight)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct data_update_opts data_opts = {
|
||||
@ -187,11 +226,6 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush(trans);
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
|
||||
__func__, bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -200,12 +234,17 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
if (unlikely(freezing(current)))
|
||||
break;
|
||||
|
||||
f = fifo_push_ref(buckets_in_flight);
|
||||
f->bucket = i->bucket;
|
||||
f->gen = i->gen;
|
||||
atomic_set(&f->count, 0);
|
||||
f = move_bucket_in_flight_add(buckets_in_flight, *i);
|
||||
ret = PTR_ERR_OR_ZERO(f);
|
||||
if (ret == -EEXIST) /* rare race: copygc_get_buckets returned same bucket more than once */
|
||||
continue;
|
||||
if (ret == -ENOMEM) { /* flush IO, continue later */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket, f->gen, data_opts);
|
||||
ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket.k.bucket,
|
||||
f->bucket.k.gen, data_opts);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -269,6 +308,12 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
atomic64_read(&c->io_clock[WRITE].now)) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "Currently waiting since: ");
|
||||
prt_human_readable_u64(out, max(0LL,
|
||||
atomic64_read(&c->io_clock[WRITE].now) -
|
||||
c->copygc_wait_at) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "Currently calculated wait: ");
|
||||
prt_human_readable_u64(out, bch2_copygc_wait_amount(c));
|
||||
prt_newline(out);
|
||||
@ -281,13 +326,17 @@ static int bch2_copygc_thread(void *arg)
|
||||
struct moving_context ctxt;
|
||||
struct bch_move_stats move_stats;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
move_buckets_in_flight move_buckets;
|
||||
struct buckets_in_flight move_buckets;
|
||||
u64 last, wait;
|
||||
int ret = 0;
|
||||
|
||||
if (!init_fifo(&move_buckets, 1 << 14, GFP_KERNEL)) {
|
||||
bch_err(c, "error allocating copygc buckets in flight");
|
||||
return -ENOMEM;
|
||||
memset(&move_buckets, 0, sizeof(move_buckets));
|
||||
|
||||
ret = rhashtable_init(&move_buckets.table, &bch_move_bucket_params);
|
||||
if (ret) {
|
||||
bch_err(c, "error allocating copygc buckets in flight: %s",
|
||||
bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
set_freezable();
|
||||
@ -303,12 +352,12 @@ static int bch2_copygc_thread(void *arg)
|
||||
cond_resched();
|
||||
|
||||
if (!c->copy_gc_enabled) {
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
|
||||
kthread_wait_freezable(c->copy_gc_enabled);
|
||||
}
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
|
||||
__refrigerator(false);
|
||||
continue;
|
||||
}
|
||||
@ -317,9 +366,10 @@ static int bch2_copygc_thread(void *arg)
|
||||
wait = bch2_copygc_wait_amount(c);
|
||||
|
||||
if (wait > clock->max_slop) {
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, 0, true);
|
||||
trace_and_count(c, copygc_wait, c, wait, last + wait);
|
||||
c->copygc_wait_at = last;
|
||||
c->copygc_wait = last + wait;
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
|
||||
trace_and_count(c, copygc_wait, c, wait, last + wait);
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
continue;
|
||||
@ -334,9 +384,9 @@ static int bch2_copygc_thread(void *arg)
|
||||
wake_up(&c->copygc_running_wq);
|
||||
}
|
||||
|
||||
move_buckets_wait(&trans, &ctxt, &move_buckets, true);
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
free_fifo(&move_buckets);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -476,6 +476,26 @@ void bch2_journal_keys_free(struct journal_keys *keys)
|
||||
keys->nr = keys->gap = keys->size = 0;
|
||||
}
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
{
|
||||
struct journal_key *src, *dst;
|
||||
|
||||
sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
|
||||
|
||||
src = dst = keys->d;
|
||||
while (src < keys->d + keys->nr) {
|
||||
while (src + 1 < keys->d + keys->nr &&
|
||||
src[0].btree_id == src[1].btree_id &&
|
||||
src[0].level == src[1].level &&
|
||||
bpos_eq(src[0].k->k.p, src[1].k->k.p))
|
||||
src++;
|
||||
|
||||
*dst++ = *src++;
|
||||
}
|
||||
|
||||
keys->nr = dst - keys->d;
|
||||
}
|
||||
|
||||
static int journal_keys_sort(struct bch_fs *c)
|
||||
{
|
||||
struct genradix_iter iter;
|
||||
@ -483,8 +503,7 @@ static int journal_keys_sort(struct bch_fs *c)
|
||||
struct jset_entry *entry;
|
||||
struct bkey_i *k;
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key *src, *dst;
|
||||
size_t nr_keys = 0;
|
||||
size_t nr_keys = 0, nr_read = 0;
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, _i) {
|
||||
i = *_i;
|
||||
@ -503,10 +522,20 @@ static int journal_keys_sort(struct bch_fs *c)
|
||||
|
||||
keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
|
||||
if (!keys->d) {
|
||||
bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys)",
|
||||
bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys); trying slowpath",
|
||||
nr_keys);
|
||||
|
||||
do {
|
||||
keys->size >>= 1;
|
||||
keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
|
||||
} while (!keys->d && keys->size > nr_keys / 8);
|
||||
|
||||
if (!keys->d) {
|
||||
bch_err(c, "Failed to allocate %zu size buffer for sorted journal keys; exiting",
|
||||
keys->size);
|
||||
return -BCH_ERR_ENOMEM_journal_keys_sort;
|
||||
}
|
||||
}
|
||||
|
||||
genradix_for_each(&c->journal_entries, iter, _i) {
|
||||
i = *_i;
|
||||
@ -514,7 +543,17 @@ static int journal_keys_sort(struct bch_fs *c)
|
||||
if (!i || i->ignore)
|
||||
continue;
|
||||
|
||||
for_each_jset_key(k, entry, &i->j)
|
||||
for_each_jset_key(k, entry, &i->j) {
|
||||
if (keys->nr == keys->size) {
|
||||
__journal_keys_sort(keys);
|
||||
|
||||
if (keys->nr > keys->size * 7 / 8) {
|
||||
bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu/%zu",
|
||||
keys->nr, keys->size, nr_read, nr_keys);
|
||||
return -BCH_ERR_ENOMEM_journal_keys_sort;
|
||||
}
|
||||
}
|
||||
|
||||
keys->d[keys->nr++] = (struct journal_key) {
|
||||
.btree_id = entry->btree_id,
|
||||
.level = entry->level,
|
||||
@ -522,23 +561,15 @@ static int journal_keys_sort(struct bch_fs *c)
|
||||
.journal_seq = le64_to_cpu(i->j.seq),
|
||||
.journal_offset = k->_data - i->j._data,
|
||||
};
|
||||
|
||||
nr_read++;
|
||||
}
|
||||
}
|
||||
|
||||
sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
|
||||
|
||||
src = dst = keys->d;
|
||||
while (src < keys->d + keys->nr) {
|
||||
while (src + 1 < keys->d + keys->nr &&
|
||||
src[0].btree_id == src[1].btree_id &&
|
||||
src[0].level == src[1].level &&
|
||||
bpos_eq(src[0].k->k.p, src[1].k->k.p))
|
||||
src++;
|
||||
|
||||
*dst++ = *src++;
|
||||
}
|
||||
|
||||
keys->nr = dst - keys->d;
|
||||
__journal_keys_sort(keys);
|
||||
keys->gap = keys->nr;
|
||||
|
||||
bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_keys, keys->nr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -614,7 +645,7 @@ static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq)
|
||||
journal_sort_seq_cmp, NULL);
|
||||
|
||||
if (keys->nr) {
|
||||
ret = bch2_fs_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
|
||||
ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
|
||||
keys->nr, start_seq, end_seq);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -649,7 +680,7 @@ static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq)
|
||||
ret = bch2_journal_error(j);
|
||||
|
||||
if (keys->nr && !ret)
|
||||
bch2_fs_log_msg(c, "journal replay finished");
|
||||
bch2_journal_log_msg(c, "journal replay finished");
|
||||
err:
|
||||
kvfree(keys_sorted);
|
||||
return ret;
|
||||
@ -1103,14 +1134,11 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
}
|
||||
|
||||
if (!c->opts.nochanges) {
|
||||
if (c->sb.version < bcachefs_metadata_version_lru_v2) {
|
||||
bch_info(c, "version prior to backpointers, upgrade and fsck required");
|
||||
if (c->sb.version < bcachefs_metadata_version_no_bps_in_alloc_keys) {
|
||||
bch_info(c, "version prior to no_bps_in_alloc_keys, upgrade and fsck required");
|
||||
c->opts.version_upgrade = true;
|
||||
c->opts.fsck = true;
|
||||
c->opts.fix_errors = FSCK_OPT_YES;
|
||||
} else if (c->sb.version < bcachefs_metadata_version_fragmentation_lru) {
|
||||
bch_info(c, "version prior to backpointers, upgrade required");
|
||||
c->opts.version_upgrade = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1213,7 +1241,7 @@ use_clean:
|
||||
journal_seq += 8;
|
||||
|
||||
if (blacklist_seq != journal_seq) {
|
||||
ret = bch2_fs_log_msg(c, "blacklisting entries %llu-%llu",
|
||||
ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
|
||||
blacklist_seq, journal_seq) ?:
|
||||
bch2_journal_seq_blacklist_add(c,
|
||||
blacklist_seq, journal_seq);
|
||||
@ -1223,14 +1251,14 @@ use_clean:
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_fs_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
|
||||
ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
|
||||
journal_seq, last_seq, blacklist_seq - 1) ?:
|
||||
bch2_fs_journal_start(&c->journal, journal_seq);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.reconstruct_alloc)
|
||||
bch2_fs_log_msg(c, "dropping alloc info");
|
||||
bch2_journal_log_msg(c, "dropping alloc info");
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
|
@ -714,7 +714,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
|
||||
!queue_work(system_long_wq, &c->snapshot_delete_work))
|
||||
!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
||||
@ -926,7 +926,7 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
|
||||
return -EROFS;
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
|
||||
return 0;
|
||||
}
|
||||
|
@ -6,4 +6,16 @@
|
||||
|
||||
typedef DARRAY(u32) snapshot_id_list;
|
||||
|
||||
struct snapshot_t {
|
||||
u32 parent;
|
||||
u32 children[2];
|
||||
u32 subvol; /* Nonzero only if a subvolume points to this node: */
|
||||
u32 equiv;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
u32 subvol;
|
||||
u64 inum;
|
||||
} subvol_inum;
|
||||
|
||||
#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */
|
||||
|
@ -494,6 +494,8 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
|
||||
if (c->write_ref_wq)
|
||||
destroy_workqueue(c->write_ref_wq);
|
||||
if (c->io_complete_wq)
|
||||
destroy_workqueue(c->io_complete_wq);
|
||||
if (c->copygc_wq)
|
||||
@ -709,6 +711,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
sema_init(&c->io_in_flight, 128);
|
||||
|
||||
INIT_LIST_HEAD(&c->vfs_inodes_list);
|
||||
mutex_init(&c->vfs_inodes_lock);
|
||||
|
||||
c->copy_gc_enabled = 1;
|
||||
c->rebalance.enabled = 1;
|
||||
c->promote_whole_extents = true;
|
||||
@ -784,6 +789,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
|
||||
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
|
||||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
|
||||
WQ_FREEZABLE, 0)) ||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
@ -1738,6 +1745,10 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret));
|
||||
|
||||
up_write(&c->state_lock);
|
||||
return 0;
|
||||
err:
|
||||
|
@ -2,8 +2,10 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_types.h"
|
||||
#include "buckets.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "keylist.h"
|
||||
#include "opts.h"
|
||||
|
||||
|
@ -240,36 +240,6 @@ bool bch2_is_zero(const void *_p, size_t n)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
while (i < ARRAY_SIZE(q->entries)) {
|
||||
struct bch2_quantile_entry *e = q->entries + i;
|
||||
|
||||
if (unlikely(!e->step)) {
|
||||
e->m = v;
|
||||
e->step = max_t(unsigned, v / 2, 1024);
|
||||
} else if (e->m > v) {
|
||||
e->m = e->m >= e->step
|
||||
? e->m - e->step
|
||||
: 0;
|
||||
} else if (e->m < v) {
|
||||
e->m = e->m + e->step > e->m
|
||||
? e->m + e->step
|
||||
: U32_MAX;
|
||||
}
|
||||
|
||||
if ((e->m > v ? e->m - v : v - e->m) < e->step)
|
||||
e->step = max_t(unsigned, e->step / 2, 1);
|
||||
|
||||
if (v >= e->m)
|
||||
break;
|
||||
|
||||
i = eytzinger0_child(i, v > e->m);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
|
||||
{
|
||||
while (nr_bits)
|
||||
@ -343,6 +313,36 @@ int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task)
|
||||
/* time stats: */
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
while (i < ARRAY_SIZE(q->entries)) {
|
||||
struct bch2_quantile_entry *e = q->entries + i;
|
||||
|
||||
if (unlikely(!e->step)) {
|
||||
e->m = v;
|
||||
e->step = max_t(unsigned, v / 2, 1024);
|
||||
} else if (e->m > v) {
|
||||
e->m = e->m >= e->step
|
||||
? e->m - e->step
|
||||
: 0;
|
||||
} else if (e->m < v) {
|
||||
e->m = e->m + e->step > e->m
|
||||
? e->m + e->step
|
||||
: U32_MAX;
|
||||
}
|
||||
|
||||
if ((e->m > v ? e->m - v : v - e->m) < e->step)
|
||||
e->step = max_t(unsigned, e->step / 2, 1);
|
||||
|
||||
if (v >= e->m)
|
||||
break;
|
||||
|
||||
i = eytzinger0_child(i, v > e->m);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
|
@ -168,10 +168,10 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
||||
void bio_free_pages(struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bvec;
|
||||
struct bio_vec bvec;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, iter)
|
||||
__free_page(bvec->bv_page);
|
||||
__free_page(bvec.bv_page);
|
||||
}
|
||||
|
||||
void bio_advance(struct bio *bio, unsigned bytes)
|
||||
|
Loading…
Reference in New Issue
Block a user