Update bcachefs sources to 5242db9aec bcachefs: Fix bch2_check_fix_ptrs()

This commit is contained in:
Kent Overstreet 2022-01-05 19:39:57 -05:00
parent 931ed5a709
commit 47c554c31a
9 changed files with 393 additions and 368 deletions

View File

@ -1 +1 @@
50ac18afbb522a3103cecff9aaf9519d4eb5e908
5242db9aec10220b6ee7162ba7bec173417348cf

View File

@ -38,6 +38,15 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#undef x
};
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
/* Persistent alloc info: */
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
@ -244,25 +253,24 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret;
}
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
static void bch2_alloc_pack(struct bch_fs *c,
struct bkey_alloc_buf *dst,
const struct bkey_alloc_unpacked src)
{
struct bkey_alloc_buf *dst;
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (!IS_ERR(dst))
bch2_alloc_pack_v3(dst, src);
return dst;
}
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
{
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
struct bkey_alloc_buf *a;
return PTR_ERR_OR_ZERO(a) ?:
bch2_trans_update(trans, iter, &a->k, trigger_flags);
a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (IS_ERR(a))
return PTR_ERR(a);
bch2_alloc_pack(trans->c, a, *u);
return bch2_trans_update(trans, iter, &a->k, trigger_flags);
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
@ -332,7 +340,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
#undef x
}
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
@ -343,43 +351,108 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
int ret;
bch2_trans_init(&trans, c, 0, 0);
down_read(&c->gc_lock);
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (!bkey_is_alloc(k.k))
continue;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc);
g = bucket(ca, k.k->p.offset);
u = bch2_alloc_unpack(k);
if (!gc)
*bucket_gen(ca, k.k->p.offset) = u.gen;
g->_mark.gen = u.gen;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = !gc ? u.oldest_gen : u.gen;
g->gen_valid = 1;
if (!gc ||
(metadata_only &&
(u.data_type == BCH_DATA_user ||
u.data_type == BCH_DATA_cached ||
u.data_type == BCH_DATA_parity))) {
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
}
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = u.oldest_gen;
g->gen_valid = 1;
}
bch2_trans_iter_exit(&trans, &iter);
up_read(&c->gc_lock);
bch2_trans_exit(&trans);
if (ret)
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
}
return 0;
}
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u;
int ret;
retry:
bch2_trans_begin(trans);
ret = bch2_btree_key_cache_flush(trans,
BTREE_ID_alloc, iter->pos);
if (ret)
goto err;
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
old_u = bch2_alloc_unpack(k);
new_u = alloc_mem_to_key(c, iter);
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
ret = bch2_alloc_write(trans, iter, &new_u,
BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|flags);
err:
if (ret == -EINTR)
goto retry;
return ret;
}
int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
{
struct btree_trans trans;
struct btree_iter iter;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_member_device(ca, c, i) {
bch2_btree_iter_set_pos(&iter,
POS(ca->dev_idx, ca->mi.first_bucket));
while (iter.pos.offset < ca->mi.nbuckets) {
ret = bch2_alloc_write_key(&trans, &iter, flags);
if (ret) {
percpu_ref_put(&ca->ref);
goto err;
}
bch2_btree_iter_advance(&iter);
}
}
err:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
@ -390,20 +463,19 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
u64 *time, now;
int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto out;
u = bch2_alloc_unpack(k);
u = alloc_mem_to_key(c, &iter);
time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now);
@ -586,34 +658,56 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
return nr;
}
/*
* returns sequence number of most recent journal entry that updated this
* bucket:
*/
static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
{
if (m.journal_seq_valid) {
u64 journal_seq = atomic64_read(&c->journal.seq);
u64 bucket_seq = journal_seq;
bucket_seq &= ~((u64) U16_MAX);
bucket_seq |= m.journal_seq;
if (bucket_seq > journal_seq)
bucket_seq -= 1 << 16;
return bucket_seq;
} else {
return 0;
}
}
static int bucket_invalidate_btree(struct btree_trans *trans,
struct bch_dev *ca, u64 b,
struct bkey_alloc_unpacked *u)
struct bch_dev *ca, u64 b)
{
struct bch_fs *c = trans->c;
struct bkey_alloc_unpacked u;
struct btree_iter iter;
struct bkey_s_c k;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, b),
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
*u = bch2_alloc_unpack(k);
u->gen++;
u->data_type = 0;
u->dirty_sectors = 0;
u->cached_sectors = 0;
u->read_time = atomic64_read(&c->io_clock[READ].now);
u->write_time = atomic64_read(&c->io_clock[WRITE].now);
u = alloc_mem_to_key(c, &iter);
ret = bch2_alloc_write(trans, &iter, u,
u.gen++;
u.data_type = 0;
u.dirty_sectors = 0;
u.cached_sectors = 0;
u.read_time = atomic64_read(&c->io_clock[READ].now);
u.write_time = atomic64_read(&c->io_clock[WRITE].now);
ret = bch2_alloc_write(trans, &iter, &u,
BTREE_TRIGGER_BUCKET_INVALIDATE);
err:
bch2_trans_iter_exit(trans, &iter);
@ -623,23 +717,21 @@ err:
static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
u64 *journal_seq, unsigned flags)
{
struct bkey_alloc_unpacked u;
struct bucket *g;
struct bucket_mark m;
size_t b;
int ret = 0;
/*
* If the read-only path is trying to shut down, we can't be generating
* new btree updates:
*/
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags))
return 1;
BUG_ON(!ca->alloc_heap.used ||
!ca->alloc_heap.data[0].nr);
b = ca->alloc_heap.data[0].bucket;
/* first, put on free_inc and mark as owned by allocator: */
percpu_down_read(&c->mark_lock);
g = bucket(ca, b);
m = READ_ONCE(g->mark);
BUG_ON(m.dirty_sectors);
bch2_mark_alloc_bucket(c, ca, b, true);
@ -648,15 +740,38 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(!fifo_push(&ca->free_inc, b));
spin_unlock(&c->freelist_lock);
/*
* If we're not invalidating cached data, we only increment the bucket
* gen in memory here, the incremented gen will be updated in the btree
* by bch2_trans_mark_pointer():
*/
if (!m.cached_sectors &&
!bucket_needs_journal_commit(m, c->journal.last_seq_ondisk)) {
BUG_ON(m.data_type);
bucket_cmpxchg(g, m, m.gen++);
*bucket_gen(ca, b) = m.gen;
percpu_up_read(&c->mark_lock);
goto out;
}
percpu_up_read(&c->mark_lock);
/*
* If the read-only path is trying to shut down, we can't be generating
* new btree updates:
*/
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
ret = 1;
goto out;
}
ret = bch2_trans_do(c, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED|
flags,
bucket_invalidate_btree(&trans, ca, b, &u));
bucket_invalidate_btree(&trans, ca, b));
out:
if (!ret) {
/* remove from alloc_heap: */
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
@ -672,7 +787,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
* bucket (i.e. deleting the last reference) before writing to
* this bucket again:
*/
*journal_seq = max(*journal_seq, u.journal_seq);
*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
} else {
size_t b2;

View File

@ -38,23 +38,40 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
;
}
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
const struct bkey_alloc_unpacked);
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
struct bkey_alloc_unpacked *, unsigned);
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
static inline struct bkey_alloc_unpacked
alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
{
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked ret;
percpu_down_read(&c->mark_lock);
ca = bch_dev_bkey_exists(c, iter->pos.inode);
g = bucket(ca, iter->pos.offset);
ret = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
return ret;
}
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
@ -84,7 +101,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
k->type == KEY_TYPE_alloc_v3;
}
int bch2_alloc_read(struct bch_fs *, bool, bool);
int bch2_alloc_read(struct bch_fs *);
static inline void bch2_wake_allocator(struct bch_dev *ca)
{
@ -122,6 +139,7 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
int bch2_alloc_write_all(struct bch_fs *, unsigned);
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

View File

@ -534,6 +534,7 @@ enum {
/* misc: */
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,
};

View File

@ -9,7 +9,6 @@
#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "btree_io.h"
@ -534,6 +533,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
if (fsck_err_on(!g->gen_valid, c,
@ -544,8 +544,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) {
g->_mark.gen = p.ptr.gen;
g->gen_valid = true;
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
@ -559,12 +560,13 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
p.ptr.gen, g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) {
g->_mark.gen = p.ptr.gen;
g->gen_valid = true;
g->_mark.data_type = 0;
g->_mark.dirty_sectors = 0;
g->_mark.cached_sectors = 0;
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true;
g2->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
@ -601,8 +603,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
bch2_data_types[data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (data_type == BCH_DATA_btree) {
g->_mark.data_type = data_type;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
g2->_mark.data_type = g->_mark.data_type = data_type;
g2->gen_valid = g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else {
do_update = true;
}
@ -1166,14 +1169,13 @@ static int bch2_gc_done(struct bch_fs *c,
unsigned i, dev;
int ret = 0;
percpu_down_write(&c->mark_lock);
#define copy_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
if (verify) \
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
@ -1183,6 +1185,18 @@ static int bch2_gc_done(struct bch_fs *c,
iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_bucket_field(_f) \
if (dst->b[b]._f != src->b[b]._f) { \
if (verify) \
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
": got %u, should be %u", dev, b, \
dst->b[b].mark.gen, \
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b]._f, src->b[b]._f); \
dst->b[b]._f = src->b[b]._f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
@ -1193,6 +1207,23 @@ static int bch2_gc_done(struct bch_fs *c,
bch2_fs_usage_acc_to_base(c, i);
for_each_member_device(ca, c, dev) {
struct bucket_array *dst = __bucket_array(ca, 0);
struct bucket_array *src = __bucket_array(ca, 1);
size_t b;
for (b = 0; b < src->nbuckets; b++) {
copy_bucket_field(_mark.gen);
copy_bucket_field(_mark.data_type);
copy_bucket_field(_mark.stripe);
copy_bucket_field(_mark.dirty_sectors);
copy_bucket_field(_mark.cached_sectors);
copy_bucket_field(stripe_redundancy);
copy_bucket_field(stripe);
dst->b[b].oldest_gen = src->b[b].oldest_gen;
}
{
struct bch_dev_usage *dst = ca->usage_base;
struct bch_dev_usage *src = (void *)
bch2_acc_percpu_u64s((void *) ca->usage_gc,
@ -1206,6 +1237,7 @@ static int bch2_gc_done(struct bch_fs *c,
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
}
};
{
@ -1246,6 +1278,7 @@ static int bch2_gc_done(struct bch_fs *c,
#undef copy_fs_field
#undef copy_dev_field
#undef copy_bucket_field
#undef copy_stripe_field
#undef copy_field
fsck_err:
@ -1253,8 +1286,6 @@ fsck_err:
percpu_ref_put(&ca->ref);
if (ret)
bch_err(c, "%s: ret %i", __func__, ret);
percpu_up_write(&c->mark_lock);
return ret;
}
@ -1277,6 +1308,15 @@ static int bch2_gc_start(struct bch_fs *c,
BUG_ON(ca->buckets[1]);
BUG_ON(ca->usage_gc);
ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO);
if (!ca->buckets[1]) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
return -ENOMEM;
}
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
if (!ca->usage_gc) {
bch_err(c, "error allocating ca->usage_gc");
@ -1285,184 +1325,39 @@ static int bch2_gc_start(struct bch_fs *c,
}
}
return 0;
}
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
bool initial, bool metadata_only)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket *g;
struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u, gc_u;
struct bkey_alloc_buf *a;
int ret;
percpu_down_write(&c->mark_lock);
/*
* For this to be correct at runtime, we'll need to figure out a way for
* it to actually lock the key in the btree key cache:
* indicate to stripe code that we need to allocate for the gc stripes
* radix tree, too
*/
gc_pos_set(c, gc_phase(GC_PHASE_START));
if (!initial) {
ret = bch2_btree_key_cache_flush(trans,
BTREE_ID_alloc, iter->pos);
if (ret)
return ret;
}
for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 1);
struct bucket_array *src = __bucket_array(ca, 0);
size_t b;
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
return ret;
dst->first_bucket = src->first_bucket;
dst->nbuckets = src->nbuckets;
old_u = new_u = bch2_alloc_unpack(k);
for (b = 0; b < src->nbuckets; b++) {
struct bucket *d = &dst->b[b];
struct bucket *s = &src->b[b];
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, iter->pos.offset);
gc_u = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
d->gen_valid = s->gen_valid;
if (metadata_only &&
gc_u.data_type != BCH_DATA_sb &&
gc_u.data_type != BCH_DATA_journal &&
gc_u.data_type != BCH_DATA_btree)
return 0;
if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
gen_after(old_u.gen, gc_u.gen))
return 0;
#define copy_bucket_field(_f) \
if (fsck_err_on(new_u._f != gc_u._f, c, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
": got %u, should be %u", \
iter->pos.inode, iter->pos.offset, \
new_u.gen, \
bch2_data_types[new_u.data_type], \
new_u._f, gc_u._f)) \
new_u._f = gc_u._f; \
copy_bucket_field(gen);
copy_bucket_field(data_type);
copy_bucket_field(stripe);
copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy);
copy_bucket_field(stripe);
#undef copy_bucket_field
new_u.oldest_gen = gc_u.oldest_gen;
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
a = bch2_alloc_pack(trans, new_u);
if (IS_ERR(a))
return PTR_ERR(a);
ret = initial
? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
: bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
fsck_err:
return ret;
(s->mark.data_type == BCH_DATA_user ||
s->mark.data_type == BCH_DATA_cached))
d->_mark = s->mark;
}
};
static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_member_device(ca, c, i) {
for_each_btree_key(&trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_SLOTS|
BTREE_ITER_PREFETCH, k, ret) {
if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
break;
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW,
bch2_alloc_write_key(&trans, &iter,
initial, metadata_only));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
if (ret) {
bch_err(c, "error writing alloc info: %i", ret);
percpu_ref_put(&ca->ref);
break;
}
}
bch2_trans_exit(&trans);
return ret;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
for_each_member_device(ca, c, i) {
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO);
if (!buckets) {
percpu_ref_put(&ca->ref);
percpu_up_write(&c->mark_lock);
bch_err(c, "error allocating ca->buckets[gc]");
return -ENOMEM;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets[1], buckets);
};
return bch2_alloc_read(c, true, metadata_only);
}
static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
for_each_member_device(ca, c, i) {
struct bucket_array *buckets = __bucket_array(ca, true);
struct bucket *g;
for_each_bucket(g, buckets) {
if (metadata_only &&
(g->mark.data_type == BCH_DATA_user ||
g->mark.data_type == BCH_DATA_cached ||
g->mark.data_type == BCH_DATA_parity))
continue;
g->_mark.dirty_sectors = 0;
g->_mark.cached_sectors = 0;
}
};
return 0;
}
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
@ -1535,55 +1430,6 @@ fsck_err:
return ret;
}
static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct genradix_iter iter;
struct reflink_gc *r;
genradix_for_each(&c->reflink_gc_table, iter, r)
r->refcount = 0;
}
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
int ret = 0;
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
c->reflink_gc_nr = 0;
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -ENOMEM;
break;
}
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
@ -1647,10 +1493,43 @@ fsck_err:
return ret;
}
static void bch2_gc_stripes_reset(struct bch_fs *c, bool initial,
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only)
{
genradix_free(&c->gc_stripes);
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct reflink_gc *r;
int ret = 0;
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
c->reflink_gc_nr = 0;
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -ENOMEM;
break;
}
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return ret;
}
/**
@ -1686,14 +1565,11 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
/* flush interior btree updates: */
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
again:
ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_alloc_start(c, initial, metadata_only) ?:
bch2_gc_reflink_start(c, initial, metadata_only);
if (ret)
goto out;
again:
gc_pos_set(c, gc_phase(GC_PHASE_START));
bch2_mark_superblocks(c);
@ -1731,40 +1607,40 @@ again:
if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
(!iter && bch2_test_restart_gc)) {
if (iter++ > 2) {
bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL;
goto out;
}
/*
* XXX: make sure gens we fixed got saved
*/
if (iter++ <= 2) {
bch_info(c, "Second GC pass needed, restarting:");
clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
bch2_gc_stripes_reset(c, initial, metadata_only);
bch2_gc_alloc_reset(c, initial, metadata_only);
bch2_gc_reflink_reset(c, initial, metadata_only);
percpu_down_write(&c->mark_lock);
bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
/* flush fsck errors, reset counters */
bch2_flush_fsck_errs(c);
goto again;
}
bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL;
}
out:
if (!ret) {
bch2_journal_block(&c->journal);
ret = bch2_gc_stripes_done(c, initial, metadata_only) ?:
bch2_gc_reflink_done(c, initial, metadata_only) ?:
bch2_gc_alloc_done(c, initial, metadata_only) ?:
percpu_down_write(&c->mark_lock);
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
bch2_gc_stripes_done(c, initial, metadata_only) ?:
bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal);
} else {
percpu_down_write(&c->mark_lock);
}
percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));

View File

@ -2182,23 +2182,6 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
return ret;
}
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
if ((cmp_int(btree_id, i->btree_id) ?:
bpos_cmp(pos, i->k->k.p)) <= 0) {
if (btree_id == i->btree_id)
return i->k;
break;
}
return NULL;
}
static noinline
struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
struct btree_path *path)

View File

@ -135,4 +135,21 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
(_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++)
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
if ((cmp_int(btree_id, i->btree_id) ?:
bpos_cmp(pos, i->k->k.p)) <= 0) {
if (btree_id == i->btree_id)
return i->k;
break;
}
return NULL;
}
#endif /* _BCACHEFS_BTREE_UPDATE_H */

View File

@ -1459,22 +1459,24 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bkey_s_c k;
struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos);
int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
BTREE_ITER_WITH_UPDATES|
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
ret = bch2_btree_iter_traverse(iter);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ret;
}
*u = bch2_alloc_unpack(k);
*u = update && !bpos_cmp(update->k.p, pos)
? bch2_alloc_unpack(bkey_i_to_s_c(update))
: alloc_mem_to_key(c, iter);
return 0;
}

View File

@ -1095,11 +1095,7 @@ use_clean:
bch_verbose(c, "starting alloc read");
err = "error reading allocation information";
down_read(&c->gc_lock);
ret = bch2_alloc_read(c, false, false);
up_read(&c->gc_lock);
ret = bch2_alloc_read(c);
if (ret)
goto err;
bch_verbose(c, "alloc read done");
@ -1157,6 +1153,23 @@ use_clean:
if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done");
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
!c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
* oldest_gen:
*/
bch_verbose(c, "writing allocation info");
err = "error writing out alloc info";
ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
if (ret) {
bch_err(c, "error writing alloc info");
goto err;
}
bch_verbose(c, "alloc write done");
}
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
bch2_fs_lazy_rw(c);