mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to aa439f3b94 bcachefs: btree_gc no longer uses main in-memory bucket array
This commit is contained in:
parent
8a632ea60d
commit
c50379128b
@ -1 +1 @@
|
||||
42284b8b2bb980c80140b640de7cb12bc1e4541c
|
||||
aa439f3b94eb3141f9b6d71f780300e7fef44af9
|
||||
|
@ -605,8 +605,6 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
|
||||
|
||||
darray_free(s.extents);
|
||||
genradix_free(&s.hardlinks);
|
||||
|
||||
bch2_alloc_write_all(c, false);
|
||||
}
|
||||
|
||||
static void find_superblock_space(ranges extents,
|
||||
|
@ -38,15 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bkey_alloc_buf {
|
||||
struct bkey_i k;
|
||||
struct bch_alloc_v3 v;
|
||||
|
||||
#define x(_name, _bits) + _bits / 8
|
||||
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
|
||||
#undef x
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/* Persistent alloc info: */
|
||||
|
||||
static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
|
||||
@ -253,24 +244,25 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_alloc_pack(struct bch_fs *c,
|
||||
struct bkey_alloc_buf *dst,
|
||||
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
{
|
||||
struct bkey_alloc_buf *dst;
|
||||
|
||||
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
|
||||
if (!IS_ERR(dst))
|
||||
bch2_alloc_pack_v3(dst, src);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_alloc_unpacked *u, unsigned trigger_flags)
|
||||
{
|
||||
struct bkey_alloc_buf *a;
|
||||
struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
|
||||
|
||||
a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
bch2_alloc_pack(trans->c, a, *u);
|
||||
return bch2_trans_update(trans, iter, &a->k, trigger_flags);
|
||||
return PTR_ERR_OR_ZERO(a) ?:
|
||||
bch2_trans_update(trans, iter, &a->k, trigger_flags);
|
||||
}
|
||||
|
||||
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
|
||||
@ -340,119 +332,54 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
#undef x
|
||||
}
|
||||
|
||||
static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bkey_alloc_unpacked u;
|
||||
int ret;
|
||||
|
||||
if (!bkey_is_alloc(k.k))
|
||||
return 0;
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = bucket(ca, k.k->p.offset);
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
u = bch2_alloc_unpack(k);
|
||||
|
||||
if (!gc)
|
||||
*bucket_gen(ca, k.k->p.offset) = u.gen;
|
||||
|
||||
g->_mark.gen = u.gen;
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = !gc ? u.oldest_gen : u.gen;
|
||||
g->gen_valid = 1;
|
||||
|
||||
if (!gc ||
|
||||
(metadata_only &&
|
||||
(u.data_type == BCH_DATA_user ||
|
||||
u.data_type == BCH_DATA_cached ||
|
||||
u.data_type == BCH_DATA_parity))) {
|
||||
g->_mark.data_type = u.data_type;
|
||||
g->_mark.dirty_sectors = u.dirty_sectors;
|
||||
g->_mark.cached_sectors = u.cached_sectors;
|
||||
g->_mark.stripe = u.stripe != 0;
|
||||
g->stripe = u.stripe;
|
||||
g->stripe_redundancy = u.stripe_redundancy;
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
|
||||
up_read(&c->gc_lock);
|
||||
bch2_trans_exit(&trans);
|
||||
if (ret) {
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked old_u, new_u;
|
||||
int ret;
|
||||
retry:
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_btree_key_cache_flush(trans,
|
||||
BTREE_ID_alloc, iter->pos);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
old_u = bch2_alloc_unpack(k);
|
||||
new_u = alloc_mem_to_key(c, iter);
|
||||
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
|
||||
return 0;
|
||||
|
||||
ret = bch2_alloc_write(trans, iter, &new_u,
|
||||
BTREE_TRIGGER_NORUN) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|flags);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
bch2_btree_iter_set_pos(&iter,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket));
|
||||
|
||||
while (iter.pos.offset < ca->mi.nbuckets) {
|
||||
ret = bch2_alloc_write_key(&trans, &iter, flags);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
goto err;
|
||||
}
|
||||
bch2_btree_iter_advance(&iter);
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -463,19 +390,20 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked u;
|
||||
u64 *time, now;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_CACHED_NOFILL|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
u = alloc_mem_to_key(c, &iter);
|
||||
u = bch2_alloc_unpack(k);
|
||||
|
||||
time = rw == READ ? &u.read_time : &u.write_time;
|
||||
now = atomic64_read(&c->io_clock[rw].now);
|
||||
@ -664,20 +592,20 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, b),
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_CACHED_NOFILL|
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
*u = alloc_mem_to_key(c, &iter);
|
||||
|
||||
*u = bch2_alloc_unpack(k);
|
||||
u->gen++;
|
||||
u->data_type = 0;
|
||||
u->dirty_sectors = 0;
|
||||
@ -859,8 +787,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
|
||||
static bool allocator_thread_running(struct bch_dev *ca)
|
||||
{
|
||||
unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) &&
|
||||
test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags)
|
||||
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags)
|
||||
? ALLOCATOR_running
|
||||
: ALLOCATOR_stopped;
|
||||
alloc_thread_set_state(ca, state);
|
||||
|
@ -38,40 +38,23 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
|
||||
;
|
||||
}
|
||||
|
||||
struct bkey_alloc_buf {
|
||||
struct bkey_i k;
|
||||
struct bch_alloc_v3 v;
|
||||
|
||||
#define x(_name, _bits) + _bits / 8
|
||||
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
|
||||
#undef x
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
|
||||
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
|
||||
const struct bkey_alloc_unpacked);
|
||||
int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_alloc_unpacked *, unsigned);
|
||||
|
||||
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
|
||||
|
||||
static inline struct bkey_alloc_unpacked
|
||||
alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bkey_alloc_unpacked ret;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
ca = bch_dev_bkey_exists(c, iter->pos.inode);
|
||||
g = bucket(ca, iter->pos.offset);
|
||||
ret = (struct bkey_alloc_unpacked) {
|
||||
.dev = iter->pos.inode,
|
||||
.bucket = iter->pos.offset,
|
||||
.gen = g->mark.gen,
|
||||
.oldest_gen = g->oldest_gen,
|
||||
.data_type = g->mark.data_type,
|
||||
.dirty_sectors = g->mark.dirty_sectors,
|
||||
.cached_sectors = g->mark.cached_sectors,
|
||||
.read_time = g->io_time[READ],
|
||||
.write_time = g->io_time[WRITE],
|
||||
.stripe = g->stripe,
|
||||
.stripe_redundancy = g->stripe_redundancy,
|
||||
};
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
|
||||
|
||||
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
@ -101,7 +84,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
|
||||
k->type == KEY_TYPE_alloc_v3;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *);
|
||||
int bch2_alloc_read(struct bch_fs *, bool, bool);
|
||||
|
||||
static inline void bch2_wake_allocator(struct bch_dev *ca)
|
||||
{
|
||||
@ -139,7 +122,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
int bch2_alloc_write_all(struct bch_fs *, unsigned);
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
@ -510,8 +510,6 @@ enum {
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED,
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
BCH_FS_ALLOC_REPLAY_DONE,
|
||||
BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
BCH_FS_RW,
|
||||
@ -531,7 +529,6 @@ enum {
|
||||
/* misc: */
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
BCH_FS_DELETED_NODES,
|
||||
BCH_FS_NEED_ALLOC_WRITE,
|
||||
BCH_FS_REBUILD_REPLICAS,
|
||||
BCH_FS_HOLD_BTREE_WRITES,
|
||||
};
|
||||
@ -860,7 +857,6 @@ struct bch_fs {
|
||||
u64 reflink_hint;
|
||||
reflink_gc_table reflink_gc_table;
|
||||
size_t reflink_gc_nr;
|
||||
size_t reflink_gc_idx;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
|
@ -1427,6 +1427,7 @@ LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
|
||||
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
|
||||
|
||||
/*
|
||||
* Features:
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "alloc_foreground.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
@ -505,7 +506,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
|
||||
|
||||
if (fsck_err_on(!g->gen_valid, c,
|
||||
@ -516,9 +516,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
p.ptr.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
@ -532,9 +531,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen, g->mark.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
g2->_mark.data_type = g->_mark.data_type = data_type;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
g->_mark.data_type = data_type;
|
||||
g->gen_valid = true;
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
|
||||
@ -545,13 +543,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
p.ptr.gen, g->mark.gen,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g2->_mark.gen = g->_mark.gen = p.ptr.gen;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
g2->_mark.data_type = 0;
|
||||
g2->_mark.dirty_sectors = 0;
|
||||
g2->_mark.cached_sectors = 0;
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
g->_mark.data_type = 0;
|
||||
g->_mark.dirty_sectors = 0;
|
||||
g->_mark.cached_sectors = 0;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
@ -588,9 +585,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
bch2_data_types[data_type],
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
|
||||
if (data_type == BCH_DATA_btree) {
|
||||
g2->_mark.data_type = g->_mark.data_type = data_type;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
|
||||
g->_mark.data_type = data_type;
|
||||
g->gen_valid = true;
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
@ -691,11 +687,17 @@ found:
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert_take(c, btree_id, level, new);
|
||||
|
||||
if (ret)
|
||||
kfree(new);
|
||||
else
|
||||
else {
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, *k);
|
||||
bch_info(c, "updated %s", buf);
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new));
|
||||
bch_info(c, "new key %s", buf);
|
||||
*k = bkey_i_to_s_c(new);
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -1145,13 +1147,14 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
unsigned i, dev;
|
||||
int ret = 0;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
#define copy_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
if (verify) \
|
||||
fsck_err(c, _msg ": got %llu, should be %llu" \
|
||||
, ##__VA_ARGS__, dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_stripe_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
@ -1161,18 +1164,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
iter.pos, ##__VA_ARGS__, \
|
||||
dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_bucket_field(_f) \
|
||||
if (dst->b[b]._f != src->b[b]._f) { \
|
||||
if (verify) \
|
||||
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", dev, b, \
|
||||
dst->b[b].mark.gen, \
|
||||
bch2_data_types[dst->b[b].mark.data_type],\
|
||||
dst->b[b]._f, src->b[b]._f); \
|
||||
dst->b[b]._f = src->b[b]._f; \
|
||||
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
|
||||
}
|
||||
#define copy_dev_field(_f, _msg, ...) \
|
||||
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
|
||||
@ -1183,23 +1174,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
bch2_fs_usage_acc_to_base(c, i);
|
||||
|
||||
for_each_member_device(ca, c, dev) {
|
||||
struct bucket_array *dst = __bucket_array(ca, 0);
|
||||
struct bucket_array *src = __bucket_array(ca, 1);
|
||||
size_t b;
|
||||
|
||||
for (b = 0; b < src->nbuckets; b++) {
|
||||
copy_bucket_field(_mark.gen);
|
||||
copy_bucket_field(_mark.data_type);
|
||||
copy_bucket_field(_mark.stripe);
|
||||
copy_bucket_field(_mark.dirty_sectors);
|
||||
copy_bucket_field(_mark.cached_sectors);
|
||||
copy_bucket_field(stripe_redundancy);
|
||||
copy_bucket_field(stripe);
|
||||
|
||||
dst->b[b].oldest_gen = src->b[b].oldest_gen;
|
||||
}
|
||||
|
||||
{
|
||||
struct bch_dev_usage *dst = ca->usage_base;
|
||||
struct bch_dev_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) ca->usage_gc,
|
||||
@ -1213,7 +1187,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
|
||||
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
@ -1254,7 +1227,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
|
||||
#undef copy_fs_field
|
||||
#undef copy_dev_field
|
||||
#undef copy_bucket_field
|
||||
#undef copy_stripe_field
|
||||
#undef copy_field
|
||||
fsck_err:
|
||||
@ -1262,6 +1234,8 @@ fsck_err:
|
||||
percpu_ref_put(&ca->ref);
|
||||
if (ret)
|
||||
bch_err(c, "%s: ret %i", __func__, ret);
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1284,15 +1258,6 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
BUG_ON(ca->buckets[1]);
|
||||
BUG_ON(ca->usage_gc);
|
||||
|
||||
ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!ca->buckets[1]) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
bch_err(c, "error allocating ca->buckets[gc]");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
|
||||
if (!ca->usage_gc) {
|
||||
bch_err(c, "error allocating ca->usage_gc");
|
||||
@ -1301,94 +1266,165 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* indicate to stripe code that we need to allocate for the gc stripes
|
||||
* radix tree, too
|
||||
*/
|
||||
gc_pos_set(c, gc_phase(GC_PHASE_START));
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *dst = __bucket_array(ca, 1);
|
||||
struct bucket_array *src = __bucket_array(ca, 0);
|
||||
size_t b;
|
||||
|
||||
dst->first_bucket = src->first_bucket;
|
||||
dst->nbuckets = src->nbuckets;
|
||||
|
||||
for (b = 0; b < src->nbuckets; b++) {
|
||||
struct bucket *d = &dst->b[b];
|
||||
struct bucket *s = &src->b[b];
|
||||
|
||||
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
|
||||
d->gen_valid = s->gen_valid;
|
||||
|
||||
if (metadata_only &&
|
||||
(s->mark.data_type == BCH_DATA_user ||
|
||||
s->mark.data_type == BCH_DATA_cached))
|
||||
d->_mark = s->mark;
|
||||
}
|
||||
};
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
bool initial, bool metadata_only)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
|
||||
struct bucket *g;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked old_u, new_u, gc_u;
|
||||
struct bkey_alloc_buf *a;
|
||||
int ret;
|
||||
|
||||
if (!refcount)
|
||||
/*
|
||||
* For this to be correct at runtime, we'll need to figure out a way for
|
||||
* it to actually lock the key in the btree key cache:
|
||||
*/
|
||||
|
||||
if (!initial) {
|
||||
ret = bch2_btree_key_cache_flush(trans,
|
||||
BTREE_ID_alloc, iter->pos);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_u = new_u = bch2_alloc_unpack(k);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = gc_bucket(ca, iter->pos.offset);
|
||||
gc_u = (struct bkey_alloc_unpacked) {
|
||||
.dev = iter->pos.inode,
|
||||
.bucket = iter->pos.offset,
|
||||
.gen = g->mark.gen,
|
||||
.oldest_gen = g->oldest_gen,
|
||||
.data_type = g->mark.data_type,
|
||||
.dirty_sectors = g->mark.dirty_sectors,
|
||||
.cached_sectors = g->mark.cached_sectors,
|
||||
.read_time = g->io_time[READ],
|
||||
.write_time = g->io_time[WRITE],
|
||||
.stripe = g->stripe,
|
||||
.stripe_redundancy = g->stripe_redundancy,
|
||||
};
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (metadata_only &&
|
||||
gc_u.data_type != BCH_DATA_sb &&
|
||||
gc_u.data_type != BCH_DATA_journal &&
|
||||
gc_u.data_type != BCH_DATA_btree)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
|
||||
gen_after(old_u.gen, gc_u.gen))
|
||||
return 0;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
#define copy_bucket_field(_f) \
|
||||
if (fsck_err_on(new_u._f != gc_u._f, c, \
|
||||
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", \
|
||||
iter->pos.inode, iter->pos.offset, \
|
||||
new_u.gen, \
|
||||
bch2_data_types[new_u.data_type], \
|
||||
new_u._f, gc_u._f)) \
|
||||
new_u._f = gc_u._f; \
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
copy_bucket_field(gen);
|
||||
copy_bucket_field(data_type);
|
||||
copy_bucket_field(stripe);
|
||||
copy_bucket_field(dirty_sectors);
|
||||
copy_bucket_field(cached_sectors);
|
||||
copy_bucket_field(stripe_redundancy);
|
||||
copy_bucket_field(stripe);
|
||||
#undef copy_bucket_field
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
}
|
||||
new_u.oldest_gen = gc_u.oldest_gen;
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
|
||||
return 0;
|
||||
|
||||
if (!r->refcount) {
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
new->k.size = 0;
|
||||
} else {
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
}
|
||||
a = bch2_alloc_pack(trans, new_u);
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
|
||||
kfree(new);
|
||||
}
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
|
||||
: bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket),
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
|
||||
break;
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
bch2_alloc_write_key(&trans, &iter,
|
||||
initial, metadata_only));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error writing alloc info: %i", ret);
|
||||
percpu_ref_put(&ca->ref);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!buckets) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
bch_err(c, "error allocating ca->buckets[gc]");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
buckets->first_bucket = ca->mi.first_bucket;
|
||||
buckets->nbuckets = ca->mi.nbuckets;
|
||||
rcu_assign_pointer(ca->buckets[1], buckets);
|
||||
};
|
||||
|
||||
return bch2_alloc_read(c, true, metadata_only);
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
@ -1405,14 +1441,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
if (initial) {
|
||||
c->reflink_gc_idx = 0;
|
||||
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_done_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
@ -1420,7 +1448,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx);
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx++);
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
@ -1450,7 +1478,9 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
else
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
|
||||
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
||||
kfree(new);
|
||||
|
||||
@ -1460,104 +1490,11 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
out:
|
||||
c->reflink_gc_nr = 0;
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct gc_stripe *m;
|
||||
const struct bch_stripe *s;
|
||||
char buf[200];
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return 0;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
|
||||
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
||||
|
||||
for (i = 0; i < s->nr_blocks; i++)
|
||||
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
|
||||
goto inconsistent;
|
||||
return 0;
|
||||
inconsistent:
|
||||
if (fsck_err_on(true, c,
|
||||
"stripe has wrong block sector count %u:\n"
|
||||
" %s\n"
|
||||
" should be %u", i,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
m ? m->block_sectors[i] : 0)) {
|
||||
struct bkey_i_stripe *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
bkey_reassemble(&new->k_i, k);
|
||||
|
||||
for (i = 0; i < new->v.nr_blocks; i++)
|
||||
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
|
||||
kfree(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret = 0;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
if (initial) {
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
||||
bch2_gc_stripes_done_initial_fn);
|
||||
} else {
|
||||
BUG();
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
@ -1573,12 +1510,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
if (initial) {
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_start_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
@ -1598,7 +1529,70 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
r->refcount = 0;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
out:
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct gc_stripe *m;
|
||||
const struct bch_stripe *s;
|
||||
char buf[200];
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
continue;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
||||
|
||||
for (i = 0; i < s->nr_blocks; i++)
|
||||
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
|
||||
goto inconsistent;
|
||||
continue;
|
||||
inconsistent:
|
||||
if (fsck_err_on(true, c,
|
||||
"stripe has wrong block sector count %u:\n"
|
||||
" %s\n"
|
||||
" should be %u", i,
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
m ? m->block_sectors[i] : 0)) {
|
||||
struct bkey_i_stripe *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
bkey_reassemble(&new->k_i, k);
|
||||
|
||||
for (i = 0; i < new->v.nr_blocks; i++)
|
||||
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
|
||||
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
|
||||
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
|
||||
kfree(new);
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
@ -1638,10 +1632,13 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
again:
|
||||
ret = bch2_gc_start(c, metadata_only) ?:
|
||||
bch2_gc_alloc_start(c, initial, metadata_only) ?:
|
||||
bch2_gc_reflink_start(c, initial, metadata_only);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
gc_pos_set(c, gc_phase(GC_PHASE_START));
|
||||
|
||||
bch2_mark_superblocks(c);
|
||||
|
||||
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
|
||||
@ -1702,16 +1699,15 @@ out:
|
||||
if (!ret) {
|
||||
bch2_journal_block(&c->journal);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_stripes_done(c, initial, metadata_only) ?:
|
||||
ret = bch2_gc_stripes_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_alloc_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_done(c, initial, metadata_only);
|
||||
|
||||
bch2_journal_unblock(&c->journal);
|
||||
} else {
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
/* Indicates that gc is no longer in progress: */
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
@ -1077,6 +1078,7 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
|
||||
static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
struct btree_path *path, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path_level *l;
|
||||
unsigned plevel;
|
||||
bool parent_locked;
|
||||
@ -1085,6 +1087,9 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||
return;
|
||||
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
return;
|
||||
|
||||
plevel = b->c.level + 1;
|
||||
if (!btree_path_node(path, plevel))
|
||||
return;
|
||||
@ -1105,7 +1110,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
char buf4[100];
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
bch2_dump_btree_node(trans->c, l->b);
|
||||
bch2_dump_btree_node(c, l->b);
|
||||
bch2_bpos_to_text(&PBUF(buf1), path->pos);
|
||||
bch2_bkey_to_text(&PBUF(buf2), &uk);
|
||||
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
|
||||
@ -1296,6 +1301,41 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree_and_journal_iter *jiter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
|
||||
? (path->level > 1 ? 0 : 2)
|
||||
: (path->level > 1 ? 1 : 16);
|
||||
bool was_locked = btree_node_locked(path, path->level);
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr && !ret) {
|
||||
if (!bch2_btree_node_relock(trans, path, path->level))
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_advance(jiter);
|
||||
k = bch2_btree_and_journal_iter_peek(jiter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
|
||||
path->level - 1);
|
||||
}
|
||||
|
||||
if (!was_locked)
|
||||
btree_node_unlock(path, path->level);
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned plevel, struct btree *b)
|
||||
@ -1318,6 +1358,30 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
||||
btree_node_unlock(path, plevel);
|
||||
}
|
||||
|
||||
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
struct bkey_buf *out)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path_level *l = path_l(path);
|
||||
struct btree_and_journal_iter jiter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
|
||||
|
||||
k = bch2_btree_and_journal_iter_peek(&jiter);
|
||||
|
||||
bch2_bkey_buf_reassemble(out, c, k);
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH)
|
||||
ret = btree_path_prefetch_j(trans, path, &jiter);
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&jiter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
@ -1328,15 +1392,29 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
struct btree *b;
|
||||
unsigned level = path->level - 1;
|
||||
enum six_lock_type lock_type = __btree_lock_want(path, level);
|
||||
bool replay_done = test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
||||
struct bkey_buf tmp;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!btree_node_locked(path, path->level));
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
if (unlikely(!replay_done)) {
|
||||
ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
bch2_bkey_buf_unpack(&tmp, c, l->b,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH) {
|
||||
ret = btree_path_prefetch(trans, path);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (unlikely(ret))
|
||||
@ -1345,13 +1423,10 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
mark_btree_node_locked(path, level, lock_type);
|
||||
btree_path_level_init(trans, path, b);
|
||||
|
||||
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
|
||||
unlikely(b != btree_node_mem_ptr(tmp.k)))
|
||||
btree_node_mem_ptr_set(trans, path, level + 1, b);
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH)
|
||||
ret = btree_path_prefetch(trans, path);
|
||||
|
||||
if (btree_node_read_locked(path, level + 1))
|
||||
btree_node_unlock(path, level + 1);
|
||||
path->level = level;
|
||||
@ -2107,6 +2182,59 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if ((cmp_int(btree_id, i->btree_id) ?:
|
||||
bpos_cmp(pos, i->k->k.p)) <= 0) {
|
||||
if (btree_id == i->btree_id)
|
||||
return i->k;
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
struct journal_keys *keys = &trans->c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, path->btree_id,
|
||||
path->level, path->pos);
|
||||
|
||||
while (idx < keys->nr && keys->d[idx].overwritten)
|
||||
idx++;
|
||||
|
||||
return (idx < keys->nr &&
|
||||
keys->d[idx].btree_id == path->btree_id &&
|
||||
keys->d[idx].level == path->level)
|
||||
? keys->d[idx].k
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i *next_journal =
|
||||
__btree_trans_peek_journal(trans, iter->path);
|
||||
|
||||
if (next_journal &&
|
||||
bpos_cmp(next_journal->k.p,
|
||||
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
|
||||
iter->k = next_journal->k;
|
||||
k = bkey_i_to_s_c(next_journal);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
|
||||
* current position
|
||||
@ -2117,7 +2245,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
struct bpos search_key = btree_iter_search_key(iter);
|
||||
struct bkey_i *next_update;
|
||||
struct bkey_s_c k;
|
||||
int ret, cmp;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->path->cached || iter->path->level);
|
||||
bch2_btree_iter_verify(iter);
|
||||
@ -2136,19 +2264,14 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
goto out;
|
||||
}
|
||||
|
||||
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
||||
k = btree_trans_peek_journal(trans, iter, k);
|
||||
|
||||
next_update = iter->flags & BTREE_ITER_WITH_UPDATES
|
||||
? btree_trans_peek_updates(trans, iter->btree_id, search_key)
|
||||
: NULL;
|
||||
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
||||
|
||||
/* * In the btree, deleted keys sort before non deleted: */
|
||||
if (k.k && bkey_deleted(k.k) &&
|
||||
(!next_update ||
|
||||
bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
|
||||
search_key = k.k->p;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next_update &&
|
||||
bpos_cmp(next_update->k.p,
|
||||
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
|
||||
@ -2156,6 +2279,20 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
}
|
||||
|
||||
if (k.k && bkey_deleted(k.k)) {
|
||||
/*
|
||||
* If we've got a whiteout, and it's after the search
|
||||
* key, advance the search key to the whiteout instead
|
||||
* of just after the whiteout - it might be a btree
|
||||
* whiteout, with a real key at the same position, since
|
||||
* in the btree deleted keys sort before non deleted.
|
||||
*/
|
||||
search_key = bpos_cmp(search_key, k.k->p)
|
||||
? k.k->p
|
||||
: bpos_successor(k.k->p);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (likely(k.k)) {
|
||||
/*
|
||||
* We can never have a key in a leaf node at POS_MAX, so
|
||||
@ -2199,14 +2336,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
|
||||
iter->pos.snapshot = iter->snapshot;
|
||||
|
||||
cmp = bpos_cmp(k.k->p, iter->path->pos);
|
||||
if (cmp) {
|
||||
iter->path = bch2_btree_path_make_mut(trans, iter->path,
|
||||
iter->path = btree_path_set_pos(trans, iter->path, k.k->p,
|
||||
iter->flags & BTREE_ITER_INTENT,
|
||||
btree_iter_ip_allocated(iter));
|
||||
iter->path->pos = k.k->p;
|
||||
btree_path_check_sort(trans, iter->path, cmp);
|
||||
}
|
||||
BUG_ON(!iter->path->nodes_locked);
|
||||
out:
|
||||
iter->path->should_be_locked = true;
|
||||
|
||||
@ -2247,6 +2380,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
|
||||
EBUG_ON(iter->path->cached || iter->path->level);
|
||||
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
|
||||
|
||||
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
|
||||
return bkey_s_c_err(-EIO);
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
|
||||
@ -2397,17 +2534,24 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
||||
struct bkey_i *next_update;
|
||||
|
||||
next_update = iter->flags & BTREE_ITER_WITH_UPDATES
|
||||
? btree_trans_peek_updates(trans, iter->btree_id, search_key)
|
||||
: NULL;
|
||||
|
||||
if (next_update &&
|
||||
if ((iter->flags & BTREE_ITER_WITH_UPDATES) &&
|
||||
(next_update = btree_trans_peek_updates(trans,
|
||||
iter->btree_id, search_key)) &&
|
||||
!bpos_cmp(next_update->k.p, iter->pos)) {
|
||||
iter->k = next_update->k;
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
} else {
|
||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
|
||||
(next_update = __btree_trans_peek_journal(trans, iter->path)) &&
|
||||
!bpos_cmp(next_update->k.p, iter->pos)) {
|
||||
iter->k = next_update->k;
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
goto out;
|
||||
}
|
||||
|
||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||
} else {
|
||||
struct bpos next;
|
||||
|
||||
@ -2451,7 +2595,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
k = (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
iter->path->should_be_locked = true;
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
@ -2618,6 +2762,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||
btree_type_has_snapshots(btree_id))
|
||||
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
||||
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags))
|
||||
flags |= BTREE_ITER_WITH_JOURNAL;
|
||||
|
||||
iter->trans = trans;
|
||||
iter->path = NULL;
|
||||
iter->btree_id = btree_id;
|
||||
|
@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
|
||||
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
|
||||
size_t max_dirty = 4096 + (nr_keys * 3) / 4;
|
||||
|
||||
return nr_dirty > max_dirty &&
|
||||
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
|
||||
return nr_dirty > max_dirty;
|
||||
}
|
||||
|
||||
int bch2_btree_key_cache_journal_flush(struct journal *,
|
||||
|
@ -207,10 +207,11 @@ struct btree_node_iter {
|
||||
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
|
||||
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
|
||||
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
|
||||
#define BTREE_ITER_NOPRESERVE (1 << 14)
|
||||
#define BTREE_ITER_WITH_JOURNAL (1 << 11)
|
||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
|
||||
#define BTREE_ITER_NOPRESERVE (1 << 15)
|
||||
|
||||
enum btree_path_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
|
@ -135,21 +135,4 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
|
||||
(_i) < (_trans)->updates + (_trans)->nr_updates; \
|
||||
(_i)++)
|
||||
|
||||
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if ((cmp_int(btree_id, i->btree_id) ?:
|
||||
bpos_cmp(pos, i->k->k.p)) <= 0) {
|
||||
if (btree_id == i->btree_id)
|
||||
return i->k;
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_H */
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "keylist.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -44,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
|
||||
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
return;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
@ -1146,6 +1147,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
||||
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!btree_ptr_sectors_written(insert));
|
||||
|
||||
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
|
||||
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
|
||||
|
||||
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
|
||||
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
|
||||
if (invalid) {
|
||||
@ -1847,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
return;
|
||||
|
||||
|
@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
|
||||
int old_live_u64s = b->nr.live_u64s;
|
||||
int live_u64s_added, u64s_added;
|
||||
|
||||
EBUG_ON(!insert->level &&
|
||||
!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
|
||||
|
||||
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
|
||||
&insert_l(insert)->iter, insert->k)))
|
||||
return false;
|
||||
@ -476,12 +473,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
JOURNAL_RES_GET_NONBLOCK);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
trans->journal_res.seq = c->journal.replay_journal_seq;
|
||||
}
|
||||
|
||||
if (unlikely(trans->journal_transaction_names))
|
||||
journal_transaction_name(trans);
|
||||
} else {
|
||||
trans->journal_res.seq = c->journal.replay_journal_seq;
|
||||
}
|
||||
|
||||
if (unlikely(trans->extra_journal_entry_u64s)) {
|
||||
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
|
||||
|
@ -1458,24 +1458,22 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
|
||||
struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos);
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
|
||||
POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
|
||||
BTREE_ITER_WITH_UPDATES|
|
||||
BTREE_ITER_CACHED|
|
||||
BTREE_ITER_CACHED_NOFILL|
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
*u = update && !bpos_cmp(update->k.p, pos)
|
||||
? bch2_alloc_unpack(bkey_i_to_s_c(update))
|
||||
: alloc_mem_to_key(c, iter);
|
||||
|
||||
*u = bch2_alloc_unpack(k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1558,20 +1558,26 @@ void bch2_stripes_heap_start(struct bch_fs *c)
|
||||
bch2_stripes_heap_insert(c, m, iter.pos);
|
||||
}
|
||||
|
||||
static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
const struct bch_stripe *s;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct stripe *m;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return 0;
|
||||
continue;
|
||||
|
||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
|
||||
@ -1589,19 +1595,11 @@ static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
bch2_stripes_heap_update(c, m, k.k->p.offset);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
||||
bch2_stripes_read_fn);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error reading stripes: %i", ret);
|
||||
|
||||
|
@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
u64 seq;
|
||||
int err;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
while (1) {
|
||||
@ -692,8 +689,6 @@ static int bch2_journal_reclaim_thread(void *arg)
|
||||
|
||||
set_freezable();
|
||||
|
||||
kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
while (!ret && !kthread_should_stop()) {
|
||||
|
@ -148,7 +148,6 @@ enum journal_space_from {
|
||||
enum {
|
||||
JOURNAL_REPLAY_DONE,
|
||||
JOURNAL_STARTED,
|
||||
JOURNAL_RECLAIM_STARTED,
|
||||
JOURNAL_NEED_WRITE,
|
||||
JOURNAL_MAY_GET_UNRESERVED,
|
||||
JOURNAL_MAY_SKIP_FLUSH,
|
||||
|
@ -330,9 +330,9 @@ enum opt_type {
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Read all journal entries, not just dirty ones")\
|
||||
x(journal_transaction_names, u8, \
|
||||
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false, \
|
||||
BCH_SB_JOURNAL_TRANSACTION_NAMES, false, \
|
||||
NULL, "Log transaction function names in journal") \
|
||||
x(noexcl, u8, \
|
||||
OPT_FS|OPT_MOUNT, \
|
||||
|
@ -59,21 +59,19 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
|
||||
static int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
struct journal_key *r)
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||
cmp_int(l_level, r->level) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p));
|
||||
}
|
||||
|
||||
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
|
||||
static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p));
|
||||
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||
}
|
||||
|
||||
static size_t journal_key_search(struct journal_keys *journal_keys,
|
||||
size_t bch2_journal_key_search(struct journal_keys *journal_keys,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
@ -116,11 +114,18 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
.btree_id = id,
|
||||
.level = level,
|
||||
.k = k,
|
||||
.allocated = true
|
||||
.allocated = true,
|
||||
/*
|
||||
* Ensure these keys are done last by journal replay, to unblock
|
||||
* journal reclaim:
|
||||
*/
|
||||
.journal_seq = U32_MAX,
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_iter *iter;
|
||||
unsigned idx = journal_key_search(keys, id, level, k->k.p);
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
|
||||
|
||||
if (idx < keys->nr &&
|
||||
journal_key_cmp(&n, &keys->d[idx]) == 0) {
|
||||
@ -157,6 +162,11 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Can only be used from the recovery thread while we're still RO - can't be
|
||||
* used once we've got RW, as journal_keys is at that point used by multiple
|
||||
* threads:
|
||||
*/
|
||||
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
@ -189,7 +199,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = journal_key_search(keys, btree, level, pos);
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (idx < keys->nr &&
|
||||
keys->d[idx].btree_id == btree &&
|
||||
@ -200,15 +210,18 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
{
|
||||
struct journal_key *k = iter->idx - iter->keys->nr
|
||||
? iter->keys->d + iter->idx : NULL;
|
||||
struct journal_key *k = iter->keys->d + iter->idx;
|
||||
|
||||
if (k &&
|
||||
while (k < iter->keys->d + iter->keys->nr &&
|
||||
k->btree_id == iter->btree_id &&
|
||||
k->level == iter->level)
|
||||
k->level == iter->level) {
|
||||
if (!k->overwritten)
|
||||
return k->k;
|
||||
|
||||
iter->idx = iter->keys->nr;
|
||||
iter->idx++;
|
||||
k = iter->keys->d + iter->idx;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -231,8 +244,7 @@ static void bch2_journal_iter_init(struct bch_fs *c,
|
||||
iter->btree_id = id;
|
||||
iter->level = level;
|
||||
iter->keys = &c->journal_keys;
|
||||
iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
|
||||
list_add(&iter->list, &c->journal_iters);
|
||||
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
|
||||
}
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
|
||||
@ -318,106 +330,33 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
|
||||
bch2_journal_iter_exit(&iter->journal);
|
||||
}
|
||||
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
struct btree *b)
|
||||
struct btree *b,
|
||||
struct btree_node_iter node_iter,
|
||||
struct bpos pos)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->b = b;
|
||||
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
|
||||
bch2_journal_iter_init(c, &iter->journal,
|
||||
b->c.btree_id, b->c.level, b->data->min_key);
|
||||
iter->node_iter = node_iter;
|
||||
bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
|
||||
INIT_LIST_HEAD(&iter->journal.list);
|
||||
}
|
||||
|
||||
/* Walk btree, overlaying keys from the journal: */
|
||||
|
||||
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
|
||||
struct btree_and_journal_iter iter)
|
||||
/*
|
||||
* this version is used by btree_gc before filesystem has gone RW and
|
||||
* multithreaded, so uses the journal_iters list:
|
||||
*/
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
struct btree_node_iter node_iter;
|
||||
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (i < nr &&
|
||||
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
|
||||
bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
i++;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
}
|
||||
|
||||
static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
|
||||
enum btree_id btree_id,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
struct btree *child;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
if (b->c.level) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
btree_and_journal_iter_prefetch(c, b, iter);
|
||||
|
||||
ret = bch2_btree_and_journal_walk_recurse(trans, child,
|
||||
btree_id, key_fn);
|
||||
six_unlock_read(&child->c.lock);
|
||||
} else {
|
||||
ret = key_fn(trans, k);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = c->btree_roots[btree_id].b;
|
||||
int ret = 0;
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return 0;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
return ret;
|
||||
bch2_btree_node_iter_init_from_start(&node_iter, b);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
|
||||
list_add(&iter->journal.list, &c->journal_iters);
|
||||
}
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
@ -442,9 +381,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p) ?:
|
||||
return journal_key_cmp(l, r) ?:
|
||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->journal_offset, r->journal_offset);
|
||||
}
|
||||
@ -537,7 +474,7 @@ static void replay_now_at(struct journal *j, u64 seq)
|
||||
bch2_journal_pin_put(j, j->replay_journal_seq++);
|
||||
}
|
||||
|
||||
static int __bch2_journal_replay_key(struct btree_trans *trans,
|
||||
static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
struct journal_key *k)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -546,45 +483,32 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
|
||||
BTREE_ITER_NOT_EXTENTS;
|
||||
int ret;
|
||||
|
||||
/* Must be checked with btree locked: */
|
||||
if (k->overwritten)
|
||||
return 0;
|
||||
|
||||
if (!k->level && k->btree_id == BTREE_ID_alloc)
|
||||
iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL;
|
||||
iter_flags |= BTREE_ITER_CACHED;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
|
||||
BTREE_MAX_DEPTH, k->level,
|
||||
iter_flags);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Must be checked with btree locked: */
|
||||
if (k->overwritten)
|
||||
goto out;
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
|
||||
{
|
||||
unsigned commit_flags =
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED;
|
||||
|
||||
if (!k->allocated)
|
||||
commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, commit_flags,
|
||||
__bch2_journal_replay_key(&trans, k));
|
||||
}
|
||||
|
||||
static int journal_sort_seq_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct journal_key *l = *((const struct journal_key **)_l);
|
||||
const struct journal_key *r = *((const struct journal_key **)_r);
|
||||
|
||||
return cmp_int(r->level, l->level) ?:
|
||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->btree_id, r->btree_id) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
return cmp_int(l->journal_seq, r->journal_seq);
|
||||
}
|
||||
|
||||
static int bch2_journal_replay(struct bch_fs *c)
|
||||
@ -592,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key **keys_sorted, *k;
|
||||
struct journal *j = &c->journal;
|
||||
struct bch_dev *ca;
|
||||
unsigned idx;
|
||||
size_t i;
|
||||
u64 seq;
|
||||
int ret;
|
||||
|
||||
keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
|
||||
@ -609,91 +530,40 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
sizeof(keys_sorted[0]),
|
||||
journal_sort_seq_cmp, NULL);
|
||||
|
||||
if (keys->nr)
|
||||
if (keys->nr) {
|
||||
bch_verbose(c, "starting journal replay, %zu keys", keys->nr);
|
||||
replay_now_at(j, keys->journal_seq_base);
|
||||
}
|
||||
|
||||
seq = j->replay_journal_seq;
|
||||
|
||||
/*
|
||||
* First replay updates to the alloc btree - these will only update the
|
||||
* btree key cache:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (!k->level && k->btree_id == BTREE_ID_alloc) {
|
||||
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now we can start the allocator threads: */
|
||||
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
|
||||
for_each_member_device(ca, c, idx)
|
||||
bch2_wake_allocator(ca);
|
||||
|
||||
/*
|
||||
* Next replay updates to interior btree nodes:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (k->level) {
|
||||
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that the btree is in a consistent state, we can start journal
|
||||
* reclaim (which will be flushing entries from the btree key cache back
|
||||
* to the btree:
|
||||
*/
|
||||
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
|
||||
set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
|
||||
journal_reclaim_kick(j);
|
||||
|
||||
j->replay_journal_seq = seq;
|
||||
|
||||
/*
|
||||
* Now replay leaf node updates:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (k->level || k->btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
|
||||
if (!k->allocated)
|
||||
replay_now_at(j, keys->journal_seq_base + k->journal_seq);
|
||||
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED|
|
||||
(!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
|
||||
bch2_journal_replay_key(&trans, k));
|
||||
if (ret) {
|
||||
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
|
||||
ret, bch2_btree_ids[k->btree_id], k->level);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
j->replay_journal_seq = 0;
|
||||
|
||||
bch2_journal_set_replay_done(j);
|
||||
bch2_journal_flush_all_pins(j);
|
||||
kfree(keys_sorted);
|
||||
|
||||
return bch2_journal_error(j);
|
||||
ret = bch2_journal_error(j);
|
||||
err:
|
||||
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
|
||||
ret, bch2_btree_ids[k->btree_id], k->level);
|
||||
kfree(keys_sorted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1217,7 +1087,11 @@ use_clean:
|
||||
|
||||
bch_verbose(c, "starting alloc read");
|
||||
err = "error reading allocation information";
|
||||
ret = bch2_alloc_read(c);
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_alloc_read(c, false, false);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "alloc read done");
|
||||
@ -1231,6 +1105,13 @@ use_clean:
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
/*
|
||||
* If we're not running fsck, this ensures bch2_fsck_err() calls are
|
||||
* instead interpreted as bch2_inconsistent_err() calls:
|
||||
*/
|
||||
if (!c->opts.fsck)
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
if (c->opts.fsck ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) ||
|
||||
@ -1265,24 +1146,8 @@ use_clean:
|
||||
ret = bch2_journal_replay(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "journal replay done");
|
||||
|
||||
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
|
||||
!c->opts.nochanges) {
|
||||
/*
|
||||
* note that even when filesystem was clean there might be work
|
||||
* to do here, if we ran gc (because of fsck) which recalculated
|
||||
* oldest_gen:
|
||||
*/
|
||||
bch_verbose(c, "writing allocation info");
|
||||
err = "error writing out alloc info";
|
||||
ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
|
||||
if (ret) {
|
||||
bch_err(c, "error writing alloc info");
|
||||
goto err;
|
||||
}
|
||||
bch_verbose(c, "alloc write done");
|
||||
}
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
bch2_fs_lazy_rw(c);
|
||||
@ -1430,14 +1295,11 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
|
||||
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
|
||||
set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_online_member(ca, c, i) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
|
@ -31,6 +31,9 @@ struct btree_and_journal_iter {
|
||||
} last;
|
||||
};
|
||||
|
||||
size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
|
||||
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bkey_i *);
|
||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||
@ -45,14 +48,13 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
|
||||
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *, struct btree *,
|
||||
struct btree_node_iter, struct bpos);
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *,
|
||||
struct btree *);
|
||||
|
||||
typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
|
||||
|
||||
int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
|
||||
|
@ -752,11 +752,24 @@ int bch2_write_super(struct bch_fs *c)
|
||||
closure_sync(cl);
|
||||
|
||||
for_each_online_member(ca, c, i) {
|
||||
if (!ca->sb_write_error &&
|
||||
ca->disk_sb.seq !=
|
||||
le64_to_cpu(ca->sb_read_scratch->seq)) {
|
||||
if (ca->sb_write_error)
|
||||
continue;
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock modified by another process");
|
||||
"Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock modified by another process (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
|
Loading…
Reference in New Issue
Block a user