Update bcachefs sources to aa439f3b94 bcachefs: btree_gc no longer uses main in-memory bucket array

This commit is contained in:
Kent Overstreet 2022-01-01 21:22:24 -05:00
parent 8a632ea60d
commit c50379128b
21 changed files with 679 additions and 784 deletions

View File

@ -1 +1 @@
42284b8b2bb980c80140b640de7cb12bc1e4541c aa439f3b94eb3141f9b6d71f780300e7fef44af9

View File

@ -605,8 +605,6 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
darray_free(s.extents); darray_free(s.extents);
genradix_free(&s.hardlinks); genradix_free(&s.hardlinks);
bch2_alloc_write_all(c, false);
} }
static void find_superblock_space(ranges extents, static void find_superblock_space(ranges extents,

View File

@ -38,15 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
#undef x #undef x
}; };
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
/* Persistent alloc info: */ /* Persistent alloc info: */
static inline u64 alloc_field_v1_get(const struct bch_alloc *a, static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
@ -253,24 +244,25 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret; return ret;
} }
static void bch2_alloc_pack(struct bch_fs *c, struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans,
struct bkey_alloc_buf *dst, const struct bkey_alloc_unpacked src)
const struct bkey_alloc_unpacked src)
{ {
bch2_alloc_pack_v3(dst, src); struct bkey_alloc_buf *dst;
dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf));
if (!IS_ERR(dst))
bch2_alloc_pack_v3(dst, src);
return dst;
} }
int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter, int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_alloc_unpacked *u, unsigned trigger_flags) struct bkey_alloc_unpacked *u, unsigned trigger_flags)
{ {
struct bkey_alloc_buf *a; struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u);
a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); return PTR_ERR_OR_ZERO(a) ?:
if (IS_ERR(a)) bch2_trans_update(trans, iter, &a->k, trigger_flags);
return PTR_ERR(a);
bch2_alloc_pack(trans->c, a, *u);
return bch2_trans_update(trans, iter, &a->k, trigger_flags);
} }
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
@ -340,119 +332,54 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
#undef x #undef x
} }
static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k) int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked u;
if (!bkey_is_alloc(k.k))
return 0;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = bucket(ca, k.k->p.offset);
u = bch2_alloc_unpack(k);
*bucket_gen(ca, k.k->p.offset) = u.gen;
g->_mark.gen = u.gen;
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
g->oldest_gen = u.oldest_gen;
g->gen_valid = 1;
return 0;
}
int bch2_alloc_read(struct bch_fs *c)
{
struct btree_trans trans;
int ret;
bch2_trans_init(&trans, c, 0, 0);
down_read(&c->gc_lock);
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
up_read(&c->gc_lock);
bch2_trans_exit(&trans);
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
}
return 0;
}
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_alloc_unpacked old_u, new_u;
int ret;
retry:
bch2_trans_begin(trans);
ret = bch2_btree_key_cache_flush(trans,
BTREE_ID_alloc, iter->pos);
if (ret)
goto err;
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
old_u = bch2_alloc_unpack(k);
new_u = alloc_mem_to_key(c, iter);
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
ret = bch2_alloc_write(trans, iter, &new_u,
BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|flags);
err:
if (ret == -EINTR)
goto retry;
return ret;
}
int bch2_alloc_write_all(struct bch_fs *c, unsigned flags)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca; struct bch_dev *ca;
unsigned i; struct bucket *g;
int ret = 0; struct bkey_alloc_unpacked u;
int ret;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_member_device(ca, c, i) { for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
bch2_btree_iter_set_pos(&iter, BTREE_ITER_PREFETCH, k, ret) {
POS(ca->dev_idx, ca->mi.first_bucket)); ca = bch_dev_bkey_exists(c, k.k->p.inode);
g = __bucket(ca, k.k->p.offset, gc);
u = bch2_alloc_unpack(k);
while (iter.pos.offset < ca->mi.nbuckets) { if (!gc)
ret = bch2_alloc_write_key(&trans, &iter, flags); *bucket_gen(ca, k.k->p.offset) = u.gen;
if (ret) {
percpu_ref_put(&ca->ref); g->_mark.gen = u.gen;
goto err; g->io_time[READ] = u.read_time;
} g->io_time[WRITE] = u.write_time;
bch2_btree_iter_advance(&iter); g->oldest_gen = !gc ? u.oldest_gen : u.gen;
g->gen_valid = 1;
if (!gc ||
(metadata_only &&
(u.data_type == BCH_DATA_user ||
u.data_type == BCH_DATA_cached ||
u.data_type == BCH_DATA_parity))) {
g->_mark.data_type = u.data_type;
g->_mark.dirty_sectors = u.dirty_sectors;
g->_mark.cached_sectors = u.cached_sectors;
g->_mark.stripe = u.stripe != 0;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
} }
} }
err:
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
if (ret)
bch_err(c, "error reading alloc info: %i", ret);
return ret; return ret;
} }
@ -463,19 +390,20 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
u64 *time, now; u64 *time, now;
int ret = 0; int ret = 0;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr), bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr),
BTREE_ITER_CACHED| BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret) if (ret)
goto out; goto out;
u = alloc_mem_to_key(c, &iter); u = bch2_alloc_unpack(k);
time = rw == READ ? &u.read_time : &u.write_time; time = rw == READ ? &u.read_time : &u.write_time;
now = atomic64_read(&c->io_clock[rw].now); now = atomic64_read(&c->io_clock[rw].now);
@ -664,20 +592,20 @@ static int bucket_invalidate_btree(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ca->dev_idx, b), POS(ca->dev_idx, b),
BTREE_ITER_CACHED| BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
*u = alloc_mem_to_key(c, &iter); *u = bch2_alloc_unpack(k);
u->gen++; u->gen++;
u->data_type = 0; u->data_type = 0;
u->dirty_sectors = 0; u->dirty_sectors = 0;
@ -859,8 +787,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
static bool allocator_thread_running(struct bch_dev *ca) static bool allocator_thread_running(struct bch_dev *ca)
{ {
unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw && unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw &&
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) && test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags)
test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags)
? ALLOCATOR_running ? ALLOCATOR_running
: ALLOCATOR_stopped; : ALLOCATOR_stopped;
alloc_thread_set_state(ca, state); alloc_thread_set_state(ca, state);

View File

@ -38,40 +38,23 @@ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
; ;
} }
struct bkey_alloc_buf {
struct bkey_i k;
struct bch_alloc_v3 v;
#define x(_name, _bits) + _bits / 8
u8 _pad[0 + BCH_ALLOC_FIELDS_V2()];
#undef x
} __attribute__((packed, aligned(8)));
struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *,
const struct bkey_alloc_unpacked);
int bch2_alloc_write(struct btree_trans *, struct btree_iter *, int bch2_alloc_write(struct btree_trans *, struct btree_iter *,
struct bkey_alloc_unpacked *, unsigned); struct bkey_alloc_unpacked *, unsigned);
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
static inline struct bkey_alloc_unpacked
alloc_mem_to_key(struct bch_fs *c, struct btree_iter *iter)
{
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked ret;
percpu_down_read(&c->mark_lock);
ca = bch_dev_bkey_exists(c, iter->pos.inode);
g = bucket(ca, iter->pos.offset);
ret = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
return ret;
}
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c);
@ -101,7 +84,7 @@ static inline bool bkey_is_alloc(const struct bkey *k)
k->type == KEY_TYPE_alloc_v3; k->type == KEY_TYPE_alloc_v3;
} }
int bch2_alloc_read(struct bch_fs *); int bch2_alloc_read(struct bch_fs *, bool, bool);
static inline void bch2_wake_allocator(struct bch_dev *ca) static inline void bch2_wake_allocator(struct bch_dev *ca)
{ {
@ -139,7 +122,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *);
int bch2_alloc_write_all(struct bch_fs *, unsigned);
void bch2_fs_allocator_background_init(struct bch_fs *); void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

View File

@ -510,8 +510,6 @@ enum {
BCH_FS_INITIAL_GC_DONE, BCH_FS_INITIAL_GC_DONE,
BCH_FS_INITIAL_GC_UNFIXED, BCH_FS_INITIAL_GC_UNFIXED,
BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_ALLOC_REPLAY_DONE,
BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
BCH_FS_FSCK_DONE, BCH_FS_FSCK_DONE,
BCH_FS_STARTED, BCH_FS_STARTED,
BCH_FS_RW, BCH_FS_RW,
@ -531,7 +529,6 @@ enum {
/* misc: */ /* misc: */
BCH_FS_NEED_ANOTHER_GC, BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES, BCH_FS_DELETED_NODES,
BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS, BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES, BCH_FS_HOLD_BTREE_WRITES,
}; };
@ -860,7 +857,6 @@ struct bch_fs {
u64 reflink_hint; u64 reflink_hint;
reflink_gc_table reflink_gc_table; reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr; size_t reflink_gc_nr;
size_t reflink_gc_idx;
/* VFS IO PATH - fs-io.c */ /* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset; struct bio_set writepage_bioset;

View File

@ -1427,6 +1427,7 @@ LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
/* /*
* Features: * Features:

View File

@ -9,6 +9,7 @@
#include "alloc_foreground.h" #include "alloc_foreground.h"
#include "bkey_methods.h" #include "bkey_methods.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "btree_key_cache.h"
#include "btree_locking.h" #include "btree_locking.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "btree_io.h" #include "btree_io.h"
@ -505,7 +506,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
bkey_for_each_ptr_decode(k->k, ptrs, p, entry) { bkey_for_each_ptr_decode(k->k, ptrs, p, entry) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
struct bucket *g2 = PTR_BUCKET(ca, &p.ptr);
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr); enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr);
if (fsck_err_on(!g->gen_valid, c, if (fsck_err_on(!g->gen_valid, c,
@ -516,9 +516,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
p.ptr.gen, p.ptr.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) { if (!p.ptr.cached) {
g2->_mark.gen = g->_mark.gen = p.ptr.gen; g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true; g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else { } else {
do_update = true; do_update = true;
} }
@ -532,9 +531,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
bch2_data_types[ptr_data_type(k->k, &p.ptr)], bch2_data_types[ptr_data_type(k->k, &p.ptr)],
p.ptr.gen, g->mark.gen, p.ptr.gen, g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
g2->_mark.data_type = g->_mark.data_type = data_type; g->_mark.data_type = data_type;
g2->gen_valid = g->gen_valid = true; g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} }
if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c, if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
@ -545,13 +543,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
p.ptr.gen, g->mark.gen, p.ptr.gen, g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (!p.ptr.cached) { if (!p.ptr.cached) {
g2->_mark.gen = g->_mark.gen = p.ptr.gen; g->_mark.gen = p.ptr.gen;
g2->gen_valid = g->gen_valid = true; g->gen_valid = true;
g2->_mark.data_type = 0; g->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0; g->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0; g->_mark.cached_sectors = 0;
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else { } else {
do_update = true; do_update = true;
} }
@ -588,9 +585,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
bch2_data_types[data_type], bch2_data_types[data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
if (data_type == BCH_DATA_btree) { if (data_type == BCH_DATA_btree) {
g2->_mark.data_type = g->_mark.data_type = data_type; g->_mark.data_type = data_type;
g2->gen_valid = g->gen_valid = true; g->gen_valid = true;
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
} else { } else {
do_update = true; do_update = true;
} }
@ -691,10 +687,16 @@ found:
} }
ret = bch2_journal_key_insert_take(c, btree_id, level, new); ret = bch2_journal_key_insert_take(c, btree_id, level, new);
if (ret) if (ret)
kfree(new); kfree(new);
else else {
bch2_bkey_val_to_text(&PBUF(buf), c, *k);
bch_info(c, "updated %s", buf);
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new));
bch_info(c, "new key %s", buf);
*k = bkey_i_to_s_c(new); *k = bkey_i_to_s_c(new);
}
} }
fsck_err: fsck_err:
return ret; return ret;
@ -1145,13 +1147,14 @@ static int bch2_gc_done(struct bch_fs *c,
unsigned i, dev; unsigned i, dev;
int ret = 0; int ret = 0;
percpu_down_write(&c->mark_lock);
#define copy_field(_f, _msg, ...) \ #define copy_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \ if (dst->_f != src->_f) { \
if (verify) \ if (verify) \
fsck_err(c, _msg ": got %llu, should be %llu" \ fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \ , ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \ dst->_f = src->_f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
} }
#define copy_stripe_field(_f, _msg, ...) \ #define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \ if (dst->_f != src->_f) { \
@ -1161,18 +1164,6 @@ static int bch2_gc_done(struct bch_fs *c,
iter.pos, ##__VA_ARGS__, \ iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \ dst->_f, src->_f); \
dst->_f = src->_f; \ dst->_f = src->_f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_bucket_field(_f) \
if (dst->b[b]._f != src->b[b]._f) { \
if (verify) \
fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \
": got %u, should be %u", dev, b, \
dst->b[b].mark.gen, \
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b]._f, src->b[b]._f); \
dst->b[b]._f = src->b[b]._f; \
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
} }
#define copy_dev_field(_f, _msg, ...) \ #define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__)
@ -1183,36 +1174,18 @@ static int bch2_gc_done(struct bch_fs *c,
bch2_fs_usage_acc_to_base(c, i); bch2_fs_usage_acc_to_base(c, i);
for_each_member_device(ca, c, dev) { for_each_member_device(ca, c, dev) {
struct bucket_array *dst = __bucket_array(ca, 0); struct bch_dev_usage *dst = ca->usage_base;
struct bucket_array *src = __bucket_array(ca, 1); struct bch_dev_usage *src = (void *)
size_t b; bch2_acc_percpu_u64s((void *) ca->usage_gc,
dev_usage_u64s());
for (b = 0; b < src->nbuckets; b++) { copy_dev_field(buckets_ec, "buckets_ec");
copy_bucket_field(_mark.gen); copy_dev_field(buckets_unavailable, "buckets_unavailable");
copy_bucket_field(_mark.data_type);
copy_bucket_field(_mark.stripe);
copy_bucket_field(_mark.dirty_sectors);
copy_bucket_field(_mark.cached_sectors);
copy_bucket_field(stripe_redundancy);
copy_bucket_field(stripe);
dst->b[b].oldest_gen = src->b[b].oldest_gen; for (i = 0; i < BCH_DATA_NR; i++) {
} copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
{ copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
struct bch_dev_usage *dst = ca->usage_base;
struct bch_dev_usage *src = (void *)
bch2_acc_percpu_u64s((void *) ca->usage_gc,
dev_usage_u64s());
copy_dev_field(buckets_ec, "buckets_ec");
copy_dev_field(buckets_unavailable, "buckets_unavailable");
for (i = 0; i < BCH_DATA_NR; i++) {
copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]);
copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]);
copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]);
}
} }
}; };
@ -1254,7 +1227,6 @@ static int bch2_gc_done(struct bch_fs *c,
#undef copy_fs_field #undef copy_fs_field
#undef copy_dev_field #undef copy_dev_field
#undef copy_bucket_field
#undef copy_stripe_field #undef copy_stripe_field
#undef copy_field #undef copy_field
fsck_err: fsck_err:
@ -1262,6 +1234,8 @@ fsck_err:
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
if (ret) if (ret)
bch_err(c, "%s: ret %i", __func__, ret); bch_err(c, "%s: ret %i", __func__, ret);
percpu_up_write(&c->mark_lock);
return ret; return ret;
} }
@ -1284,15 +1258,6 @@ static int bch2_gc_start(struct bch_fs *c,
BUG_ON(ca->buckets[1]); BUG_ON(ca->buckets[1]);
BUG_ON(ca->usage_gc); BUG_ON(ca->usage_gc);
ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO);
if (!ca->buckets[1]) {
percpu_ref_put(&ca->ref);
bch_err(c, "error allocating ca->buckets[gc]");
return -ENOMEM;
}
ca->usage_gc = alloc_percpu(struct bch_dev_usage); ca->usage_gc = alloc_percpu(struct bch_dev_usage);
if (!ca->usage_gc) { if (!ca->usage_gc) {
bch_err(c, "error allocating ca->usage_gc"); bch_err(c, "error allocating ca->usage_gc");
@ -1301,94 +1266,165 @@ static int bch2_gc_start(struct bch_fs *c,
} }
} }
percpu_down_write(&c->mark_lock);
/*
* indicate to stripe code that we need to allocate for the gc stripes
* radix tree, too
*/
gc_pos_set(c, gc_phase(GC_PHASE_START));
for_each_member_device(ca, c, i) {
struct bucket_array *dst = __bucket_array(ca, 1);
struct bucket_array *src = __bucket_array(ca, 0);
size_t b;
dst->first_bucket = src->first_bucket;
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++) {
struct bucket *d = &dst->b[b];
struct bucket *s = &src->b[b];
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
d->gen_valid = s->gen_valid;
if (metadata_only &&
(s->mark.data_type == BCH_DATA_user ||
s->mark.data_type == BCH_DATA_cached))
d->_mark = s->mark;
}
};
percpu_up_write(&c->mark_lock);
return 0; return 0;
} }
static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans, static int bch2_alloc_write_key(struct btree_trans *trans,
struct bkey_s_c k) struct btree_iter *iter,
bool initial, bool metadata_only)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct reflink_gc *r; struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
const __le64 *refcount = bkey_refcount_c(k); struct bucket *g;
char buf[200]; struct bkey_s_c k;
int ret = 0; struct bkey_alloc_unpacked old_u, new_u, gc_u;
struct bkey_alloc_buf *a;
int ret;
if (!refcount) /*
* For this to be correct at runtime, we'll need to figure out a way for
* it to actually lock the key in the btree key cache:
*/
if (!initial) {
ret = bch2_btree_key_cache_flush(trans,
BTREE_ID_alloc, iter->pos);
if (ret)
return ret;
}
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
return ret;
old_u = new_u = bch2_alloc_unpack(k);
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, iter->pos.offset);
gc_u = (struct bkey_alloc_unpacked) {
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
.read_time = g->io_time[READ],
.write_time = g->io_time[WRITE],
.stripe = g->stripe,
.stripe_redundancy = g->stripe_redundancy,
};
percpu_up_read(&c->mark_lock);
if (metadata_only &&
gc_u.data_type != BCH_DATA_sb &&
gc_u.data_type != BCH_DATA_journal &&
gc_u.data_type != BCH_DATA_btree)
return 0; return 0;
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++); if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
if (!r) gen_after(old_u.gen, gc_u.gen))
return -ENOMEM; return 0;
if (!r || #define copy_bucket_field(_f) \
r->offset != k.k->p.offset || if (fsck_err_on(new_u._f != gc_u._f, c, \
r->size != k.k->size) { "bucket %llu:%llu gen %u data type %s has wrong " #_f \
bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); ": got %u, should be %u", \
return -EINVAL; iter->pos.inode, iter->pos.offset, \
} new_u.gen, \
bch2_data_types[new_u.data_type], \
new_u._f, gc_u._f)) \
new_u._f = gc_u._f; \
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, copy_bucket_field(gen);
"reflink key has wrong refcount:\n" copy_bucket_field(data_type);
" %s\n" copy_bucket_field(stripe);
" should be %u", copy_bucket_field(dirty_sectors);
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), copy_bucket_field(cached_sectors);
r->refcount)) { copy_bucket_field(stripe_redundancy);
struct bkey_i *new; copy_bucket_field(stripe);
#undef copy_bucket_field
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); new_u.oldest_gen = gc_u.oldest_gen;
if (!new) {
ret = -ENOMEM;
goto fsck_err;
}
bkey_reassemble(new, k); if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
if (!r->refcount) { a = bch2_alloc_pack(trans, new_u);
new->k.type = KEY_TYPE_deleted; if (IS_ERR(a))
new->k.size = 0; return PTR_ERR(a);
} else {
*bkey_refcount(new) = cpu_to_le64(r->refcount);
}
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new); ret = initial
kfree(new); ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k)
} : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
fsck_err: fsck_err:
return ret; return ret;
} }
static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_dev *ca;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_member_device(ca, c, i) {
for_each_btree_key(&trans, iter, BTREE_ID_alloc,
POS(ca->dev_idx, ca->mi.first_bucket),
BTREE_ITER_SLOTS|
BTREE_ITER_PREFETCH, k, ret) {
if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
break;
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW,
bch2_alloc_write_key(&trans, &iter,
initial, metadata_only));
if (ret)
break;
}
bch2_trans_iter_exit(&trans, &iter);
if (ret) {
bch_err(c, "error writing alloc info: %i", ret);
percpu_ref_put(&ca->ref);
break;
}
}
bch2_trans_exit(&trans);
return ret;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
for_each_member_device(ca, c, i) {
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
ca->mi.nbuckets * sizeof(struct bucket),
GFP_KERNEL|__GFP_ZERO);
if (!buckets) {
percpu_ref_put(&ca->ref);
percpu_up_write(&c->mark_lock);
bch_err(c, "error allocating ca->buckets[gc]");
return -ENOMEM;
}
buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
rcu_assign_pointer(ca->buckets[1], buckets);
};
return bch2_alloc_read(c, true, metadata_only);
}
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
bool metadata_only) bool metadata_only)
{ {
@ -1405,14 +1441,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
if (initial) {
c->reflink_gc_idx = 0;
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
bch2_gc_reflink_done_initial_fn);
goto out;
}
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k); const __le64 *refcount = bkey_refcount_c(k);
@ -1420,7 +1448,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
if (!refcount) if (!refcount)
continue; continue;
r = genradix_ptr(&c->reflink_gc_table, idx); r = genradix_ptr(&c->reflink_gc_table, idx++);
if (!r || if (!r ||
r->offset != k.k->p.offset || r->offset != k.k->p.offset ||
r->size != k.k->size) { r->size != k.k->size) {
@ -1450,7 +1478,9 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
else else
*bkey_refcount(new) = cpu_to_le64(r->refcount); *bkey_refcount(new) = cpu_to_le64(r->refcount);
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = initial
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
: __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_reflink, new)); __bch2_btree_insert(&trans, BTREE_ID_reflink, new));
kfree(new); kfree(new);
@ -1460,104 +1490,11 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
} }
fsck_err: fsck_err:
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
out:
c->reflink_gc_nr = 0; c->reflink_gc_nr = 0;
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct gc_stripe *m;
const struct bch_stripe *s;
char buf[200];
unsigned i;
int ret = 0;
if (k.k->type != KEY_TYPE_stripe)
return 0;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
for (i = 0; i < s->nr_blocks; i++)
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
goto inconsistent;
return 0;
inconsistent:
if (fsck_err_on(true, c,
"stripe has wrong block sector count %u:\n"
" %s\n"
" should be %u", i,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
m ? m->block_sectors[i] : 0)) {
struct bkey_i_stripe *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
goto fsck_err;
}
bkey_reassemble(&new->k_i, k);
for (i = 0; i < new->v.nr_blocks; i++)
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
kfree(new);
}
fsck_err:
return ret;
}
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
int ret = 0;
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
if (initial) {
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
bch2_gc_stripes_done_initial_fn);
} else {
BUG();
}
bch2_trans_exit(&trans);
return ret;
}
static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct reflink_gc *r;
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
return 0;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r)
return -ENOMEM;
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
return 0;
}
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only) bool metadata_only)
{ {
@ -1573,12 +1510,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
c->reflink_gc_nr = 0; c->reflink_gc_nr = 0;
if (initial) {
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
bch2_gc_reflink_start_initial_fn);
goto out;
}
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k); const __le64 *refcount = bkey_refcount_c(k);
@ -1598,7 +1529,70 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
r->refcount = 0; r->refcount = 0;
} }
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
out:
bch2_trans_exit(&trans);
return ret;
}
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct gc_stripe *m;
const struct bch_stripe *s;
char buf[200];
unsigned i;
int ret = 0;
if (metadata_only)
return 0;
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->type != KEY_TYPE_stripe)
continue;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
for (i = 0; i < s->nr_blocks; i++)
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
goto inconsistent;
continue;
inconsistent:
if (fsck_err_on(true, c,
"stripe has wrong block sector count %u:\n"
" %s\n"
" should be %u", i,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
m ? m->block_sectors[i] : 0)) {
struct bkey_i_stripe *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
break;
}
bkey_reassemble(&new->k_i, k);
for (i = 0; i < new->v.nr_blocks; i++)
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
ret = initial
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
: __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
kfree(new);
}
}
fsck_err:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -1638,10 +1632,13 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
!bch2_btree_interior_updates_nr_pending(c)); !bch2_btree_interior_updates_nr_pending(c));
again: again:
ret = bch2_gc_start(c, metadata_only) ?: ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_alloc_start(c, initial, metadata_only) ?:
bch2_gc_reflink_start(c, initial, metadata_only); bch2_gc_reflink_start(c, initial, metadata_only);
if (ret) if (ret)
goto out; goto out;
gc_pos_set(c, gc_phase(GC_PHASE_START));
bch2_mark_superblocks(c); bch2_mark_superblocks(c);
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) && if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb) &&
@ -1702,16 +1699,15 @@ out:
if (!ret) { if (!ret) {
bch2_journal_block(&c->journal); bch2_journal_block(&c->journal);
percpu_down_write(&c->mark_lock); ret = bch2_gc_stripes_done(c, initial, metadata_only) ?:
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?: bch2_gc_reflink_done(c, initial, metadata_only) ?:
bch2_gc_stripes_done(c, initial, metadata_only) ?: bch2_gc_alloc_done(c, initial, metadata_only) ?:
bch2_gc_done(c, initial, metadata_only); bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal); bch2_journal_unblock(&c->journal);
} else {
percpu_down_write(&c->mark_lock);
} }
percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */ /* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));

View File

@ -12,6 +12,7 @@
#include "error.h" #include "error.h"
#include "extents.h" #include "extents.h"
#include "journal.h" #include "journal.h"
#include "recovery.h"
#include "replicas.h" #include "replicas.h"
#include "subvolume.h" #include "subvolume.h"
@ -1077,6 +1078,7 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
static void btree_path_verify_new_node(struct btree_trans *trans, static void btree_path_verify_new_node(struct btree_trans *trans,
struct btree_path *path, struct btree *b) struct btree_path *path, struct btree *b)
{ {
struct bch_fs *c = trans->c;
struct btree_path_level *l; struct btree_path_level *l;
unsigned plevel; unsigned plevel;
bool parent_locked; bool parent_locked;
@ -1085,6 +1087,9 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
return; return;
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
return;
plevel = b->c.level + 1; plevel = b->c.level + 1;
if (!btree_path_node(path, plevel)) if (!btree_path_node(path, plevel))
return; return;
@ -1105,7 +1110,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
char buf4[100]; char buf4[100];
struct bkey uk = bkey_unpack_key(b, k); struct bkey uk = bkey_unpack_key(b, k);
bch2_dump_btree_node(trans->c, l->b); bch2_dump_btree_node(c, l->b);
bch2_bpos_to_text(&PBUF(buf1), path->pos); bch2_bpos_to_text(&PBUF(buf1), path->pos);
bch2_bkey_to_text(&PBUF(buf2), &uk); bch2_bkey_to_text(&PBUF(buf2), &uk);
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
@ -1296,6 +1301,41 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
return ret; return ret;
} }
static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
struct btree_and_journal_iter *jiter)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bkey_buf tmp;
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
? (path->level > 1 ? 0 : 2)
: (path->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(path, path->level);
int ret = 0;
bch2_bkey_buf_init(&tmp);
while (nr && !ret) {
if (!bch2_btree_node_relock(trans, path, path->level))
break;
bch2_btree_and_journal_iter_advance(jiter);
k = bch2_btree_and_journal_iter_peek(jiter);
if (!k.k)
break;
bch2_bkey_buf_reassemble(&tmp, c, k);
ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
path->level - 1);
}
if (!was_locked)
btree_node_unlock(path, path->level);
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
struct btree_path *path, struct btree_path *path,
unsigned plevel, struct btree *b) unsigned plevel, struct btree *b)
@ -1318,6 +1358,30 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
btree_node_unlock(path, plevel); btree_node_unlock(path, plevel);
} }
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
struct btree_path *path,
unsigned flags,
struct bkey_buf *out)
{
struct bch_fs *c = trans->c;
struct btree_path_level *l = path_l(path);
struct btree_and_journal_iter jiter;
struct bkey_s_c k;
int ret = 0;
__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
k = bch2_btree_and_journal_iter_peek(&jiter);
bch2_bkey_buf_reassemble(out, c, k);
if (flags & BTREE_ITER_PREFETCH)
ret = btree_path_prefetch_j(trans, path, &jiter);
bch2_btree_and_journal_iter_exit(&jiter);
return ret;
}
static __always_inline int btree_path_down(struct btree_trans *trans, static __always_inline int btree_path_down(struct btree_trans *trans,
struct btree_path *path, struct btree_path *path,
unsigned flags, unsigned flags,
@ -1328,14 +1392,28 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
struct btree *b; struct btree *b;
unsigned level = path->level - 1; unsigned level = path->level - 1;
enum six_lock_type lock_type = __btree_lock_want(path, level); enum six_lock_type lock_type = __btree_lock_want(path, level);
bool replay_done = test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
struct bkey_buf tmp; struct bkey_buf tmp;
int ret; int ret;
EBUG_ON(!btree_node_locked(path, path->level)); EBUG_ON(!btree_node_locked(path, path->level));
bch2_bkey_buf_init(&tmp); bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b)); if (unlikely(!replay_done)) {
ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
if (ret)
goto err;
} else {
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
if (flags & BTREE_ITER_PREFETCH) {
ret = btree_path_prefetch(trans, path);
if (ret)
goto err;
}
}
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b); ret = PTR_ERR_OR_ZERO(b);
@ -1345,13 +1423,10 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
mark_btree_node_locked(path, level, lock_type); mark_btree_node_locked(path, level, lock_type);
btree_path_level_init(trans, path, b); btree_path_level_init(trans, path, b);
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
unlikely(b != btree_node_mem_ptr(tmp.k))) unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(trans, path, level + 1, b); btree_node_mem_ptr_set(trans, path, level + 1, b);
if (flags & BTREE_ITER_PREFETCH)
ret = btree_path_prefetch(trans, path);
if (btree_node_read_locked(path, level + 1)) if (btree_node_read_locked(path, level + 1))
btree_node_unlock(path, level + 1); btree_node_unlock(path, level + 1);
path->level = level; path->level = level;
@ -2107,6 +2182,59 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
return ret; return ret;
} }
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
if ((cmp_int(btree_id, i->btree_id) ?:
bpos_cmp(pos, i->k->k.p)) <= 0) {
if (btree_id == i->btree_id)
return i->k;
break;
}
return NULL;
}
static noinline
struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
struct btree_path *path)
{
struct journal_keys *keys = &trans->c->journal_keys;
size_t idx = bch2_journal_key_search(keys, path->btree_id,
path->level, path->pos);
while (idx < keys->nr && keys->d[idx].overwritten)
idx++;
return (idx < keys->nr &&
keys->d[idx].btree_id == path->btree_id &&
keys->d[idx].level == path->level)
? keys->d[idx].k
: NULL;
}
static noinline
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bkey_i *next_journal =
__btree_trans_peek_journal(trans, iter->path);
if (next_journal &&
bpos_cmp(next_journal->k.p,
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
iter->k = next_journal->k;
k = bkey_i_to_s_c(next_journal);
}
return k;
}
/** /**
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position * current position
@ -2117,7 +2245,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
struct bpos search_key = btree_iter_search_key(iter); struct bpos search_key = btree_iter_search_key(iter);
struct bkey_i *next_update; struct bkey_i *next_update;
struct bkey_s_c k; struct bkey_s_c k;
int ret, cmp; int ret;
EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->path->cached || iter->path->level);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
@ -2136,19 +2264,14 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
goto out; goto out;
} }
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
k = btree_trans_peek_journal(trans, iter, k);
next_update = iter->flags & BTREE_ITER_WITH_UPDATES next_update = iter->flags & BTREE_ITER_WITH_UPDATES
? btree_trans_peek_updates(trans, iter->btree_id, search_key) ? btree_trans_peek_updates(trans, iter->btree_id, search_key)
: NULL; : NULL;
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
/* * In the btree, deleted keys sort before non deleted: */
if (k.k && bkey_deleted(k.k) &&
(!next_update ||
bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
search_key = k.k->p;
continue;
}
if (next_update && if (next_update &&
bpos_cmp(next_update->k.p, bpos_cmp(next_update->k.p,
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
@ -2156,6 +2279,20 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
k = bkey_i_to_s_c(next_update); k = bkey_i_to_s_c(next_update);
} }
if (k.k && bkey_deleted(k.k)) {
/*
* If we've got a whiteout, and it's after the search
* key, advance the search key to the whiteout instead
* of just after the whiteout - it might be a btree
* whiteout, with a real key at the same position, since
* in the btree deleted keys sort before non deleted.
*/
search_key = bpos_cmp(search_key, k.k->p)
? k.k->p
: bpos_successor(k.k->p);
continue;
}
if (likely(k.k)) { if (likely(k.k)) {
/* /*
* We can never have a key in a leaf node at POS_MAX, so * We can never have a key in a leaf node at POS_MAX, so
@ -2199,14 +2336,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
iter->pos.snapshot = iter->snapshot; iter->pos.snapshot = iter->snapshot;
cmp = bpos_cmp(k.k->p, iter->path->pos); iter->path = btree_path_set_pos(trans, iter->path, k.k->p,
if (cmp) { iter->flags & BTREE_ITER_INTENT,
iter->path = bch2_btree_path_make_mut(trans, iter->path, btree_iter_ip_allocated(iter));
iter->flags & BTREE_ITER_INTENT, BUG_ON(!iter->path->nodes_locked);
btree_iter_ip_allocated(iter));
iter->path->pos = k.k->p;
btree_path_check_sort(trans, iter->path, cmp);
}
out: out:
iter->path->should_be_locked = true; iter->path->should_be_locked = true;
@ -2247,6 +2380,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->path->cached || iter->path->level);
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
return bkey_s_c_err(-EIO);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify_entry_exit(iter);
@ -2397,17 +2534,24 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
struct bkey_i *next_update; struct bkey_i *next_update;
next_update = iter->flags & BTREE_ITER_WITH_UPDATES if ((iter->flags & BTREE_ITER_WITH_UPDATES) &&
? btree_trans_peek_updates(trans, iter->btree_id, search_key) (next_update = btree_trans_peek_updates(trans,
: NULL; iter->btree_id, search_key)) &&
if (next_update &&
!bpos_cmp(next_update->k.p, iter->pos)) { !bpos_cmp(next_update->k.p, iter->pos)) {
iter->k = next_update->k; iter->k = next_update->k;
k = bkey_i_to_s_c(next_update); k = bkey_i_to_s_c(next_update);
} else { goto out;
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
} }
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
(next_update = __btree_trans_peek_journal(trans, iter->path)) &&
!bpos_cmp(next_update->k.p, iter->pos)) {
iter->k = next_update->k;
k = bkey_i_to_s_c(next_update);
goto out;
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
} else { } else {
struct bpos next; struct bpos next;
@ -2451,7 +2595,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
k = (struct bkey_s_c) { &iter->k, NULL }; k = (struct bkey_s_c) { &iter->k, NULL };
} }
} }
out:
iter->path->should_be_locked = true; iter->path->should_be_locked = true;
bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify_entry_exit(iter);
@ -2618,6 +2762,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
btree_type_has_snapshots(btree_id)) btree_type_has_snapshots(btree_id))
flags |= BTREE_ITER_FILTER_SNAPSHOTS; flags |= BTREE_ITER_FILTER_SNAPSHOTS;
if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags))
flags |= BTREE_ITER_WITH_JOURNAL;
iter->trans = trans; iter->trans = trans;
iter->path = NULL; iter->path = NULL;
iter->btree_id = btree_id; iter->btree_id = btree_id;

View File

@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
size_t max_dirty = 4096 + (nr_keys * 3) / 4; size_t max_dirty = 4096 + (nr_keys * 3) / 4;
return nr_dirty > max_dirty && return nr_dirty > max_dirty;
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
} }
int bch2_btree_key_cache_journal_flush(struct journal *, int bch2_btree_key_cache_journal_flush(struct journal *,

View File

@ -207,10 +207,11 @@ struct btree_node_iter {
#define BTREE_ITER_CACHED_NOFILL (1 << 8) #define BTREE_ITER_CACHED_NOFILL (1 << 8)
#define BTREE_ITER_CACHED_NOCREATE (1 << 9) #define BTREE_ITER_CACHED_NOCREATE (1 << 9)
#define BTREE_ITER_WITH_UPDATES (1 << 10) #define BTREE_ITER_WITH_UPDATES (1 << 10)
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11) #define BTREE_ITER_WITH_JOURNAL (1 << 11)
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) #define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13) #define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
#define BTREE_ITER_NOPRESERVE (1 << 14) #define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
#define BTREE_ITER_NOPRESERVE (1 << 15)
enum btree_path_uptodate { enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0, BTREE_ITER_UPTODATE = 0,

View File

@ -135,21 +135,4 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
(_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++) (_i)++)
static inline struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
struct btree_insert_entry *i;
trans_for_each_update(trans, i)
if ((cmp_int(btree_id, i->btree_id) ?:
bpos_cmp(pos, i->k->k.p)) <= 0) {
if (btree_id == i->btree_id)
return i->k;
break;
}
return NULL;
}
#endif /* _BCACHEFS_BTREE_UPDATE_H */ #endif /* _BCACHEFS_BTREE_UPDATE_H */

View File

@ -16,6 +16,7 @@
#include "journal.h" #include "journal.h"
#include "journal_reclaim.h" #include "journal_reclaim.h"
#include "keylist.h" #include "keylist.h"
#include "recovery.h"
#include "replicas.h" #include "replicas.h"
#include "super-io.h" #include "super-io.h"
@ -44,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
BUG_ON(!b->c.level); BUG_ON(!b->c.level);
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)) if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
return; return;
bch2_btree_node_iter_init_from_start(&iter, b); bch2_btree_node_iter_init_from_start(&iter, b);
@ -1146,6 +1147,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
!btree_ptr_sectors_written(insert)); !btree_ptr_sectors_written(insert));
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?: invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert)); bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
if (invalid) { if (invalid) {
@ -1847,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{ {
struct async_btree_rewrite *a; struct async_btree_rewrite *a;
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
return;
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
return; return;

View File

@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
int old_live_u64s = b->nr.live_u64s; int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added; int live_u64s_added, u64s_added;
EBUG_ON(!insert->level &&
!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
&insert_l(insert)->iter, insert->k))) &insert_l(insert)->iter, insert->k)))
return false; return false;
@ -476,13 +473,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
JOURNAL_RES_GET_NONBLOCK); JOURNAL_RES_GET_NONBLOCK);
if (ret) if (ret)
return ret; return ret;
if (unlikely(trans->journal_transaction_names))
journal_transaction_name(trans);
} else { } else {
trans->journal_res.seq = c->journal.replay_journal_seq; trans->journal_res.seq = c->journal.replay_journal_seq;
} }
if (unlikely(trans->journal_transaction_names))
journal_transaction_name(trans);
if (unlikely(trans->extra_journal_entry_u64s)) { if (unlikely(trans->extra_journal_entry_u64s)) {
memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
trans->extra_journal_entries, trans->extra_journal_entries,

View File

@ -1458,24 +1458,22 @@ static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bkey_s_c k;
struct bkey_i *update = btree_trans_peek_updates(trans, BTREE_ID_alloc, pos);
int ret; int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, bch2_trans_iter_init(trans, iter, BTREE_ID_alloc,
POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)),
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED| BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(iter); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) { if (ret) {
bch2_trans_iter_exit(trans, iter); bch2_trans_iter_exit(trans, iter);
return ret; return ret;
} }
*u = update && !bpos_cmp(update->k.p, pos) *u = bch2_alloc_unpack(k);
? bch2_alloc_unpack(bkey_i_to_s_c(update))
: alloc_mem_to_key(c, iter);
return 0; return 0;
} }

View File

@ -1558,50 +1558,48 @@ void bch2_stripes_heap_start(struct bch_fs *c)
bch2_stripes_heap_insert(c, m, iter.pos); bch2_stripes_heap_insert(c, m, iter.pos);
} }
static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
{
const struct bch_stripe *s;
struct bch_fs *c = trans->c;
struct stripe *m;
unsigned i;
int ret = 0;
if (k.k->type != KEY_TYPE_stripe)
return 0;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
if (ret)
return ret;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->stripes, k.k->p.offset);
m->alive = true;
m->sectors = le16_to_cpu(s->sectors);
m->algorithm = s->algorithm;
m->nr_blocks = s->nr_blocks;
m->nr_redundant = s->nr_redundant;
m->blocks_nonempty = 0;
for (i = 0; i < s->nr_blocks; i++)
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_update(c, m, k.k->p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
return ret;
}
int bch2_stripes_read(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_stripe *s;
struct stripe *m;
unsigned i;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
bch2_stripes_read_fn); for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
if (k.k->type != KEY_TYPE_stripe)
continue;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
if (ret)
break;
s = bkey_s_c_to_stripe(k).v;
m = genradix_ptr(&c->stripes, k.k->p.offset);
m->alive = true;
m->sectors = le16_to_cpu(s->sectors);
m->algorithm = s->algorithm;
m->nr_blocks = s->nr_blocks;
m->nr_redundant = s->nr_redundant;
m->blocks_nonempty = 0;
for (i = 0; i < s->nr_blocks; i++)
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
spin_lock(&c->ec_stripes_heap_lock);
bch2_stripes_heap_update(c, m, k.k->p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
if (ret) if (ret)
bch_err(c, "error reading stripes: %i", ret); bch_err(c, "error reading stripes: %i", ret);

View File

@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
u64 seq; u64 seq;
int err; int err;
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
return 0;
lockdep_assert_held(&j->reclaim_lock); lockdep_assert_held(&j->reclaim_lock);
while (1) { while (1) {
@ -692,8 +689,6 @@ static int bch2_journal_reclaim_thread(void *arg)
set_freezable(); set_freezable();
kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
j->last_flushed = jiffies; j->last_flushed = jiffies;
while (!ret && !kthread_should_stop()) { while (!ret && !kthread_should_stop()) {

View File

@ -148,7 +148,6 @@ enum journal_space_from {
enum { enum {
JOURNAL_REPLAY_DONE, JOURNAL_REPLAY_DONE,
JOURNAL_STARTED, JOURNAL_STARTED,
JOURNAL_RECLAIM_STARTED,
JOURNAL_NEED_WRITE, JOURNAL_NEED_WRITE,
JOURNAL_MAY_GET_UNRESERVED, JOURNAL_MAY_GET_UNRESERVED,
JOURNAL_MAY_SKIP_FLUSH, JOURNAL_MAY_SKIP_FLUSH,

View File

@ -330,9 +330,9 @@ enum opt_type {
NO_SB_OPT, false, \ NO_SB_OPT, false, \
NULL, "Read all journal entries, not just dirty ones")\ NULL, "Read all journal entries, not just dirty ones")\
x(journal_transaction_names, u8, \ x(journal_transaction_names, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \ OPT_BOOL(), \
NO_SB_OPT, false, \ BCH_SB_JOURNAL_TRANSACTION_NAMES, false, \
NULL, "Log transaction function names in journal") \ NULL, "Log transaction function names in journal") \
x(noexcl, u8, \ x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \

View File

@ -59,23 +59,21 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
static int __journal_key_cmp(enum btree_id l_btree_id, static int __journal_key_cmp(enum btree_id l_btree_id,
unsigned l_level, unsigned l_level,
struct bpos l_pos, struct bpos l_pos,
struct journal_key *r) const struct journal_key *r)
{ {
return (cmp_int(l_btree_id, r->btree_id) ?: return (cmp_int(l_btree_id, r->btree_id) ?:
cmp_int(l_level, r->level) ?: cmp_int(l_level, r->level) ?:
bpos_cmp(l_pos, r->k->k.p)); bpos_cmp(l_pos, r->k->k.p));
} }
static int journal_key_cmp(struct journal_key *l, struct journal_key *r) static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
{ {
return (cmp_int(l->btree_id, r->btree_id) ?: return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
cmp_int(l->level, r->level) ?:
bpos_cmp(l->k->k.p, r->k->k.p));
} }
static size_t journal_key_search(struct journal_keys *journal_keys, size_t bch2_journal_key_search(struct journal_keys *journal_keys,
enum btree_id id, unsigned level, enum btree_id id, unsigned level,
struct bpos pos) struct bpos pos)
{ {
size_t l = 0, r = journal_keys->nr, m; size_t l = 0, r = journal_keys->nr, m;
@ -116,11 +114,18 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
.btree_id = id, .btree_id = id,
.level = level, .level = level,
.k = k, .k = k,
.allocated = true .allocated = true,
/*
* Ensure these keys are done last by journal replay, to unblock
* journal reclaim:
*/
.journal_seq = U32_MAX,
}; };
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;
struct journal_iter *iter; struct journal_iter *iter;
unsigned idx = journal_key_search(keys, id, level, k->k.p); size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
if (idx < keys->nr && if (idx < keys->nr &&
journal_key_cmp(&n, &keys->d[idx]) == 0) { journal_key_cmp(&n, &keys->d[idx]) == 0) {
@ -157,6 +162,11 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
return 0; return 0;
} }
/*
* Can only be used from the recovery thread while we're still RO - can't be
* used once we've got RW, as journal_keys is at that point used by multiple
* threads:
*/
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_i *k) unsigned level, struct bkey_i *k)
{ {
@ -189,7 +199,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos pos) unsigned level, struct bpos pos)
{ {
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;
size_t idx = journal_key_search(keys, btree, level, pos); size_t idx = bch2_journal_key_search(keys, btree, level, pos);
if (idx < keys->nr && if (idx < keys->nr &&
keys->d[idx].btree_id == btree && keys->d[idx].btree_id == btree &&
@ -200,15 +210,18 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
{ {
struct journal_key *k = iter->idx - iter->keys->nr struct journal_key *k = iter->keys->d + iter->idx;
? iter->keys->d + iter->idx : NULL;
if (k && while (k < iter->keys->d + iter->keys->nr &&
k->btree_id == iter->btree_id && k->btree_id == iter->btree_id &&
k->level == iter->level) k->level == iter->level) {
return k->k; if (!k->overwritten)
return k->k;
iter->idx++;
k = iter->keys->d + iter->idx;
}
iter->idx = iter->keys->nr;
return NULL; return NULL;
} }
@ -231,8 +244,7 @@ static void bch2_journal_iter_init(struct bch_fs *c,
iter->btree_id = id; iter->btree_id = id;
iter->level = level; iter->level = level;
iter->keys = &c->journal_keys; iter->keys = &c->journal_keys;
iter->idx = journal_key_search(&c->journal_keys, id, level, pos); iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
list_add(&iter->list, &c->journal_iters);
} }
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@ -318,106 +330,33 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
bch2_journal_iter_exit(&iter->journal); bch2_journal_iter_exit(&iter->journal);
} }
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
struct bch_fs *c, struct bch_fs *c,
struct btree *b) struct btree *b,
struct btree_node_iter node_iter,
struct bpos pos)
{ {
memset(iter, 0, sizeof(*iter)); memset(iter, 0, sizeof(*iter));
iter->b = b; iter->b = b;
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b); iter->node_iter = node_iter;
bch2_journal_iter_init(c, &iter->journal, bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
b->c.btree_id, b->c.level, b->data->min_key); INIT_LIST_HEAD(&iter->journal.list);
} }
/* Walk btree, overlaying keys from the journal: */ /*
* this version is used by btree_gc before filesystem has gone RW and
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b, * multithreaded, so uses the journal_iters list:
struct btree_and_journal_iter iter) */
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
struct bch_fs *c,
struct btree *b)
{ {
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16; struct btree_node_iter node_iter;
struct bkey_s_c k;
struct bkey_buf tmp;
BUG_ON(!b->c.level); bch2_btree_node_iter_init_from_start(&node_iter, b);
__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
bch2_bkey_buf_init(&tmp); list_add(&iter->journal.list, &c->journal_iters);
while (i < nr &&
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_buf_reassemble(&tmp, c, k);
bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
b->c.btree_id, b->c.level - 1);
bch2_btree_and_journal_iter_advance(&iter);
i++;
}
bch2_bkey_buf_exit(&tmp, c);
}
static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
enum btree_id btree_id,
btree_walk_key_fn key_fn)
{
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf tmp;
struct btree *child;
int ret = 0;
bch2_bkey_buf_init(&tmp);
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (b->c.level) {
bch2_bkey_buf_reassemble(&tmp, c, k);
child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
break;
btree_and_journal_iter_prefetch(c, b, iter);
ret = bch2_btree_and_journal_walk_recurse(trans, child,
btree_id, key_fn);
six_unlock_read(&child->c.lock);
} else {
ret = key_fn(trans, k);
}
if (ret)
break;
bch2_btree_and_journal_iter_advance(&iter);
}
bch2_btree_and_journal_iter_exit(&iter);
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
btree_walk_key_fn key_fn)
{
struct bch_fs *c = trans->c;
struct btree *b = c->btree_roots[btree_id].b;
int ret = 0;
if (btree_node_fake(b))
return 0;
six_lock_read(&b->c.lock, NULL, NULL);
ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
six_unlock_read(&b->c.lock);
return ret;
} }
/* sort and dedup all keys in the journal: */ /* sort and dedup all keys in the journal: */
@ -442,9 +381,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
const struct journal_key *l = _l; const struct journal_key *l = _l;
const struct journal_key *r = _r; const struct journal_key *r = _r;
return cmp_int(l->btree_id, r->btree_id) ?: return journal_key_cmp(l, r) ?:
cmp_int(l->level, r->level) ?:
bpos_cmp(l->k->k.p, r->k->k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset); cmp_int(l->journal_offset, r->journal_offset);
} }
@ -537,8 +474,8 @@ static void replay_now_at(struct journal *j, u64 seq)
bch2_journal_pin_put(j, j->replay_journal_seq++); bch2_journal_pin_put(j, j->replay_journal_seq++);
} }
static int __bch2_journal_replay_key(struct btree_trans *trans, static int bch2_journal_replay_key(struct btree_trans *trans,
struct journal_key *k) struct journal_key *k)
{ {
struct btree_iter iter; struct btree_iter iter;
unsigned iter_flags = unsigned iter_flags =
@ -546,45 +483,32 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
BTREE_ITER_NOT_EXTENTS; BTREE_ITER_NOT_EXTENTS;
int ret; int ret;
/* Must be checked with btree locked: */
if (k->overwritten)
return 0;
if (!k->level && k->btree_id == BTREE_ID_alloc) if (!k->level && k->btree_id == BTREE_ID_alloc)
iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL; iter_flags |= BTREE_ITER_CACHED;
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level, BTREE_MAX_DEPTH, k->level,
iter_flags); iter_flags);
ret = bch2_btree_iter_traverse(&iter) ?: ret = bch2_btree_iter_traverse(&iter);
bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN); if (ret)
goto out;
/* Must be checked with btree locked: */
if (k->overwritten)
goto out;
ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
{
unsigned commit_flags =
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RESERVED;
if (!k->allocated)
commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
return bch2_trans_do(c, NULL, NULL, commit_flags,
__bch2_journal_replay_key(&trans, k));
}
static int journal_sort_seq_cmp(const void *_l, const void *_r) static int journal_sort_seq_cmp(const void *_l, const void *_r)
{ {
const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *l = *((const struct journal_key **)_l);
const struct journal_key *r = *((const struct journal_key **)_r); const struct journal_key *r = *((const struct journal_key **)_r);
return cmp_int(r->level, l->level) ?: return cmp_int(l->journal_seq, r->journal_seq);
cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->btree_id, r->btree_id) ?:
bpos_cmp(l->k->k.p, r->k->k.p);
} }
static int bch2_journal_replay(struct bch_fs *c) static int bch2_journal_replay(struct bch_fs *c)
@ -592,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c)
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;
struct journal_key **keys_sorted, *k; struct journal_key **keys_sorted, *k;
struct journal *j = &c->journal; struct journal *j = &c->journal;
struct bch_dev *ca;
unsigned idx;
size_t i; size_t i;
u64 seq;
int ret; int ret;
keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL); keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
@ -609,76 +530,30 @@ static int bch2_journal_replay(struct bch_fs *c)
sizeof(keys_sorted[0]), sizeof(keys_sorted[0]),
journal_sort_seq_cmp, NULL); journal_sort_seq_cmp, NULL);
if (keys->nr) if (keys->nr) {
bch_verbose(c, "starting journal replay, %zu keys", keys->nr);
replay_now_at(j, keys->journal_seq_base); replay_now_at(j, keys->journal_seq_base);
seq = j->replay_journal_seq;
/*
* First replay updates to the alloc btree - these will only update the
* btree key cache:
*/
for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i];
cond_resched();
if (!k->level && k->btree_id == BTREE_ID_alloc) {
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
ret = bch2_journal_replay_key(c, k);
if (ret)
goto err;
}
} }
/* Now we can start the allocator threads: */
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
for_each_member_device(ca, c, idx)
bch2_wake_allocator(ca);
/*
* Next replay updates to interior btree nodes:
*/
for (i = 0; i < keys->nr; i++) { for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i]; k = keys_sorted[i];
cond_resched(); cond_resched();
if (k->level) { if (!k->allocated)
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq; replay_now_at(j, keys->journal_seq_base + k->journal_seq);
ret = bch2_journal_replay_key(c, k);
if (ret)
goto err;
}
}
/* ret = bch2_trans_do(c, NULL, NULL,
* Now that the btree is in a consistent state, we can start journal BTREE_INSERT_LAZY_RW|
* reclaim (which will be flushing entries from the btree key cache back BTREE_INSERT_NOFAIL|
* to the btree: BTREE_INSERT_JOURNAL_RESERVED|
*/ (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags); bch2_journal_replay_key(&trans, k));
set_bit(JOURNAL_RECLAIM_STARTED, &j->flags); if (ret) {
journal_reclaim_kick(j); bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
ret, bch2_btree_ids[k->btree_id], k->level);
j->replay_journal_seq = seq;
/*
* Now replay leaf node updates:
*/
for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i];
cond_resched();
if (k->level || k->btree_id == BTREE_ID_alloc)
continue;
replay_now_at(j, keys->journal_seq_base + k->journal_seq);
ret = bch2_journal_replay_key(c, k);
if (ret)
goto err; goto err;
}
} }
replay_now_at(j, j->replay_journal_seq_end); replay_now_at(j, j->replay_journal_seq_end);
@ -686,14 +561,9 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j); bch2_journal_set_replay_done(j);
bch2_journal_flush_all_pins(j); bch2_journal_flush_all_pins(j);
kfree(keys_sorted); ret = bch2_journal_error(j);
return bch2_journal_error(j);
err: err:
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
ret, bch2_btree_ids[k->btree_id], k->level);
kfree(keys_sorted); kfree(keys_sorted);
return ret; return ret;
} }
@ -1217,7 +1087,11 @@ use_clean:
bch_verbose(c, "starting alloc read"); bch_verbose(c, "starting alloc read");
err = "error reading allocation information"; err = "error reading allocation information";
ret = bch2_alloc_read(c);
down_read(&c->gc_lock);
ret = bch2_alloc_read(c, false, false);
up_read(&c->gc_lock);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "alloc read done"); bch_verbose(c, "alloc read done");
@ -1231,6 +1105,13 @@ use_clean:
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
/*
* If we're not running fsck, this ensures bch2_fsck_err() calls are
* instead interpreted as bch2_inconsistent_err() calls:
*/
if (!c->opts.fsck)
set_bit(BCH_FS_FSCK_DONE, &c->flags);
if (c->opts.fsck || if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) || !(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) || !(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) ||
@ -1265,24 +1146,8 @@ use_clean:
ret = bch2_journal_replay(c); ret = bch2_journal_replay(c);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "journal replay done"); if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done");
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
!c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
* oldest_gen:
*/
bch_verbose(c, "writing allocation info");
err = "error writing out alloc info";
ret = bch2_alloc_write_all(c, BTREE_INSERT_LAZY_RW);
if (ret) {
bch_err(c, "error writing alloc info");
goto err;
}
bch_verbose(c, "alloc write done");
}
if (c->sb.version < bcachefs_metadata_version_snapshot_2) { if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
bch2_fs_lazy_rw(c); bch2_fs_lazy_rw(c);
@ -1430,14 +1295,11 @@ int bch2_fs_initialize(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
set_bit(BCH_FS_FSCK_DONE, &c->flags);
for (i = 0; i < BTREE_ID_NR; i++) for (i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i); bch2_btree_root_alloc(c, i);
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
err = "unable to allocate journal buckets"; err = "unable to allocate journal buckets";
for_each_online_member(ca, c, i) { for_each_online_member(ca, c, i) {
ret = bch2_dev_journal_alloc(ca); ret = bch2_dev_journal_alloc(ca);

View File

@ -31,6 +31,9 @@ struct btree_and_journal_iter {
} last; } last;
}; };
size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
unsigned, struct bpos);
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id, int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *); unsigned, struct bkey_i *);
int bch2_journal_key_insert(struct bch_fs *, enum btree_id, int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
@ -45,14 +48,13 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct bch_fs *, struct btree *,
struct btree_node_iter, struct bpos);
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct bch_fs *, struct bch_fs *,
struct btree *); struct btree *);
typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
void bch2_journal_keys_free(struct journal_keys *); void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct list_head *); void bch2_journal_entries_free(struct list_head *);

View File

@ -752,11 +752,24 @@ int bch2_write_super(struct bch_fs *c)
closure_sync(cl); closure_sync(cl);
for_each_online_member(ca, c, i) { for_each_online_member(ca, c, i) {
if (!ca->sb_write_error && if (ca->sb_write_error)
ca->disk_sb.seq != continue;
le64_to_cpu(ca->sb_read_scratch->seq)) {
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
bch2_fs_fatal_error(c, bch2_fs_fatal_error(c,
"Superblock modified by another process"); "Superblock write was silently dropped! (seq %llu expected %llu)",
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref);
ret = -EROFS;
goto out;
}
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
bch2_fs_fatal_error(c,
"Superblock modified by another process (seq %llu expected %llu)",
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
ret = -EROFS; ret = -EROFS;
goto out; goto out;