mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 09a5465430 bcachefs: Don't need to walk inodes on clean shutdown
This commit is contained in:
parent
35fca2f044
commit
17c5215c1c
@ -1 +1 @@
|
||||
99750eab4d583132cf61f071082c7cf21f5295c0
|
||||
09a546543006b60d44c4c51e7b40cd3ec7837a5e
|
||||
|
@ -1256,7 +1256,8 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
|
||||
if (ca->alloc_thread)
|
||||
closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
|
||||
}
|
||||
|
||||
/* stop allocator thread: */
|
||||
@ -1534,6 +1535,8 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
|
||||
return bch2_alloc_write(c, false, &wrote);
|
||||
}
|
||||
|
||||
|
@ -474,6 +474,7 @@ enum {
|
||||
/* startup: */
|
||||
BCH_FS_ALLOC_READ_DONE,
|
||||
BCH_FS_ALLOCATOR_STARTED,
|
||||
BCH_FS_ALLOCATOR_RUNNING,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
@ -541,6 +542,8 @@ struct bch_fs {
|
||||
struct bch_replicas_cpu replicas_gc;
|
||||
struct mutex replicas_gc_lock;
|
||||
|
||||
struct journal_entry_res replicas_journal_res;
|
||||
|
||||
struct bch_disk_groups_cpu __rcu *disk_groups;
|
||||
|
||||
struct bch_opts opts;
|
||||
@ -562,6 +565,7 @@ struct bch_fs {
|
||||
u32 time_base_hi;
|
||||
u32 time_precision;
|
||||
u64 features;
|
||||
u64 compat;
|
||||
} sb;
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
@ -1274,6 +1274,10 @@ enum bch_sb_features {
|
||||
BCH_FEATURE_NR,
|
||||
};
|
||||
|
||||
enum bch_sb_compat {
|
||||
BCH_COMPAT_FEAT_ALLOC_INFO = 0,
|
||||
};
|
||||
|
||||
/* options: */
|
||||
|
||||
#define BCH_REPLICAS_MAX 4U
|
||||
@ -1354,7 +1358,9 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(btree_root, 1) \
|
||||
x(prio_ptrs, 2) \
|
||||
x(blacklist, 3) \
|
||||
x(blacklist_v2, 4)
|
||||
x(blacklist_v2, 4) \
|
||||
x(usage, 5) \
|
||||
x(data_usage, 6)
|
||||
|
||||
enum {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
@ -1384,6 +1390,24 @@ struct jset_entry_blacklist_v2 {
|
||||
__le64 end;
|
||||
};
|
||||
|
||||
enum {
|
||||
FS_USAGE_RESERVED = 0,
|
||||
FS_USAGE_INODES = 1,
|
||||
FS_USAGE_KEY_VERSION = 2,
|
||||
FS_USAGE_NR = 3
|
||||
};
|
||||
|
||||
struct jset_entry_usage {
|
||||
struct jset_entry entry;
|
||||
__le64 v;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct jset_entry_data_usage {
|
||||
struct jset_entry entry;
|
||||
__le64 v;
|
||||
struct bch_replicas_entry r;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* On disk format for a journal entry:
|
||||
* seq is monotonically increasing; every journal entry has its own unique
|
||||
|
@ -573,7 +573,8 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
if (initial) {
|
||||
if (initial &&
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
|
||||
bch2_gc_done_nocheck(c);
|
||||
goto out;
|
||||
}
|
||||
@ -815,9 +816,6 @@ out:
|
||||
bch2_gc_free(c);
|
||||
up_write(&c->gc_lock);
|
||||
|
||||
if (!ret && initial)
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
trace_gc_end(c);
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
|
||||
@ -1245,19 +1243,3 @@ int bch2_gc_thread_start(struct bch_fs *c)
|
||||
wake_up_process(p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initial GC computes bucket marks during startup */
|
||||
|
||||
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
|
||||
{
|
||||
int ret = bch2_gc(c, journal, true);
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
* but hadn't had their pointers written:
|
||||
*/
|
||||
if (c->sb.encryption_type)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -7,7 +7,6 @@ void bch2_coalesce(struct bch_fs *);
|
||||
int bch2_gc(struct bch_fs *, struct list_head *, bool);
|
||||
void bch2_gc_thread_stop(struct bch_fs *);
|
||||
int bch2_gc_thread_start(struct bch_fs *);
|
||||
int bch2_initial_gc(struct bch_fs *, struct list_head *);
|
||||
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
|
||||
|
||||
/*
|
||||
@ -109,7 +108,7 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->gc_pos_lock);
|
||||
ret = gc_pos_cmp(pos, c->gc_pos) < 0;
|
||||
ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
|
||||
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
||||
|
||||
return ret;
|
||||
|
@ -474,6 +474,7 @@ struct btree_root {
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
u8 level;
|
||||
u8 alive;
|
||||
s8 error;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2117,7 +2117,6 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
|
||||
BUG_ON(btree_node_root(c, b));
|
||||
|
||||
__bch2_btree_set_root_inmem(c, b);
|
||||
bch2_btree_set_root_ondisk(c, b, READ);
|
||||
}
|
||||
|
||||
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
||||
|
@ -113,6 +113,36 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *usage;
|
||||
unsigned i, nr;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
nr = sizeof(struct bch_fs_usage) / sizeof(u64) + c->replicas.nr;
|
||||
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
usage->s.reserved += usage->persistent_reserved[i];
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
|
||||
switch (e->data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_USER:
|
||||
usage->s.data += usage->data[i];
|
||||
break;
|
||||
case BCH_DATA_CACHED:
|
||||
usage->s.cached += usage->data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
#define bch2_usage_read_raw(_stats) \
|
||||
({ \
|
||||
typeof(*this_cpu_ptr(_stats)) _acc; \
|
||||
@ -814,7 +844,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
ret = bch2_mark_stripe(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
break;
|
||||
case KEY_TYPE_alloc:
|
||||
case KEY_TYPE_inode:
|
||||
if (inserting)
|
||||
fs_usage->s.nr_inodes++;
|
||||
else
|
||||
@ -994,10 +1024,7 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
|
||||
static u64 bch2_recalc_sectors_available(struct bch_fs *c)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(c->pcpu, cpu)->sectors_available = 0;
|
||||
percpu_u64_set(&c->pcpu->sectors_available, 0);
|
||||
|
||||
return avail_factor(bch2_fs_sectors_free(c));
|
||||
}
|
||||
|
@ -247,6 +247,7 @@ static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
|
||||
/* key/bucket marking: */
|
||||
|
||||
void bch2_bucket_seq_cleanup(struct bch_fs *);
|
||||
void bch2_fs_usage_initialize(struct bch_fs *);
|
||||
|
||||
void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, struct bucket_mark *);
|
||||
|
@ -1186,6 +1186,11 @@ static int check_inode(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (u.bi_flags & BCH_INODE_UNLINKED) {
|
||||
fsck_err_on(c->sb.clean, c,
|
||||
"filesystem marked clean, "
|
||||
"but inode %llu unlinked",
|
||||
u.bi_inum);
|
||||
|
||||
bch_verbose(c, "deleting inode %llu", u.bi_inum);
|
||||
|
||||
ret = bch2_inode_rm(c, u.bi_inum);
|
||||
@ -1388,16 +1393,13 @@ static int check_inodes_fast(struct bch_fs *c)
|
||||
(BCH_INODE_I_SIZE_DIRTY|
|
||||
BCH_INODE_I_SECTORS_DIRTY|
|
||||
BCH_INODE_UNLINKED)) {
|
||||
fsck_err_on(c->sb.clean, c,
|
||||
"filesystem marked clean but found inode %llu with flags %x",
|
||||
inode.k->p.inode, inode.v->bi_flags);
|
||||
ret = check_inode(c, NULL, &iter, inode, NULL);
|
||||
BUG_ON(ret == -EINTR);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
@ -1459,9 +1461,10 @@ int bch2_fsck(struct bch_fs *c)
|
||||
if (c->opts.fsck)
|
||||
return bch2_fsck_full(c);
|
||||
|
||||
if (!c->sb.clean &&
|
||||
!(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)))
|
||||
return bch2_fsck_inode_nlink(c);
|
||||
if (c->sb.clean)
|
||||
return 0;
|
||||
|
||||
return bch2_fsck_walk_inodes_only(c);
|
||||
return c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)
|
||||
? bch2_fsck_walk_inodes_only(c)
|
||||
: bch2_fsck_inode_nlink(c);
|
||||
}
|
||||
|
@ -64,11 +64,6 @@ static void bch2_journal_buf_init(struct journal *j)
|
||||
buf->data->u64s = 0;
|
||||
}
|
||||
|
||||
static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
|
||||
{
|
||||
return BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
|
||||
}
|
||||
|
||||
static inline bool journal_entry_empty(struct jset *j)
|
||||
{
|
||||
struct jset_entry *i;
|
||||
@ -130,7 +125,7 @@ static enum {
|
||||
|
||||
j->prev_buf_sectors =
|
||||
vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
journal_entry_u64s_reserve(buf)) *
|
||||
buf->u64s_reserved) *
|
||||
c->opts.block_size;
|
||||
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
||||
|
||||
@ -225,6 +220,7 @@ static int journal_entry_open(struct journal *j)
|
||||
return sectors;
|
||||
|
||||
buf->disk_sectors = sectors;
|
||||
buf->u64s_reserved = j->entry_u64s_reserved;
|
||||
|
||||
sectors = min_t(unsigned, sectors, buf->size >> 9);
|
||||
j->cur_buf_sectors = sectors;
|
||||
@ -233,11 +229,7 @@ static int journal_entry_open(struct journal *j)
|
||||
|
||||
/* Subtract the journal header */
|
||||
u64s -= sizeof(struct jset) / sizeof(u64);
|
||||
/*
|
||||
* Btree roots, prio pointers don't get added until right before we do
|
||||
* the write:
|
||||
*/
|
||||
u64s -= journal_entry_u64s_reserve(buf);
|
||||
u64s -= buf->u64s_reserved;
|
||||
u64s = max_t(ssize_t, 0L, u64s);
|
||||
|
||||
BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
|
||||
@ -436,6 +428,45 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *j,
|
||||
struct journal_entry_res *res,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
union journal_res_state state;
|
||||
int d = new_u64s - res->u64s;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
j->entry_u64s_reserved += d;
|
||||
if (d <= 0)
|
||||
goto out_unlock;
|
||||
|
||||
j->cur_entry_u64s -= d;
|
||||
smp_mb();
|
||||
state = READ_ONCE(j->reservations);
|
||||
|
||||
if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
|
||||
state.cur_entry_offset > j->cur_entry_u64s) {
|
||||
j->cur_entry_u64s += d;
|
||||
/*
|
||||
* Not enough room in current journal entry, have to flush it:
|
||||
*/
|
||||
__journal_entry_close(j);
|
||||
goto out;
|
||||
}
|
||||
|
||||
journal_cur_buf(j)->u64s_reserved += d;
|
||||
out_unlock:
|
||||
spin_unlock(&j->lock);
|
||||
out:
|
||||
res->u64s += d;
|
||||
return;
|
||||
}
|
||||
|
||||
/* journal flushing: */
|
||||
|
||||
u64 bch2_journal_last_unwritten_seq(struct journal *j)
|
||||
{
|
||||
u64 seq;
|
||||
@ -1020,6 +1051,10 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
j->write_delay_ms = 1000;
|
||||
j->reclaim_delay_ms = 100;
|
||||
|
||||
/* Btree roots: */
|
||||
j->entry_u64s_reserved +=
|
||||
BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
|
||||
|
||||
atomic64_set(&j->reservations.counter,
|
||||
((union journal_res_state)
|
||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||
|
@ -330,6 +330,10 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *,
|
||||
struct journal_entry_res *,
|
||||
unsigned);
|
||||
|
||||
u64 bch2_journal_last_unwritten_seq(struct journal *);
|
||||
int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
|
||||
|
||||
|
@ -284,6 +284,7 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
|
||||
"invalid journal seq blacklist entry: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
goto out;
|
||||
}
|
||||
|
||||
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||
@ -293,6 +294,49 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
|
||||
"invalid journal seq blacklist entry: start > end")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_entry_validate_usage(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
int write)
|
||||
{
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u),
|
||||
c,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_entry_validate_data_usage(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
int write)
|
||||
{
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u) ||
|
||||
bytes < sizeof(*u) + u->r.nr_devs,
|
||||
c,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
}
|
||||
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -315,18 +359,10 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = {
|
||||
static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
|
||||
struct jset_entry *entry, int write)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (entry->type >= BCH_JSET_ENTRY_NR) {
|
||||
journal_entry_err(c, "invalid journal entry type %u",
|
||||
entry->type);
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bch2_jset_entry_ops[entry->type].validate(c, jset, entry, write);
|
||||
fsck_err:
|
||||
return ret;
|
||||
return entry->type < BCH_JSET_ENTRY_NR
|
||||
? bch2_jset_entry_ops[entry->type].validate(c, jset,
|
||||
entry, write)
|
||||
: 0;
|
||||
}
|
||||
|
||||
static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
|
||||
@ -848,19 +884,6 @@ err:
|
||||
|
||||
/* journal write: */
|
||||
|
||||
static void bch2_journal_add_btree_root(struct journal_buf *buf,
|
||||
enum btree_id id, struct bkey_i *k,
|
||||
unsigned level)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
|
||||
entry = bch2_journal_add_entry_noreservation(buf, k->k.u64s);
|
||||
entry->type = BCH_JSET_ENTRY_btree_root;
|
||||
entry->btree_id = id;
|
||||
entry->level = level;
|
||||
memcpy_u64s(entry->_data, k, k->k.u64s);
|
||||
}
|
||||
|
||||
static unsigned journal_dev_buckets_available(struct journal *j,
|
||||
struct journal_device *ja)
|
||||
{
|
||||
@ -1191,25 +1214,26 @@ void bch2_journal_write(struct closure *cl)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
struct jset_entry *start, *end;
|
||||
struct jset *jset;
|
||||
struct bio *bio;
|
||||
struct bch_extent_ptr *ptr;
|
||||
bool validate_before_checksum = false;
|
||||
unsigned i, sectors, bytes;
|
||||
unsigned i, sectors, bytes, u64s;
|
||||
|
||||
journal_buf_realloc(j, w);
|
||||
jset = w->data;
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
struct btree_root *r = &c->btree_roots[i];
|
||||
|
||||
if (r->alive)
|
||||
bch2_journal_add_btree_root(w, i, &r->key, r->level);
|
||||
}
|
||||
c->btree_roots_dirty = false;
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
start = vstruct_last(w->data);
|
||||
end = bch2_journal_super_entries_add_common(c, start);
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
le32_add_cpu(&w->data->u64s, u64s);
|
||||
BUG_ON(vstruct_sectors(jset, c->block_bits) >
|
||||
w->disk_sectors);
|
||||
|
||||
journal_write_compact(jset);
|
||||
|
||||
|
@ -23,6 +23,7 @@ struct journal_buf {
|
||||
|
||||
unsigned size;
|
||||
unsigned disk_sectors;
|
||||
unsigned u64s_reserved;
|
||||
/* bloom filter: */
|
||||
unsigned long has_inode[1024 / sizeof(unsigned long)];
|
||||
};
|
||||
@ -154,6 +155,9 @@ struct journal {
|
||||
u64 seq_ondisk;
|
||||
u64 last_seq_ondisk;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
/*
|
||||
* FIFO of journal entries whose btree updates have not yet been
|
||||
* written out.
|
||||
@ -242,4 +246,11 @@ struct journal_device {
|
||||
struct closure read;
|
||||
};
|
||||
|
||||
/*
|
||||
* journal_entry_res - reserve space in every journal entry:
|
||||
*/
|
||||
struct journal_entry_res {
|
||||
unsigned u64s;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_JOURNAL_TYPES_H */
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
#include "buckets.h"
|
||||
#include "dirent.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
@ -12,16 +13,17 @@
|
||||
#include "journal_io.h"
|
||||
#include "quota.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/stat.h>
|
||||
|
||||
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
||||
|
||||
struct bkey_i *btree_root_find(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean,
|
||||
struct jset *j,
|
||||
enum btree_id id, unsigned *level)
|
||||
static struct bkey_i *btree_root_find(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean,
|
||||
struct jset *j,
|
||||
enum btree_id id, unsigned *level)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
struct jset_entry *entry, *start, *end;
|
||||
@ -49,6 +51,60 @@ found:
|
||||
return k;
|
||||
}
|
||||
|
||||
static int journal_replay_entry_early(struct bch_fs *c,
|
||||
struct jset_entry *entry)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (entry->type) {
|
||||
case BCH_JSET_ENTRY_btree_root: {
|
||||
struct btree_root *r = &c->btree_roots[entry->btree_id];
|
||||
|
||||
if (entry->u64s) {
|
||||
r->level = entry->level;
|
||||
bkey_copy(&r->key, &entry->start[0]);
|
||||
r->error = 0;
|
||||
} else {
|
||||
r->error = -EIO;
|
||||
}
|
||||
r->alive = true;
|
||||
break;
|
||||
}
|
||||
case BCH_JSET_ENTRY_usage: {
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
|
||||
switch (entry->btree_id) {
|
||||
case FS_USAGE_RESERVED:
|
||||
if (entry->level < BCH_REPLICAS_MAX)
|
||||
percpu_u64_set(&c->usage[0]->
|
||||
persistent_reserved[entry->level],
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_INODES:
|
||||
percpu_u64_set(&c->usage[0]->s.nr_inodes,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_KEY_VERSION:
|
||||
atomic64_set(&c->key_version,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case BCH_JSET_ENTRY_data_usage: {
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
ret = bch2_replicas_set_usage(c, &u->r,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int verify_superblock_clean(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean,
|
||||
struct jset *j)
|
||||
@ -107,7 +163,9 @@ static bool journal_empty(struct list_head *journal)
|
||||
|
||||
list_for_each_entry(i, journal, list) {
|
||||
vstruct_for_each(&i->j, entry) {
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root)
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root ||
|
||||
entry->type == BCH_JSET_ENTRY_usage ||
|
||||
entry->type == BCH_JSET_ENTRY_data_usage)
|
||||
continue;
|
||||
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_keys &&
|
||||
@ -124,6 +182,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
{
|
||||
const char *err = "cannot allocate memory";
|
||||
struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL;
|
||||
struct jset_entry *entry;
|
||||
LIST_HEAD(journal);
|
||||
struct jset *j = NULL;
|
||||
unsigned i;
|
||||
@ -176,28 +235,46 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
fsck_err_on(clean && !journal_empty(&journal), c,
|
||||
"filesystem marked clean but journal not empty");
|
||||
|
||||
err = "insufficient memory";
|
||||
if (clean) {
|
||||
c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
|
||||
c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
|
||||
|
||||
for (entry = clean->start;
|
||||
entry != vstruct_end(&clean->field);
|
||||
entry = vstruct_next(entry)) {
|
||||
ret = journal_replay_entry_early(c, entry);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
struct journal_replay *i;
|
||||
|
||||
c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
|
||||
c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
|
||||
|
||||
list_for_each_entry(i, &journal, list)
|
||||
vstruct_for_each(&i->j, entry) {
|
||||
ret = journal_replay_entry_early(c, entry);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
unsigned level;
|
||||
struct bkey_i *k;
|
||||
bch2_fs_usage_initialize(c);
|
||||
|
||||
k = btree_root_find(c, clean, j, i, &level);
|
||||
if (!k)
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
struct btree_root *r = &c->btree_roots[i];
|
||||
|
||||
if (!r->alive)
|
||||
continue;
|
||||
|
||||
err = "invalid btree root pointer";
|
||||
if (IS_ERR(k))
|
||||
if (r->error)
|
||||
goto err;
|
||||
|
||||
err = "error reading btree root";
|
||||
if (bch2_btree_root_read(c, i, k, level)) {
|
||||
if (bch2_btree_root_read(c, i, &r->key, r->level)) {
|
||||
if (i != BTREE_ID_ALLOC)
|
||||
goto err;
|
||||
|
||||
@ -214,21 +291,33 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch_verbose(c, "starting stripes_read");
|
||||
ret = bch2_stripes_read(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
pr_info("stripes_read done");
|
||||
bch_verbose(c, "stripes_read done");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
bch_verbose(c, "starting mark and sweep:");
|
||||
err = "error in recovery";
|
||||
ret = bch2_initial_gc(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "mark and sweep done");
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
|
||||
c->opts.fsck) {
|
||||
bch_verbose(c, "starting mark and sweep:");
|
||||
err = "error in recovery";
|
||||
ret = bch2_gc(c, &journal, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "mark and sweep done");
|
||||
}
|
||||
|
||||
clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
/*
|
||||
* Skip past versions that might have possibly been used (as nonces),
|
||||
* but hadn't had their pointers written:
|
||||
*/
|
||||
if (c->sb.encryption_type && !c->sb.clean)
|
||||
atomic64_add(1 << 16, &c->key_version);
|
||||
|
||||
if (c->opts.noreplay)
|
||||
goto out;
|
||||
@ -311,15 +400,22 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
for_each_online_member(ca, c, i)
|
||||
bch2_mark_dev_superblock(c, ca, 0);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
ret = bch2_initial_gc(c, &journal);
|
||||
ret = bch2_gc(c, &journal, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_online_member(ca, c, i)
|
||||
if (bch2_dev_journal_alloc(ca)) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "journal.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -28,11 +29,6 @@ static void replicas_entry_sort(struct bch_replicas_entry *e)
|
||||
bubble_sort(e->devs, e->nr_devs, u8_cmp);
|
||||
}
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
|
||||
@ -301,6 +297,32 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned reserve_journal_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_replicas_entry *e;
|
||||
unsigned journal_res_u64s = 0;
|
||||
|
||||
/* nr_inodes: */
|
||||
journal_res_u64s +=
|
||||
DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
|
||||
|
||||
/* key_version: */
|
||||
journal_res_u64s +=
|
||||
DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
|
||||
|
||||
/* persistent_reserved: */
|
||||
journal_res_u64s +=
|
||||
DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
|
||||
BCH_REPLICAS_MAX;
|
||||
|
||||
for_each_cpu_replicas_entry(r, e)
|
||||
journal_res_u64s +=
|
||||
DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
|
||||
e->nr_devs, sizeof(u64));
|
||||
return journal_res_u64s;
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_entry *new_entry)
|
||||
@ -328,6 +350,10 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_journal_entry_res_resize(&c->journal,
|
||||
&c->replicas_journal_res,
|
||||
reserve_journal_replicas(c, &new_r));
|
||||
}
|
||||
|
||||
if (!new_r.entries &&
|
||||
@ -425,14 +451,12 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
struct bch_replicas_cpu n;
|
||||
u64 v = 0;
|
||||
int cpu;
|
||||
u64 v;
|
||||
|
||||
if (__replicas_has_entry(&c->replicas_gc, e))
|
||||
continue;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
v += *per_cpu_ptr(&c->usage[0]->data[i], cpu);
|
||||
v = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
if (!v)
|
||||
continue;
|
||||
|
||||
@ -510,6 +534,34 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_replicas_set_usage(struct bch_fs *c,
|
||||
struct bch_replicas_entry *r,
|
||||
u64 sectors)
|
||||
{
|
||||
int ret, idx = bch2_replicas_entry_idx(c, r);
|
||||
|
||||
if (idx < 0) {
|
||||
struct bch_replicas_cpu n;
|
||||
|
||||
n = cpu_replicas_add_entry(&c->replicas, r);
|
||||
if (!n.entries)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = replicas_table_update(c, &n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kfree(n.entries);
|
||||
|
||||
idx = bch2_replicas_entry_idx(c, r);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
percpu_u64_set(&c->usage[0]->data[idx], sectors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Replicas tracking - superblock: */
|
||||
|
||||
static int
|
||||
@ -596,6 +648,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
bch2_cpu_replicas_sort(&new_r);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
ret = replicas_table_update(c, &new_r);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
@ -916,3 +969,10 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_replicas_init(struct bch_fs *c)
|
||||
{
|
||||
c->journal.entry_u64s_reserved +=
|
||||
reserve_journal_replicas(c, &c->replicas);
|
||||
return 0;
|
||||
}
|
||||
|
@ -56,6 +56,15 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
|
||||
int bch2_replicas_set_usage(struct bch_fs *,
|
||||
struct bch_replicas_entry *,
|
||||
u64);
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
/* iterate over superblock replicas - used by userspace tools: */
|
||||
|
||||
#define replicas_entry_bytes(_i) \
|
||||
@ -79,4 +88,6 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
|
||||
|
||||
int bch2_fs_replicas_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_H */
|
||||
|
@ -181,6 +181,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
|
||||
}
|
||||
}
|
||||
|
||||
f = bch2_sb_field_get(sb->sb, type);
|
||||
f = __bch2_sb_field_resize(sb, f, u64s);
|
||||
if (f)
|
||||
f->type = cpu_to_le32(type);
|
||||
@ -362,6 +363,7 @@ static void bch2_sb_update(struct bch_fs *c)
|
||||
c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
|
||||
c->sb.time_precision = le32_to_cpu(src->time_precision);
|
||||
c->sb.features = le64_to_cpu(src->features[0]);
|
||||
c->sb.compat = le64_to_cpu(src->compat[0]);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
ca->mi = bch2_mi_to_cpu(mi->members + i);
|
||||
@ -881,29 +883,132 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
|
||||
bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
|
||||
}
|
||||
|
||||
void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
|
||||
static void bch2_fs_mark_dirty(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_clean *sb_clean;
|
||||
unsigned u64s = sizeof(*sb_clean) / sizeof(u64);
|
||||
struct jset_entry *entry;
|
||||
struct btree_root *r;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (clean == BCH_SB_CLEAN(c->disk_sb.sb))
|
||||
goto out;
|
||||
if (BCH_SB_CLEAN(c->disk_sb.sb) ||
|
||||
(c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
bch2_write_super(c);
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, clean);
|
||||
|
||||
if (!clean)
|
||||
goto write_super;
|
||||
struct jset_entry *
|
||||
bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
struct jset_entry *entry)
|
||||
{
|
||||
struct btree_root *r;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
|
||||
for (r = c->btree_roots;
|
||||
r < c->btree_roots + BTREE_ID_NR;
|
||||
r++)
|
||||
if (r->alive)
|
||||
u64s += jset_u64s(r->key.u64s);
|
||||
if (r->alive) {
|
||||
entry->u64s = r->key.u64s;
|
||||
entry->btree_id = r - c->btree_roots;
|
||||
entry->level = r->level;
|
||||
entry->type = BCH_JSET_ENTRY_btree_root;
|
||||
bkey_copy(&entry->start[0], &r->key);
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
c->btree_roots_dirty = false;
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
|
||||
{
|
||||
u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
|
||||
memset(u, 0, sizeof(*u));
|
||||
u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
|
||||
u->entry.type = BCH_JSET_ENTRY_usage;
|
||||
u->entry.btree_id = FS_USAGE_INODES;
|
||||
u->v = cpu_to_le64(nr_inodes);
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
|
||||
{
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
|
||||
memset(u, 0, sizeof(*u));
|
||||
u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
|
||||
u->entry.type = BCH_JSET_ENTRY_usage;
|
||||
u->entry.btree_id = FS_USAGE_KEY_VERSION;
|
||||
u->v = cpu_to_le64(atomic64_read(&c->key_version));
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]);
|
||||
|
||||
if (!sectors)
|
||||
continue;
|
||||
|
||||
memset(u, 0, sizeof(*u));
|
||||
u->entry.u64s = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
|
||||
u->entry.type = BCH_JSET_ENTRY_usage;
|
||||
u->entry.btree_id = FS_USAGE_RESERVED;
|
||||
u->entry.level = i;
|
||||
u->v = sectors;
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
|
||||
memset(u, 0, sizeof(*u));
|
||||
u->entry.u64s = DIV_ROUND_UP(sizeof(*u) + e->nr_devs,
|
||||
sizeof(u64)) - 1;
|
||||
u->entry.type = BCH_JSET_ENTRY_data_usage;
|
||||
u->v = cpu_to_le64(sectors);
|
||||
memcpy(&u->r, e, replicas_entry_bytes(e));
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
}
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
|
||||
{
|
||||
struct bch_sb_field_clean *sb_clean;
|
||||
struct jset_entry *entry;
|
||||
unsigned u64s;
|
||||
|
||||
if (!clean) {
|
||||
bch2_fs_mark_dirty(c);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (BCH_SB_CLEAN(c->disk_sb.sb))
|
||||
goto out;
|
||||
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
|
||||
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
|
||||
|
||||
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
|
||||
|
||||
sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
|
||||
if (!sb_clean) {
|
||||
@ -917,30 +1022,16 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
|
||||
sb_clean->journal_seq = journal_cur_seq(&c->journal) - 1;
|
||||
|
||||
entry = sb_clean->start;
|
||||
entry = bch2_journal_super_entries_add_common(c, entry);
|
||||
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
|
||||
|
||||
memset(entry, 0,
|
||||
vstruct_end(&sb_clean->field) - (void *) entry);
|
||||
|
||||
for (r = c->btree_roots;
|
||||
r < c->btree_roots + BTREE_ID_NR;
|
||||
r++)
|
||||
if (r->alive) {
|
||||
entry->u64s = r->key.u64s;
|
||||
entry->btree_id = r - c->btree_roots;
|
||||
entry->level = r->level;
|
||||
entry->type = BCH_JSET_ENTRY_btree_root;
|
||||
bkey_copy(&entry->start[0], &r->key);
|
||||
entry = vstruct_next(entry);
|
||||
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
|
||||
}
|
||||
|
||||
BUG_ON(entry != vstruct_end(&sb_clean->field));
|
||||
|
||||
if (le16_to_cpu(c->disk_sb.sb->version) <
|
||||
bcachefs_metadata_version_bkey_renumber)
|
||||
bch2_sb_clean_renumber(sb_clean, WRITE);
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
write_super:
|
||||
bch2_write_super(c);
|
||||
out:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
@ -134,6 +134,10 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
|
||||
/* BCH_SB_FIELD_clean: */
|
||||
|
||||
struct jset_entry *
|
||||
bch2_journal_super_entries_add_common(struct bch_fs *,
|
||||
struct jset_entry *);
|
||||
|
||||
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
|
||||
|
||||
void bch2_fs_mark_clean(struct bch_fs *, bool);
|
||||
|
@ -222,6 +222,9 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
|
||||
goto allocator_not_running;
|
||||
|
||||
do {
|
||||
ret = bch2_alloc_write(c, false, &wrote);
|
||||
if (ret) {
|
||||
@ -250,10 +253,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
} while (wrote);
|
||||
|
||||
allocator_not_running:
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/* XXX: mark super that alloc info is persistent */
|
||||
@ -380,6 +385,8 @@ const char *bch2_fs_read_write(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
|
||||
err = "error starting btree GC thread";
|
||||
if (bch2_gc_thread_start(c))
|
||||
goto err;
|
||||
@ -683,6 +690,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
bch2_fs_journal_init(&c->journal) ||
|
||||
bch2_fs_replicas_init(c) ||
|
||||
bch2_fs_btree_cache_init(c) ||
|
||||
bch2_fs_io_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
@ -1101,9 +1109,12 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(ca->fs, ca, 0);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
|
||||
!percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_SB])) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(ca->fs, ca, 0);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
bch2_dev_sysfs_online(c, ca);
|
||||
|
||||
|
@ -132,6 +132,7 @@ do { \
|
||||
write_attribute(trigger_journal_flush);
|
||||
write_attribute(trigger_btree_coalesce);
|
||||
write_attribute(trigger_gc);
|
||||
write_attribute(trigger_alloc_write);
|
||||
write_attribute(prune_cache);
|
||||
rw_attribute(btree_gc_periodic);
|
||||
|
||||
@ -239,27 +240,29 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
if (!fs_usage)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_buf(&out, "capacity:\t\t%llu\n", c->capacity);
|
||||
pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
|
||||
|
||||
pr_buf(&out, "hidden:\t\t\t\t%llu\n",
|
||||
fs_usage->s.hidden);
|
||||
pr_buf(&out, "data:\t\t\t\t%llu\n",
|
||||
fs_usage->s.data);
|
||||
pr_buf(&out, "cached:\t\t\t\t%llu\n",
|
||||
fs_usage->s.cached);
|
||||
pr_buf(&out, "reserved:\t\t\t%llu\n",
|
||||
fs_usage->s.reserved);
|
||||
pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
|
||||
fs_usage->s.nr_inodes);
|
||||
pr_buf(&out, "online reserved:\t\t%llu\n",
|
||||
fs_usage->s.online_reserved);
|
||||
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
i++) {
|
||||
pr_buf(&out, "%u replicas:\n", i + 1);
|
||||
#if 0
|
||||
for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
|
||||
pr_buf(&out, "\t%s:\t\t%llu\n",
|
||||
bch2_data_types[type],
|
||||
stats.replicas[replicas].data[type]);
|
||||
pr_buf(&out, "\terasure coded:\t%llu\n",
|
||||
stats.replicas[replicas].ec_data);
|
||||
#endif
|
||||
pr_buf(&out, "\treserved:\t%llu\n",
|
||||
pr_buf(&out, "\treserved:\t\t%llu\n",
|
||||
fs_usage->persistent_reserved[i]);
|
||||
}
|
||||
|
||||
pr_buf(&out, "online reserved:\t%llu\n",
|
||||
fs_usage->s.online_reserved);
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
@ -492,6 +495,12 @@ STORE(__bch2_fs)
|
||||
if (attr == &sysfs_trigger_gc)
|
||||
bch2_gc(c, NULL, false);
|
||||
|
||||
if (attr == &sysfs_trigger_alloc_write) {
|
||||
bool wrote;
|
||||
|
||||
bch2_alloc_write(c, false, &wrote);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_prune_cache) {
|
||||
struct shrink_control sc;
|
||||
|
||||
@ -584,6 +593,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_trigger_journal_flush,
|
||||
&sysfs_trigger_btree_coalesce,
|
||||
&sysfs_trigger_gc,
|
||||
&sysfs_trigger_alloc_write,
|
||||
&sysfs_prune_cache,
|
||||
|
||||
&sysfs_copy_gc_enabled,
|
||||
@ -882,20 +892,15 @@ static const char * const bch2_rw[] = {
|
||||
static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
int rw, i, cpu;
|
||||
int rw, i;
|
||||
|
||||
for (rw = 0; rw < 2; rw++) {
|
||||
pr_buf(&out, "%s:\n", bch2_rw[rw]);
|
||||
|
||||
for (i = 1; i < BCH_DATA_NR; i++) {
|
||||
u64 n = 0;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i];
|
||||
|
||||
for (i = 1; i < BCH_DATA_NR; i++)
|
||||
pr_buf(&out, "%-12s:%12llu\n",
|
||||
bch2_data_types[i], n << 9);
|
||||
}
|
||||
bch2_data_types[i],
|
||||
percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
|
||||
}
|
||||
|
||||
return out.pos - buf;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/llist.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
@ -698,6 +699,28 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static inline u64 percpu_u64_get(u64 __percpu *src)
|
||||
{
|
||||
u64 ret = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
ret += *per_cpu_ptr(src, cpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(dst, cpu) = 0;
|
||||
|
||||
preempt_disable();
|
||||
*this_cpu_ptr(dst) = src;
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
|
||||
{
|
||||
unsigned i;
|
||||
|
Loading…
Reference in New Issue
Block a user