mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 3b4024f944
This commit is contained in:
parent
e394bd4ba3
commit
03bc9d71b1
@ -1 +1 @@
|
||||
da037866e669b09edc6b049ce09535d3456474cb
|
||||
3b4024f94489e4d8dc8eb7f1278754a2545f8026
|
||||
|
@ -754,7 +754,7 @@ struct bch_fs {
|
||||
unsigned bucket_journal_seq;
|
||||
|
||||
/* The rest of this all shows up in sysfs */
|
||||
atomic_long_t cache_read_races;
|
||||
atomic_long_t read_realloc_races;
|
||||
|
||||
unsigned foreground_write_ratelimit_enabled:1;
|
||||
unsigned copy_gc_enabled:1;
|
||||
|
@ -1630,82 +1630,19 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Write all dirty btree nodes to disk, including roots
|
||||
*/
|
||||
void bch2_btree_flush(struct bch_fs *c)
|
||||
void bch2_btree_verify_flushed(struct bch_fs *c)
|
||||
{
|
||||
struct closure cl;
|
||||
struct btree *b;
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
bool saw_dirty;
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
rcu_read_lock();
|
||||
tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
|
||||
&c->btree_cache_table);
|
||||
|
||||
do {
|
||||
saw_dirty = false;
|
||||
i = 0;
|
||||
restart:
|
||||
tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
|
||||
&c->btree_cache_table);
|
||||
|
||||
for (; i < tbl->size; i++)
|
||||
rht_for_each_entry_rcu(b, pos, tbl, i, hash) {
|
||||
saw_dirty |= btree_node_dirty(b);
|
||||
|
||||
if (btree_node_dirty(b) &&
|
||||
btree_node_may_write(b)) {
|
||||
rcu_read_unlock();
|
||||
six_lock_read(&b->lock);
|
||||
bch2_btree_node_write_dirty(c, b, &cl, 1);
|
||||
six_unlock_read(&b->lock);
|
||||
rcu_read_lock();
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
} while (saw_dirty);
|
||||
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
rht_for_each_entry_rcu(b, pos, tbl, i, hash)
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
rcu_read_unlock();
|
||||
|
||||
closure_sync(&cl);
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_node_flush_journal - flush any journal entries that contain keys
|
||||
* from this node
|
||||
*
|
||||
* The bset's journal sequence number is used for preserving ordering of index
|
||||
* updates across unclean shutdowns - it's used to ignore bsets newer than the
|
||||
* most recent journal entry.
|
||||
*
|
||||
* But when rewriting btree nodes we compact all the bsets in a btree node - and
|
||||
* if we compacted a bset that should be ignored with bsets we do need, that
|
||||
* would be bad. So to avoid that, prior to making the new node visible ensure
|
||||
* that the journal has been flushed so that all the bsets we compacted should
|
||||
* be visible.
|
||||
*/
|
||||
void bch2_btree_node_flush_journal_entries(struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct closure *cl)
|
||||
{
|
||||
int i = b->nsets;
|
||||
|
||||
/*
|
||||
* Journal sequence numbers in the different bsets will always be in
|
||||
* ascending order, we only need to flush the highest - except that the
|
||||
* most recent bset might not have a journal sequence number yet, so we
|
||||
* need to loop:
|
||||
*/
|
||||
while (i--) {
|
||||
u64 seq = le64_to_cpu(bset(b, &b->set[i])->journal_seq);
|
||||
|
||||
if (seq) {
|
||||
bch2_journal_flush_seq_async(&c->journal, seq, cl);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -94,8 +94,6 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void bch2_btree_flush(struct bch_fs *);
|
||||
void bch2_btree_node_flush_journal_entries(struct bch_fs *, struct btree *,
|
||||
struct closure *);
|
||||
void bch2_btree_verify_flushed(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHE_BTREE_IO_H */
|
||||
|
@ -161,15 +161,14 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
|
||||
{
|
||||
trace_btree_node_free(c, b);
|
||||
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(b == btree_node_root(c, b));
|
||||
BUG_ON(b->ob);
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
|
||||
six_lock_write(&b->lock);
|
||||
clear_btree_node_noevict(b);
|
||||
|
||||
if (btree_node_dirty(b))
|
||||
bch2_btree_complete_write(c, b, btree_current_write(b));
|
||||
clear_btree_node_dirty(b);
|
||||
six_lock_write(&b->lock);
|
||||
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
|
||||
@ -192,6 +191,8 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
|
||||
|
||||
b->ob = NULL;
|
||||
|
||||
clear_btree_node_dirty(b);
|
||||
|
||||
__btree_node_free(c, b, NULL);
|
||||
|
||||
bch2_open_bucket_put(c, ob);
|
||||
@ -890,7 +891,8 @@ bch2_btree_interior_update_alloc(struct bch_fs *c)
|
||||
|
||||
static void btree_interior_update_free(struct closure *cl)
|
||||
{
|
||||
struct btree_interior_update *as = container_of(cl, struct btree_interior_update, cl);
|
||||
struct btree_interior_update *as =
|
||||
container_of(cl, struct btree_interior_update, cl);
|
||||
|
||||
mempool_free(as, &as->c->btree_interior_update_pool);
|
||||
}
|
||||
@ -910,9 +912,6 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
|
||||
bch2_btree_node_free_ondisk(c, &as->pending[i]);
|
||||
as->nr_pending = 0;
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
list_del(&as->list);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
@ -1039,6 +1038,15 @@ static void btree_interior_update_updated_btree(struct bch_fs *c,
|
||||
system_freezable_wq);
|
||||
}
|
||||
|
||||
static void btree_interior_update_reparent(struct btree_interior_update *as,
|
||||
struct btree_interior_update *child)
|
||||
{
|
||||
child->b = NULL;
|
||||
child->mode = BTREE_INTERIOR_UPDATING_AS;
|
||||
child->parent_as = as;
|
||||
closure_get(&as->cl);
|
||||
}
|
||||
|
||||
static void btree_interior_update_updated_root(struct bch_fs *c,
|
||||
struct btree_interior_update *as,
|
||||
enum btree_id btree_id)
|
||||
@ -1053,14 +1061,8 @@ static void btree_interior_update_updated_root(struct bch_fs *c,
|
||||
* Old root might not be persistent yet - if so, redirect its
|
||||
* btree_interior_update operation to point to us:
|
||||
*/
|
||||
if (r->as) {
|
||||
BUG_ON(r->as->mode != BTREE_INTERIOR_UPDATING_ROOT);
|
||||
|
||||
r->as->b = NULL;
|
||||
r->as->mode = BTREE_INTERIOR_UPDATING_AS;
|
||||
r->as->parent_as = as;
|
||||
closure_get(&as->cl);
|
||||
}
|
||||
if (r->as)
|
||||
btree_interior_update_reparent(as, r->as);
|
||||
|
||||
as->mode = BTREE_INTERIOR_UPDATING_ROOT;
|
||||
as->b = r->b;
|
||||
@ -1068,8 +1070,6 @@ static void btree_interior_update_updated_root(struct bch_fs *c,
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
bch2_journal_wait_on_seq(&c->journal, as->journal_seq, &as->cl);
|
||||
|
||||
continue_at(&as->cl, btree_interior_update_nodes_written,
|
||||
system_freezable_wq);
|
||||
}
|
||||
@ -1092,8 +1092,10 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
struct btree_interior_update *as,
|
||||
struct btree *b)
|
||||
{
|
||||
struct closure *cl, *cl_n;
|
||||
struct btree_interior_update *p, *n;
|
||||
struct pending_btree_node_free *d;
|
||||
struct btree_write *w;
|
||||
struct bset_tree *t;
|
||||
|
||||
/*
|
||||
@ -1107,41 +1109,8 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
for_each_bset(b, t)
|
||||
as->journal_seq = max(as->journal_seq, bset(b, t)->journal_seq);
|
||||
|
||||
/*
|
||||
* Does this node have unwritten data that has a pin on the journal?
|
||||
*
|
||||
* If so, transfer that pin to the btree_interior_update operation -
|
||||
* note that if we're freeing multiple nodes, we only need to keep the
|
||||
* oldest pin of any of the nodes we're freeing. We'll release the pin
|
||||
* when the new nodes are persistent and reachable on disk:
|
||||
*/
|
||||
bch2_journal_pin_add_if_older(&c->journal,
|
||||
&b->writes[0].journal,
|
||||
&as->journal, interior_update_flush);
|
||||
bch2_journal_pin_add_if_older(&c->journal,
|
||||
&b->writes[1].journal,
|
||||
&as->journal, interior_update_flush);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/*
|
||||
* Does this node have any btree_interior_update operations preventing
|
||||
* it from being written?
|
||||
*
|
||||
* If so, redirect them to point to this btree_interior_update: we can
|
||||
* write out our new nodes, but we won't make them visible until those
|
||||
* operations complete
|
||||
*/
|
||||
list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
|
||||
BUG_ON(p->mode != BTREE_INTERIOR_UPDATING_NODE);
|
||||
|
||||
p->mode = BTREE_INTERIOR_UPDATING_AS;
|
||||
list_del(&p->write_blocked_list);
|
||||
p->b = NULL;
|
||||
p->parent_as = as;
|
||||
closure_get(&as->cl);
|
||||
}
|
||||
|
||||
/* Add this node to the list of nodes being freed: */
|
||||
BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
|
||||
|
||||
@ -1152,6 +1121,38 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
d->level = b->level;
|
||||
bkey_copy(&d->key, &b->key);
|
||||
|
||||
/*
|
||||
* Does this node have any btree_interior_update operations preventing
|
||||
* it from being written?
|
||||
*
|
||||
* If so, redirect them to point to this btree_interior_update: we can
|
||||
* write out our new nodes, but we won't make them visible until those
|
||||
* operations complete
|
||||
*/
|
||||
list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
|
||||
list_del(&p->write_blocked_list);
|
||||
btree_interior_update_reparent(as, p);
|
||||
}
|
||||
|
||||
clear_btree_node_dirty(b);
|
||||
w = btree_current_write(b);
|
||||
|
||||
llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list)
|
||||
llist_add(&cl->list, &as->wait.list);
|
||||
|
||||
/*
|
||||
* Does this node have unwritten data that has a pin on the journal?
|
||||
*
|
||||
* If so, transfer that pin to the btree_interior_update operation -
|
||||
* note that if we're freeing multiple nodes, we only need to keep the
|
||||
* oldest pin of any of the nodes we're freeing. We'll release the pin
|
||||
* when the new nodes are persistent and reachable on disk:
|
||||
*/
|
||||
bch2_journal_pin_add_if_older(&c->journal, &w->journal,
|
||||
&as->journal, interior_update_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
|
@ -199,21 +199,6 @@ static inline u64 bch2_fs_sectors_used(struct bch_fs *c)
|
||||
return min(c->capacity, __bch2_fs_sectors_used(c));
|
||||
}
|
||||
|
||||
/* XXX: kill? */
|
||||
static inline u64 sectors_available(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
u64 ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i)
|
||||
ret += dev_buckets_available(ca) << ca->bucket_bits;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
return (!mark.owned_by_allocator &&
|
||||
|
@ -37,10 +37,10 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
|
||||
|
||||
path = strndup_user((const char __user *)
|
||||
(unsigned long) dev, PATH_MAX);
|
||||
if (!path)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (IS_ERR(path))
|
||||
return ERR_CAST(path);
|
||||
|
||||
bdev = lookup_bdev(strim(path));
|
||||
bdev = lookup_bdev(path);
|
||||
kfree(path);
|
||||
if (IS_ERR(bdev))
|
||||
return ERR_CAST(bdev);
|
||||
|
@ -1046,7 +1046,7 @@ static void bch2_read_endio(struct bio *bio)
|
||||
if (rbio->ptr.cached &&
|
||||
(((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
|
||||
ptr_stale(rbio->ca, &rbio->ptr))) {
|
||||
atomic_long_inc(&c->cache_read_races);
|
||||
atomic_long_inc(&c->read_realloc_races);
|
||||
|
||||
if (rbio->flags & BCH_READ_RETRY_IF_STALE)
|
||||
bch2_rbio_retry(c, rbio);
|
||||
|
@ -180,8 +180,10 @@ redo_peek:
|
||||
ret == -EINTR)
|
||||
goto redo_peek;
|
||||
|
||||
/* -EROFS or perhaps -ENOSPC - bail out: */
|
||||
/* XXX warn here */
|
||||
bch2_fs_fatal_error(c,
|
||||
"error %i rewriting btree node with blacklisted journal seq",
|
||||
ret);
|
||||
bch2_journal_halt(j);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1018,6 +1020,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
|
||||
fifo_for_each_entry_ptr(p, &j->pin, iter) {
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, 0);
|
||||
}
|
||||
|
||||
@ -1147,6 +1150,7 @@ static void __journal_entry_new(struct journal *j, int count)
|
||||
&fifo_peek_back(&j->pin));
|
||||
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
atomic_set(&p->count, count);
|
||||
}
|
||||
|
||||
@ -1516,7 +1520,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
j->replay_pin_list = NULL;
|
||||
|
||||
if (did_replay) {
|
||||
bch2_btree_flush(c);
|
||||
bch2_journal_flush_pins(&c->journal, U64_MAX);
|
||||
|
||||
/*
|
||||
* Write a new journal entry _before_ we start journalling new data -
|
||||
@ -1859,7 +1863,7 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
struct journal_entry_pin, list);
|
||||
if (ret) {
|
||||
/* must be list_del_init(), see bch2_journal_pin_drop() */
|
||||
list_del_init(&ret->list);
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
*seq = journal_pin_seq(j, pin_list);
|
||||
break;
|
||||
}
|
||||
@ -1869,28 +1873,32 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool journal_has_pins(struct journal *j)
|
||||
static bool journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
journal_reclaim_fast(j);
|
||||
ret = fifo_used(&j->pin) > 1 ||
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) > 1;
|
||||
|
||||
ret = (fifo_used(&j->pin) == 1 &&
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) == 1) ||
|
||||
last_seq(j) > seq_to_flush;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *j)
|
||||
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 seq;
|
||||
u64 pin_seq;
|
||||
|
||||
while ((pin = journal_get_next_pin(j, U64_MAX, &seq)))
|
||||
pin->flush(j, pin, seq);
|
||||
while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq)))
|
||||
pin->flush(j, pin, pin_seq);
|
||||
|
||||
wait_event(j->wait, !journal_has_pins(j) || bch2_journal_error(j));
|
||||
wait_event(j->wait,
|
||||
journal_flush_done(j, seq_to_flush) ||
|
||||
bch2_journal_error(j));
|
||||
}
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
@ -2174,9 +2182,18 @@ static void journal_write_done(struct closure *cl)
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
|
||||
__bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
|
||||
j->last_seq_ondisk = le64_to_cpu(w->data->last_seq);
|
||||
|
||||
__bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
/*
|
||||
* Updating last_seq_ondisk may let journal_reclaim_work() discard more
|
||||
* buckets:
|
||||
*
|
||||
* Must come before signaling write completion, for
|
||||
* bch2_fs_journal_stop():
|
||||
*/
|
||||
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||
|
||||
BUG_ON(!j->reservations.prev_buf_unwritten);
|
||||
atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
|
||||
@ -2199,12 +2216,6 @@ static void journal_write_done(struct closure *cl)
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
wake_up(&j->wait);
|
||||
|
||||
/*
|
||||
* Updating last_seq_ondisk may let journal_reclaim_work() discard more
|
||||
* buckets:
|
||||
*/
|
||||
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||
}
|
||||
|
||||
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
@ -2345,8 +2356,12 @@ static void journal_write_work(struct work_struct *work)
|
||||
struct journal *j = container_of(to_delayed_work(work),
|
||||
struct journal, write_work);
|
||||
spin_lock(&j->lock);
|
||||
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
if (!journal_entry_is_open(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
if (journal_buf_switch(j, false) != JOURNAL_UNLOCKED)
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
@ -2505,6 +2520,8 @@ void bch2_journal_wait_on_seq(struct journal *j, u64 seq, struct closure *parent
|
||||
|
||||
void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *parent)
|
||||
{
|
||||
struct journal_buf *buf;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq > atomic64_read(&j->seq));
|
||||
@ -2517,8 +2534,9 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *pa
|
||||
if (seq == atomic64_read(&j->seq)) {
|
||||
bool set_need_write = false;
|
||||
|
||||
if (parent &&
|
||||
!closure_wait(&journal_cur_buf(j)->wait, parent))
|
||||
buf = journal_cur_buf(j);
|
||||
|
||||
if (parent && !closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
||||
if (!test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
||||
@ -2529,7 +2547,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *pa
|
||||
switch (journal_buf_switch(j, set_need_write)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
if (parent)
|
||||
closure_wake_up(&journal_cur_buf(j)->wait);
|
||||
closure_wake_up(&buf->wait);
|
||||
break;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
/*
|
||||
@ -2545,7 +2563,9 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *pa
|
||||
} else if (parent &&
|
||||
seq + 1 == atomic64_read(&j->seq) &&
|
||||
j->reservations.prev_buf_unwritten) {
|
||||
if (!closure_wait(&journal_prev_buf(j)->wait, parent))
|
||||
buf = journal_prev_buf(j);
|
||||
|
||||
if (!closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
||||
smp_mb();
|
||||
@ -2553,7 +2573,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *pa
|
||||
/* check if raced with write completion (or failure) */
|
||||
if (!j->reservations.prev_buf_unwritten ||
|
||||
bch2_journal_error(j))
|
||||
closure_wake_up(&journal_prev_buf(j)->wait);
|
||||
closure_wake_up(&buf->wait);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
@ -2698,6 +2718,39 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *pin;
|
||||
ssize_t ret = 0;
|
||||
unsigned i;
|
||||
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"%llu: count %u\n",
|
||||
journal_pin_seq(j, pin_list),
|
||||
atomic_read(&pin_list->count));
|
||||
|
||||
list_for_each_entry(pin, &pin_list->list, list)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"\t%p %pf\n",
|
||||
pin, pin->flush);
|
||||
|
||||
if (!list_empty(&pin_list->flushed))
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"flushed:\n");
|
||||
|
||||
list_for_each_entry(pin, &pin_list->flushed, list)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"\t%p %pf\n",
|
||||
pin, pin->flush);
|
||||
}
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool bch2_journal_writing_to_device(struct bch_dev *ca)
|
||||
{
|
||||
struct journal *j = &ca->fs->journal;
|
||||
@ -2725,12 +2778,11 @@ static bool bch2_journal_writing_to_device(struct bch_dev *ca)
|
||||
|
||||
int bch2_journal_move(struct bch_dev *ca)
|
||||
{
|
||||
u64 last_flushed_seq;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct journal *j = &c->journal;
|
||||
struct journal *j = &ca->fs->journal;
|
||||
u64 seq_to_flush = 0;
|
||||
unsigned i;
|
||||
int ret = 0; /* Success */
|
||||
int ret;
|
||||
|
||||
if (bch2_journal_writing_to_device(ca)) {
|
||||
/*
|
||||
@ -2744,16 +2796,10 @@ int bch2_journal_move(struct bch_dev *ca)
|
||||
BUG_ON(bch2_journal_writing_to_device(ca));
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush all btree updates to backing store so that any
|
||||
* journal entries written to ca become stale and are no
|
||||
* longer needed.
|
||||
*/
|
||||
for (i = 0; i < ja->nr; i++)
|
||||
seq_to_flush = max(seq_to_flush, ja->bucket_seq[i]);
|
||||
|
||||
/*
|
||||
* XXX: switch to normal journal reclaim machinery
|
||||
*/
|
||||
bch2_btree_flush(c);
|
||||
bch2_journal_flush_pins(j, seq_to_flush);
|
||||
|
||||
/*
|
||||
* Force a meta-data journal entry to be written so that
|
||||
@ -2767,12 +2813,9 @@ int bch2_journal_move(struct bch_dev *ca)
|
||||
* the device
|
||||
*/
|
||||
spin_lock(&j->lock);
|
||||
last_flushed_seq = last_seq(j);
|
||||
ret = j->last_seq_ondisk > seq_to_flush ? 0 : -EIO;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
for (i = 0; i < ja->nr; i += 1)
|
||||
BUG_ON(ja->bucket_seq[i] > last_flushed_seq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2786,7 +2829,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
* journal entries, then force a brand new empty journal entry to be
|
||||
* written:
|
||||
*/
|
||||
bch2_journal_flush_pins(j);
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
bch2_journal_flush_async(j, NULL);
|
||||
bch2_journal_meta(j);
|
||||
|
||||
|
@ -141,7 +141,7 @@ void bch2_journal_pin_add_if_older(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_flush_pins(struct journal *);
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
|
||||
struct closure;
|
||||
struct bch_fs;
|
||||
@ -354,6 +354,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
}
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
|
||||
|
@ -38,6 +38,7 @@ struct journal_buf {
|
||||
|
||||
struct journal_entry_pin_list {
|
||||
struct list_head list;
|
||||
struct list_head flushed;
|
||||
atomic_t count;
|
||||
};
|
||||
|
||||
|
@ -211,7 +211,14 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
|
||||
bch2_gc_thread_stop(c);
|
||||
|
||||
bch2_btree_flush(c);
|
||||
/*
|
||||
* Flush journal before stopping allocators, because flushing journal
|
||||
* blacklist entries involves allocating new btree nodes:
|
||||
*/
|
||||
bch2_journal_flush_pins(&c->journal, U64_MAX);
|
||||
|
||||
if (!bch2_journal_error(&c->journal))
|
||||
bch2_btree_verify_flushed(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
@ -120,6 +120,7 @@ do { \
|
||||
return strtoi_h(buf, &var) ?: (ssize_t) size; \
|
||||
} while (0)
|
||||
|
||||
write_attribute(trigger_journal_flush);
|
||||
write_attribute(trigger_btree_coalesce);
|
||||
write_attribute(trigger_gc);
|
||||
write_attribute(prune_cache);
|
||||
@ -127,35 +128,25 @@ write_attribute(prune_cache);
|
||||
read_attribute(uuid);
|
||||
read_attribute(minor);
|
||||
read_attribute(bucket_size);
|
||||
read_attribute(bucket_size_bytes);
|
||||
read_attribute(block_size);
|
||||
read_attribute(block_size_bytes);
|
||||
read_attribute(btree_node_size);
|
||||
read_attribute(btree_node_size_bytes);
|
||||
read_attribute(first_bucket);
|
||||
read_attribute(nbuckets);
|
||||
read_attribute(tree_depth);
|
||||
read_attribute(root_usage_percent);
|
||||
read_attribute(read_priority_stats);
|
||||
read_attribute(write_priority_stats);
|
||||
read_attribute(fragmentation_stats);
|
||||
read_attribute(oldest_gen_stats);
|
||||
read_attribute(reserve_stats);
|
||||
read_attribute(btree_cache_size);
|
||||
read_attribute(cache_available_percent);
|
||||
read_attribute(compression_stats);
|
||||
read_attribute(written);
|
||||
read_attribute(btree_written);
|
||||
read_attribute(metadata_written);
|
||||
read_attribute(journal_debug);
|
||||
write_attribute(journal_flush);
|
||||
read_attribute(journal_pins);
|
||||
|
||||
read_attribute(internal_uuid);
|
||||
|
||||
read_attribute(btree_gc_running);
|
||||
|
||||
read_attribute(btree_nodes);
|
||||
read_attribute(btree_used_percent);
|
||||
read_attribute(average_key_size);
|
||||
read_attribute(available_buckets);
|
||||
read_attribute(free_buckets);
|
||||
read_attribute(dirty_data);
|
||||
@ -168,10 +159,9 @@ read_attribute(meta_buckets);
|
||||
read_attribute(alloc_buckets);
|
||||
read_attribute(has_data);
|
||||
read_attribute(has_metadata);
|
||||
read_attribute(bset_tree_stats);
|
||||
read_attribute(alloc_debug);
|
||||
|
||||
read_attribute(cache_read_races);
|
||||
read_attribute(read_realloc_races);
|
||||
|
||||
rw_attribute(journal_write_delay_ms);
|
||||
rw_attribute(journal_reclaim_delay_ms);
|
||||
@ -221,73 +211,6 @@ static struct attribute sysfs_state_rw = {
|
||||
.mode = S_IRUGO
|
||||
};
|
||||
|
||||
static int bch2_bset_print_stats(struct bch_fs *c, char *buf)
|
||||
{
|
||||
struct bset_stats stats;
|
||||
size_t nodes = 0;
|
||||
struct btree *b;
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
unsigned iter;
|
||||
|
||||
memset(&stats, 0, sizeof(stats));
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, iter, pos) {
|
||||
bch2_btree_keys_stats(b, &stats);
|
||||
nodes++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return snprintf(buf, PAGE_SIZE,
|
||||
"btree nodes: %zu\n"
|
||||
"written sets: %zu\n"
|
||||
"written key bytes: %zu\n"
|
||||
"unwritten sets: %zu\n"
|
||||
"unwritten key bytes: %zu\n"
|
||||
"no table sets: %zu\n"
|
||||
"no table key bytes: %zu\n"
|
||||
"floats: %zu\n"
|
||||
"failed unpacked: %zu\n"
|
||||
"failed prev: %zu\n"
|
||||
"failed overflow: %zu\n",
|
||||
nodes,
|
||||
stats.sets[BSET_RO_AUX_TREE].nr,
|
||||
stats.sets[BSET_RO_AUX_TREE].bytes,
|
||||
stats.sets[BSET_RW_AUX_TREE].nr,
|
||||
stats.sets[BSET_RW_AUX_TREE].bytes,
|
||||
stats.sets[BSET_NO_AUX_TREE].nr,
|
||||
stats.sets[BSET_NO_AUX_TREE].bytes,
|
||||
stats.floats,
|
||||
stats.failed_unpacked,
|
||||
stats.failed_prev,
|
||||
stats.failed_overflow);
|
||||
}
|
||||
|
||||
static unsigned bch2_root_usage(struct bch_fs *c)
|
||||
{
|
||||
unsigned bytes = 0;
|
||||
struct bkey_packed *k;
|
||||
struct btree *b;
|
||||
struct btree_node_iter iter;
|
||||
|
||||
goto lock_root;
|
||||
|
||||
do {
|
||||
six_unlock_read(&b->lock);
|
||||
lock_root:
|
||||
b = c->btree_roots[BTREE_ID_EXTENTS].b;
|
||||
six_lock_read(&b->lock);
|
||||
} while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
|
||||
|
||||
for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
|
||||
bytes += bkey_bytes(k);
|
||||
|
||||
six_unlock_read(&b->lock);
|
||||
|
||||
return (bytes * 100) / btree_bytes(c);
|
||||
}
|
||||
|
||||
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
{
|
||||
size_t ret = 0;
|
||||
@ -301,27 +224,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned bch2_fs_available_percent(struct bch_fs *c)
|
||||
{
|
||||
return div64_u64((u64) sectors_available(c) * 100,
|
||||
c->capacity ?: 1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static unsigned bch2_btree_used(struct bch_fs *c)
|
||||
{
|
||||
return div64_u64(c->gc_stats.key_bytes * 100,
|
||||
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
|
||||
}
|
||||
|
||||
static unsigned bch2_average_key_size(struct bch_fs *c)
|
||||
{
|
||||
return c->gc_stats.nkeys
|
||||
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
|
||||
: 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
{
|
||||
struct bch_fs_usage stats = bch2_fs_usage_read(c);
|
||||
@ -358,6 +260,9 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
|
||||
compressed_sectors_compressed = 0,
|
||||
compressed_sectors_uncompressed = 0;
|
||||
|
||||
if (!bch2_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
|
||||
if (k.k->type == BCH_EXTENT) {
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
@ -402,29 +307,17 @@ SHOW(bch2_fs)
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
|
||||
|
||||
sysfs_print(minor, c->minor);
|
||||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
|
||||
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
|
||||
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
|
||||
|
||||
sysfs_hprint(block_size, block_bytes(c));
|
||||
sysfs_print(block_size_bytes, block_bytes(c));
|
||||
sysfs_hprint(btree_node_size, c->sb.btree_node_size << 9);
|
||||
sysfs_print(btree_node_size_bytes, c->sb.btree_node_size << 9);
|
||||
|
||||
sysfs_print(block_size, block_bytes(c));
|
||||
sysfs_print(btree_node_size, btree_bytes(c));
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
sysfs_print(cache_available_percent, bch2_fs_available_percent(c));
|
||||
|
||||
sysfs_print(btree_gc_running, c->gc_pos.phase != GC_PHASE_DONE);
|
||||
|
||||
#if 0
|
||||
/* XXX: reimplement */
|
||||
sysfs_print(btree_used_percent, bch2_btree_used(c));
|
||||
sysfs_print(btree_nodes, c->gc_stats.nodes);
|
||||
sysfs_hprint(average_key_size, bch2_average_key_size(c));
|
||||
#endif
|
||||
|
||||
sysfs_print(cache_read_races,
|
||||
atomic_long_read(&c->cache_read_races));
|
||||
sysfs_print(read_realloc_races,
|
||||
atomic_long_read(&c->read_realloc_races));
|
||||
|
||||
sysfs_printf(foreground_write_ratelimit_enabled, "%i",
|
||||
c->foreground_write_ratelimit_enabled);
|
||||
@ -445,28 +338,21 @@ SHOW(bch2_fs)
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_journal_debug)
|
||||
return bch2_journal_print_debug(&c->journal, buf);
|
||||
|
||||
#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
|
||||
BCH_DEBUG_PARAMS()
|
||||
#undef BCH_DEBUG_PARAM
|
||||
|
||||
if (!bch2_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
if (attr == &sysfs_bset_tree_stats)
|
||||
return bch2_bset_print_stats(c, buf);
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
return show_fs_alloc_debug(c, buf);
|
||||
|
||||
sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
|
||||
sysfs_print(root_usage_percent, bch2_root_usage(c));
|
||||
if (attr == &sysfs_journal_debug)
|
||||
return bch2_journal_print_debug(&c->journal, buf);
|
||||
|
||||
if (attr == &sysfs_journal_pins)
|
||||
return bch2_journal_print_pins(&c->journal, buf);
|
||||
|
||||
if (attr == &sysfs_compression_stats)
|
||||
return bch2_compression_stats(c, buf);
|
||||
|
||||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
|
||||
BCH_DEBUG_PARAMS()
|
||||
#undef BCH_DEBUG_PARAM
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -519,17 +405,14 @@ STORE(__bch2_fs)
|
||||
if (!bch2_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
if (attr == &sysfs_journal_flush) {
|
||||
bch2_journal_meta_async(&c->journal, NULL);
|
||||
/* Debugging: */
|
||||
|
||||
return size;
|
||||
}
|
||||
if (attr == &sysfs_trigger_journal_flush)
|
||||
bch2_journal_meta_async(&c->journal, NULL);
|
||||
|
||||
if (attr == &sysfs_trigger_btree_coalesce)
|
||||
bch2_coalesce(c);
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_trigger_gc)
|
||||
bch2_gc(c);
|
||||
|
||||
@ -557,28 +440,21 @@ STORE(bch2_fs)
|
||||
SYSFS_OPS(bch2_fs);
|
||||
|
||||
struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_journal_write_delay_ms,
|
||||
&sysfs_journal_reclaim_delay_ms,
|
||||
|
||||
&sysfs_minor,
|
||||
&sysfs_block_size,
|
||||
&sysfs_block_size_bytes,
|
||||
&sysfs_btree_node_size,
|
||||
&sysfs_btree_node_size_bytes,
|
||||
&sysfs_tree_depth,
|
||||
&sysfs_root_usage_percent,
|
||||
&sysfs_btree_cache_size,
|
||||
&sysfs_cache_available_percent,
|
||||
&sysfs_compression_stats,
|
||||
|
||||
&sysfs_average_key_size,
|
||||
|
||||
&sysfs_meta_replicas_have,
|
||||
&sysfs_data_replicas_have,
|
||||
|
||||
&sysfs_journal_write_delay_ms,
|
||||
&sysfs_journal_reclaim_delay_ms,
|
||||
|
||||
&sysfs_foreground_target_percent,
|
||||
&sysfs_tiering_percent,
|
||||
|
||||
&sysfs_journal_flush,
|
||||
&sysfs_compression_stats,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -598,21 +474,17 @@ STORE(bch2_fs_internal)
|
||||
SYSFS_OPS(bch2_fs_internal);
|
||||
|
||||
struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_journal_debug,
|
||||
|
||||
&sysfs_alloc_debug,
|
||||
&sysfs_journal_debug,
|
||||
&sysfs_journal_pins,
|
||||
|
||||
&sysfs_btree_gc_running,
|
||||
|
||||
&sysfs_btree_nodes,
|
||||
&sysfs_btree_used_percent,
|
||||
|
||||
&sysfs_bset_tree_stats,
|
||||
&sysfs_cache_read_races,
|
||||
&sysfs_read_realloc_races,
|
||||
|
||||
&sysfs_trigger_journal_flush,
|
||||
&sysfs_trigger_btree_coalesce,
|
||||
&sysfs_trigger_gc,
|
||||
&sysfs_prune_cache,
|
||||
|
||||
&sysfs_foreground_write_ratelimit_enabled,
|
||||
&sysfs_copy_gc_enabled,
|
||||
&sysfs_tiering_enabled,
|
||||
@ -853,10 +725,8 @@ SHOW(bch2_dev)
|
||||
|
||||
sysfs_printf(uuid, "%pU\n", ca->uuid.b);
|
||||
|
||||
sysfs_hprint(bucket_size, bucket_bytes(ca));
|
||||
sysfs_print(bucket_size_bytes, bucket_bytes(ca));
|
||||
sysfs_hprint(block_size, block_bytes(c));
|
||||
sysfs_print(block_size_bytes, block_bytes(c));
|
||||
sysfs_print(bucket_size, bucket_bytes(ca));
|
||||
sysfs_print(block_size, block_bytes(c));
|
||||
sysfs_print(first_bucket, ca->mi.first_bucket);
|
||||
sysfs_print(nbuckets, ca->mi.nbuckets);
|
||||
sysfs_print(discard, ca->mi.discard);
|
||||
@ -979,35 +849,46 @@ SYSFS_OPS(bch2_dev);
|
||||
struct attribute *bch2_dev_files[] = {
|
||||
&sysfs_uuid,
|
||||
&sysfs_bucket_size,
|
||||
&sysfs_bucket_size_bytes,
|
||||
&sysfs_block_size,
|
||||
&sysfs_block_size_bytes,
|
||||
&sysfs_first_bucket,
|
||||
&sysfs_nbuckets,
|
||||
|
||||
/* settings: */
|
||||
&sysfs_discard,
|
||||
&sysfs_cache_replacement_policy,
|
||||
&sysfs_tier,
|
||||
&sysfs_state_rw,
|
||||
|
||||
&sysfs_has_data,
|
||||
&sysfs_has_metadata,
|
||||
|
||||
/* io stats: */
|
||||
&sysfs_written,
|
||||
&sysfs_btree_written,
|
||||
&sysfs_metadata_written,
|
||||
|
||||
/* alloc info - data: */
|
||||
&sysfs_dirty_data,
|
||||
&sysfs_dirty_bytes,
|
||||
&sysfs_cached_data,
|
||||
&sysfs_cached_bytes,
|
||||
|
||||
/* alloc info - buckets: */
|
||||
&sysfs_available_buckets,
|
||||
&sysfs_free_buckets,
|
||||
&sysfs_dirty_buckets,
|
||||
&sysfs_cached_buckets,
|
||||
&sysfs_meta_buckets,
|
||||
&sysfs_alloc_buckets,
|
||||
|
||||
/* alloc info - other stats: */
|
||||
&sysfs_read_priority_stats,
|
||||
&sysfs_write_priority_stats,
|
||||
&sysfs_fragmentation_stats,
|
||||
&sysfs_oldest_gen_stats,
|
||||
&sysfs_reserve_stats,
|
||||
&sysfs_available_buckets,
|
||||
&sysfs_free_buckets,
|
||||
&sysfs_dirty_data,
|
||||
&sysfs_dirty_bytes,
|
||||
&sysfs_dirty_buckets,
|
||||
&sysfs_cached_data,
|
||||
&sysfs_cached_bytes,
|
||||
&sysfs_cached_buckets,
|
||||
&sysfs_meta_buckets,
|
||||
&sysfs_alloc_buckets,
|
||||
&sysfs_has_data,
|
||||
&sysfs_has_metadata,
|
||||
&sysfs_discard,
|
||||
&sysfs_written,
|
||||
&sysfs_btree_written,
|
||||
&sysfs_metadata_written,
|
||||
&sysfs_cache_replacement_policy,
|
||||
&sysfs_tier,
|
||||
&sysfs_state_rw,
|
||||
|
||||
/* debug: */
|
||||
&sysfs_alloc_debug,
|
||||
|
||||
sysfs_pd_controller_files(copy_gc),
|
||||
|
Loading…
Reference in New Issue
Block a user