mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-09 00:00:17 +03:00
Update bcachefs sources to 2e9d7e867ec2 bcachefs: More reconcile counters/tracepoints
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
601931227c
commit
eeaf00a23b
@ -1 +1 @@
|
||||
00b91697cf0881853c8c646647d365645a26159c
|
||||
2e9d7e867ec236de11ebf725da352cd6bf7aaa86
|
||||
|
||||
@ -253,6 +253,8 @@ void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
BUG_ON(ret);
|
||||
|
||||
clear_btree_node_just_written(b);
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
b->hash_val = 0;
|
||||
|
||||
@ -860,7 +862,6 @@ err:
|
||||
/* Try to cannibalize another cached btree node: */
|
||||
if (bc->alloc_lock == current) {
|
||||
b2 = btree_node_cannibalize(c);
|
||||
clear_btree_node_just_written(b2);
|
||||
__bch2_btree_node_hash_remove(bc, b2);
|
||||
|
||||
if (b) {
|
||||
|
||||
@ -403,7 +403,7 @@ static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
event_inc_trace(c, trans_restart_injected, buf, prt_str(&buf, trans->fn));
|
||||
event_inc_trace(trans->c, trans_restart_injected, buf, prt_str(&buf, trans->fn));
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
|
||||
@ -1370,6 +1370,9 @@ static int bch2_extent_set_rb_pending(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
event_add_trace(c, reconcile_set_pending, k.k->size, buf,
|
||||
bch2_bkey_val_to_text(&buf, c, k));
|
||||
|
||||
if (rb_work_id(bch2_bkey_reconcile_opts(c, k)) == RECONCILE_WORK_pending)
|
||||
return 0;
|
||||
|
||||
@ -1442,12 +1445,17 @@ static int do_reconcile_extent(struct moving_context *ctxt,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
struct bpos work_pos)
|
||||
{
|
||||
struct bbpos data_pos = rb_work_to_data_pos(work_pos);
|
||||
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bbpos data_pos = rb_work_to_data_pos(work_pos);
|
||||
|
||||
CLASS(btree_iter, iter)(trans, data_pos.btree, data_pos.pos, BTREE_ITER_all_snapshots);
|
||||
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter));
|
||||
if (!k.k)
|
||||
return 0;
|
||||
|
||||
event_add_trace(c, reconcile_data, k.k->size, buf,
|
||||
bch2_bkey_val_to_text(&buf, c, k));
|
||||
|
||||
return __do_reconcile_extent(ctxt, snapshot_io_opts, &iter, k);
|
||||
}
|
||||
@ -1458,17 +1466,42 @@ static int do_reconcile_btree(struct moving_context *ctxt,
|
||||
struct bkey_s_c_backpointer bp)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
CLASS(btree_iter_uninit, iter)(trans);
|
||||
struct bkey_s_c k = bkey_try(reconcile_bp_get_key(trans, &iter, bp));
|
||||
|
||||
if (!k.k)
|
||||
return 0;
|
||||
|
||||
event_add_trace(c, reconcile_btree, btree_sectors(c), buf,
|
||||
bch2_bkey_val_to_text(&buf, c, k));
|
||||
|
||||
return __do_reconcile_extent(ctxt, snapshot_io_opts, &iter, k);
|
||||
}
|
||||
|
||||
static int update_reconcile_opts_scan(struct btree_trans *trans,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
struct bch_inode_opts *opts,
|
||||
struct btree_iter *iter,
|
||||
unsigned level,
|
||||
struct bkey_s_c k,
|
||||
struct reconcile_scan s)
|
||||
{
|
||||
switch (s.type) {
|
||||
#define x(n) case RECONCILE_SCAN_##n: \
|
||||
event_add_trace(trans->c, reconcile_scan_##n, k.k->size, \
|
||||
buf, bch2_bkey_val_to_text(&buf, trans->c, k)); \
|
||||
break;
|
||||
RECONCILE_SCAN_TYPES()
|
||||
#undef x
|
||||
}
|
||||
|
||||
return bch2_update_reconcile_opts(trans, snapshot_io_opts, opts, iter, level, k,
|
||||
SET_NEEDS_REBALANCE_opt_change);
|
||||
}
|
||||
|
||||
static int do_reconcile_scan_bp(struct btree_trans *trans,
|
||||
struct reconcile_scan s,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct wb_maybe_flush *last_flushed)
|
||||
{
|
||||
@ -1487,16 +1520,18 @@ static int do_reconcile_scan_bp(struct btree_trans *trans,
|
||||
struct bch_inode_opts opts;
|
||||
try(bch2_bkey_get_io_opts(trans, NULL, k, &opts));
|
||||
|
||||
return bch2_update_reconcile_opts(trans, NULL, &opts, &iter, bp.v->level, k,
|
||||
SET_NEEDS_REBALANCE_opt_change);
|
||||
return update_reconcile_opts_scan(trans, NULL, &opts, &iter, bp.v->level, k, s);
|
||||
}
|
||||
|
||||
static int do_reconcile_scan_indirect(struct btree_trans *trans,
|
||||
static int do_reconcile_scan_indirect(struct moving_context *ctxt,
|
||||
struct reconcile_scan s,
|
||||
struct disk_reservation *res,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
struct bch_inode_opts *opts)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
|
||||
u64 idx = REFLINK_P_IDX(p.v) - le32_to_cpu(p.v->front_pad);
|
||||
u64 end = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad);
|
||||
u32 restart_count = trans->restart_count;
|
||||
@ -1510,8 +1545,7 @@ static int do_reconcile_scan_indirect(struct btree_trans *trans,
|
||||
break;
|
||||
|
||||
bch2_disk_reservation_put(trans->c, res);
|
||||
bch2_update_reconcile_opts(trans, snapshot_io_opts, opts, &iter, 0, k,
|
||||
SET_NEEDS_REBALANCE_opt_change_indirect);
|
||||
update_reconcile_opts_scan(trans, snapshot_io_opts, opts, &iter, 0, k, s);
|
||||
})));
|
||||
|
||||
/* suppress trans_was_restarted() check */
|
||||
@ -1520,6 +1554,7 @@ static int do_reconcile_scan_indirect(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static int do_reconcile_scan_btree(struct moving_context *ctxt,
|
||||
struct reconcile_scan s,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bpos start, struct bpos end)
|
||||
@ -1554,8 +1589,7 @@ static int do_reconcile_scan_btree(struct moving_context *ctxt,
|
||||
|
||||
struct bch_inode_opts opts;
|
||||
ret = bch2_bkey_get_io_opts(trans, snapshot_io_opts, k, &opts) ?:
|
||||
bch2_update_reconcile_opts(trans, snapshot_io_opts, &opts, &iter, level, k,
|
||||
SET_NEEDS_REBALANCE_opt_change);
|
||||
update_reconcile_opts_scan(trans, snapshot_io_opts, &opts, &iter, level, k, s);
|
||||
root_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -1583,19 +1617,18 @@ root_err:
|
||||
|
||||
struct bch_inode_opts opts;
|
||||
bch2_bkey_get_io_opts(trans, snapshot_io_opts, k, &opts) ?:
|
||||
bch2_update_reconcile_opts(trans, snapshot_io_opts, &opts, &iter, level, k,
|
||||
SET_NEEDS_REBALANCE_opt_change) ?:
|
||||
update_reconcile_opts_scan(trans, snapshot_io_opts, &opts, &iter, level, k, s) ?:
|
||||
(start.inode &&
|
||||
k.k->type == KEY_TYPE_reflink_p &&
|
||||
REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)
|
||||
? do_reconcile_scan_indirect(trans, &res.r, bkey_s_c_to_reflink_p(k),
|
||||
? do_reconcile_scan_indirect(ctxt, s, &res.r, bkey_s_c_to_reflink_p(k),
|
||||
snapshot_io_opts, &opts)
|
||||
: 0) ?:
|
||||
bch2_trans_commit(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}));
|
||||
}
|
||||
|
||||
static int do_reconcile_scan_fs(struct moving_context *ctxt,
|
||||
static int do_reconcile_scan_fs(struct moving_context *ctxt, struct reconcile_scan s,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
bool metadata)
|
||||
{
|
||||
@ -1614,8 +1647,8 @@ static int do_reconcile_scan_fs(struct moving_context *ctxt,
|
||||
btree == BTREE_ID_reflink);
|
||||
|
||||
for (unsigned level = !scan_leaves; level < BTREE_MAX_DEPTH; level++)
|
||||
try(do_reconcile_scan_btree(ctxt, snapshot_io_opts, btree, level,
|
||||
POS_MIN, SPOS_MAX));
|
||||
try(do_reconcile_scan_btree(ctxt, s, snapshot_io_opts,
|
||||
btree, level, POS_MIN, SPOS_MAX));
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1635,9 +1668,9 @@ static int do_reconcile_scan(struct moving_context *ctxt,
|
||||
|
||||
struct reconcile_scan s = reconcile_scan_decode(c, cookie_pos.offset);
|
||||
if (s.type == RECONCILE_SCAN_fs) {
|
||||
try(do_reconcile_scan_fs(ctxt, snapshot_io_opts, false));
|
||||
try(do_reconcile_scan_fs(ctxt, s, snapshot_io_opts, false));
|
||||
} else if (s.type == RECONCILE_SCAN_metadata) {
|
||||
try(do_reconcile_scan_fs(ctxt, snapshot_io_opts, true));
|
||||
try(do_reconcile_scan_fs(ctxt, s, snapshot_io_opts, true));
|
||||
} else if (s.type == RECONCILE_SCAN_device) {
|
||||
r->scan_start = BBPOS(BTREE_ID_backpointers, POS(s.dev, 0));
|
||||
r->scan_end = BBPOS(BTREE_ID_backpointers, POS(s.dev, U64_MAX));
|
||||
@ -1659,13 +1692,13 @@ static int do_reconcile_scan(struct moving_context *ctxt,
|
||||
continue;
|
||||
|
||||
bch2_disk_reservation_put(c, &res.r);
|
||||
do_reconcile_scan_bp(trans, bkey_s_c_to_backpointer(k), &last_flushed);
|
||||
do_reconcile_scan_bp(trans, s, bkey_s_c_to_backpointer(k), &last_flushed);
|
||||
})));
|
||||
} else if (s.type == RECONCILE_SCAN_inum) {
|
||||
r->scan_start = BBPOS(BTREE_ID_extents, POS(s.inum, 0));
|
||||
r->scan_end = BBPOS(BTREE_ID_extents, POS(s.inum, U64_MAX));
|
||||
|
||||
try(do_reconcile_scan_btree(ctxt, snapshot_io_opts, BTREE_ID_extents, 0,
|
||||
try(do_reconcile_scan_btree(ctxt, s, snapshot_io_opts, BTREE_ID_extents, 0,
|
||||
r->scan_start.pos, r->scan_end.pos));
|
||||
}
|
||||
|
||||
|
||||
@ -211,7 +211,8 @@ read_attribute(has_data);
|
||||
read_attribute(alloc_debug);
|
||||
read_attribute(usage_base);
|
||||
|
||||
#define x(t, n, ...) read_attribute(t);
|
||||
#define x(t, n, ...) \
|
||||
static struct attribute sysfs_counter_##t = { .name = #t, .mode = 0644 };
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
|
||||
@ -554,7 +555,7 @@ SHOW(bch2_fs_counters)
|
||||
printbuf_tabstop_push(out, 32);
|
||||
|
||||
#define x(t, n, f, ...) \
|
||||
if (attr == &sysfs_##t) { \
|
||||
if (attr == &sysfs_counter_##t) { \
|
||||
counter = percpu_u64_get(&c->counters.now[BCH_COUNTER_##t]);\
|
||||
counter_since_mount = counter - c->counters.mount[BCH_COUNTER_##t];\
|
||||
if (f & TYPE_SECTORS) { \
|
||||
@ -585,7 +586,7 @@ SYSFS_OPS(bch2_fs_counters);
|
||||
|
||||
struct attribute *bch2_fs_counters_files[] = {
|
||||
#define x(t, ...) \
|
||||
&sysfs_##t,
|
||||
&sysfs_counter_##t,
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
NULL
|
||||
|
||||
@ -6,16 +6,6 @@
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#define TRACE_BPOS_entries(name) \
|
||||
__field(u64, name##_inode ) \
|
||||
__field(u64, name##_offset ) \
|
||||
__field(u32, name##_snapshot )
|
||||
|
||||
#define TRACE_BPOS_assign(dst, src) \
|
||||
__entry->dst##_inode = (src).inode; \
|
||||
__entry->dst##_offset = (src).offset; \
|
||||
__entry->dst##_snapshot = (src).snapshot
|
||||
|
||||
DECLARE_EVENT_CLASS(fs_str,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str),
|
||||
|
||||
@ -605,7 +605,6 @@ void bch2_fs_journal_init_early(struct journal *j)
|
||||
init_waitqueue_head(&j->reclaim_wait);
|
||||
init_waitqueue_head(&j->pin_flush_wait);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
mutex_init(&j->last_seq_ondisk_lock);
|
||||
mutex_init(&j->discard_lock);
|
||||
|
||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||
|
||||
@ -74,6 +74,7 @@ void bch2_journal_set_watermark(struct journal *j)
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], low_on_pin) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
|
||||
event_inc_trace(c, journal_full, buf, ({
|
||||
guard(printbuf_atomic)(&buf);
|
||||
prt_printf(&buf, "low_on_space %u\n", low_on_space);
|
||||
prt_printf(&buf, "low_on_pin%u\n", low_on_pin);
|
||||
prt_printf(&buf, "low_on_wb%u\n", low_on_wb);
|
||||
@ -349,47 +350,40 @@ void bch2_journal_update_last_seq(struct journal *j)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_journal_update_last_seq_ondisk(struct journal *j, u64 last_seq_ondisk,
|
||||
bool clean)
|
||||
int bch2_journal_update_last_seq_ondisk(struct journal *j, u64 last_seq_ondisk,
|
||||
darray_replicas_entry_refs *refs)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
union bch_replicas_padded replicas;
|
||||
unsigned nr_refs = 0;
|
||||
size_t dirty_entry_bytes = 0;
|
||||
BUG_ON(last_seq_ondisk > j->pin.back);
|
||||
|
||||
scoped_guard(mutex, &j->last_seq_ondisk_lock) {
|
||||
for (u64 seq = j->last_seq_ondisk;
|
||||
seq < (clean ? j->pin.back : last_seq_ondisk);
|
||||
seq++) {
|
||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||
for (u64 seq = j->last_seq_ondisk; seq < last_seq_ondisk; seq++) {
|
||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||
|
||||
if (pin_list->devs.e.nr_devs) {
|
||||
if (nr_refs &&
|
||||
!bch2_replicas_entry_eq(&replicas.e, &pin_list->devs.e)) {
|
||||
bch2_replicas_entry_put_many(c, &replicas.e, nr_refs);
|
||||
nr_refs = 0;
|
||||
}
|
||||
BUG_ON(atomic_read(&pin_list->count));
|
||||
|
||||
memcpy(&replicas, &pin_list->devs, replicas_entry_bytes(&pin_list->devs.e));
|
||||
pin_list->devs.e.nr_devs = 0;
|
||||
nr_refs++;
|
||||
if (pin_list->devs.e.nr_devs) {
|
||||
replicas_entry_refs *e = darray_find_p(*refs, i,
|
||||
bch2_replicas_entry_eq(&i->replicas.e, &pin_list->devs.e));
|
||||
|
||||
if (e) {
|
||||
e->nr_refs++;
|
||||
} else {
|
||||
try(darray_push_gfp(refs, ((replicas_entry_refs) {
|
||||
.nr_refs = 1,
|
||||
.replicas = pin_list->devs,
|
||||
}), GFP_ATOMIC));
|
||||
}
|
||||
|
||||
dirty_entry_bytes += pin_list->bytes;
|
||||
pin_list->bytes = 0;
|
||||
pin_list->devs.e.nr_devs = 0;
|
||||
}
|
||||
|
||||
j->last_seq_ondisk = last_seq_ondisk;
|
||||
if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes))
|
||||
pin_list->bytes = j->dirty_entry_bytes;
|
||||
|
||||
j->dirty_entry_bytes -= pin_list->bytes;
|
||||
pin_list->bytes = 0;
|
||||
}
|
||||
|
||||
scoped_guard(spinlock, &j->lock) {
|
||||
if (WARN_ON(j->dirty_entry_bytes < dirty_entry_bytes))
|
||||
dirty_entry_bytes = j->dirty_entry_bytes;
|
||||
j->dirty_entry_bytes -= dirty_entry_bytes;
|
||||
}
|
||||
|
||||
if (nr_refs)
|
||||
bch2_replicas_entry_put_many(c, &replicas.e, nr_refs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool __bch2_journal_pin_put(struct journal *j, u64 seq)
|
||||
|
||||
@ -44,7 +44,16 @@ journal_seq_pin(struct journal *j, u64 seq)
|
||||
}
|
||||
|
||||
void bch2_journal_update_last_seq(struct journal *);
|
||||
void bch2_journal_update_last_seq_ondisk(struct journal *, u64, bool);
|
||||
|
||||
typedef struct {
|
||||
unsigned nr_refs;
|
||||
union bch_replicas_padded replicas;
|
||||
} replicas_entry_refs;
|
||||
|
||||
DEFINE_DARRAY_PREALLOCATED(replicas_entry_refs, 16);
|
||||
|
||||
int bch2_journal_update_last_seq_ondisk(struct journal *, u64,
|
||||
darray_replicas_entry_refs *);
|
||||
|
||||
bool __bch2_journal_pin_put(struct journal *, u64);
|
||||
void bch2_journal_pin_put(struct journal *, u64);
|
||||
|
||||
@ -280,7 +280,6 @@ struct journal {
|
||||
spinlock_t err_lock;
|
||||
|
||||
struct mutex reclaim_lock;
|
||||
struct mutex last_seq_ondisk_lock;
|
||||
/*
|
||||
* Used for waiting until journal reclaim has freed up space in the
|
||||
* journal:
|
||||
|
||||
@ -184,20 +184,38 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
kvfree(new_buf);
|
||||
}
|
||||
|
||||
static void replicas_refs_put(struct bch_fs *c, darray_replicas_entry_refs *refs)
|
||||
{
|
||||
darray_for_each(*refs, i)
|
||||
bch2_replicas_entry_put_many(c, &i->replicas.e, i->nr_refs);
|
||||
refs->nr = 0;
|
||||
}
|
||||
|
||||
static inline u64 last_uncompleted_write_seq(struct journal *j, u64 seq_completing)
|
||||
{
|
||||
u64 seq = journal_last_unwritten_seq(j);
|
||||
|
||||
if (seq <= journal_cur_seq(j) &&
|
||||
(j->buf[seq & JOURNAL_BUF_MASK].write_done ||
|
||||
seq == seq_completing))
|
||||
return seq;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static CLOSURE_CALLBACK(journal_write_done)
|
||||
{
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
u64 seq = le64_to_cpu(w->data->seq);
|
||||
u64 seq_wrote = seq;
|
||||
u64 seq_wrote = le64_to_cpu(w->data->seq);
|
||||
int err = 0;
|
||||
|
||||
bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
|
||||
? j->flush_write_time
|
||||
: j->noflush_write_time, j->write_start_time);
|
||||
|
||||
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, seq)->devs.e;
|
||||
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, seq_wrote)->devs.e;
|
||||
if (w->had_error) {
|
||||
bch2_replicas_entry_put(c, r);
|
||||
r->nr_devs = 0;
|
||||
@ -230,59 +248,85 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
CLASS(darray_replicas_entry_refs, replicas_refs)();
|
||||
|
||||
spin_lock(&j->lock);
|
||||
BUG_ON(seq < j->pin.front);
|
||||
if (err && (!j->err_seq || seq < j->err_seq))
|
||||
j->err_seq = seq;
|
||||
BUG_ON(seq_wrote < j->pin.front);
|
||||
if (err && (!j->err_seq || seq_wrote < j->err_seq))
|
||||
j->err_seq = seq_wrote;
|
||||
|
||||
if (!j->free_buf || j->free_buf_size < w->buf_size) {
|
||||
swap(j->free_buf, w->data);
|
||||
swap(j->free_buf_size, w->buf_size);
|
||||
}
|
||||
|
||||
if (w->data) {
|
||||
void *buf = w->data;
|
||||
w->data = NULL;
|
||||
w->buf_size = 0;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
kvfree(buf);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
/* kvfree can allocate memory, and can't be called under j->lock */
|
||||
void *buf_to_free __free(kvfree) = w->data;
|
||||
w->data = NULL;
|
||||
w->buf_size = 0;
|
||||
|
||||
bool completed = false;
|
||||
bool last_seq_ondisk_updated = false;
|
||||
again:
|
||||
for (seq = journal_last_unwritten_seq(j);
|
||||
seq <= journal_cur_seq(j);
|
||||
seq++) {
|
||||
|
||||
u64 seq;
|
||||
while ((seq = last_uncompleted_write_seq(j, seq_wrote))) {
|
||||
w = j->buf + (seq & JOURNAL_BUF_MASK);
|
||||
if (!w->write_done && seq != seq_wrote)
|
||||
break;
|
||||
|
||||
if (!j->err_seq && !w->noflush) {
|
||||
BUG_ON(w->empty && w->last_seq != seq);
|
||||
|
||||
if (j->last_seq_ondisk < w->last_seq) {
|
||||
spin_unlock(&j->lock);
|
||||
bch2_journal_update_last_seq_ondisk(j,
|
||||
w->last_seq + w->empty, &replicas_refs);
|
||||
/*
|
||||
* this needs to happen _before_ updating
|
||||
* j->flushed_seq_ondisk, for flushing to work
|
||||
* properly - when the flush completes replcias
|
||||
* refs need to have been dropped
|
||||
* */
|
||||
bch2_journal_update_last_seq_ondisk(j, w->last_seq, w->empty);
|
||||
* bch2_journal_update_last_seq_ondisk()
|
||||
* can return an error if appending to
|
||||
* replicas_refs failed, but we don't
|
||||
* care - it's a preallocated darray so
|
||||
* it'll allways be able to do some
|
||||
* work, and we have to retry anyways,
|
||||
* because we have to drop j->lock to
|
||||
* put the replicas refs before updating
|
||||
* j->flushed_seq_ondisk
|
||||
*/
|
||||
|
||||
/*
|
||||
* Do this before updating j->last_seq_ondisk,
|
||||
* or journal flushing breaks:
|
||||
*/
|
||||
if (replicas_refs.nr) {
|
||||
spin_unlock(&j->lock);
|
||||
replicas_refs_put(c, &replicas_refs);
|
||||
spin_lock(&j->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(j->last_seq > j->last_seq);
|
||||
j->last_seq_ondisk = w->last_seq;
|
||||
last_seq_ondisk_updated = true;
|
||||
spin_lock(&j->lock);
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* replicas refs eed to be put first */
|
||||
j->flushed_seq_ondisk = seq;
|
||||
}
|
||||
|
||||
j->seq_ondisk = seq;
|
||||
|
||||
if (w->empty)
|
||||
j->last_empty_seq = seq;
|
||||
j->seq_ondisk = seq;
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
completed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writes might complete out of order, but we have to do the completions
|
||||
* in order: if we complete out of order we note it here so the next
|
||||
* write completion will pick it up:
|
||||
*/
|
||||
j->buf[seq_wrote & JOURNAL_BUF_MASK].write_done = true;
|
||||
j->pin.front = min(j->pin.back, j->last_seq_ondisk);
|
||||
|
||||
if (completed) {
|
||||
/*
|
||||
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
|
||||
* more buckets:
|
||||
@ -293,13 +337,6 @@ again:
|
||||
if (j->watermark != BCH_WATERMARK_stripe)
|
||||
journal_reclaim_kick(&c->journal);
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
completed = true;
|
||||
}
|
||||
|
||||
j->buf[seq_wrote & JOURNAL_BUF_MASK].write_done = true;
|
||||
|
||||
if (completed) {
|
||||
bch2_journal_update_last_seq(j);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
@ -308,8 +345,6 @@ again:
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
j->pin.front = min(j->pin.back, j->last_seq_ondisk);
|
||||
|
||||
if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
|
||||
j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
|
||||
@ -38,13 +38,20 @@ enum bch_counters_flags {
|
||||
x(data_update_key_fail, 38, TYPE_SECTORS) \
|
||||
x(data_update_start_fail_obsolete, 39, TYPE_COUNTER) \
|
||||
x(data_update_noop_obsolete, 92, TYPE_COUNTER) \
|
||||
x(reconcile_scan_fs, 113, TYPE_SECTORS) \
|
||||
x(reconcile_scan_metadata, 114, TYPE_SECTORS) \
|
||||
x(reconcile_scan_pending, 115, TYPE_SECTORS) \
|
||||
x(reconcile_scan_device, 116, TYPE_SECTORS) \
|
||||
x(reconcile_scan_inum, 117, TYPE_SECTORS) \
|
||||
x(reconcile_btree, 118, TYPE_SECTORS) \
|
||||
x(reconcile_data, 119, TYPE_SECTORS) \
|
||||
x(reconcile_set_pending, 83, TYPE_SECTORS) \
|
||||
x(evacuate_bucket, 84, TYPE_COUNTER) \
|
||||
x(stripe_create, 102, TYPE_COUNTER) \
|
||||
x(stripe_create_fail, 103, TYPE_COUNTER) \
|
||||
x(stripe_update_bucket, 104, TYPE_COUNTER) \
|
||||
x(stripe_update_extent, 99, TYPE_COUNTER) \
|
||||
x(stripe_update_extent_fail, 100, TYPE_COUNTER) \
|
||||
x(reconcile_set_pending, 83, TYPE_COUNTER) \
|
||||
x(copygc, 40, TYPE_COUNTER) \
|
||||
x(copygc_wait_obsolete, 41, TYPE_COUNTER) \
|
||||
x(bucket_invalidate, 3, TYPE_COUNTER) \
|
||||
|
||||
@ -152,6 +152,10 @@ DEFINE_CLASS(_type, _type, darray_exit(&(_T)), (_type) {}, void)
|
||||
typedef DARRAY(_type) darray_##_type; \
|
||||
DEFINE_DARRAY_CLASS(darray_##_type)
|
||||
|
||||
#define DEFINE_DARRAY_PREALLOCATED(_type, _nr) \
|
||||
typedef DARRAY_PREALLOCATED(_type, _nr) darray_##_type; \
|
||||
DEFINE_DARRAY_CLASS(darray_##_type)
|
||||
|
||||
#define DEFINE_DARRAY_NAMED(_name, _type) \
|
||||
typedef DARRAY(_type) _name; \
|
||||
DEFINE_DARRAY_CLASS(_name)
|
||||
|
||||
@ -2391,7 +2391,7 @@ int bch2_fs_vfs_init(struct bch_fs *c)
|
||||
try(rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params));
|
||||
|
||||
c->vfs_writeback_wq = alloc_workqueue("bcachefs_vfs_writeback",
|
||||
WQ_PERCPU|WQ_MEM_RECLAIM|WQ_FREEZABLE, 1);
|
||||
WQ_MEM_RECLAIM|WQ_FREEZABLE, 1);
|
||||
if (!c->vfs_writeback_wq)
|
||||
return bch_err_throw(c, ENOMEM_fs_other_alloc);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user