Update bcachefs sources to 454bd4f82d bcachefs: Fix for the stripes mark path and gc

This commit is contained in:
Kent Overstreet 2019-05-08 19:13:46 -04:00
parent b485aae1ba
commit 93bdfcb210
17 changed files with 177 additions and 97 deletions

View File

@ -1 +1 @@
6f603b8d79efa7d9ac04ea0c38ef1bbaa10fd678 454bd4f82d85bb42a86b8eb0172b13e86e5788a7

View File

@ -498,6 +498,78 @@ TRACE_EVENT(copygc,
__entry->buckets_moved, __entry->buckets_not_moved) __entry->buckets_moved, __entry->buckets_not_moved)
); );
DECLARE_EVENT_CLASS(transaction_restart,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip),
TP_STRUCT__entry(
__array(char, name, 16)
__field(unsigned long, ip )
),
TP_fast_assign(
memcpy(__entry->name, c->name, 16);
__entry->ip = ip;
),
TP_printk("%pf", (void *) __entry->ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_iters_realloced,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_mem_realloced,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_fault_inject,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_traverse,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
DEFINE_EVENT(transaction_restart, trans_restart_atomic,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
);
#endif /* _TRACE_BCACHE_H */ #endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */ /* This part must be outside protection */

View File

@ -732,6 +732,8 @@ retry:
goto retry; goto retry;
trans_restart(); trans_restart();
trace_trans_restart_btree_node_reused(c,
iter->trans->ip);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
} }

View File

@ -251,12 +251,15 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
} }
} }
if (ret) if (unlikely(!ret)) {
__btree_node_lock_type(iter->trans->c, b, type);
else
trans_restart(); trans_restart();
trace_trans_restart_would_deadlock(iter->trans->c,
iter->trans->ip);
return false;
}
return ret; __btree_node_lock_type(iter->trans->c, b, type);
return true;
} }
/* Btree iterator locking: */ /* Btree iterator locking: */
@ -1692,6 +1695,7 @@ success:
if (trans->iters_live) { if (trans->iters_live) {
trans_restart(); trans_restart();
trace_trans_restart_iters_realloced(trans->c, trans->ip);
return -EINTR; return -EINTR;
} }
@ -1859,6 +1863,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans,
if (old_bytes) { if (old_bytes) {
trans_restart(); trans_restart();
trace_trans_restart_mem_realloced(trans->c, trans->ip);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
} }
@ -1935,6 +1940,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
memset(trans, 0, offsetof(struct btree_trans, iters_onstack)); memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
trans->c = c; trans->c = c;
trans->ip = _RET_IP_;
trans->size = ARRAY_SIZE(trans->iters_onstack); trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack; trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack; trans->updates = trans->updates_onstack;

View File

@ -268,6 +268,7 @@ struct btree_insert_entry {
struct btree_trans { struct btree_trans {
struct bch_fs *c; struct bch_fs *c;
unsigned long ip;
size_t nr_restarts; size_t nr_restarts;
u64 commit_start; u64 commit_start;

View File

@ -439,6 +439,7 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
if (!bch2_btree_trans_relock(trans)) { if (!bch2_btree_trans_relock(trans)) {
trans_restart(" (iter relock after journal preres get blocked)"); trans_restart(" (iter relock after journal preres get blocked)");
trace_trans_restart_journal_preres_get(c, trans->ip);
return -EINTR; return -EINTR;
} }
@ -575,6 +576,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
if (race_fault()) { if (race_fault()) {
ret = -EINTR; ret = -EINTR;
trans_restart(" (race)"); trans_restart(" (race)");
trace_trans_restart_fault_inject(c, trans->ip);
goto out; goto out;
} }
@ -725,6 +727,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
*/ */
if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) { if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)"); trans_restart(" (split)");
trace_trans_restart_btree_node_split(c, trans->ip);
ret = -EINTR; ret = -EINTR;
} }
break; break;
@ -744,6 +747,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return 0; return 0;
trans_restart(" (iter relock after marking replicas)"); trans_restart(" (iter relock after marking replicas)");
trace_trans_restart_mark_replicas(c, trans->ip);
ret = -EINTR; ret = -EINTR;
break; break;
case BTREE_INSERT_NEED_JOURNAL_RES: case BTREE_INSERT_NEED_JOURNAL_RES:
@ -757,6 +761,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return 0; return 0;
trans_restart(" (iter relock after journal res get blocked)"); trans_restart(" (iter relock after journal res get blocked)");
trace_trans_restart_journal_res_get(c, trans->ip);
ret = -EINTR; ret = -EINTR;
break; break;
default: default:
@ -769,6 +774,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (ret2) { if (ret2) {
trans_restart(" (traverse)"); trans_restart(" (traverse)");
trace_trans_restart_traverse(c, trans->ip);
return ret2; return ret2;
} }
@ -780,6 +786,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return 0; return 0;
trans_restart(" (atomic)"); trans_restart(" (atomic)");
trace_trans_restart_atomic(c, trans->ip);
} }
return ret; return ret;

View File

@ -983,7 +983,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
return -1; return -1;
} }
if (m->alive) if (!gc && m->alive)
bch2_stripes_heap_del(c, m, idx); bch2_stripes_heap_del(c, m, idx);
memset(m, 0, sizeof(*m)); memset(m, 0, sizeof(*m));

View File

@ -97,7 +97,7 @@ struct bch_fs_usage_short {
struct replicas_delta { struct replicas_delta {
s64 delta; s64 delta;
struct bch_replicas_entry r; struct bch_replicas_entry r;
}; } __packed;
struct replicas_delta_list { struct replicas_delta_list {
struct bch_fs_usage fs_usage; struct bch_fs_usage fs_usage;

View File

@ -1189,9 +1189,10 @@ static void bch2_writepage_io_done(struct closure *cl)
unsigned i; unsigned i;
if (io->op.op.error) { if (io->op.op.error) {
bio_for_each_segment_all(bvec, bio, i) bio_for_each_segment_all(bvec, bio, i) {
SetPageError(bvec->bv_page); SetPageError(bvec->bv_page);
set_bit(AS_EIO, &io->op.inode->v.i_mapping->flags); mapping_set_error(bvec->bv_page->mapping, -EIO);
}
} }
/* /*
@ -2079,10 +2080,9 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (ret) if (ret)
return ret; return ret;
out: out:
if (c->opts.journal_flush_disabled) if (!c->opts.journal_flush_disabled)
return 0; ret = bch2_journal_flush_seq(&c->journal,
inode->ei_journal_seq);
ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
ret2 = file_check_and_advance_wb_err(file); ret2 = file_check_and_advance_wb_err(file);
return ret ?: ret2; return ret ?: ret2;

View File

@ -395,7 +395,7 @@ retry:
if (!tmpfile) { if (!tmpfile) {
bch2_inode_update_after_write(c, dir, &dir_u, bch2_inode_update_after_write(c, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME); ATTR_MTIME|ATTR_CTIME);
journal_seq_copy(dir, inode->ei_journal_seq); journal_seq_copy(dir, journal_seq);
mutex_unlock(&dir->ei_update_lock); mutex_unlock(&dir->ei_update_lock);
} }

View File

@ -47,10 +47,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH); POS_MIN, BTREE_ITER_PREFETCH);
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) { !(ret = bkey_err(k))) {
if (!bkey_extent_is_data(k.k) || if (!bkey_extent_is_data(k.k) ||
@ -96,13 +92,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
break; break;
} }
ret = bch2_trans_exit(&trans) ?: ret;
BUG_ON(ret == -EINTR); BUG_ON(ret == -EINTR);
bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
return ret; return ret;
} }
@ -122,9 +115,6 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
bch2_trans_init(&trans, c); bch2_trans_init(&trans, c);
closure_init_stack(&cl); closure_init_stack(&cl);
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&trans, iter, id, POS_MIN, for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) { BTREE_ITER_PREFETCH, b) {
@ -177,10 +167,9 @@ retry:
ret = 0; ret = 0;
err: err:
bch2_trans_exit(&trans); ret = bch2_trans_exit(&trans) ?: ret;
ret = bch2_replicas_gc_end(c, ret); BUG_ON(ret == -EINTR);
mutex_unlock(&c->replicas_gc_lock);
return ret; return ret;
} }
@ -188,5 +177,6 @@ err:
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{ {
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?: return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
bch2_dev_metadata_drop(c, dev_idx, flags); bch2_dev_metadata_drop(c, dev_idx, flags) ?:
bch2_replicas_gc2(c);
} }

View File

@ -620,64 +620,6 @@ out:
return ret; return ret;
} }
static int bch2_gc_data_replicas(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c);
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
}
ret = bch2_trans_exit(&trans) ?: ret;
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
return ret;
}
static int bch2_gc_btree_replicas(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
unsigned id;
int ret = 0;
bch2_trans_init(&trans, c);
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
bch2_trans_cond_resched(&trans);
}
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
}
bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
return ret;
}
static int bch2_move_btree(struct bch_fs *c, static int bch2_move_btree(struct bch_fs *c,
move_pred_fn pred, move_pred_fn pred,
void *arg, void *arg,
@ -804,14 +746,14 @@ int bch2_data_job(struct bch_fs *c,
bch2_journal_meta(&c->journal); bch2_journal_meta(&c->journal);
} }
ret = bch2_gc_btree_replicas(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL, ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
op.start, op.start,
op.end, op.end,
rereplicate_pred, c, stats) ?: ret; rereplicate_pred, c, stats) ?: ret;
ret = bch2_gc_data_replicas(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
break; break;
case BCH_DATA_OP_MIGRATE: case BCH_DATA_OP_MIGRATE:
if (op.migrate.dev >= c->sb.nr_devices) if (op.migrate.dev >= c->sb.nr_devices)
@ -821,14 +763,14 @@ int bch2_data_job(struct bch_fs *c,
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
ret = bch2_gc_btree_replicas(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL, ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
op.start, op.start,
op.end, op.end,
migrate_pred, &op, stats) ?: ret; migrate_pred, &op, stats) ?: ret;
ret = bch2_gc_data_replicas(c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;

View File

@ -219,7 +219,7 @@ enum opt_type {
x(fsck, u8, \ x(fsck, u8, \
OPT_MOUNT, \ OPT_MOUNT, \
OPT_BOOL(), \ OPT_BOOL(), \
NO_SB_OPT, true, \ NO_SB_OPT, false, \
NULL, "Run fsck on mount") \ NULL, "Run fsck on mount") \
x(fix_errors, u8, \ x(fix_errors, u8, \
OPT_MOUNT, \ OPT_MOUNT, \

View File

@ -567,6 +567,64 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
return 0; return 0;
} }
int bch2_replicas_gc2(struct bch_fs *c)
{
struct bch_replicas_cpu new = { 0 };
unsigned i, nr;
int ret = 0;
bch2_journal_meta(&c->journal);
retry:
nr = READ_ONCE(c->replicas.nr);
new.entry_size = READ_ONCE(c->replicas.entry_size);
new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
if (!new.entries)
return -ENOMEM;
mutex_lock(&c->sb_lock);
percpu_down_write(&c->mark_lock);
if (nr != c->replicas.nr ||
new.entry_size != c->replicas.entry_size) {
percpu_up_write(&c->mark_lock);
mutex_unlock(&c->sb_lock);
kfree(new.entries);
goto retry;
}
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
if (e->data_type == BCH_DATA_JOURNAL ||
c->usage_base->replicas[i] ||
percpu_u64_get(&c->usage[0]->replicas[i]) ||
percpu_u64_get(&c->usage[1]->replicas[i]))
memcpy(cpu_replicas_entry(&new, new.nr++),
e, new.entry_size);
}
bch2_cpu_replicas_sort(&new);
if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
ret = -ENOSPC;
goto err;
}
ret = replicas_table_update(c, &new);
err:
kfree(new.entries);
percpu_up_write(&c->mark_lock);
if (!ret)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
return ret;
}
int bch2_replicas_set_usage(struct bch_fs *c, int bch2_replicas_set_usage(struct bch_fs *c,
struct bch_replicas_entry *r, struct bch_replicas_entry *r,
u64 sectors) u64 sectors)

View File

@ -58,6 +58,7 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int); int bch2_replicas_gc_end(struct bch_fs *, int);
int bch2_replicas_gc_start(struct bch_fs *, unsigned); int bch2_replicas_gc_start(struct bch_fs *, unsigned);
int bch2_replicas_gc2(struct bch_fs *);
int bch2_replicas_set_usage(struct bch_fs *, int bch2_replicas_set_usage(struct bch_fs *,
struct bch_replicas_entry *, struct bch_replicas_entry *,

View File

@ -735,9 +735,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
sizeof(struct btree_node_iter_set); sizeof(struct btree_node_iter_set);
if (!(c->wq = alloc_workqueue("bcachefs", if (!(c->wq = alloc_workqueue("bcachefs",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcache_copygc", !(c->copygc_wq = alloc_workqueue("bcache_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal", !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled, percpu_ref_init(&c->writes, bch2_writes_disabled,
@ -925,7 +925,8 @@ err:
break; break;
} }
BUG_ON(!ret); if (ret >= 0)
ret = -EIO;
goto out; goto out;
} }

View File

@ -142,10 +142,10 @@ void bch2_flags_to_text(struct printbuf *out,
nr++; nr++;
while (flags && (bit = __ffs(flags)) < nr) { while (flags && (bit = __ffs(flags)) < nr) {
pr_buf(out, "%s", list[bit]);
if (!first) if (!first)
pr_buf(out, ","); pr_buf(out, ",");
first = false; first = false;
pr_buf(out, "%s", list[bit]);
flags ^= 1 << bit; flags ^= 1 << bit;
} }
} }