Update bcachefs sources to 661faf58db bcachefs: fix a deadlock in journal replay error path

This commit is contained in:
Kent Overstreet 2017-11-12 18:23:29 -05:00
parent 3347339bbd
commit 74cb922032
7 changed files with 61 additions and 28 deletions

View File

@ -1 +1 @@
6a361fb68c8b0b7cd3bc0085b8d21b808fdc13eb 661faf58dbcab87e512e64e7cb164905689e64c8

View File

@ -1153,6 +1153,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
* open_bucket_add_buckets expects new pointers at the head of * open_bucket_add_buckets expects new pointers at the head of
* the list: * the list:
*/ */
BUG_ON(ob->nr_ptrs >= BCH_REPLICAS_MAX);
memmove(&ob->ptrs[1], memmove(&ob->ptrs[1],
&ob->ptrs[0], &ob->ptrs[0],
ob->nr_ptrs * sizeof(ob->ptrs[0])); ob->nr_ptrs * sizeof(ob->ptrs[0]));
@ -1239,12 +1240,15 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
* reference _after_ doing the index update that makes its allocation reachable. * reference _after_ doing the index update that makes its allocation reachable.
*/ */
static void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
{ {
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
u8 new_ob;
lockdep_assert_held(&c->open_buckets_lock); if (!atomic_dec_and_test(&ob->pin))
return;
spin_lock(&c->open_buckets_lock);
open_bucket_for_each_ptr(ob, ptr) { open_bucket_for_each_ptr(ob, ptr) {
struct bch_dev *ca = c->devs[ptr->dev]; struct bch_dev *ca = c->devs[ptr->dev];
@ -1252,19 +1256,17 @@ static void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
} }
ob->nr_ptrs = 0; ob->nr_ptrs = 0;
new_ob = ob->new_ob;
ob->new_ob = 0;
list_move(&ob->list, &c->open_buckets_free); list_move(&ob->list, &c->open_buckets_free);
c->open_buckets_nr_free++; c->open_buckets_nr_free++;
closure_wake_up(&c->open_buckets_wait);
}
void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *b)
{
if (atomic_dec_and_test(&b->pin)) {
spin_lock(&c->open_buckets_lock);
__bch2_open_bucket_put(c, b);
spin_unlock(&c->open_buckets_lock); spin_unlock(&c->open_buckets_lock);
}
closure_wake_up(&c->open_buckets_wait);
if (new_ob)
bch2_open_bucket_put(c, c->open_buckets + new_ob);
} }
static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c, static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
@ -1285,6 +1287,9 @@ static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
atomic_set(&ret->pin, 1); /* XXX */ atomic_set(&ret->pin, 1); /* XXX */
ret->has_full_ptrs = false; ret->has_full_ptrs = false;
BUG_ON(ret->new_ob);
BUG_ON(ret->nr_ptrs);
c->open_buckets_nr_free--; c->open_buckets_nr_free--;
trace_open_bucket_alloc(c, cl); trace_open_bucket_alloc(c, cl);
} else { } else {
@ -1333,16 +1338,33 @@ static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
struct open_bucket *new, struct open_bucket *new,
struct open_bucket *old) struct open_bucket *old)
{ {
unsigned i; bool moved_ptr = false;
int i;
for (i = 0; i < old->nr_ptrs; i++) for (i = old->nr_ptrs - 1; i >= 0; --i)
if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) { if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) {
struct bch_extent_ptr tmp = old->ptrs[i]; BUG_ON(new->nr_ptrs >= BCH_REPLICAS_MAX);
tmp.offset += old->ptr_offset[i]; new->ptrs[new->nr_ptrs] = old->ptrs[i];
new->ptrs[new->nr_ptrs] = tmp; new->ptr_offset[new->nr_ptrs] = old->ptr_offset[i];
new->ptr_offset[new->nr_ptrs] = 0;
new->nr_ptrs++; new->nr_ptrs++;
old->nr_ptrs--;
memmove(&old->ptrs[i],
&old->ptrs[i + 1],
(old->nr_ptrs - i) * sizeof(old->ptrs[0]));
memmove(&old->ptr_offset[i],
&old->ptr_offset[i + 1],
(old->nr_ptrs - i) * sizeof(old->ptr_offset[0]));
moved_ptr = true;
}
if (moved_ptr) {
BUG_ON(old->new_ob);
atomic_inc(&new->pin);
old->new_ob = new - c->open_buckets;
} }
} }

View File

@ -52,6 +52,8 @@ struct open_bucket {
struct mutex lock; struct mutex lock;
atomic_t pin; atomic_t pin;
bool has_full_ptrs; bool has_full_ptrs;
u8 new_ob;
/* /*
* recalculated every time we allocate from this open_bucket based on * recalculated every time we allocate from this open_bucket based on
* how many pointers we're actually going to use: * how many pointers we're actually going to use:

View File

@ -216,9 +216,6 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
if (btree_shrinker_disabled(c)) if (btree_shrinker_disabled(c))
return SHRINK_STOP; return SHRINK_STOP;
if (c->btree_cache_alloc_lock)
return SHRINK_STOP;
/* Return -1 if we can't do anything right now */ /* Return -1 if we can't do anything right now */
if (sc->gfp_mask & __GFP_IO) if (sc->gfp_mask & __GFP_IO)
mutex_lock(&c->btree_cache_lock); mutex_lock(&c->btree_cache_lock);
@ -302,9 +299,6 @@ static unsigned long bch2_mca_count(struct shrinker *shrink,
if (btree_shrinker_disabled(c)) if (btree_shrinker_disabled(c))
return 0; return 0;
if (c->btree_cache_alloc_lock)
return 0;
return mca_can_free(c) * btree_pages(c); return mca_can_free(c) * btree_pages(c);
} }

View File

@ -1198,8 +1198,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
goto err; goto err;
} }
if (ret) if (ret) {
btree_err_on(!b->written,
BTREE_ERR_FIXABLE, c, b, i,
"first btree node bset has blacklisted journal seq");
if (b->written)
continue; continue;
}
__bch2_btree_node_iter_push(iter, b, __bch2_btree_node_iter_push(iter, b,
i->start, i->start,
@ -1267,6 +1272,7 @@ static void btree_node_read_work(struct work_struct *work)
goto start; goto start;
do { do {
bch_info(c, "retrying read");
bio_reset(bio); bio_reset(bio);
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META; bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
bio->bi_bdev = rb->pick.ca->disk_sb.bdev; bio->bi_bdev = rb->pick.ca->disk_sb.bdev;

View File

@ -1496,6 +1496,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
j->replay_pin_list = NULL; j->replay_pin_list = NULL;
bch2_journal_set_replay_done(j);
if (did_replay) { if (did_replay) {
bch2_journal_flush_pins(&c->journal, U64_MAX); bch2_journal_flush_pins(&c->journal, U64_MAX);
@ -1511,8 +1513,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
goto err; goto err;
} }
} }
bch2_journal_set_replay_done(j);
err: err:
bch2_journal_entries_free(list); bch2_journal_entries_free(list);
return ret; return ret;
@ -1834,6 +1834,14 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq))) while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq)))
pin->flush(j, pin, pin_seq); pin->flush(j, pin, pin_seq);
/*
* If journal replay hasn't completed, the unreplayed journal entries
* hold refs on their corresponding sequence numbers and thus this would
* deadlock:
*/
if (!test_bit(JOURNAL_REPLAY_DONE, &j->flags))
return;
wait_event(j->wait, wait_event(j->wait,
journal_flush_done(j, seq_to_flush) || journal_flush_done(j, seq_to_flush) ||
bch2_journal_error(j)); bch2_journal_error(j));

View File

@ -395,6 +395,7 @@ static void bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->btree_reserve_pool); mempool_exit(&c->btree_reserve_pool);
mempool_exit(&c->fill_iter); mempool_exit(&c->fill_iter);
percpu_ref_exit(&c->writes); percpu_ref_exit(&c->writes);
kfree(c->replicas);
if (c->copygc_wq) if (c->copygc_wq)
destroy_workqueue(c->copygc_wq); destroy_workqueue(c->copygc_wq);