From 74cb92203293a8d5b16b078389f6b3dba5300e89 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 12 Nov 2017 18:23:29 -0500 Subject: [PATCH] Update bcachefs sources to 661faf58db bcachefs: fix a deadlock in journal replay error path --- .bcachefs_revision | 2 +- libbcachefs/alloc.c | 56 +++++++++++++++++++++++++++------------ libbcachefs/alloc_types.h | 2 ++ libbcachefs/btree_cache.c | 6 ----- libbcachefs/btree_io.c | 10 +++++-- libbcachefs/journal.c | 12 +++++++-- libbcachefs/super.c | 1 + 7 files changed, 61 insertions(+), 28 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index aa0579cc..668fea75 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -6a361fb68c8b0b7cd3bc0085b8d21b808fdc13eb +661faf58dbcab87e512e64e7cb164905689e64c8 diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index 09cb6fb2..a1086576 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -1153,6 +1153,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c, * open_bucket_add_buckets expects new pointers at the head of * the list: */ + BUG_ON(ob->nr_ptrs >= BCH_REPLICAS_MAX); memmove(&ob->ptrs[1], &ob->ptrs[0], ob->nr_ptrs * sizeof(ob->ptrs[0])); @@ -1239,12 +1240,15 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp, * reference _after_ doing the index update that makes its allocation reachable. */ -static void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) +void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) { const struct bch_extent_ptr *ptr; + u8 new_ob; - lockdep_assert_held(&c->open_buckets_lock); + if (!atomic_dec_and_test(&ob->pin)) + return; + spin_lock(&c->open_buckets_lock); open_bucket_for_each_ptr(ob, ptr) { struct bch_dev *ca = c->devs[ptr->dev]; @@ -1252,19 +1256,17 @@ static void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) } ob->nr_ptrs = 0; + new_ob = ob->new_ob; + ob->new_ob = 0; list_move(&ob->list, &c->open_buckets_free); c->open_buckets_nr_free++; - closure_wake_up(&c->open_buckets_wait); -} + spin_unlock(&c->open_buckets_lock); -void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *b) -{ - if (atomic_dec_and_test(&b->pin)) { - spin_lock(&c->open_buckets_lock); - __bch2_open_bucket_put(c, b); - spin_unlock(&c->open_buckets_lock); - } + closure_wake_up(&c->open_buckets_wait); + + if (new_ob) + bch2_open_bucket_put(c, c->open_buckets + new_ob); } static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c, @@ -1285,6 +1287,9 @@ static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c, atomic_set(&ret->pin, 1); /* XXX */ ret->has_full_ptrs = false; + BUG_ON(ret->new_ob); + BUG_ON(ret->nr_ptrs); + c->open_buckets_nr_free--; trace_open_bucket_alloc(c, cl); } else { @@ -1333,17 +1338,34 @@ static void open_bucket_copy_unused_ptrs(struct bch_fs *c, struct open_bucket *new, struct open_bucket *old) { - unsigned i; + bool moved_ptr = false; + int i; - for (i = 0; i < old->nr_ptrs; i++) + for (i = old->nr_ptrs - 1; i >= 0; --i) if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) { - struct bch_extent_ptr tmp = old->ptrs[i]; + BUG_ON(new->nr_ptrs >= BCH_REPLICAS_MAX); - tmp.offset += old->ptr_offset[i]; - new->ptrs[new->nr_ptrs] = tmp; - new->ptr_offset[new->nr_ptrs] = 0; + new->ptrs[new->nr_ptrs] = old->ptrs[i]; + new->ptr_offset[new->nr_ptrs] = old->ptr_offset[i]; new->nr_ptrs++; + + old->nr_ptrs--; + memmove(&old->ptrs[i], + &old->ptrs[i + 1], + (old->nr_ptrs - i) * sizeof(old->ptrs[0])); + memmove(&old->ptr_offset[i], + &old->ptr_offset[i + 1], + (old->nr_ptrs - i) * sizeof(old->ptr_offset[0])); + + moved_ptr = true; } + + if (moved_ptr) { + BUG_ON(old->new_ob); + + atomic_inc(&new->pin); + old->new_ob = new - c->open_buckets; + } } static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob) diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index d297430c..bee6d28d 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -52,6 +52,8 @@ struct open_bucket { struct mutex lock; atomic_t pin; bool has_full_ptrs; + u8 new_ob; + /* * recalculated every time we allocate from this open_bucket based on * how many pointers we're actually going to use: diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 0be372c4..4147545d 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -216,9 +216,6 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink, if (btree_shrinker_disabled(c)) return SHRINK_STOP; - if (c->btree_cache_alloc_lock) - return SHRINK_STOP; - /* Return -1 if we can't do anything right now */ if (sc->gfp_mask & __GFP_IO) mutex_lock(&c->btree_cache_lock); @@ -302,9 +299,6 @@ static unsigned long bch2_mca_count(struct shrinker *shrink, if (btree_shrinker_disabled(c)) return 0; - if (c->btree_cache_alloc_lock) - return 0; - return mca_can_free(c) * btree_pages(c); } diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index b600842b..507a6a9d 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1198,8 +1198,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry goto err; } - if (ret) - continue; + if (ret) { + btree_err_on(!b->written, + BTREE_ERR_FIXABLE, c, b, i, + "first btree node bset has blacklisted journal seq"); + if (b->written) + continue; + } __bch2_btree_node_iter_push(iter, b, i->start, @@ -1267,6 +1272,7 @@ static void btree_node_read_work(struct work_struct *work) goto start; do { + bch_info(c, "retrying read"); bio_reset(bio); bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META; bio->bi_bdev = rb->pick.ca->disk_sb.bdev; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 3ee9d39e..37b342b9 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1496,6 +1496,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) j->replay_pin_list = NULL; + bch2_journal_set_replay_done(j); + if (did_replay) { bch2_journal_flush_pins(&c->journal, U64_MAX); @@ -1511,8 +1513,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) goto err; } } - - bch2_journal_set_replay_done(j); err: bch2_journal_entries_free(list); return ret; @@ -1834,6 +1834,14 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq))) pin->flush(j, pin, pin_seq); + /* + * If journal replay hasn't completed, the unreplayed journal entries + * hold refs on their corresponding sequence numbers and thus this would + * deadlock: + */ + if (!test_bit(JOURNAL_REPLAY_DONE, &j->flags)) + return; + wait_event(j->wait, journal_flush_done(j, seq_to_flush) || bch2_journal_error(j)); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 4af9075c..dfb95d0d 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -395,6 +395,7 @@ static void bch2_fs_free(struct bch_fs *c) mempool_exit(&c->btree_reserve_pool); mempool_exit(&c->fill_iter); percpu_ref_exit(&c->writes); + kfree(c->replicas); if (c->copygc_wq) destroy_workqueue(c->copygc_wq);