From e6e7d374e7ac3a90db9eda0b3eb6d113657afa1d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Wed, 28 Aug 2019 11:09:35 -0400
Subject: [PATCH] Update bcachefs sources to bfb7133d71 bcachefs: Refactor
 bch2_alloc_write()

---
 .bcachefs_revision             |   2 +-
 cmd_migrate.c                  |   3 -
 libbcachefs/alloc_background.c | 195 ++++++++++++++-------------------
 libbcachefs/alloc_background.h |  11 ++
 libbcachefs/bcachefs.h         |   1 -
 libbcachefs/btree_gc.c         |   8 +-
 libbcachefs/buckets.c          |  38 +++----
 libbcachefs/buckets_types.h    |   1 -
 8 files changed, 116 insertions(+), 143 deletions(-)

diff --git a/.bcachefs_revision b/.bcachefs_revision
index d90017fa..97cf88b4 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-4ed63a3241fa1e6d7610607be033ef08bef1a43e
+bfb7133d71638b39411352729427c1bb14ca0b6e
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 5c973498..7d6af443 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -318,7 +318,6 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
 		struct bkey_i_extent *e;
 		BKEY_PADDED(k) k;
 		u64 b = sector_to_bucket(ca, physical);
-		struct bucket_mark m;
 		struct disk_reservation res;
 		unsigned sectors;
 		int ret;
@@ -337,8 +336,6 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
 					.gen = bucket(ca, b)->mark.gen,
 				  });
 
-		bucket_cmpxchg(bucket(ca, b), m, m.dirty = true);
-
 		ret = bch2_disk_reservation_get(c, &res, sectors, 1,
 						BCH_DISK_RESERVATION_NOFAIL);
 		if (ret)
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 8b06f51d..7a457729 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -258,46 +258,68 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
 	return 0;
 }
 
-int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+enum alloc_write_ret {
+	ALLOC_WROTE,
+	ALLOC_NOWROTE,
+	ALLOC_END,
+};
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+				struct btree_iter *iter,
+				unsigned flags)
 {
-	struct btree_trans trans;
-	struct btree_iter *iter;
+	struct bch_fs *c = trans->c;
+	struct bkey_s_c k;
 	struct bch_dev *ca;
+	struct bucket_array *ba;
+	struct bucket *g;
+	struct bucket_mark m;
+	struct bkey_alloc_unpacked old_u, new_u;
+	__BKEY_PADDED(k, 8) alloc_key; /* hack: */
+	struct bkey_i_alloc *a;
 	int ret;
-
-	if (k->k.p.inode >= c->sb.nr_devices ||
-	    !c->devs[k->k.p.inode])
-		return 0;
-
-	ca = bch_dev_bkey_exists(c, k->k.p.inode);
-
-	if (k->k.p.offset >= ca->mi.nbuckets)
-		return 0;
-
-	bch2_trans_init(&trans, c, 0, 0);
-
-	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
-				   BTREE_ITER_INTENT);
-
-	ret = bch2_btree_iter_traverse(iter);
+retry:
+	k = bch2_btree_iter_peek_slot(iter);
+	ret = bkey_err(k);
 	if (ret)
 		goto err;
 
-	/* check buckets_written with btree node locked: */
-	if (test_bit(k->k.p.offset, ca->buckets_written)) {
-		ret = 0;
-		goto err;
+	old_u = bch2_alloc_unpack(k);
+
+	if (iter->pos.inode >= c->sb.nr_devices ||
+	    !c->devs[iter->pos.inode])
+		return ALLOC_END;
+
+	percpu_down_read(&c->mark_lock);
+	ca	= bch_dev_bkey_exists(c, iter->pos.inode);
+	ba	= bucket_array(ca);
+
+	if (iter->pos.offset >= ba->nbuckets) {
+		percpu_up_read(&c->mark_lock);
+		return ALLOC_END;
 	}
 
-	bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
+	g	= &ba->b[iter->pos.offset];
+	m	= READ_ONCE(g->mark);
+	new_u	= alloc_mem_to_key(g, m);
+	percpu_up_read(&c->mark_lock);
 
-	ret = bch2_trans_commit(&trans, NULL, NULL,
+	if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+		return ALLOC_NOWROTE;
+
+	a = bkey_alloc_init(&alloc_key.k);
+	a->k.p = iter->pos;
+	bch2_alloc_pack(a, new_u);
+
+	bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
+	ret = bch2_trans_commit(trans, NULL, NULL,
+				BTREE_INSERT_ATOMIC|
 				BTREE_INSERT_NOFAIL|
-				BTREE_INSERT_LAZY_RW|
-				BTREE_INSERT_JOURNAL_REPLAY|
-				BTREE_INSERT_NOMARK);
+				BTREE_INSERT_NOMARK|
+				flags);
 err:
-	bch2_trans_exit(&trans);
+	if (ret == -EINTR)
+		goto retry;
 	return ret;
 }
 
@@ -305,16 +327,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
 {
 	struct btree_trans trans;
 	struct btree_iter *iter;
-	struct bucket_array *buckets;
 	struct bch_dev *ca;
-	struct bucket *g;
-	struct bucket_mark m, new;
-	struct bkey_alloc_unpacked old_u, new_u;
-	__BKEY_PADDED(k, 8) alloc_key; /* hack: */
-	struct bkey_i_alloc *a;
-	struct bkey_s_c k;
 	unsigned i;
-	size_t b;
 	int ret = 0;
 
 	BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
@@ -325,81 +339,24 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
 				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
 	for_each_rw_member(ca, c, i) {
-		down_read(&ca->bucket_lock);
-restart:
-		buckets = bucket_array(ca);
+		unsigned first_bucket;
 
-		for (b = buckets->first_bucket;
-		     b < buckets->nbuckets;
-		     b++) {
-			if (!buckets->b[b].mark.dirty)
-				continue;
+		percpu_down_read(&c->mark_lock);
+		first_bucket = bucket_array(ca)->first_bucket;
+		percpu_up_read(&c->mark_lock);
 
-			bch2_btree_iter_set_pos(iter, POS(i, b));
-			k = bch2_btree_iter_peek_slot(iter);
-			ret = bkey_err(k);
-			if (ret)
-				goto err;
+		bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
 
-			old_u = bch2_alloc_unpack(k);
-
-			percpu_down_read(&c->mark_lock);
-			g	= bucket(ca, b);
-			m	= READ_ONCE(g->mark);
-			new_u	= alloc_mem_to_key(g, m);
-			percpu_up_read(&c->mark_lock);
-
-			if (!m.dirty)
-				continue;
-
-			if ((flags & BTREE_INSERT_LAZY_RW) &&
-			    percpu_ref_is_zero(&c->writes)) {
-				up_read(&ca->bucket_lock);
-				bch2_trans_unlock(&trans);
-
-				ret = bch2_fs_read_write_early(c);
-				down_read(&ca->bucket_lock);
-
-				if (ret)
-					goto err;
-				goto restart;
-			}
-
-			a = bkey_alloc_init(&alloc_key.k);
-			a->k.p = iter->pos;
-			bch2_alloc_pack(a, new_u);
-
-			bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
-			ret = bch2_trans_commit(&trans, NULL, NULL,
-						BTREE_INSERT_NOFAIL|
-						BTREE_INSERT_NOMARK|
-						flags);
-err:
-			if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
-				bch_err(c, "error %i writing alloc info", ret);
-				printk(KERN_CONT "dev %llu bucket %llu\n",
-				       iter->pos.inode, iter->pos.offset);
-				printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen);
-#define x(_name, _bits)		printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name);
-				BCH_ALLOC_FIELDS()
-#undef  x
-			}
-			if (ret)
+		while (1) {
+			ret = bch2_alloc_write_key(&trans, iter, flags);
+			if (ret < 0 || ret == ALLOC_END)
 				break;
-
-			new = m;
-			new.dirty = false;
-			atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
-
-			if (ca->buckets_written)
-				set_bit(b, ca->buckets_written);
-
-			bch2_trans_cond_resched(&trans);
-			*wrote = true;
+			if (ret == ALLOC_WROTE)
+				*wrote = true;
+			bch2_btree_iter_next_slot(iter);
 		}
-		up_read(&ca->bucket_lock);
 
-		if (ret) {
+		if (ret < 0) {
 			percpu_ref_put(&ca->io_ref);
 			break;
 		}
@@ -407,7 +364,27 @@ err:
 
 	bch2_trans_exit(&trans);
 
-	return ret;
+	return ret < 0 ? ret : 0;
+}
+
+int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+{
+	struct btree_trans trans;
+	struct btree_iter *iter;
+	int ret;
+
+	bch2_trans_init(&trans, c, 0, 0);
+
+	iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
+				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+	ret = bch2_alloc_write_key(&trans, iter,
+				   BTREE_INSERT_NOFAIL|
+				   BTREE_INSERT_LAZY_RW|
+				   BTREE_INSERT_JOURNAL_REPLAY|
+				   BTREE_INSERT_NOMARK);
+	bch2_trans_exit(&trans);
+	return ret < 0 ? ret : 0;
 }
 
 /* Bucket IO clocks: */
@@ -954,10 +931,6 @@ retry:
 		if (!top->nr)
 			heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
 
-		/* with btree still locked: */
-		if (ca->buckets_written)
-			set_bit(b, ca->buckets_written);
-
 		/*
 		 * Make sure we flush the last journal entry that updated this
 		 * bucket (i.e. deleting the last reference) before writing to
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 134c6d81..501c4443 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -13,6 +13,17 @@ struct bkey_alloc_unpacked {
 #undef  x
 };
 
+/* returns true if not equal */
+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
+					   struct bkey_alloc_unpacked r)
+{
+	return l.gen != r.gen
+#define x(_name, _bits)	|| l._name != r._name
+	BCH_ALLOC_FIELDS()
+#undef  x
+	;
+}
+
 struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
 void bch2_alloc_pack(struct bkey_i_alloc *,
 		     const struct bkey_alloc_unpacked);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index c85d7766..ac797854 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -410,7 +410,6 @@ struct bch_dev {
 	 */
 	struct bucket_array __rcu *buckets[2];
 	unsigned long		*buckets_nouse;
-	unsigned long		*buckets_written;
 	struct rw_semaphore	bucket_lock;
 
 	struct bch_dev_usage __percpu *usage[2];
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index e43d48b8..6c2253ef 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -146,7 +146,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
 					"type %u gen %u",
 					k.k->type, ptr->gen)) {
 				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-				g2->_mark.dirty	= g->_mark.dirty	= true;
 				g2->gen_valid	= g->gen_valid		= true;
 			}
 
@@ -154,7 +153,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
 					"%u ptr gen in the future: %u > %u",
 					k.k->type, ptr->gen, g->mark.gen)) {
 				g2->_mark.gen	= g->_mark.gen		= ptr->gen;
-				g2->_mark.dirty	= g->_mark.dirty	= true;
 				g2->gen_valid	= g->gen_valid		= true;
 				set_bit(BCH_FS_FIXED_GENS, &c->flags);
 			}
@@ -526,7 +524,6 @@ static int bch2_gc_done(struct bch_fs *c,
 				": got %u, should be %u", i, b,		\
 				dst->b[b].mark._f, src->b[b].mark._f);	\
 		dst->b[b]._mark._f = src->b[b].mark._f;			\
-		dst->b[b]._mark.dirty = true;				\
 	}
 #define copy_dev_field(_f, _msg, ...)					\
 	copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@@ -578,10 +575,7 @@ static int bch2_gc_done(struct bch_fs *c,
 			copy_bucket_field(dirty_sectors);
 			copy_bucket_field(cached_sectors);
 
-			if (dst->b[b].oldest_gen != src->b[b].oldest_gen) {
-				dst->b[b].oldest_gen = src->b[b].oldest_gen;
-				dst->b[b]._mark.dirty = true;
-			}
+			dst->b[b].oldest_gen = src->b[b].oldest_gen;
 		}
 	};
 
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index fc3519bc..16559e89 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -634,7 +634,6 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
 		BUG_ON(!is_available_bucket(new));
 
 		new.owned_by_allocator	= true;
-		new.dirty		= true;
 		new.data_type		= 0;
 		new.cached_sectors	= 0;
 		new.dirty_sectors	= 0;
@@ -774,7 +773,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 	       type != BCH_DATA_JOURNAL);
 
 	old = bucket_cmpxchg(g, new, ({
-		new.dirty	= true;
 		new.data_type	= type;
 		overflow = checked_add(new.dirty_sectors, sectors);
 	}));
@@ -849,7 +847,6 @@ static void bucket_set_stripe(struct bch_fs *c,
 		struct bucket_mark new, old;
 
 		old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
-			new.dirty			= true;
 			new.stripe			= enabled;
 			if (journal_seq) {
 				new.journal_seq_valid	= 1;
@@ -896,8 +893,6 @@ static bool bch2_mark_pointer(struct bch_fs *c,
 	do {
 		new.v.counter = old.v.counter = v;
 
-		new.dirty = true;
-
 		/*
 		 * Check this after reading bucket mark to guard against
 		 * the allocator invalidating a bucket after we've already
@@ -1416,8 +1411,6 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
 	struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
 	struct btree_iter *iter;
 	struct bkey_s_c k;
-	struct bucket *g;
-	struct bucket_mark m;
 	struct bkey_alloc_unpacked u;
 	struct bkey_i_alloc *a;
 	bool overflow;
@@ -1430,12 +1423,31 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
 		return ret;
 
 	if (!ret) {
+		/*
+		 * During journal replay, and if gc repairs alloc info at
+		 * runtime, the alloc info in the btree might not be up to date
+		 * yet - so, trust the in memory mark:
+		 */
+		struct bucket *g;
+		struct bucket_mark m;
+
 		percpu_down_read(&c->mark_lock);
 		g	= bucket(ca, iter->pos.offset);
 		m	= READ_ONCE(g->mark);
 		u	= alloc_mem_to_key(g, m);
 		percpu_up_read(&c->mark_lock);
 	} else {
+		/*
+		 * Unless we're already updating that key:
+		 */
+		if (k.k->type != KEY_TYPE_alloc) {
+			bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu",
+					    p.ptr.dev,
+					    PTR_BUCKET_NR(ca, &p.ptr));
+			ret = -1;
+			goto out;
+		}
+
 		u = bch2_alloc_unpack(k);
 	}
 
@@ -1881,7 +1893,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 {
 	struct bucket_array *buckets = NULL, *old_buckets = NULL;
 	unsigned long *buckets_nouse = NULL;
-	unsigned long *buckets_written = NULL;
 	alloc_fifo	free[RESERVE_NR];
 	alloc_fifo	free_inc;
 	alloc_heap	alloc_heap;
@@ -1910,9 +1921,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 	    !(buckets_nouse	= kvpmalloc(BITS_TO_LONGS(nbuckets) *
 					    sizeof(unsigned long),
 					    GFP_KERNEL|__GFP_ZERO)) ||
-	    !(buckets_written	= kvpmalloc(BITS_TO_LONGS(nbuckets) *
-					    sizeof(unsigned long),
-					    GFP_KERNEL|__GFP_ZERO)) ||
 	    !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
 	    !init_fifo(&free[RESERVE_MOVINGGC],
 		       copygc_reserve, GFP_KERNEL) ||
@@ -1944,16 +1952,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 		memcpy(buckets_nouse,
 		       ca->buckets_nouse,
 		       BITS_TO_LONGS(n) * sizeof(unsigned long));
-		memcpy(buckets_written,
-		       ca->buckets_written,
-		       BITS_TO_LONGS(n) * sizeof(unsigned long));
 	}
 
 	rcu_assign_pointer(ca->buckets[0], buckets);
 	buckets = old_buckets;
 
 	swap(ca->buckets_nouse, buckets_nouse);
-	swap(ca->buckets_written, buckets_written);
 
 	if (resize)
 		percpu_up_write(&c->mark_lock);
@@ -1993,8 +1997,6 @@ err:
 		free_fifo(&free[i]);
 	kvpfree(buckets_nouse,
 		BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-	kvpfree(buckets_written,
-		BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
 	if (buckets)
 		call_rcu(&old_buckets->rcu, buckets_free_rcu);
 
@@ -2010,8 +2012,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
 	free_fifo(&ca->free_inc);
 	for (i = 0; i < RESERVE_NR; i++)
 		free_fifo(&ca->free[i]);
-	kvpfree(ca->buckets_written,
-		BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
 	kvpfree(ca->buckets_nouse,
 		BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
 	kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h
index e51d2979..94bd9da3 100644
--- a/libbcachefs/buckets_types.h
+++ b/libbcachefs/buckets_types.h
@@ -15,7 +15,6 @@ struct bucket_mark {
 	u8		gen;
 	u8		data_type:3,
 			owned_by_allocator:1,
-			dirty:1,
 			journal_seq_valid:1,
 			stripe:1;
 	u16		dirty_sectors;