Update bcachefs sources to 986543d24e bcachefs: fix bch2_invalidate_one_bucket2() during journal replay

2025-04-18 00:00:04 +03:00 · 2019-03-13 17:00:36 -04:00 · 2019-03-13 17:00:36 -04:00 · 0894d54750
commit 0894d54750
parent d4c9b18e21
5 changed files with 99 additions and 44 deletions
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@ -1 +1 @@
-00c04f8485db33178b98f67d7c106e3b49fb5b67
+986543d24e08a0c0308472403b230d546e7ecbbb
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@ -938,7 +938,7 @@ retry:
 	else
 		memset(&u, 0, sizeof(u));

-	invalidating_cached_data = u.cached_sectors != 0;
+	invalidating_cached_data = m.cached_sectors != 0;

 	//BUG_ON(u.dirty_sectors);
 	u.data_type	= 0;
@ -946,20 +946,33 @@ retry:
 	u.cached_sectors = 0;
 	u.read_time	= c->bucket_clock[READ].hand;
 	u.write_time	= c->bucket_clock[WRITE].hand;
-	u.gen++;
+
+	/*
+	 * The allocator has to start before journal replay is finished - thus,
+	 * we have to trust the in memory bucket @m, not the version in the
+	 * btree:
+	 */
+	u.gen		= m.gen + 1;

 	a = bkey_alloc_init(&alloc_key.k);
 	a->k.p = iter->pos;
 	bch2_alloc_pack(a, u);

+	/*
+	 * XXX:
+	 * when using deferred btree updates, we have journal reclaim doing
+	 * btree updates and thus requiring the allocator to make forward
+	 * progress, and here the allocator is requiring space in the journal -
+	 * so we need a journal pre-reservation:
+	 */
 	ret = bch2_btree_insert_at(c, NULL,
 			invalidating_cached_data ? journal_seq : NULL,
 			BTREE_INSERT_ATOMIC|
+			BTREE_INSERT_NOUNLOCK|
 			BTREE_INSERT_NOCHECK_RW|
 			BTREE_INSERT_NOFAIL|
 			BTREE_INSERT_USE_RESERVE|
 			BTREE_INSERT_USE_ALLOC_RESERVE|
-			BTREE_INSERT_JOURNAL_RESERVED|
 			flags,
 			BTREE_INSERT_ENTRY(iter, &a->k_i));
 	if (ret == -EINTR)
@ -975,6 +988,10 @@ retry:
 		if (!top->nr)
 			heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);

+		/* with btree still locked: */
+		if (ca->buckets_written)
+			set_bit(b, ca->buckets_written);
+
 		/*
 		 * Make sure we flush the last journal entry that updated this
 		 * bucket (i.e. deleting the last reference) before writing to
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@ -287,6 +287,8 @@ static int bch2_extent_update(struct btree_trans *trans,
 			      bool direct,
 			      s64 *total_delta)
 {
+	struct bch_fs *c = trans->c;
+	struct btree_iter *inode_iter = NULL;
 	struct bch_inode_unpacked inode_u;
 	struct bkey_inode_buf inode_p;
 	bool allocating = false;
@ -319,35 +321,62 @@ static int bch2_extent_update(struct btree_trans *trans,
 	/* XXX: inode->i_size locking */
 	if (i_sectors_delta ||
 	    new_i_size > inode->ei_inode.bi_size) {
-		bch2_btree_iter_unlock(extent_iter);
-		mutex_lock(&inode->ei_update_lock);
+		if (c->opts.new_inode_updates) {
+			bch2_btree_iter_unlock(extent_iter);
+			mutex_lock(&inode->ei_update_lock);

-		if (!bch2_btree_iter_relock(extent_iter)) {
-			mutex_unlock(&inode->ei_update_lock);
-			return -EINTR;
+			if (!bch2_btree_iter_relock(extent_iter)) {
+				mutex_unlock(&inode->ei_update_lock);
+				return -EINTR;
+			}
+
+			inode_locked = true;
+
+			if (!inode->ei_inode_update)
+				inode->ei_inode_update =
+					bch2_deferred_update_alloc(c,
+								BTREE_ID_INODES, 64);
+
+			inode_u = inode->ei_inode;
+			inode_u.bi_sectors += i_sectors_delta;
+
+			/* XXX: this is slightly suspect */
+			if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+			    new_i_size > inode_u.bi_size) {
+				inode_u.bi_size = new_i_size;
+				extended = true;
+			}
+
+			bch2_inode_pack(&inode_p, &inode_u);
+			bch2_trans_update(trans,
+				BTREE_INSERT_DEFERRED(inode->ei_inode_update,
+						      &inode_p.inode.k_i));
+		} else {
+			inode_iter = bch2_trans_get_iter(trans,
+				BTREE_ID_INODES,
+				POS(k->k.p.inode, 0),
+				BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+			if (IS_ERR(inode_iter))
+				return PTR_ERR(inode_iter);
+
+			ret = bch2_btree_iter_traverse(inode_iter);
+			if (ret)
+				goto err;
+
+			inode_u = inode->ei_inode;
+			inode_u.bi_sectors += i_sectors_delta;
+
+			/* XXX: this is slightly suspect */
+			if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+			    new_i_size > inode_u.bi_size) {
+				inode_u.bi_size = new_i_size;
+				extended = true;
+			}
+
+			bch2_inode_pack(&inode_p, &inode_u);
+			bch2_trans_update(trans,
+				BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i));
 		}
-
-		inode_locked = true;
-
-		if (!inode->ei_inode_update)
-			inode->ei_inode_update =
-				bch2_deferred_update_alloc(trans->c,
-							BTREE_ID_INODES, 64);
-
-		inode_u = inode->ei_inode;
-		inode_u.bi_sectors += i_sectors_delta;
-
-		/* XXX: this is slightly suspect */
-		if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-		    new_i_size > inode_u.bi_size) {
-			inode_u.bi_size = new_i_size;
-			extended = true;
-		}
-
-		bch2_inode_pack(&inode_p, &inode_u);
-		bch2_trans_update(trans,
-			BTREE_INSERT_DEFERRED(inode->ei_inode_update,
-					      &inode_p.inode.k_i));
 	}

 	ret = bch2_trans_commit(trans, disk_res,
@ -376,11 +405,13 @@ static int bch2_extent_update(struct btree_trans *trans,
 	}

 	if (direct)
-		i_sectors_acct(trans->c, inode, quota_res, i_sectors_delta);
+		i_sectors_acct(c, inode, quota_res, i_sectors_delta);

 	if (total_delta)
 		*total_delta += i_sectors_delta;
 err:
+	if (!IS_ERR_OR_NULL(inode_iter))
+		bch2_trans_iter_put(trans, inode_iter);
 	if (inode_locked)
 		mutex_unlock(&inode->ei_update_lock);

--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@ -112,22 +112,24 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,

 	lockdep_assert_held(&inode->ei_update_lock);

+	if (c->opts.new_inode_updates) {
 	/* XXX: Don't do this with btree locks held */
 	if (!inode->ei_inode_update)
 		inode->ei_inode_update =
 			bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64);
-#if 0
-	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-			POS(inode->v.i_ino, 0),
-			BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-	if (IS_ERR(iter))
-		return PTR_ERR(iter);
+	} else {
+		iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
+					   POS(inode->v.i_ino, 0),
+					   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+		if (IS_ERR(iter))
+			return PTR_ERR(iter);
+
+		/* The btree node lock is our lock on the inode: */
+		ret = bch2_btree_iter_traverse(iter);
+		if (ret)
+			return ret;
+	}

-	/* The btree node lock is our lock on the inode: */
-	ret = bch2_btree_iter_traverse(iter);
-	if (ret)
-		return ret;
-#endif
 	*inode_u = inode->ei_inode;

 	if (set) {
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@ -288,7 +288,12 @@ enum opt_type {
 	  OPT_UINT(0, BCH_REPLICAS_MAX),				\
 	  NO_SB_OPT,			1,				\
 	  "n",		"Data written to this device will be considered\n"\
-			"to have already been replicated n times")
+			"to have already been replicated n times")	\
+	x(new_inode_updates,		u8,				\
+	  OPT_MOUNT,							\
+	  OPT_BOOL(),							\
+	  NO_SB_OPT,			false,				\
+	  NULL,		"Enable new btree write-cache for inode updates")


 struct bch_opts {