diff --git a/.bcachefs_revision b/.bcachefs_revision index 539b02bd..eeed3190 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -00c04f8485db33178b98f67d7c106e3b49fb5b67 +986543d24e08a0c0308472403b230d546e7ecbbb diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 1297638b..62f639b8 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -938,7 +938,7 @@ retry: else memset(&u, 0, sizeof(u)); - invalidating_cached_data = u.cached_sectors != 0; + invalidating_cached_data = m.cached_sectors != 0; //BUG_ON(u.dirty_sectors); u.data_type = 0; @@ -946,20 +946,33 @@ retry: u.cached_sectors = 0; u.read_time = c->bucket_clock[READ].hand; u.write_time = c->bucket_clock[WRITE].hand; - u.gen++; + + /* + * The allocator has to start before journal replay is finished - thus, + * we have to trust the in memory bucket @m, not the version in the + * btree: + */ + u.gen = m.gen + 1; a = bkey_alloc_init(&alloc_key.k); a->k.p = iter->pos; bch2_alloc_pack(a, u); + /* + * XXX: + * when using deferred btree updates, we have journal reclaim doing + * btree updates and thus requiring the allocator to make forward + * progress, and here the allocator is requiring space in the journal - + * so we need a journal pre-reservation: + */ ret = bch2_btree_insert_at(c, NULL, invalidating_cached_data ? journal_seq : NULL, BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE| - BTREE_INSERT_JOURNAL_RESERVED| flags, BTREE_INSERT_ENTRY(iter, &a->k_i)); if (ret == -EINTR) @@ -975,6 +988,10 @@ retry: if (!top->nr) heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL); + /* with btree still locked: */ + if (ca->buckets_written) + set_bit(b, ca->buckets_written); + /* * Make sure we flush the last journal entry that updated this * bucket (i.e. deleting the last reference) before writing to diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index cabd3e08..d3a03641 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -287,6 +287,8 @@ static int bch2_extent_update(struct btree_trans *trans, bool direct, s64 *total_delta) { + struct bch_fs *c = trans->c; + struct btree_iter *inode_iter = NULL; struct bch_inode_unpacked inode_u; struct bkey_inode_buf inode_p; bool allocating = false; @@ -319,35 +321,62 @@ static int bch2_extent_update(struct btree_trans *trans, /* XXX: inode->i_size locking */ if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { - bch2_btree_iter_unlock(extent_iter); - mutex_lock(&inode->ei_update_lock); + if (c->opts.new_inode_updates) { + bch2_btree_iter_unlock(extent_iter); + mutex_lock(&inode->ei_update_lock); - if (!bch2_btree_iter_relock(extent_iter)) { - mutex_unlock(&inode->ei_update_lock); - return -EINTR; + if (!bch2_btree_iter_relock(extent_iter)) { + mutex_unlock(&inode->ei_update_lock); + return -EINTR; + } + + inode_locked = true; + + if (!inode->ei_inode_update) + inode->ei_inode_update = + bch2_deferred_update_alloc(c, + BTREE_ID_INODES, 64); + + inode_u = inode->ei_inode; + inode_u.bi_sectors += i_sectors_delta; + + /* XXX: this is slightly suspect */ + if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > inode_u.bi_size) { + inode_u.bi_size = new_i_size; + extended = true; + } + + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, + BTREE_INSERT_DEFERRED(inode->ei_inode_update, + &inode_p.inode.k_i)); + } else { + inode_iter = bch2_trans_get_iter(trans, + BTREE_ID_INODES, + POS(k->k.p.inode, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(inode_iter)) + return PTR_ERR(inode_iter); + + ret = bch2_btree_iter_traverse(inode_iter); + if (ret) + goto err; + + inode_u = inode->ei_inode; + inode_u.bi_sectors += i_sectors_delta; + + /* XXX: this is slightly suspect */ + if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > inode_u.bi_size) { + inode_u.bi_size = new_i_size; + extended = true; + } + + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i)); } - - inode_locked = true; - - if (!inode->ei_inode_update) - inode->ei_inode_update = - bch2_deferred_update_alloc(trans->c, - BTREE_ID_INODES, 64); - - inode_u = inode->ei_inode; - inode_u.bi_sectors += i_sectors_delta; - - /* XXX: this is slightly suspect */ - if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > inode_u.bi_size) { - inode_u.bi_size = new_i_size; - extended = true; - } - - bch2_inode_pack(&inode_p, &inode_u); - bch2_trans_update(trans, - BTREE_INSERT_DEFERRED(inode->ei_inode_update, - &inode_p.inode.k_i)); } ret = bch2_trans_commit(trans, disk_res, @@ -376,11 +405,13 @@ static int bch2_extent_update(struct btree_trans *trans, } if (direct) - i_sectors_acct(trans->c, inode, quota_res, i_sectors_delta); + i_sectors_acct(c, inode, quota_res, i_sectors_delta); if (total_delta) *total_delta += i_sectors_delta; err: + if (!IS_ERR_OR_NULL(inode_iter)) + bch2_trans_iter_put(trans, inode_iter); if (inode_locked) mutex_unlock(&inode->ei_update_lock); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 8c9fdc84..55fc88d3 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -112,22 +112,24 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, lockdep_assert_held(&inode->ei_update_lock); + if (c->opts.new_inode_updates) { /* XXX: Don't do this with btree locks held */ if (!inode->ei_inode_update) inode->ei_inode_update = bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64); -#if 0 - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, - POS(inode->v.i_ino, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); + } else { + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + POS(inode->v.i_ino, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + /* The btree node lock is our lock on the inode: */ + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + } - /* The btree node lock is our lock on the inode: */ - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; -#endif *inode_u = inode->ei_inode; if (set) { diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index f2ce0e5f..a20a09ee 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -288,7 +288,12 @@ enum opt_type { OPT_UINT(0, BCH_REPLICAS_MAX), \ NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") + "to have already been replicated n times") \ + x(new_inode_updates, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Enable new btree write-cache for inode updates") struct bch_opts {