From 82c7e8ff35ac0c501290cd4779e8ab2c553aa09d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Dec 2019 18:19:05 -0500 Subject: [PATCH] Update bcachefs sources to cfb41d25c7 bcachefs: Add an assertion to track down a heisenbug --- .bcachefs_revision | 2 +- cmd_migrate.c | 6 +- libbcachefs/acl.c | 1 - libbcachefs/alloc_background.c | 2 - libbcachefs/btree_iter.c | 5 - libbcachefs/btree_iter.h | 5 - libbcachefs/btree_types.h | 1 + libbcachefs/btree_update.h | 37 ++++--- libbcachefs/btree_update_interior.c | 2 + libbcachefs/btree_update_leaf.c | 156 ++++++++++++---------------- libbcachefs/buckets.c | 2 +- libbcachefs/clock.c | 45 +++++--- libbcachefs/clock.h | 6 +- libbcachefs/clock_types.h | 1 + libbcachefs/dirent.c | 12 --- libbcachefs/dirent.h | 2 - libbcachefs/ec.c | 11 +- libbcachefs/fs-io.c | 9 +- libbcachefs/fs.c | 6 -- libbcachefs/fsck.c | 125 ++++++++++++---------- libbcachefs/inode.c | 3 +- libbcachefs/io.c | 33 +++--- libbcachefs/migrate.c | 1 - libbcachefs/move.c | 1 - libbcachefs/movinggc.c | 52 +++++----- libbcachefs/recovery.c | 14 +-- libbcachefs/reflink.c | 6 +- libbcachefs/super-io.c | 4 +- libbcachefs/super.c | 2 +- libbcachefs/super.h | 1 - libbcachefs/sysfs.c | 12 +++ libbcachefs/xattr.c | 2 +- 32 files changed, 278 insertions(+), 289 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index ede4a3de..2dbaaff9 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -5d7142b75a6eb52f7398fd119971c14d76add6ba +cfb41d25c7a20e4c9b67a8d87f88b659412d5f3f diff --git a/cmd_migrate.c b/cmd_migrate.c index 14532b6a..998275a0 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -137,7 +137,7 @@ static void create_link(struct bch_fs *c, struct bch_inode_unpacked parent_u; struct bch_inode_unpacked inode; - int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, + int ret = bch2_trans_do(c, NULL, NULL, 0, bch2_link_trans(&trans, parent->bi_inum, inum, &parent_u, &inode, &qstr)); if (ret) @@ -153,7 +153,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c, struct qstr qstr = QSTR(name); struct bch_inode_unpacked new_inode; - int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, + int ret = bch2_trans_do(c, NULL, NULL, 0, bch2_create_trans(&trans, parent->bi_inum, parent, &new_inode, &qstr, @@ -224,7 +224,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, const struct xattr_handler *h = xattr_resolve_name(&attr); - int ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, + int ret = bch2_trans_do(c, NULL, NULL, 0, bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr, val, val_size, h->flags, 0)); if (ret < 0) diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index dcd0dfe8..9dbc1d99 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -323,7 +323,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); btree_err: if (ret == -EINTR) diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index e252a039..83caa05e 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -316,7 +316,6 @@ retry: bch2_trans_update(trans, iter, &a->k_i); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOMARK| flags); @@ -913,7 +912,6 @@ retry: */ ret = bch2_trans_commit(trans, NULL, invalidating_cached_data ? journal_seq : NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 918e6fe4..6f19304b 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -457,11 +457,6 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, "cur key %s\n", iter->pos.inode, iter->pos.offset, buf); } - - BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE && - btree_iter_type(iter) == BTREE_ITER_KEYS && - !bkey_whiteout(&iter->k) && - bch2_btree_node_iter_end(&l->iter)); } void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 4c503222..d750c4e5 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -299,11 +299,6 @@ static inline void bch2_trans_begin(struct btree_trans *trans) return bch2_trans_reset(trans, TRANS_RESET_ITERS|TRANS_RESET_MEM); } -static inline void bch2_trans_begin_updates(struct btree_trans *trans) -{ - return bch2_trans_reset(trans, TRANS_RESET_MEM); -} - void *bch2_trans_kmalloc(struct btree_trans *, size_t); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); int bch2_trans_exit(struct btree_trans *); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 0c0a3f35..2a5b70c7 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -278,6 +278,7 @@ struct btree_trans { struct disk_reservation *disk_res; unsigned flags; unsigned journal_u64s; + unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; struct btree_iter iters_onstack[2]; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index ad8cbf3f..1534e937 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -16,7 +16,6 @@ void bch2_btree_journal_key(struct btree_trans *, struct btree_iter *, struct bkey_i *); enum { - __BTREE_INSERT_ATOMIC, __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, @@ -27,7 +26,6 @@ enum { __BTREE_INSERT_JOURNAL_RESERVED, __BTREE_INSERT_NOMARK_OVERWRITES, __BTREE_INSERT_NOMARK, - __BTREE_INSERT_NO_CLEAR_REPLICAS, __BTREE_INSERT_BUCKET_INVALIDATE, __BTREE_INSERT_NOWAIT, __BTREE_INSERT_GC_LOCK_HELD, @@ -35,12 +33,6 @@ enum { __BCH_HASH_SET_MUST_REPLACE, }; -/* - * Don't drop/retake locks before doing btree update, instead return -EINTR if - * we had to drop locks for any reason - */ -#define BTREE_INSERT_ATOMIC (1 << __BTREE_INSERT_ATOMIC) - /* * Don't drop locks _after_ successfully updating btree: */ @@ -67,8 +59,6 @@ enum { /* Don't call mark new key at all: */ #define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) -#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS) - #define BTREE_INSERT_BUCKET_INVALIDATE (1 << __BTREE_INSERT_BUCKET_INVALIDATE) /* Don't block on allocation failure (for new btree nodes: */ @@ -101,8 +91,7 @@ int __bch2_trans_commit(struct btree_trans *); * This is main entry point for btree updates. * * Return values: - * -EINTR: locking changed, this function should be called again. Only returned - * if passed BTREE_INSERT_ATOMIC. + * -EINTR: locking changed, this function should be called again. * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ @@ -131,24 +120,34 @@ static inline void bch2_trans_update(struct btree_trans *trans, }; } -#define bch2_trans_do(_c, _journal_seq, _flags, _do) \ +#define __bch2_trans_do(_trans, _disk_res, _journal_seq, \ + _flags, _reset_flags, _do) \ ({ \ - struct btree_trans trans; \ int _ret; \ \ - bch2_trans_init(&trans, (_c), 0, 0); \ - \ do { \ - bch2_trans_begin(&trans); \ + bch2_trans_reset(_trans, _reset_flags); \ \ - _ret = (_do) ?: bch2_trans_commit(&trans, NULL, \ + _ret = (_do) ?: bch2_trans_commit(_trans, (_disk_res), \ (_journal_seq), (_flags)); \ } while (_ret == -EINTR); \ \ - bch2_trans_exit(&trans); \ _ret; \ }) +#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ +({ \ + struct btree_trans trans; \ + int _ret, _ret2; \ + \ + bch2_trans_init(&trans, (_c), 0, 0); \ + _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \ + TRANS_RESET_MEM|TRANS_RESET_ITERS, _do); \ + _ret2 = bch2_trans_exit(&trans); \ + \ + _ret ?: _ret2; \ +}) + #define trans_for_each_update(_trans, _i) \ for ((_i) = (_trans)->updates; \ (_i) < (_trans)->updates + (_trans)->nr_updates; \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index f8a30cb3..5bb51864 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1657,6 +1657,8 @@ void __bch2_foreground_maybe_merge(struct bch_fs *c, size_t sib_u64s; int ret = 0; + BUG_ON(!btree_node_locked(iter, level)); + closure_init_stack(&cl); retry: BUG_ON(!btree_node_locked(iter, level)); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 46c0a1e7..d0aca7f9 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -298,8 +298,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && bkey_cmp(i->k->k.p, i->iter->l[0].b->key.k.p) > 0); - EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && - !(trans->flags & BTREE_INSERT_ATOMIC)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && @@ -517,44 +515,18 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, { struct btree_insert_entry *i; struct btree_iter *iter; - unsigned idx, u64s, journal_preres_u64s = 0; + unsigned idx; int ret; - /* - * note: running triggers will append more updates to the list of - * updates as we're walking it: - */ - trans_for_each_update(trans, i) { - /* we know trans->nounlock won't be set here: */ - if (unlikely(!(i->iter->locks_want < 1 - ? __bch2_btree_iter_upgrade(i->iter, 1) - : i->iter->uptodate <= BTREE_ITER_NEED_PEEK))) { - trace_trans_restart_upgrade(trans->ip); - return -EINTR; - } - - if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) && - update_has_trans_triggers(i)) { - ret = bch2_trans_mark_update(trans, i->iter, i->k); - if (unlikely(ret)) { - if (ret == -EINTR) - trace_trans_restart_mark(trans->ip); - return ret; - } - } - - u64s = jset_u64s(i->k->k.u64s); - if (0) - journal_preres_u64s += u64s; - trans->journal_u64s += u64s; - } + trans_for_each_update(trans, i) + BUG_ON(!btree_node_intent_locked(i->iter, 0)); ret = bch2_journal_preres_get(&trans->c->journal, - &trans->journal_preres, journal_preres_u64s, + &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK); if (unlikely(ret == -EAGAIN)) ret = bch2_trans_journal_preres_get_cold(trans, - journal_preres_u64s); + trans->journal_preres_u64s); if (unlikely(ret)) return ret; @@ -641,8 +613,8 @@ int bch2_trans_commit_error(struct btree_trans *trans, /* * if the split succeeded without dropping locks the insert will - * still be atomic (in the BTREE_INSERT_ATOMIC sense, what the - * caller peeked() and is overwriting won't have changed) + * still be atomic (what the caller peeked() and is overwriting + * won't have changed) */ #if 0 /* @@ -713,13 +685,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, return ret2; } - /* - * BTREE_ITER_ATOMIC means we have to return -EINTR if we - * dropped locks: - */ - if (!(flags & BTREE_INSERT_ATOMIC)) - return 0; - trace_trans_restart_atomic(trans->ip); } @@ -749,40 +714,61 @@ int __bch2_trans_commit(struct btree_trans *trans) { struct btree_insert_entry *i = NULL; struct btree_iter *iter; - unsigned orig_nr_updates = trans->nr_updates; - unsigned orig_mem_top = trans->mem_top; + unsigned u64s; int ret = 0; if (!trans->nr_updates) goto out_noupdates; - /* for the sake of sanity: */ - EBUG_ON(trans->nr_updates > 1 && !(trans->flags & BTREE_INSERT_ATOMIC)); - if (trans->flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&trans->c->gc_lock); memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); + trans->journal_u64s = 0; + trans->journal_preres_u64s = 0; + if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) && unlikely(!percpu_ref_tryget(&trans->c->writes))) { ret = bch2_trans_commit_get_rw_cold(trans); if (ret) return ret; } + + /* + * note: running triggers will append more updates to the list of + * updates as we're walking it: + */ + trans_for_each_update(trans, i) { + /* we know trans->nounlock won't be set here: */ + if (unlikely(!(i->iter->locks_want < 1 + ? __bch2_btree_iter_upgrade(i->iter, 1) + : i->iter->uptodate <= BTREE_ITER_NEED_PEEK))) { + trace_trans_restart_upgrade(trans->ip); + ret = -EINTR; + goto out; + } + + if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) && + update_has_trans_triggers(i)) { + ret = bch2_trans_mark_update(trans, i->iter, i->k); + if (unlikely(ret)) { + if (ret == -EINTR) + trace_trans_restart_mark(trans->ip); + goto out; + } + } + + u64s = jset_u64s(i->k->k.u64s); + if (0) + trans->journal_preres_u64s += u64s; + trans->journal_u64s += u64s; + } retry: memset(&trans->journal_res, 0, sizeof(trans->journal_res)); - trans->journal_u64s = 0; ret = do_bch2_trans_commit(trans, &i); - if (trans->fs_usage_deltas) { - trans->fs_usage_deltas->used = 0; - memset(&trans->fs_usage_deltas->memset_start, 0, - (void *) &trans->fs_usage_deltas->memset_end - - (void *) &trans->fs_usage_deltas->memset_start); - } - /* make sure we didn't drop or screw up locks: */ bch2_btree_trans_verify_locks(trans); @@ -794,8 +780,6 @@ out: if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) percpu_ref_put(&trans->c->writes); out_noupdates: - EBUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); - trans_for_each_iter_all(trans, iter) iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; @@ -806,22 +790,36 @@ out_noupdates: trans->nr_updates = 0; trans->mem_top = 0; + if (trans->fs_usage_deltas) { + trans->fs_usage_deltas->used = 0; + memset(&trans->fs_usage_deltas->memset_start, 0, + (void *) &trans->fs_usage_deltas->memset_end - + (void *) &trans->fs_usage_deltas->memset_start); + } + return ret; err: ret = bch2_trans_commit_error(trans, i, ret); - - /* can't loop if it was passed in and we changed it: */ - if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret) - ret = -EINTR; if (ret) goto out; - /* free updates and memory used by triggers, they'll be reexecuted: */ - trans->nr_updates = orig_nr_updates; - trans->mem_top = orig_mem_top; goto retry; } +static int __bch2_btree_insert(struct btree_trans *trans, + enum btree_id id, struct bkey_i *k) +{ + struct btree_iter *iter; + + iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k), + BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + bch2_trans_update(trans, iter, k); + return 0; +} + /** * bch2_btree_insert - insert keys into the extent btree * @c: pointer to struct bch_fs @@ -830,29 +828,12 @@ err: * @hook: insert callback */ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, - struct bkey_i *k, - struct disk_reservation *disk_res, - u64 *journal_seq, int flags) + struct bkey_i *k, + struct disk_reservation *disk_res, + u64 *journal_seq, int flags) { - struct btree_trans trans; - struct btree_iter *iter; - int ret; - - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k), - BTREE_ITER_INTENT); - - bch2_trans_update(&trans, iter, k); - - ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags); - if (ret == -EINTR) - goto retry; - bch2_trans_exit(&trans); - - return ret; + return bch2_trans_do(c, disk_res, journal_seq, flags, + __bch2_btree_insert(&trans, id, k)); } int bch2_btree_delete_at_range(struct btree_trans *trans, @@ -868,6 +849,8 @@ retry: bkey_cmp(iter->pos, end) < 0) { struct bkey_i delete; + bch2_trans_reset(trans, TRANS_RESET_MEM); + bkey_init(&delete.k); /* @@ -897,7 +880,6 @@ retry: bch2_trans_update(trans, iter, &delete); ret = bch2_trans_commit(trans, NULL, journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); if (ret) break; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 8d223aa2..5a34dab0 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -161,7 +161,7 @@ struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c) struct bch_fs_usage *ret; unsigned bytes = fs_usage_u64s(c) * sizeof(u64); - ret = kzalloc(bytes, GFP_NOWAIT); + ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN); if (ret) return ret; diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c index f1826633..d9de0d13 100644 --- a/libbcachefs/clock.c +++ b/libbcachefs/clock.c @@ -18,6 +18,14 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) size_t i; spin_lock(&clock->timer_lock); + + if (time_after_eq((unsigned long) atomic_long_read(&clock->now), + timer->expire)) { + spin_unlock(&clock->timer_lock); + timer->fn(timer); + return; + } + for (i = 0; i < clock->timers.used; i++) if (clock->timers.data[i] == timer) goto out; @@ -135,28 +143,33 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, return ret; } -void __bch2_increment_clock(struct io_clock *clock) +void __bch2_increment_clock(struct io_clock *clock, unsigned sectors) { struct io_timer *timer; - unsigned long now; - unsigned sectors; - - /* Buffer up one megabyte worth of IO in the percpu counter */ - preempt_disable(); - - if (this_cpu_read(*clock->pcpu_buf) < IO_CLOCK_PCPU_SECTORS) { - preempt_enable(); - return; - } - - sectors = this_cpu_xchg(*clock->pcpu_buf, 0); - preempt_enable(); - now = atomic_long_add_return(sectors, &clock->now); + unsigned long now = atomic_long_add_return(sectors, &clock->now); while ((timer = get_expired_timer(clock, now))) timer->fn(timer); } +ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf) +{ + struct printbuf out = _PBUF(buf, PAGE_SIZE); + unsigned long now; + unsigned i; + + spin_lock(&clock->timer_lock); + now = atomic_long_read(&clock->now); + + for (i = 0; i < clock->timers.used; i++) + pr_buf(&out, "%pf:\t%li\n", + clock->timers.data[i]->fn, + clock->timers.data[i]->expire - now); + spin_unlock(&clock->timer_lock); + + return out.pos - buf; +} + void bch2_io_clock_exit(struct io_clock *clock) { free_heap(&clock->timers); @@ -168,6 +181,8 @@ int bch2_io_clock_init(struct io_clock *clock) atomic_long_set(&clock->now, 0); spin_lock_init(&clock->timer_lock); + clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus(); + clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf); if (!clock->pcpu_buf) return -ENOMEM; diff --git a/libbcachefs/clock.h b/libbcachefs/clock.h index bfbbca8a..da50afe2 100644 --- a/libbcachefs/clock.h +++ b/libbcachefs/clock.h @@ -7,7 +7,7 @@ void bch2_io_timer_del(struct io_clock *, struct io_timer *); void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long, unsigned long); -void __bch2_increment_clock(struct io_clock *); +void __bch2_increment_clock(struct io_clock *, unsigned); static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors, int rw) @@ -16,7 +16,7 @@ static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors, if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >= IO_CLOCK_PCPU_SECTORS)) - __bch2_increment_clock(clock); + __bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0)); } void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long); @@ -30,6 +30,8 @@ void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long); __ret; \ }) +ssize_t bch2_io_timers_show(struct io_clock *, char *); + void bch2_io_clock_exit(struct io_clock *); int bch2_io_clock_init(struct io_clock *); diff --git a/libbcachefs/clock_types.h b/libbcachefs/clock_types.h index 2b5e499e..92c740a4 100644 --- a/libbcachefs/clock_types.h +++ b/libbcachefs/clock_types.h @@ -28,6 +28,7 @@ typedef HEAP(struct io_timer *) io_timer_heap; struct io_clock { atomic_long_t now; u16 __percpu *pcpu_buf; + unsigned max_slop; spinlock_t timer_lock; io_timer_heap timers; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 38017699..1bf53c55 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -281,18 +281,6 @@ int bch2_dirent_delete_at(struct btree_trans *trans, hash_info, iter); } -int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum, - const struct bch_hash_info *hash_info, - const struct qstr *name, - u64 *journal_seq) -{ - return bch2_trans_do(c, journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL, - bch2_hash_delete(&trans, bch2_dirent_hash_desc, hash_info, - dir_inum, name)); -} - struct btree_iter * __bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum, const struct bch_hash_info *hash_info, diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index e6184dc7..34769371 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -36,8 +36,6 @@ int bch2_dirent_create(struct btree_trans *, u64, int bch2_dirent_delete_at(struct btree_trans *, const struct bch_hash_info *, struct btree_iter *); -int bch2_dirent_delete(struct bch_fs *, u64, const struct bch_hash_info *, - const struct qstr *, u64 *); enum bch_rename_mode { BCH_RENAME, diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 5287b5ee..045a3316 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -739,7 +739,6 @@ found_slot: bch2_trans_update(&trans, iter, &stripe->k_i); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); err: if (ret == -EINTR) @@ -822,7 +821,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE); if (ret == -EINTR) @@ -1259,8 +1257,13 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote) if (!m->dirty) continue; - ret = __bch2_stripe_write_key(&trans, iter, m, giter.pos, - new_key, flags); + do { + bch2_trans_reset(&trans, TRANS_RESET_MEM); + + ret = __bch2_stripe_write_key(&trans, iter, m, + giter.pos, new_key, flags); + } while (ret == -EINTR); + if (ret) break; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 7f954a5d..50b82cb9 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -2493,9 +2493,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct bpos next_pos; struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); struct bpos atomic_end; - unsigned commit_flags = BTREE_INSERT_NOFAIL| - BTREE_INSERT_ATOMIC| - BTREE_INSERT_USE_RESERVE; + unsigned commit_flags = 0; k = insert ? bch2_btree_iter_peek_prev(src) @@ -2588,6 +2586,7 @@ reassemble: ret = bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, + BTREE_INSERT_NOFAIL| commit_flags); bch2_disk_reservation_put(c, &disk_res); bkey_err: @@ -2678,6 +2677,8 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, struct bkey_i_reservation reservation; struct bkey_s_c k; + bch2_trans_reset(&trans, TRANS_RESET_MEM); + k = bch2_btree_iter_peek_slot(iter); if ((ret = bkey_err(k))) goto bkey_err; @@ -2724,8 +2725,6 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, reservation.v.nr_replicas = disk_res.nr_replicas; } - bch2_trans_begin_updates(&trans); - ret = bch2_extent_update(&trans, iter, &reservation.k_i, &disk_res, &inode->ei_journal_seq, 0, &i_sectors_delta); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 278c6d5b..f14f8805 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -140,7 +140,6 @@ retry: bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); if (ret == -EINTR) @@ -269,7 +268,6 @@ retry: goto err_before_quota; ret = bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, @@ -393,7 +391,6 @@ static int __bch2_link(struct bch_fs *c, &dentry->d_name) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); } while (ret == -EINTR); @@ -450,7 +447,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) &inode_u, &dentry->d_name) ?: bch2_trans_commit(&trans, NULL, &dir->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); } while (ret == -EINTR); @@ -579,7 +575,6 @@ retry: mode) ?: bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); if (ret == -EINTR) goto retry; @@ -712,7 +707,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 3ae545b3..cd230dc1 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -37,8 +37,8 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) return ret ?: sectors; } -static int remove_dirent(struct btree_trans *trans, - struct bkey_s_c_dirent dirent) +static int __remove_dirent(struct btree_trans *trans, + struct bkey_s_c_dirent dirent) { struct bch_fs *c = trans->c; struct qstr name; @@ -49,31 +49,40 @@ static int remove_dirent(struct btree_trans *trans, char *buf; name.len = bch2_dirent_name_bytes(dirent); - buf = kmalloc(name.len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; + buf = bch2_trans_kmalloc(trans, name.len + 1); + if (IS_ERR(buf)) + return PTR_ERR(buf); memcpy(buf, dirent.v->d_name, name.len); buf[name.len] = '\0'; name.name = buf; - /* Unlock so we don't deadlock, after copying name: */ - bch2_trans_unlock(trans); - - ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode); - if (ret) { + ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode); + if (ret && ret != -EINTR) bch_err(c, "remove_dirent: err %i looking up directory inode", ret); - goto err; - } + if (ret) + return ret; dir_hash_info = bch2_hash_info_init(c, &dir_inode); - ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); - if (ret) + ret = bch2_hash_delete(trans, bch2_dirent_hash_desc, + &dir_hash_info, dir_inum, &name); + if (ret && ret != -EINTR) bch_err(c, "remove_dirent: err %i deleting dirent", ret); -err: - kfree(buf); - return ret; + if (ret) + return ret; + + return 0; +} + +static int remove_dirent(struct btree_trans *trans, + struct bkey_s_c_dirent dirent) +{ + return __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + TRANS_RESET_MEM, + __remove_dirent(trans, dirent)); } static int reattach_inode(struct bch_fs *c, @@ -88,8 +97,7 @@ static int reattach_inode(struct bch_fs *c, snprintf(name_buf, sizeof(name_buf), "%llu", inum); name = (struct qstr) QSTR(name_buf); - ret = bch2_trans_do(c, NULL, - BTREE_INSERT_ATOMIC| + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, bch2_link_trans(&trans, lostfound_inode->bi_inum, inum, &dir_u, &inode_u, &name)); @@ -171,27 +179,26 @@ static int hash_redo_key(const struct bch_hash_desc desc, struct btree_iter *k_iter, struct bkey_s_c k, u64 hashed) { + struct bkey_i delete; struct bkey_i *tmp; - int ret = 0; - tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL); - if (!tmp) - return -ENOMEM; + bch2_trans_reset(trans, TRANS_RESET_MEM); + + tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); bkey_reassemble(tmp, k); - ret = bch2_btree_delete_at(trans, k_iter, 0); - if (ret) - goto err; + bkey_init(&delete.k); + delete.k.p = k_iter->pos; + bch2_trans_update(trans, k_iter, &delete); - bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, - tmp, BCH_HASH_SET_MUST_CREATE); - ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); -err: - kfree(tmp); - return ret; + return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, + tmp, BCH_HASH_SET_MUST_CREATE) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); } static int fsck_hash_delete_at(struct btree_trans *trans, @@ -203,7 +210,6 @@ static int fsck_hash_delete_at(struct btree_trans *trans, retry: ret = bch2_hash_delete_at(trans, desc, info, iter) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); if (ret == -EINTR) { @@ -313,9 +319,11 @@ static int hash_check_key(struct btree_trans *trans, "hashed to %llu chain starts at %llu\n%s", desc.btree_id, k.k->p.offset, hashed, h->chain->pos.offset, - (bch2_bkey_val_to_text(&PBUF(buf), c, - k), buf))) { - ret = hash_redo_key(desc, trans, h, k_iter, k, hashed); + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { + do { + ret = hash_redo_key(desc, trans, h, k_iter, k, hashed); + } while (ret == -EINTR); + if (ret) { bch_err(c, "hash_redo_key err %i", ret); return ret; @@ -376,11 +384,11 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)", buf, strlen(buf), d->v.d_name, len)) { - bch2_trans_update(trans, iter, &d->k_i); - - ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + TRANS_RESET_MEM, + (bch2_trans_update(trans, iter, &d->k_i), 0)); if (ret) goto err; @@ -402,8 +410,11 @@ err_redo: k->k->p.offset, hash, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { - ret = hash_redo_key(bch2_dirent_hash_desc, trans, - h, iter, *k, hash); + do { + ret = hash_redo_key(bch2_dirent_hash_desc, trans, + h, iter, *k, hash); + } while (ret == -EINTR); + if (ret) bch_err(c, "hash_redo_key err %i", ret); else @@ -646,11 +657,11 @@ retry: bkey_reassemble(&n->k_i, d.s_c); n->v.d_type = mode_to_type(target.bi_mode); - bch2_trans_update(&trans, iter, &n->k_i); - - ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + TRANS_RESET_MEM, + (bch2_trans_update(&trans, iter, &n->k_i), 0)); kfree(n); if (ret) goto err; @@ -790,8 +801,7 @@ fsck_err: create_lostfound: bch2_inode_init_early(c, lostfound_inode); - ret = bch2_trans_do(c, NULL, - BTREE_INSERT_ATOMIC| + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_create_trans(&trans, @@ -1261,12 +1271,13 @@ static int check_inode(struct btree_trans *trans, struct bkey_inode_buf p; bch2_inode_pack(&p, &u); - bch2_trans_update(trans, iter, &p.inode.k_i); - ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret && ret != -EINTR) + ret = __bch2_trans_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + TRANS_RESET_MEM, + (bch2_trans_update(trans, iter, &p.inode.k_i), 0)); + if (ret) bch_err(c, "error in fsck: error %i " "updating inode", ret); } diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index c0642ff4..227cfb57 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -496,7 +496,6 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) bch2_trans_update(&trans, iter, &delete.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); } while (ret == -EINTR); @@ -533,7 +532,7 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, struct bch_inode_unpacked *inode) { - return bch2_trans_do(c, NULL, 0, + return bch2_trans_do(c, NULL, NULL, 0, bch2_inode_find_by_inum_trans(&trans, inode_nr, inode)); } diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 17ea38e4..86dee7e8 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -303,7 +303,6 @@ int bch2_extent_update(struct btree_trans *trans, ret = bch2_trans_commit(trans, disk_res, journal_seq, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_ATOMIC| BTREE_INSERT_USE_RESERVE); if (!ret && i_sectors_delta) *i_sectors_delta += delta; @@ -326,6 +325,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_disk_reservation_init(c, 0); struct bkey_i delete; + bch2_trans_reset(trans, TRANS_RESET_MEM); + ret = bkey_err(k); if (ret) goto btree_err; @@ -337,8 +338,6 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end, &delete); - bch2_trans_begin_updates(trans); - ret = bch2_extent_update(trans, iter, &delete, &disk_res, journal_seq, 0, i_sectors_delta); @@ -400,14 +399,14 @@ int bch2_write_index_default(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { + bch2_trans_reset(&trans, TRANS_RESET_MEM); + k = bch2_keylist_front(keys); bkey_on_stack_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); bch2_cut_front(iter->pos, sk.k); - bch2_trans_begin_updates(&trans); - ret = bch2_extent_update(&trans, iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta); @@ -501,12 +500,13 @@ static void bch2_write_done(struct closure *cl) bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); - if (op->end_io) - op->end_io(op); - if (cl->parent) - closure_return(cl); - else + if (op->end_io) { + EBUG_ON(cl->parent); closure_debug_destroy(cl); + op->end_io(op); + } else { + closure_return(cl); + } } /** @@ -1233,12 +1233,14 @@ void bch2_write(struct closure *cl) err: if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) bch2_disk_reservation_put(c, &op->res); - if (op->end_io) - op->end_io(op); - if (cl->parent) - closure_return(cl); - else + + if (op->end_io) { + EBUG_ON(cl->parent); closure_debug_destroy(cl); + op->end_io(op); + } else { + closure_return(cl); + } } /* Cache promotion on read */ @@ -1738,7 +1740,6 @@ retry: bch2_trans_update(&trans, iter, new.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOWAIT); if (ret == -EINTR) diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 4b59dcd0..db86420b 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -79,7 +79,6 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); /* diff --git a/libbcachefs/move.c b/libbcachefs/move.c index fad3cc4d..28030562 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -154,7 +154,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| m->data_opts.btree_insert_flags); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index abdeef20..e9cb2304 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -212,14 +212,36 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) buckets_to_move, buckets_not_moved); } +/* + * Copygc runs when the amount of fragmented data is above some arbitrary + * threshold: + * + * The threshold at the limit - when the device is full - is the amount of space + * we reserved in bch2_recalc_capacity; we can't have more than that amount of + * disk space stranded due to fragmentation and store everything we have + * promised to store. + * + * But we don't want to be running copygc unnecessarily when the device still + * has plenty of free space - rather, we want copygc to smoothly run every so + * often and continually reduce the amount of fragmented space as the device + * fills up. So, we increase the threshold by half the current free space. + */ +unsigned long bch2_copygc_wait_amount(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bch_dev_usage usage = bch2_dev_usage_read(c, ca); + u64 fragmented_allowed = ca->copygc_threshold + + ((__dev_buckets_available(ca, usage) * ca->mi.bucket_size) >> 1); + + return max_t(s64, 0, fragmented_allowed - usage.sectors_fragmented); +} + static int bch2_copygc_thread(void *arg) { struct bch_dev *ca = arg; struct bch_fs *c = ca->fs; struct io_clock *clock = &c->io_clock[WRITE]; - struct bch_dev_usage usage; - unsigned long last; - u64 available, fragmented, reserve, next; + unsigned long last, wait; set_freezable(); @@ -228,28 +250,10 @@ static int bch2_copygc_thread(void *arg) break; last = atomic_long_read(&clock->now); + wait = bch2_copygc_wait_amount(ca); - reserve = ca->copygc_threshold; - - usage = bch2_dev_usage_read(c, ca); - - available = __dev_buckets_available(ca, usage) * - ca->mi.bucket_size; - if (available > reserve) { - next = last + available - reserve; - bch2_kthread_io_clock_wait(clock, next, - MAX_SCHEDULE_TIMEOUT); - continue; - } - - /* - * don't start copygc until there's more than half the copygc - * reserve of fragmented space: - */ - fragmented = usage.sectors_fragmented; - if (fragmented < reserve) { - next = last + reserve - fragmented; - bch2_kthread_io_clock_wait(clock, next, + if (wait > clock->max_slop) { + bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); continue; } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index e6b51131..9c90d2bb 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -309,14 +309,11 @@ retry: 0, -((s64) k->k.size), BCH_BUCKET_MARK_OVERWRITE) ?: bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOMARK_OVERWRITES| - BTREE_INSERT_NO_CLEAR_REPLICAS); + BTREE_INSERT_NOMARK_OVERWRITES); } else { ret = bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| @@ -989,11 +986,6 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_fs_journal_start(&c->journal, 1, &journal); bch2_journal_set_replay_done(&c->journal); - err = "error going read write"; - ret = __bch2_fs_read_write(c, true); - if (ret) - goto err; - bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; @@ -1002,14 +994,14 @@ int bch2_fs_initialize(struct bch_fs *c) err = "error creating root directory"; ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed_inode.inode.k_i, - NULL, NULL, 0); + NULL, NULL, BTREE_INSERT_LAZY_RW); if (ret) goto err; bch2_inode_init_early(c, &lostfound_inode); err = "error creating lost+found"; - ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_create_trans(&trans, BCACHEFS_ROOT_INO, &root_inode, &lostfound_inode, &lostfound, diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 53bd0e0e..5cad39fe 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -185,7 +185,8 @@ s64 bch2_remap_range(struct bch_fs *c, BTREE_ITER_INTENT); while (1) { - bch2_trans_begin_updates(&trans); + bch2_trans_reset(&trans, TRANS_RESET_MEM); + trans.mem_top = 0; if (fatal_signal_pending(current)) { @@ -287,8 +288,7 @@ err: inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, journal_seq, - BTREE_INSERT_ATOMIC); + bch2_trans_commit(&trans, NULL, journal_seq, 0); } } while (ret2 == -EINTR); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index daaeaf04..43927853 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -51,7 +51,9 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) > sb->page_order); - if (!f) { + if (!f && !u64s) { + /* nothing to do: */ + } else if (!f) { f = vstruct_last(sb->sb); memset(f, 0, sizeof(u64) * u64s); f->u64s = cpu_to_le32(u64s); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 43689bb8..05013b3d 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -404,7 +404,7 @@ static int bch2_fs_read_write_late(struct bch_fs *c) return 0; } -int __bch2_fs_read_write(struct bch_fs *c, bool early) +static int __bch2_fs_read_write(struct bch_fs *c, bool early) { struct bch_dev *ca; unsigned i; diff --git a/libbcachefs/super.h b/libbcachefs/super.h index 41992e89..4aa5dd79 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -219,7 +219,6 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); -int __bch2_fs_read_write(struct bch_fs *, bool); int bch2_fs_read_write(struct bch_fs *); int bch2_fs_read_write_early(struct bch_fs *); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index e7699afd..95e52784 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -18,6 +18,7 @@ #include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" +#include "clock.h" #include "disk_groups.h" #include "ec.h" #include "inode.h" @@ -198,6 +199,9 @@ rw_attribute(pd_controllers_update_seconds); read_attribute(meta_replicas_have); read_attribute(data_replicas_have); +read_attribute(io_timers_read); +read_attribute(io_timers_write); + #ifdef CONFIG_BCACHEFS_TESTS write_attribute(perf_test); #endif /* CONFIG_BCACHEFS_TESTS */ @@ -404,6 +408,11 @@ SHOW(bch2_fs) if (attr == &sysfs_new_stripes) return bch2_new_stripes(c, buf); + if (attr == &sysfs_io_timers_read) + return bch2_io_timers_show(&c->io_clock[READ], buf); + if (attr == &sysfs_io_timers_write) + return bch2_io_timers_show(&c->io_clock[WRITE], buf); + #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name); BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM @@ -581,6 +590,9 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_new_stripes, + &sysfs_io_timers_read, + &sysfs_io_timers_write, + &sysfs_internal_uuid, #define BCH_DEBUG_PARAM(name, description) &sysfs_##name, diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 9b8f6f1f..725a6f3e 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -327,7 +327,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - return bch2_trans_do(c, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC, + return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, bch2_xattr_set(&trans, inode->v.i_ino, &inode->ei_str_hash, name, value, size,