From 34c9be19b376a007041555a7c9a47dfef3d0b1e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 14 May 2020 21:46:09 -0400 Subject: [PATCH] Update bcachefs sources to e1f6739c4a bcachefs: Fix another iterator counting bug --- .bcachefs_revision | 2 +- Makefile | 8 +- include/linux/lockdep.h | 2 +- include/trace/events/bcachefs.h | 17 +++ libbcachefs/alloc_background.c | 16 ++- libbcachefs/bcachefs.h | 1 - libbcachefs/bcachefs_format.h | 1 - libbcachefs/bkey_methods.c | 91 +++++++------ libbcachefs/btree_cache.h | 2 +- libbcachefs/btree_gc.c | 12 +- libbcachefs/btree_io.c | 16 +++ libbcachefs/btree_locking.h | 3 +- libbcachefs/btree_update_interior.c | 202 +++++++++++++++++++--------- libbcachefs/btree_update_interior.h | 8 +- libbcachefs/checksum.c | 1 + libbcachefs/compress.c | 71 ++++++---- libbcachefs/extent_update.c | 39 ++++-- libbcachefs/extents.c | 2 +- libbcachefs/fs-io.c | 13 +- libbcachefs/io.c | 29 +++- libbcachefs/io.h | 1 + libbcachefs/journal.h | 6 +- libbcachefs/journal_reclaim.c | 17 ++- libbcachefs/move.c | 15 ++- libbcachefs/quota.c | 69 +++++----- libbcachefs/rebalance.c | 16 ++- libbcachefs/replicas.c | 17 ++- libbcachefs/siphash.c | 1 - libbcachefs/super.c | 3 +- linux/six.c | 2 +- 30 files changed, 463 insertions(+), 220 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 87373ae5..90fe0441 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -a27d7265e75f6d65c2b972ce4ac27abfc153c230 +e1f6739c4a9fee1db7d94a5087a253041542cb62 diff --git a/Makefile b/Makefile index 0721fd35..9c762cbe 100644 --- a/Makefile +++ b/Makefile @@ -143,10 +143,16 @@ update-bcachefs-sources: git rm -rf --ignore-unmatch libbcachefs test -d libbcachefs || mkdir libbcachefs cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/ + git add libbcachefs/*.[ch] cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/ + git add include/trace/events/bcachefs.h + cp $(LINUX_DIR)/kernel/locking/six.c linux/ + git add linux/six.c + cp $(LINUX_DIR)/include/linux/six.h include/linux/ + git add include/linux/six.h $(RM) libbcachefs/*.mod.c git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision - git add libbcachefs/*.[ch] include/trace/events/bcachefs.h .bcachefs_revision + git add .bcachefs_revision .PHONE: update-commit-bcachefs-sources update-commit-bcachefs-sources: update-bcachefs-sources diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d95d8da3..1a7f024e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -5,7 +5,7 @@ struct lock_class_key {}; struct task_struct; # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) -# define lock_release(l, n, i) do { } while (0) +# define lock_release(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_set_current_reclaim_state(g) do { } while (0) diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index d7e898b0..01a9cc73 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -499,6 +499,23 @@ TRACE_EVENT(copygc, __entry->buckets_moved, __entry->buckets_not_moved) ); +TRACE_EVENT(transaction_restart_ip, + TP_PROTO(unsigned long caller, unsigned long ip), + TP_ARGS(caller, ip), + + TP_STRUCT__entry( + __field(unsigned long, caller ) + __field(unsigned long, ip ) + ), + + TP_fast_assign( + __entry->caller = caller; + __entry->ip = ip; + ), + + TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip) +); + DECLARE_EVENT_CLASS(transaction_restart, TP_PROTO(unsigned long ip), TP_ARGS(ip), diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index b2d1b8f9..c3794518 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -315,7 +315,9 @@ retry: bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL|flags); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + flags); err: if (ret == -EINTR) goto retry; @@ -1033,7 +1035,16 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t set_current_state(TASK_INTERRUPTIBLE); spin_lock(&c->freelist_lock); - for (i = 0; i < RESERVE_NR; i++) + for (i = 0; i < RESERVE_NR; i++) { + + /* + * Don't strand buckets on the copygc freelist until + * after recovery is finished: + */ + if (!test_bit(BCH_FS_STARTED, &c->flags) && + i == RESERVE_MOVINGGC) + continue; + if (fifo_push(&ca->free[i], bucket)) { fifo_pop(&ca->free_inc, bucket); @@ -1043,6 +1054,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t spin_unlock(&c->freelist_lock); goto out; } + } if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) { ca->allocator_state = ALLOCATOR_BLOCKED_FULL; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index d9c09b4f..fa959376 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -179,7 +179,6 @@ #undef pr_fmt #define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__ -#include #include #include #include diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 0772c586..616863ef 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -72,7 +72,6 @@ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost. */ -#include #include #include #include diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index c97e1e90..55ef4032 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -283,49 +283,64 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, const struct bkey_ops *ops; struct bkey uk; struct bkey_s u; + int i; - if (big_endian != CPU_BIG_ENDIAN) - bch2_bkey_swab_key(f, k); + /* + * Do these operations in reverse order in the write path: + */ - if (version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); + for (i = 0; i < 4; i++) + switch (!write ? i : 3 - i) { + case 0: + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_key(f, k); + break; + case 1: + if (version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); + break; + case 2: + if (version < bcachefs_metadata_version_inode_btree_change && + btree_id == BTREE_ID_INODES) { + if (!bkey_packed(k)) { + struct bkey_i *u = packed_to_bkey(k); + swap(u->k.p.inode, u->k.p.offset); + } else if (f->bits_per_field[BKEY_FIELD_INODE] && + f->bits_per_field[BKEY_FIELD_OFFSET]) { + struct bkey_format tmp = *f, *in = f, *out = &tmp; - if (version < bcachefs_metadata_version_inode_btree_change && - btree_id == BTREE_ID_INODES) { - if (!bkey_packed(k)) { - struct bkey_i *u = packed_to_bkey(k); - swap(u->k.p.inode, u->k.p.offset); - } else if (f->bits_per_field[BKEY_FIELD_INODE] && - f->bits_per_field[BKEY_FIELD_OFFSET]) { - struct bkey_format tmp = *f, *in = f, *out = &tmp; + swap(tmp.bits_per_field[BKEY_FIELD_INODE], + tmp.bits_per_field[BKEY_FIELD_OFFSET]); + swap(tmp.field_offset[BKEY_FIELD_INODE], + tmp.field_offset[BKEY_FIELD_OFFSET]); - swap(tmp.bits_per_field[BKEY_FIELD_INODE], - tmp.bits_per_field[BKEY_FIELD_OFFSET]); - swap(tmp.field_offset[BKEY_FIELD_INODE], - tmp.field_offset[BKEY_FIELD_OFFSET]); + if (!write) + swap(in, out); - if (!write) - swap(in, out); - - uk = __bch2_bkey_unpack_key(in, k); - swap(uk.p.inode, uk.p.offset); - BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); + uk = __bch2_bkey_unpack_key(in, k); + swap(uk.p.inode, uk.p.offset); + BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); + } } + break; + case 3: + if (!bkey_packed(k)) { + u = bkey_i_to_s(packed_to_bkey(k)); + } else { + uk = __bch2_bkey_unpack_key(f, k); + u.k = &uk; + u.v = bkeyp_val(f, k); + } + + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_val(u); + + ops = &bch2_bkey_ops[k->type]; + + if (ops->compat) + ops->compat(btree_id, version, big_endian, write, u); + break; + default: + BUG(); } - - if (!bkey_packed(k)) { - u = bkey_i_to_s(packed_to_bkey(k)); - } else { - uk = __bch2_bkey_unpack_key(f, k); - u.k = &uk; - u.v = bkeyp_val(f, k); - } - - if (big_endian != CPU_BIG_ENDIAN) - bch2_bkey_swab_val(u); - - ops = &bch2_bkey_ops[k->type]; - - if (ops->compat) - ops->compat(btree_id, version, big_endian, write, u); } diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 132cc95a..98cca307 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -94,7 +94,7 @@ static inline unsigned btree_blocks(struct bch_fs *c) return c->opts.btree_node_size >> c->block_bits; } -#define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 3 / 4) +#define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 2 / 3) #define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) #define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 674a1dac..146f2428 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -699,8 +699,10 @@ static int bch2_gc_start(struct bch_fs *c, c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), sizeof(u64), GFP_KERNEL); - if (!c->usage_gc) + if (!c->usage_gc) { + bch_err(c, "error allocating c->usage_gc"); return -ENOMEM; + } for_each_member_device(ca, c, i) { BUG_ON(ca->buckets[1]); @@ -711,19 +713,23 @@ static int bch2_gc_start(struct bch_fs *c, GFP_KERNEL|__GFP_ZERO); if (!ca->buckets[1]) { percpu_ref_put(&ca->ref); + bch_err(c, "error allocating ca->buckets[gc]"); return -ENOMEM; } ca->usage[1] = alloc_percpu(struct bch_dev_usage); if (!ca->usage[1]) { + bch_err(c, "error allocating ca->usage[gc]"); percpu_ref_put(&ca->ref); return -ENOMEM; } } ret = bch2_ec_mem_alloc(c, true); - if (ret) + if (ret) { + bch_err(c, "error allocating ec gc mem"); return ret; + } percpu_down_write(&c->mark_lock); @@ -933,7 +939,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, return; } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(iter->trans, iter->btree_id, btree_update_reserve_required(c, parent) + nr_old_nodes, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE, diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index ac8b9886..63063748 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -736,6 +736,17 @@ static int validate_bset(struct bch_fs *c, struct btree *b, struct btree_node *bn = container_of(i, struct btree_node, keys); /* These indicate that we read the wrong btree node: */ + + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + struct bch_btree_ptr_v2 *bp = + &bkey_i_to_btree_ptr_v2(&b->key)->v; + + /* XXX endianness */ + btree_err_on(bp->seq != bn->keys.seq, + BTREE_ERR_MUST_RETRY, c, b, NULL, + "incorrect sequence number (wrong btree node)"); + } + btree_err_on(BTREE_NODE_ID(bn) != b->btree_id, BTREE_ERR_MUST_RETRY, c, b, i, "incorrect btree id"); @@ -1626,6 +1637,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, * reflect that those writes were done and the data flushed from the * journal: * + * Also on journal error, the pending write may have updates that were + * never journalled (interior nodes, see btree_update_nodes_written()) - + * it's critical that we don't do the write in that case otherwise we + * will have updates visible that weren't in the journal: + * * Make sure to update b->written so bch2_btree_init_next() doesn't * break: */ diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index aaad2d28..9081d3fc 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -165,8 +165,7 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter, struct btree_iter *linked; trans_for_each_iter(iter->trans, linked) - if (linked != iter && - linked->l[level].b == b && + if (linked->l[level].b == b && btree_node_locked_type(linked, level) >= want) { six_lock_increment(&b->lock, want); return true; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index f6f2517d..75b70187 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -586,12 +586,12 @@ static void __bch2_btree_update_free(struct btree_update *as) bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_pin_flush(&c->journal, &as->journal); - BUG_ON((as->nr_new_nodes || as->nr_pending) && - !bch2_journal_error(&c->journal));; + BUG_ON(as->nr_new_nodes || as->nr_pending); if (as->reserve) bch2_btree_reserve_put(c, as->reserve); + list_del(&as->unwritten_list); list_del(&as->list); closure_debug_destroy(&as->cl); @@ -609,37 +609,28 @@ static void bch2_btree_update_free(struct btree_update *as) mutex_unlock(&c->btree_interior_update_lock); } -static void btree_update_nodes_reachable(struct btree_update *as, u64 seq) +static inline bool six_trylock_intentwrite(struct six_lock *lock) { - struct bch_fs *c = as->c; + if (!six_trylock_intent(lock)) + return false; - while (as->nr_new_nodes) { - struct btree *b = as->new_nodes[--as->nr_new_nodes]; - - BUG_ON(b->will_make_reachable != (unsigned long) as); - b->will_make_reachable = 0; - - /* - * b->will_make_reachable prevented it from being written, so - * write it now if it needs to be written: - */ - btree_node_lock_type(c, b, SIX_LOCK_read); - bch2_btree_node_write_cond(c, b, btree_node_need_write(b)); - six_unlock_read(&b->lock); + if (!six_trylock_write(lock)) { + six_unlock_intent(lock); + return false; } - while (as->nr_pending) - bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending], - seq); + return true; } static void btree_update_nodes_written(struct closure *cl) { struct btree_update *as = container_of(cl, struct btree_update, cl); + struct btree *nodes_need_write[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES + 1]; + unsigned nr_nodes_need_write; struct journal_res res = { 0 }; struct bch_fs *c = as->c; + struct btree_root *r; struct btree *b; - struct bset *i; int ret; /* @@ -650,6 +641,7 @@ static void btree_update_nodes_written(struct closure *cl) mutex_lock(&c->btree_interior_update_lock); as->nodes_written = true; again: + nr_nodes_need_write = 0; as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, struct btree_update, unwritten_list); if (!as || !as->nodes_written) { @@ -658,31 +650,57 @@ again: } b = as->b; - if (b && !six_trylock_intent(&b->lock)) { + if (b && !six_trylock_intentwrite(&b->lock)) { mutex_unlock(&c->btree_interior_update_lock); + btree_node_lock_type(c, b, SIX_LOCK_intent); + six_lock_write(&b->lock); + + six_unlock_write(&b->lock); six_unlock_intent(&b->lock); + mutex_lock(&c->btree_interior_update_lock); goto again; } - list_del(&as->unwritten_list); - ret = bch2_journal_res_get(&c->journal, &res, as->journal_u64s, + JOURNAL_RES_GET_NONBLOCK| JOURNAL_RES_GET_RESERVED); - if (ret) { - BUG_ON(!bch2_journal_error(&c->journal)); - /* can't unblock btree writes */ - goto free_update; + if (ret == -EAGAIN) { + unsigned u64s = as->journal_u64s; + + if (b) { + six_unlock_write(&b->lock); + six_unlock_intent(&b->lock); + } + + mutex_unlock(&c->btree_interior_update_lock); + + ret = bch2_journal_res_get(&c->journal, &res, u64s, + JOURNAL_RES_GET_CHECK| + JOURNAL_RES_GET_RESERVED); + if (!ret) { + mutex_lock(&c->btree_interior_update_lock); + goto again; + } } - { + if (!ret) { struct journal_buf *buf = &c->journal.buf[res.idx]; struct jset_entry *entry = vstruct_idx(buf->data, res.offset); res.offset += as->journal_u64s; res.u64s -= as->journal_u64s; memcpy_u64s(entry, as->journal_entries, as->journal_u64s); + } else { + /* + * On journal error we have to run most of the normal path so + * that shutdown works - unblocking btree node writes in + * particular and writing them if needed - except for + * journalling the update: + */ + + BUG_ON(!bch2_journal_error(&c->journal)); } switch (as->mode) { @@ -690,26 +708,41 @@ again: BUG(); case BTREE_INTERIOR_UPDATING_NODE: /* @b is the node we did the final insert into: */ - BUG_ON(!res.ref); - six_lock_write(&b->lock); + /* + * On failure to get a journal reservation, we still have to + * unblock the write and allow most of the write path to happen + * so that shutdown works, but the i->journal_seq mechanism + * won't work to prevent the btree write from being visible (we + * didn't get a journal sequence number) - instead + * __bch2_btree_node_write() doesn't do the actual write if + * we're in journal error state: + */ + list_del(&as->write_blocked_list); - i = btree_bset_last(b); - i->journal_seq = cpu_to_le64( - max(res.seq, - le64_to_cpu(i->journal_seq))); + if (!ret) { + struct bset *i = btree_bset_last(b); + + i->journal_seq = cpu_to_le64( + max(res.seq, + le64_to_cpu(i->journal_seq))); + + bch2_btree_add_journal_pin(c, b, res.seq); + } + + nodes_need_write[nr_nodes_need_write++] = b; - bch2_btree_add_journal_pin(c, b, res.seq); six_unlock_write(&b->lock); + six_unlock_intent(&b->lock); break; case BTREE_INTERIOR_UPDATING_AS: BUG_ON(b); break; - case BTREE_INTERIOR_UPDATING_ROOT: { - struct btree_root *r = &c->btree_roots[as->btree_id]; + case BTREE_INTERIOR_UPDATING_ROOT: + r = &c->btree_roots[as->btree_id]; BUG_ON(b); @@ -721,25 +754,24 @@ again: mutex_unlock(&c->btree_root_lock); break; } - } bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_res_put(&c->journal, &res); bch2_journal_preres_put(&c->journal, &as->journal_preres); -free_update: - /* Do btree write after dropping journal res: */ - if (b) { - /* - * b->write_blocked prevented it from being written, so - * write it now if it needs to be written: - */ - btree_node_write_if_need(c, b, SIX_LOCK_intent); - six_unlock_intent(&b->lock); + + while (as->nr_new_nodes) { + b = as->new_nodes[--as->nr_new_nodes]; + + BUG_ON(b->will_make_reachable != (unsigned long) as); + b->will_make_reachable = 0; + + nodes_need_write[nr_nodes_need_write++] = b; } - if (!ret) - btree_update_nodes_reachable(as, res.seq); + while (as->nr_pending) + bch2_btree_node_free_ondisk(c, + &as->pending[--as->nr_pending], res.seq); __bch2_btree_update_free(as); /* @@ -747,6 +779,22 @@ free_update: * nodes to be writeable: */ closure_wake_up(&c->btree_interior_update_wait); + + /* + * Can't take btree node locks while holding btree_interior_update_lock: + * */ + mutex_unlock(&c->btree_interior_update_lock); + + /* Do btree writes after dropping journal res/locks: */ + while (nr_nodes_need_write) { + b = nodes_need_write[--nr_nodes_need_write]; + + btree_node_lock_type(c, b, SIX_LOCK_read); + bch2_btree_node_write_cond(c, b, btree_node_need_write(b)); + six_unlock_read(&b->lock); + } + + mutex_lock(&c->btree_interior_update_lock); goto again; } @@ -949,17 +997,41 @@ void bch2_btree_update_done(struct btree_update *as) } struct btree_update * -bch2_btree_update_start(struct bch_fs *c, enum btree_id id, +bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, unsigned nr_nodes, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; + struct journal_preres journal_preres = { 0 }; struct btree_reserve *reserve; struct btree_update *as; int ret; + ret = bch2_journal_preres_get(&c->journal, &journal_preres, + BTREE_UPDATE_JOURNAL_RES, + JOURNAL_RES_GET_NONBLOCK); + if (ret == -EAGAIN) { + if (flags & BTREE_INSERT_NOUNLOCK) + return ERR_PTR(-EINTR); + + bch2_trans_unlock(trans); + + ret = bch2_journal_preres_get(&c->journal, &journal_preres, + BTREE_UPDATE_JOURNAL_RES, 0); + if (ret) + return ERR_PTR(ret); + + if (!bch2_trans_relock(trans)) { + bch2_journal_preres_put(&c->journal, &journal_preres); + return ERR_PTR(-EINTR); + } + } + reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl); - if (IS_ERR(reserve)) + if (IS_ERR(reserve)) { + bch2_journal_preres_put(&c->journal, &journal_preres); return ERR_CAST(reserve); + } as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO); memset(as, 0, sizeof(*as)); @@ -969,18 +1041,11 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id, as->btree_id = id; as->reserve = reserve; INIT_LIST_HEAD(&as->write_blocked_list); + INIT_LIST_HEAD(&as->unwritten_list); + as->journal_preres = journal_preres; bch2_keylist_init(&as->parent_keys, as->inline_keys); - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, - ARRAY_SIZE(as->journal_entries), 0); - if (ret) { - bch2_btree_reserve_put(c, reserve); - closure_debug_destroy(&as->cl); - mempool_free(as, &c->btree_interior_update_pool); - return ERR_PTR(ret); - } - mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); @@ -1531,8 +1596,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, /* Hack, because gc and splitting nodes doesn't mix yet: */ if (!(flags & BTREE_INSERT_GC_LOCK_HELD) && !down_read_trylock(&c->gc_lock)) { - if (flags & BTREE_INSERT_NOUNLOCK) + if (flags & BTREE_INSERT_NOUNLOCK) { + trace_transaction_restart_ip(trans->ip, _THIS_IP_); return -EINTR; + } bch2_trans_unlock(trans); down_read(&c->gc_lock); @@ -1551,7 +1618,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, goto out; } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(trans, iter->btree_id, btree_update_reserve_required(c, b), flags, !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { @@ -1560,6 +1627,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, BUG_ON(flags & BTREE_INSERT_NOUNLOCK); bch2_trans_unlock(trans); ret = -EINTR; + + trace_transaction_restart_ip(trans->ip, _THIS_IP_); } goto out; } @@ -1663,8 +1732,9 @@ retry: goto err_unlock; } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(trans, iter->btree_id, btree_update_reserve_required(c, parent) + 1, + flags| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE, !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); @@ -1776,7 +1846,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, struct btree *n, *parent = btree_node_parent(iter, b); struct btree_update *as; - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(iter->trans, iter->btree_id, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, @@ -2043,7 +2113,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, new_hash = bch2_btree_node_mem_alloc(c); } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(iter->trans, iter->btree_id, parent ? btree_update_reserve_required(c, parent) : 0, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index aef8adf8..2fddf5d3 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -32,6 +32,9 @@ struct pending_btree_node_free { __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); }; +#define BTREE_UPDATE_JOURNAL_RES \ + ((BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2) + /* * Tracks an in progress split/rewrite of a btree node and the update to the * parent node: @@ -105,8 +108,7 @@ struct btree_update { unsigned nr_new_nodes; unsigned journal_u64s; - u64 journal_entries[ - (BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2]; + u64 journal_entries[BTREE_UPDATE_JOURNAL_RES]; /* Only here to reduce stack usage on recursive splits: */ struct keylist parent_keys; @@ -132,7 +134,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, void bch2_btree_update_done(struct btree_update *); struct btree_update * -bch2_btree_update_start(struct bch_fs *, enum btree_id, unsigned, +bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned, unsigned, struct closure *); void bch2_btree_interior_update_will_free_node(struct btree_update *, diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 6f1afa4a..3d88719b 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -13,6 +13,7 @@ #include #include #include +#include #include static u64 bch2_checksum_init(unsigned type) diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 0713286d..c56938f2 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -39,6 +39,24 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw) BUG(); } +static bool bio_phys_contig(struct bio *bio, struct bvec_iter start) +{ + struct bio_vec bv; + struct bvec_iter iter; + void *expected_start = NULL; + + __bio_for_each_bvec(bv, bio, iter, start) { + if (expected_start && + expected_start != page_address(bv.bv_page) + bv.bv_offset) + return false; + + expected_start = page_address(bv.bv_page) + + bv.bv_offset + bv.bv_len; + } + + return true; +} + static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, struct bvec_iter start, int rw) { @@ -48,27 +66,28 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, unsigned nr_pages = 0; struct page *stack_pages[16]; struct page **pages = NULL; - bool first = true; - unsigned prev_end = PAGE_SIZE; void *data; BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max); -#ifndef CONFIG_HIGHMEM - __bio_for_each_bvec(bv, bio, iter, start) { - if (bv.bv_len == start.bi_size) - return (struct bbuf) { - .b = page_address(bv.bv_page) + bv.bv_offset, - .type = BB_NONE, .rw = rw - }; - } -#endif + if (!IS_ENABLED(CONFIG_HIGHMEM) && + bio_phys_contig(bio, start)) + return (struct bbuf) { + .b = page_address(bio_iter_page(bio, start)) + + bio_iter_offset(bio, start), + .type = BB_NONE, .rw = rw + }; + + /* check if we can map the pages contiguously: */ __bio_for_each_segment(bv, bio, iter, start) { - if ((!first && bv.bv_offset) || - prev_end != PAGE_SIZE) + if (iter.bi_size != start.bi_size && + bv.bv_offset) + goto bounce; + + if (bv.bv_len < iter.bi_size && + bv.bv_offset + bv.bv_len < PAGE_SIZE) goto bounce; - prev_end = bv.bv_offset + bv.bv_len; nr_pages++; } @@ -172,20 +191,21 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, } case BCH_COMPRESSION_TYPE_zstd: { ZSTD_DCtx *ctx; - size_t len; + size_t real_src_len = le32_to_cpup(src_data.b); + + if (real_src_len > src_len - 4) + goto err; workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO); ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound()); - src_len = le32_to_cpup(src_data.b); - - len = ZSTD_decompressDCtx(ctx, + ret = ZSTD_decompressDCtx(ctx, dst_data, dst_len, - src_data.b + 4, src_len); + src_data.b + 4, real_src_len); mempool_free(workspace, &c->decompress_workspace); - if (len != dst_len) + if (ret != dst_len) goto err; break; } @@ -264,7 +284,8 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src, if (ret) goto err; - if (dst_data.type != BB_NONE) + if (dst_data.type != BB_NONE && + dst_data.type != BB_VMAP) memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9)); err: bio_unmap_or_unbounce(c, dst_data); @@ -407,7 +428,8 @@ static unsigned __bio_compress(struct bch_fs *c, memset(dst_data.b + *dst_len, 0, pad); *dst_len += pad; - if (dst_data.type != BB_NONE) + if (dst_data.type != BB_NONE && + dst_data.type != BB_VMAP) memcpy_to_bio(dst, dst->bi_iter, dst_data.b); BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size); @@ -512,7 +534,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) { size_t max_extent = c->sb.encoded_extent_max << 9; - size_t order = get_order(max_extent); size_t decompress_workspace_size = 0; bool decompress_workspace_needed; ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0); @@ -547,14 +568,14 @@ have_compressed: if (!mempool_initialized(&c->compression_bounce[READ])) { ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ], - 1, order); + 1, max_extent); if (ret) goto out; } if (!mempool_initialized(&c->compression_bounce[WRITE])) { ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE], - 1, order); + 1, max_extent); if (ret) goto out; } diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 2a7d913b..fd011df3 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -34,16 +34,10 @@ static int count_iters_for_insert(struct btree_trans *trans, unsigned offset, struct bpos *end, unsigned *nr_iters, - unsigned max_iters, - bool overwrite) + unsigned max_iters) { - int ret = 0; + int ret = 0, ret2 = 0; - /* - * The extent update path requires an _additional_ iterator for each - * extent we're inserting and overwriting: - */ - *nr_iters += 1; if (*nr_iters >= max_iters) { *end = bpos_min(*end, k.k->p); ret = 1; @@ -70,16 +64,20 @@ static int count_iters_for_insert(struct btree_trans *trans, for_each_btree_key(trans, iter, BTREE_ID_REFLINK, POS(0, idx + offset), - BTREE_ITER_SLOTS, r_k, ret) { + BTREE_ITER_SLOTS, r_k, ret2) { if (bkey_cmp(bkey_start_pos(r_k.k), POS(0, idx + sectors)) >= 0) break; + /* extent_update_to_keys(), for the reflink_v update */ + *nr_iters += 1; + *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); if (*nr_iters >= max_iters) { struct bpos pos = bkey_start_pos(k.k); - pos.offset += r_k.k->p.offset - idx; + pos.offset += min_t(u64, k.k->size, + r_k.k->p.offset - idx); *end = bpos_min(*end, pos); ret = 1; @@ -92,7 +90,7 @@ static int count_iters_for_insert(struct btree_trans *trans, } } - return ret; + return ret2 ?: ret; } #define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) @@ -121,8 +119,11 @@ int bch2_extent_atomic_end(struct btree_iter *iter, *end = bpos_min(insert->k.p, b->key.k.p); + /* extent_update_to_keys(): */ + nr_iters += 1; + ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, - &nr_iters, EXTENT_ITERS_MAX / 2, false); + &nr_iters, EXTENT_ITERS_MAX / 2); if (ret < 0) return ret; @@ -139,8 +140,20 @@ int bch2_extent_atomic_end(struct btree_iter *iter, offset = bkey_start_offset(&insert->k) - bkey_start_offset(k.k); + /* extent_handle_overwrites(): */ + switch (bch2_extent_overlap(&insert->k, k.k)) { + case BCH_EXTENT_OVERLAP_ALL: + case BCH_EXTENT_OVERLAP_FRONT: + nr_iters += 1; + break; + case BCH_EXTENT_OVERLAP_BACK: + case BCH_EXTENT_OVERLAP_MIDDLE: + nr_iters += 2; + break; + } + ret = count_iters_for_insert(trans, k, offset, end, - &nr_iters, EXTENT_ITERS_MAX, true); + &nr_iters, EXTENT_ITERS_MAX); if (ret) break; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 3f66457d..d1a4ab04 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -180,7 +180,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k) return; bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, k, false), c, + !bch2_bkey_replicas_marked_locked(c, k, false), c, "btree key bad (replicas not marked in superblock):\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 0aa3afad..7de61f7f 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1239,7 +1239,8 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || bio_full(&w->io->op.wbio.bio, PAGE_SIZE) || - w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) || + w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= + (BIO_MAX_PAGES * PAGE_SIZE) || bio_end_sector(&w->io->op.wbio.bio) != sector)) bch2_writepage_do_io(w); @@ -1814,12 +1815,22 @@ static long bch2_dio_write_loop(struct dio_write *dio) goto loop; while (1) { + size_t extra = dio->iter.count - + min(BIO_MAX_PAGES * PAGE_SIZE, dio->iter.count); + if (kthread) use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); current->faults_disabled_mapping = mapping; + /* + * Don't issue more than 2MB at once, the bcachefs io path in + * io.c can't bounce more than that: + */ + + dio->iter.count -= extra; ret = bio_iov_iter_get_pages(bio, &dio->iter); + dio->iter.count += extra; current->faults_disabled_mapping = NULL; if (kthread) diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 19059702..5801a036 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -588,7 +588,9 @@ static void bch2_write_index(struct closure *cl) __bch2_write_index(op); - if (!op->error && (op->flags & BCH_WRITE_FLUSH)) { + if (!(op->flags & BCH_WRITE_DONE)) { + continue_at(cl, __bch2_write, index_update_wq(op)); + } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) { bch2_journal_flush_seq_async(&c->journal, *op_journal_seq(op), cl); @@ -1103,8 +1105,15 @@ again: if (ret < 0) goto err; - if (ret) + if (ret) { skip_put = false; + } else { + /* + * for the skip_put optimization this has to be set + * before we submit the bio: + */ + op->flags |= BCH_WRITE_DONE; + } bio->bi_end_io = bch2_write_endio; bio->bi_private = &op->cl; @@ -1127,16 +1136,30 @@ again: return; err: op->error = ret; + op->flags |= BCH_WRITE_DONE; continue_at(cl, bch2_write_index, index_update_wq(op)); return; flush_io: + /* + * If the write can't all be submitted at once, we generally want to + * block synchronously as that signals backpressure to the caller. + * + * However, if we're running out of a workqueue, we can't block here + * because we'll be blocking other work items from completing: + */ + if (current->flags & PF_WQ_WORKER) { + continue_at(cl, bch2_write_index, index_update_wq(op)); + return; + } + closure_sync(cl); if (!bch2_keylist_empty(&op->insert_keys)) { __bch2_write_index(op); if (op->error) { + op->flags |= BCH_WRITE_DONE; continue_at_nobarrier(cl, bch2_write_done, NULL); return; } @@ -1182,6 +1205,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) bch2_keylist_push(&op->insert_keys); op->flags |= BCH_WRITE_WROTE_DATA_INLINE; + op->flags |= BCH_WRITE_DONE; + continue_at_nobarrier(cl, bch2_write_index, NULL); return; err: diff --git a/libbcachefs/io.h b/libbcachefs/io.h index e45dcf96..c4c84730 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -36,6 +36,7 @@ enum bch_write_flags { /* Internal: */ BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 10), BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11), + BCH_WRITE_DONE = (1 << 12), }; static inline u64 *op_journal_seq(struct bch_write_op *op) diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index ec61137d..78f5fac0 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -269,7 +269,7 @@ static inline void bch2_journal_res_put(struct journal *j, if (!res->ref) return; - lock_release(&j->res_map, 0, _THIS_IP_); + lock_release(&j->res_map, _THIS_IP_); while (res->u64s) bch2_journal_add_entry(j, res, @@ -344,7 +344,9 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re return ret; out: if (!(flags & JOURNAL_RES_GET_CHECK)) { - lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_); + lock_acquire_shared(&j->res_map, 0, + (flags & JOURNAL_RES_GET_NONBLOCK) != 0, + NULL, _THIS_IP_); EBUG_ON(!res->ref); } return 0; diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index db3afd90..d34434f6 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -322,14 +322,12 @@ void bch2_journal_pin_drop(struct journal *j, spin_unlock(&j->lock); } -void __bch2_journal_pin_add(struct journal *j, u64 seq, +static void bch2_journal_pin_add_locked(struct journal *j, u64 seq, struct journal_entry_pin *pin, journal_pin_flush_fn flush_fn) { struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq); - spin_lock(&j->lock); - __journal_pin_drop(j, pin); BUG_ON(!atomic_read(&pin_list->count)); @@ -339,7 +337,14 @@ void __bch2_journal_pin_add(struct journal *j, u64 seq, pin->flush = flush_fn; list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed); +} +void __bch2_journal_pin_add(struct journal *j, u64 seq, + struct journal_entry_pin *pin, + journal_pin_flush_fn flush_fn) +{ + spin_lock(&j->lock); + bch2_journal_pin_add_locked(j, seq, pin, flush_fn); spin_unlock(&j->lock); /* @@ -354,9 +359,13 @@ void bch2_journal_pin_copy(struct journal *j, struct journal_entry_pin *src, journal_pin_flush_fn flush_fn) { + spin_lock(&j->lock); + if (journal_pin_active(src) && (!journal_pin_active(dst) || src->seq < dst->seq)) - __bch2_journal_pin_add(j, src->seq, dst, flush_fn); + bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn); + + spin_unlock(&j->lock); } /** diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 4afda95f..67e495bc 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -70,19 +70,26 @@ static int bch2_migrate_index_update(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k; struct bkey_i *insert; - struct bkey_i_extent *new = - bkey_i_to_extent(bch2_keylist_front(keys)); + struct bkey_i_extent *new; BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; struct extent_ptr_decoded p; bool did_work = false; int nr; + bch2_trans_reset(&trans, 0); + + k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); - if (ret) + if (ret) { + if (ret == -EINTR) + continue; break; + } + + new = bkey_i_to_extent(bch2_keylist_front(keys)); if (bversion_cmp(k.k->version, new->k.version) || !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index e7787c50..d3032a46 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -710,13 +710,43 @@ found: return ret; } +static int bch2_set_quota_trans(struct btree_trans *trans, + struct bkey_i_quota *new_quota, + struct qc_dqblk *qdq) +{ + struct btree_iter *iter; + struct bkey_s_c k; + int ret; + + iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(iter); + + ret = bkey_err(k); + if (unlikely(ret)) + return ret; + + if (k.k->type == KEY_TYPE_quota) + new_quota->v = *bkey_s_c_to_quota(k).v; + + if (qdq->d_fieldmask & QC_SPC_SOFT) + new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9); + if (qdq->d_fieldmask & QC_SPC_HARD) + new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9); + + if (qdq->d_fieldmask & QC_INO_SOFT) + new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit); + if (qdq->d_fieldmask & QC_INO_HARD) + new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); + + return bch2_trans_update(trans, iter, &new_quota->k_i, 0); +} + static int bch2_set_quota(struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct bch_fs *c = sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; struct bkey_i_quota new_quota; int ret; @@ -728,41 +758,12 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); - - ret = bkey_err(k); - if (unlikely(ret)) - return ret; - - switch (k.k->type) { - case KEY_TYPE_quota: - new_quota.v = *bkey_s_c_to_quota(k).v; - break; - } - - if (qdq->d_fieldmask & QC_SPC_SOFT) - new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9); - if (qdq->d_fieldmask & QC_SPC_HARD) - new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9); - - if (qdq->d_fieldmask & QC_INO_SOFT) - new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit); - if (qdq->d_fieldmask & QC_INO_HARD) - new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - - bch2_trans_update(&trans, iter, &new_quota.k_i, 0); - - ret = bch2_trans_commit(&trans, NULL, NULL, 0); + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK, + bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: + __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); bch2_trans_exit(&trans); - if (ret) - return ret; - - ret = __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); - return ret; } diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index ab193432..e15a2b1d 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -204,17 +204,21 @@ static int bch2_rebalance_thread(void *arg) prev_run_time; if (w.dev_most_full_percent < 20 && throttle > 0) { - r->state = REBALANCE_THROTTLED; r->throttled_until_iotime = io_start + div_u64(w.dev_most_full_capacity * (20 - w.dev_most_full_percent), 50); - r->throttled_until_cputime = start + throttle; - bch2_kthread_io_clock_wait(clock, - r->throttled_until_iotime, - throttle); - continue; + if (atomic_long_read(&clock->now) + clock->max_slop < + r->throttled_until_iotime) { + r->throttled_until_cputime = start + throttle; + r->state = REBALANCE_THROTTLED; + + bch2_kthread_io_clock_wait(clock, + r->throttled_until_iotime, + throttle); + continue; + } } /* minimum 1 mb/sec: */ diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index be490857..67a7128f 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -299,8 +299,10 @@ static int replicas_table_update(struct bch_fs *c, GFP_NOIO)) || !(new_scratch = kmalloc(bytes, GFP_NOIO)) || (c->usage_gc && - !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) + !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) { + bch_err(c, "error updating replicas table: memory allocation failure"); goto err; + } if (c->usage_base) __replicas_table_update(new_base, new_r, @@ -362,7 +364,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry *new_entry) { struct bch_replicas_cpu new_r, new_gc; - int ret = -ENOMEM; + int ret = 0; verify_replicas_entry(new_entry); @@ -409,14 +411,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, swap(new_gc, c->replicas_gc); percpu_up_write(&c->mark_lock); out: - ret = 0; -err: mutex_unlock(&c->sb_lock); kfree(new_r.entries); kfree(new_gc.entries); return ret; +err: + bch_err(c, "error adding replicas entry: memory allocation failure"); + ret = -ENOMEM; + goto out; } int bch2_mark_replicas(struct bch_fs *c, @@ -561,6 +565,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) GFP_NOIO); if (!c->replicas_gc.entries) { mutex_unlock(&c->sb_lock); + bch_err(c, "error allocating c->replicas_gc"); return -ENOMEM; } @@ -586,8 +591,10 @@ retry: nr = READ_ONCE(c->replicas.nr); new.entry_size = READ_ONCE(c->replicas.entry_size); new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); - if (!new.entries) + if (!new.entries) { + bch_err(c, "error allocating c->replicas_gc"); return -ENOMEM; + } mutex_lock(&c->sb_lock); percpu_down_write(&c->mark_lock); diff --git a/libbcachefs/siphash.c b/libbcachefs/siphash.c index 4565a843..c062edb3 100644 --- a/libbcachefs/siphash.c +++ b/libbcachefs/siphash.c @@ -44,7 +44,6 @@ * https://131002.net/siphash/ */ -#include #include #include #include diff --git a/libbcachefs/super.c b/libbcachefs/super.c index d2c275ce..d3473897 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -889,6 +889,8 @@ int bch2_fs_start(struct bch_fs *c) if (bch2_fs_init_fault("fs_start")) goto err; + set_bit(BCH_FS_STARTED, &c->flags); + if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); } else { @@ -900,7 +902,6 @@ int bch2_fs_start(struct bch_fs *c) goto err; } - set_bit(BCH_FS_STARTED, &c->flags); print_mount_opts(c); ret = 0; out: diff --git a/linux/six.c b/linux/six.c index 9fa58b6f..c7781235 100644 --- a/linux/six.c +++ b/linux/six.c @@ -15,7 +15,7 @@ #endif #define six_acquire(l, t) lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_) -#define six_release(l) lock_release(l, 0, _RET_IP_) +#define six_release(l) lock_release(l, _RET_IP_) struct six_lock_vals { /* Value we add to the lock in order to take the lock: */