diff --git a/.bcachefs_revision b/.bcachefs_revision index 4e609c13..4295df5d 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -39609901d28e0fcc35e0ec82a6bc3d2412b3cefe +ceaf9ded6efd90d5bd53c6c77d9469c5ef9bbda5 diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 62d5d17d..6615c868 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -55,63 +55,6 @@ static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) return true; } -static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - struct bpos bucket = PTR_BUCKET_POS(ca, ptr); - - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - } -} - -static noinline_for_stack -bool __bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_ptrs_c ptrs, - const struct bch_extent_ptr *start) -{ - if (!ctxt) { - bkey_for_each_ptr(ptrs, ptr) { - if (ptr == start) - break; - - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - struct bpos bucket = PTR_BUCKET_POS(ca, ptr); - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - } - return false; - } - - __bkey_for_each_ptr(start, ptrs.end, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - struct bpos bucket = PTR_BUCKET_POS(ca, ptr); - - bool locked; - move_ctxt_wait_event(ctxt, - (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) || - list_empty(&ctxt->ios)); - if (!locked) { - bch2_trans_unlock(ctxt->trans); - bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); - } - } - return true; -} - -static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_ptrs_c ptrs) -{ - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - struct bpos bucket = PTR_BUCKET_POS(ca, ptr); - - if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) - return __bkey_nocow_lock(c, ctxt, ptrs, ptr); - } - - return true; -} - noinline_for_stack static void trace_io_move_finish2(struct data_update *u, struct bkey_i *new, @@ -538,7 +481,7 @@ void bch2_data_update_exit(struct data_update *update) update->bvecs = NULL; if (c->opts.nocow_enabled) - bkey_nocow_unlock(c, k); + bch2_bkey_nocow_unlock(c, k, 0); bkey_put_dev_refs(c, k); bch2_disk_reservation_put(c, &update->op.res); bch2_bkey_buf_exit(&update->k, c); @@ -1018,10 +961,19 @@ int bch2_data_update_init(struct btree_trans *trans, goto out; } - if (c->opts.nocow_enabled && - !bkey_nocow_lock(c, ctxt, ptrs)) { - ret = bch_err_throw(c, nocow_lock_blocked); - goto out_put_dev_refs; + if (c->opts.nocow_enabled) { + if (!bch2_bkey_nocow_trylock(c, ptrs, 0)) { + bool locked = false; + if (ctxt) + move_ctxt_wait_event(ctxt, + (locked = bch2_bkey_nocow_trylock(c, ptrs, 0)) || + list_empty(&ctxt->ios)); + if (!locked) { + if (ctxt) + bch2_trans_unlock(ctxt->trans); + bch2_bkey_nocow_lock(c, ptrs, 0); + } + } } if (unwritten) { @@ -1039,8 +991,7 @@ int bch2_data_update_init(struct btree_trans *trans, return 0; out_nocow_unlock: if (c->opts.nocow_enabled) - bkey_nocow_unlock(c, k); -out_put_dev_refs: + bch2_bkey_nocow_unlock(c, k, 0); bkey_put_dev_refs(c, k); out: bch2_disk_reservation_put(c, &m->op.res); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index cbf1eedd..db2dc5b0 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -367,7 +367,10 @@ x(BCH_ERR_nopromote, nopromote_enomem) \ x(0, invalid_snapshot_node) \ x(0, option_needs_open_fs) \ - x(0, remove_disk_accounting_entry) + x(0, remove_disk_accounting_entry) \ + x(0, nocow_trylock_fail) \ + x(BCH_ERR_nocow_trylock_fail, nocow_trylock_contended) \ + x(BCH_ERR_nocow_trylock_fail, nocow_trylock_bucket_full) \ enum bch_errcode { BCH_ERR_START = 2048, diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c index 6a5da02c..ed4a9440 100644 --- a/libbcachefs/io_write.c +++ b/libbcachefs/io_write.c @@ -33,7 +33,6 @@ #include #include -#include #include #include @@ -1323,11 +1322,25 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done) bch2_write_done(cl); } -struct bucket_to_lock { - struct bpos b; - unsigned gen; - struct nocow_lock_bucket *l; -}; +static bool bkey_get_dev_iorefs(struct bch_fs *c, struct bkey_ptrs_c ptrs) +{ + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE, + BCH_DEV_WRITE_REF_io_write); + if (unlikely(!ca)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + enumerated_ref_put(&bch2_dev_have_ref(c, ptr2->dev)->io_ref[WRITE], + BCH_DEV_WRITE_REF_io_write); + } + + return false; + } + } + + return true; +} static void bch2_nocow_write(struct bch_write_op *op) { @@ -1335,15 +1348,14 @@ static void bch2_nocow_write(struct bch_write_op *op) struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; - DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; + struct bkey_ptrs_c ptrs; u32 snapshot; - struct bucket_to_lock *stale_at; + const struct bch_extent_ptr *stale_at; int stale, ret; if (op->flags & BCH_WRITE_move) return; - darray_init(&buckets); trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); @@ -1358,8 +1370,6 @@ retry: while (1) { struct bio *bio = &op->wbio.bio; - buckets.nr = 0; - ret = bch2_trans_relock(trans); if (ret) break; @@ -1381,50 +1391,42 @@ retry: break; /* Get iorefs before dropping btree locks: */ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + ptrs = bch2_bkey_ptrs_c(k); + if (!bkey_get_dev_iorefs(c, ptrs)) + goto out; + + /* Unlock before taking nocow locks, doing IO: */ + bkey_reassemble(op->insert_keys.top, k); + k = bkey_i_to_s_c(op->insert_keys.top); + ptrs = bch2_bkey_ptrs_c(k); + + bch2_trans_unlock(trans); + + bch2_bkey_nocow_lock(c, ptrs, BUCKET_NOCOW_LOCK_UPDATE); + + /* + * This could be handled better: If we're able to trylock the + * nocow locks with btree locks held we know dirty pointers + * can't be stale + */ bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE, - BCH_DEV_WRITE_REF_io_write); - if (unlikely(!ca)) - goto err_get_ioref; + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - struct bpos b = PTR_BUCKET_POS(ca, ptr); - struct nocow_lock_bucket *l = - bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b)); - prefetch(l); - - /* XXX allocating memory with btree locks held - rare */ - darray_push_gfp(&buckets, ((struct bucket_to_lock) { - .b = b, .gen = ptr->gen, .l = l, - }), GFP_KERNEL|__GFP_NOFAIL); + int gen = bucket_gen_get(ca, PTR_BUCKET_NR(ca, ptr)); + stale = gen < 0 ? gen : gen_after(gen, ptr->gen); + if (unlikely(stale)) { + stale_at = ptr; + goto err_bucket_stale; + } if (ptr->unwritten) op->flags |= BCH_WRITE_convert_unwritten; } - /* Unlock before taking nocow locks, doing IO: */ - bkey_reassemble(op->insert_keys.top, k); - bch2_trans_unlock(trans); - bch2_cut_front(op->pos, op->insert_keys.top); if (op->flags & BCH_WRITE_convert_unwritten) bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); - darray_for_each(buckets, i) { - struct bch_dev *ca = bch2_dev_have_ref(c, i->b.inode); - - __bch2_bucket_nocow_lock(&c->nocow_locks, i->l, - bucket_to_u64(i->b), - BUCKET_NOCOW_LOCK_UPDATE); - - int gen = bucket_gen_get(ca, i->b.offset); - stale = gen < 0 ? gen : gen_after(gen, i->gen); - if (unlikely(stale)) { - stale_at = i; - goto err_bucket_stale; - } - } - bio = &op->wbio.bio; if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) { bio = bio_split(bio, k.k->p.offset - op->pos.offset, @@ -1458,8 +1460,6 @@ err: goto retry; bch2_trans_put(trans); - darray_exit(&buckets); - if (ret) { bch2_write_op_error(op, op->pos.offset, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); @@ -1484,24 +1484,11 @@ err: continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op)); } return; -err_get_ioref: - darray_for_each(buckets, i) - enumerated_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE], - BCH_DEV_WRITE_REF_io_write); - - /* Fall back to COW path: */ - goto out; err_bucket_stale: - darray_for_each(buckets, i) { - bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE); - if (i == stale_at) - break; - } - CLASS(printbuf, buf)(); if (bch2_fs_inconsistent_on(stale < 0, c, - "pointer to invalid bucket in nocow path on device %llu\n %s", - stale_at->b.inode, + "pointer to invalid bucket in nocow path on device %u\n %s", + stale_at->dev, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch_err_throw(c, data_write_invalid_ptr); } else { @@ -1509,7 +1496,13 @@ err_bucket_stale: ret = bch_err_throw(c, transaction_restart); } - goto err_get_ioref; + bch2_bkey_nocow_unlock(c, k, BUCKET_NOCOW_LOCK_UPDATE); + bkey_for_each_ptr(ptrs, ptr) + enumerated_ref_put(&bch2_dev_have_ref(c, ptr->dev)->io_ref[WRITE], + BCH_DEV_WRITE_REF_io_write); + + /* Fall back to COW path: */ + goto out; } static void __bch2_write(struct bch_write_op *op) diff --git a/libbcachefs/nocow_locking.c b/libbcachefs/nocow_locking.c index 71b17f18..c8907070 100644 --- a/libbcachefs/nocow_locking.c +++ b/libbcachefs/nocow_locking.c @@ -6,6 +6,16 @@ #include "nocow_locking.h" #include "util.h" +#include + +static bool nocow_bucket_empty(struct nocow_lock_bucket *l) +{ + for (unsigned i = 0; i < ARRAY_SIZE(l->b); i++) + if (atomic_read(&l->l[i])) + return false; + return true; +} + bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket) { u64 dev_bucket = bucket_to_u64(bucket); @@ -20,14 +30,12 @@ bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos #define sign(v) (v < 0 ? -1 : v > 0 ? 1 : 0) -void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) +void __bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, u64 dev_bucket, int flags) { - u64 dev_bucket = bucket_to_u64(bucket); struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); int lock_val = flags ? 1 : -1; - unsigned i; - for (i = 0; i < ARRAY_SIZE(l->b); i++) + for (unsigned i = 0; i < ARRAY_SIZE(l->b); i++) if (l->b[i] == dev_bucket) { int v = atomic_sub_return(lock_val, &l->l[i]); @@ -40,8 +48,8 @@ void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos buc BUG(); } -bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, - u64 dev_bucket, int flags) +static int __bch2_bucket_nocow_trylock(struct bch_fs *c, struct nocow_lock_bucket *l, + u64 dev_bucket, int flags) { int v, lock_val = flags ? 1 : -1; unsigned i; @@ -58,32 +66,128 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, goto take_lock; } - return false; + return bch_err_throw(c, nocow_trylock_bucket_full); got_entry: v = atomic_read(&l->l[i]); if (lock_val > 0 ? v < 0 : v > 0) - return false; + return bch_err_throw(c, nocow_trylock_contended); take_lock: v = atomic_read(&l->l[i]); /* Overflow? */ if (v && sign(v + lock_val) != sign(v)) - return false; + return bch_err_throw(c, nocow_trylock_contended); atomic_add(lock_val, &l->l[i]); + return 0; +} + +static inline bool bch2_bucket_nocow_trylock(struct bch_fs *c, struct bpos bucket, int flags) +{ + struct bucket_nocow_lock_table *t = &c->nocow_locks; + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); + + return !__bch2_bucket_nocow_trylock(c, l, dev_bucket, flags); +} + +void bch2_bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k, int flags) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, flags); + } +} + +bool bch2_bkey_nocow_trylock(struct bch_fs *c, struct bkey_ptrs_c ptrs, int flags) +{ + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + if (unlikely(!bch2_bucket_nocow_trylock(c, bucket, flags))) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + + struct bch_dev *ca = bch2_dev_have_ref(c, ptr2->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr2); + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, flags); + } + return false; + } + } + return true; } -void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, - struct nocow_lock_bucket *l, - u64 dev_bucket, int flags) +struct bucket_to_lock { + u64 b; + struct nocow_lock_bucket *l; +}; + +static inline int bucket_to_lock_cmp(struct bucket_to_lock l, + struct bucket_to_lock r) { - if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) { - struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); + return cmp_int(l.l, r.l); +} + +void bch2_bkey_nocow_lock(struct bch_fs *c, struct bkey_ptrs_c ptrs, int flags) +{ + if (bch2_bkey_nocow_trylock(c, ptrs, flags)) + return; + + DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; + darray_init(&buckets); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + u64 b = bucket_to_u64(PTR_BUCKET_POS(ca, ptr)); + struct nocow_lock_bucket *l = + bucket_nocow_lock(&c->nocow_locks, b); + prefetch(l); + + /* XXX allocating memory with btree locks held - rare */ + darray_push_gfp(&buckets, ((struct bucket_to_lock) { .b = b, .l = l, }), + GFP_KERNEL|__GFP_NOFAIL); + } + + WARN_ON_ONCE(buckets.nr > NOCOW_LOCK_BUCKET_SIZE); + + bubble_sort(buckets.data, buckets.nr, bucket_to_lock_cmp); +retake_all: + darray_for_each(buckets, i) { + int ret = __bch2_bucket_nocow_trylock(c, i->l, i->b, flags); + if (!ret) + continue; + u64 start_time = local_clock(); - __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags)); + if (ret == -BCH_ERR_nocow_trylock_contended) + __closure_wait_event(&i->l->wait, + (ret = __bch2_bucket_nocow_trylock(c, i->l, i->b, flags)) != -BCH_ERR_nocow_trylock_contended); + if (!ret) { + bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + continue; + } + + BUG_ON(ret != -BCH_ERR_nocow_trylock_bucket_full); + + darray_for_each(buckets, i2) { + if (i2 == i) + break; + __bch2_bucket_nocow_unlock(&c->nocow_locks, i2->b, flags); + } + + __closure_wait_event(&i->l->wait, nocow_bucket_empty(i->l)); bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + goto retake_all; } + + darray_exit(&buckets); } void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t) diff --git a/libbcachefs/nocow_locking.h b/libbcachefs/nocow_locking.h index 48b8a003..972c9147 100644 --- a/libbcachefs/nocow_locking.h +++ b/libbcachefs/nocow_locking.h @@ -19,28 +19,18 @@ static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lo #define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos); -void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int); -bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int); -void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, - struct nocow_lock_bucket *, u64, int); -static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, - struct bpos bucket, int flags) +void __bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, u64, int); + +static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, + int flags) { - u64 dev_bucket = bucket_to_u64(bucket); - struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); - - __bch2_bucket_nocow_lock(t, l, dev_bucket, flags); + __bch2_bucket_nocow_unlock(t, bucket_to_u64(bucket), flags); } -static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t, - struct bpos bucket, int flags) -{ - u64 dev_bucket = bucket_to_u64(bucket); - struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); - - return __bch2_bucket_nocow_trylock(l, dev_bucket, flags); -} +void bch2_bkey_nocow_unlock(struct bch_fs *, struct bkey_s_c, int); +bool bch2_bkey_nocow_trylock(struct bch_fs *, struct bkey_ptrs_c, int); +void bch2_bkey_nocow_lock(struct bch_fs *, struct bkey_ptrs_c, int); void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/nocow_locking_types.h index bd12bf67..3fed8e95 100644 --- a/libbcachefs/nocow_locking_types.h +++ b/libbcachefs/nocow_locking_types.h @@ -5,11 +5,13 @@ #define BUCKET_NOCOW_LOCKS_BITS 10 #define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) +#define NOCOW_LOCK_BUCKET_SIZE 6 + struct nocow_lock_bucket { struct closure_waitlist wait; spinlock_t lock; - u64 b[4]; - atomic_t l[4]; + u64 b[NOCOW_LOCK_BUCKET_SIZE]; + atomic_t l[NOCOW_LOCK_BUCKET_SIZE]; } __aligned(SMP_CACHE_BYTES); struct bucket_nocow_lock_table { diff --git a/libbcachefs/replicas_format.h b/libbcachefs/replicas_format.h index b7eff904..898caf94 100644 --- a/libbcachefs/replicas_format.h +++ b/libbcachefs/replicas_format.h @@ -17,7 +17,8 @@ struct bch_replicas_entry_v1 { __u8 data_type; __u8 nr_devs; __u8 nr_required; - __u8 devs[] __counted_by(nr_devs); + /* No counted_by: bch_replicas_cpu entries are all the size of the biggest entry */ + __u8 devs[]; } __packed; struct bch_sb_field_replicas {