6120 lines
188 KiB
Diff
6120 lines
188 KiB
Diff
From 21a9c2ace04f6c699870b9222c3da9b8a9aaedf6 Mon Sep 17 00:00:00 2001
|
|
From: Alexander Miroshnichenko <alex@millerson.name>
|
|
Date: Sun, 9 Feb 2025 22:05:21 +0300
|
|
Subject: [PATCH] bcachefs: cherry-pick updates from master 81b5431
|
|
Content-Type: text/plain; charset="utf-8"
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Signed-off-by: Alexander Miroshnichenko <alex@millerson.name>
|
|
---
|
|
fs/bcachefs/Kconfig | 2 +
|
|
fs/bcachefs/Makefile | 1 +
|
|
fs/bcachefs/alloc_background.c | 12 +-
|
|
fs/bcachefs/alloc_background.h | 2 +-
|
|
fs/bcachefs/alloc_foreground.c | 25 +-
|
|
fs/bcachefs/alloc_foreground.h | 17 +
|
|
fs/bcachefs/alloc_types.h | 2 +
|
|
fs/bcachefs/backpointers.c | 108 ++----
|
|
fs/bcachefs/backpointers.h | 11 +-
|
|
fs/bcachefs/bcachefs.h | 5 +-
|
|
fs/bcachefs/bcachefs_ioctl.h | 29 +-
|
|
fs/bcachefs/btree_gc.c | 18 +-
|
|
fs/bcachefs/btree_io.c | 205 ++++++++++-
|
|
fs/bcachefs/btree_io.h | 4 +
|
|
fs/bcachefs/btree_update_interior.c | 20 ++
|
|
fs/bcachefs/btree_update_interior.h | 4 +
|
|
fs/bcachefs/chardev.c | 38 +-
|
|
fs/bcachefs/clock.c | 25 +-
|
|
fs/bcachefs/data_update.c | 220 +++++++++---
|
|
fs/bcachefs/data_update.h | 17 +-
|
|
fs/bcachefs/debug.c | 34 +-
|
|
fs/bcachefs/ec.c | 25 +-
|
|
fs/bcachefs/errcode.h | 6 +
|
|
fs/bcachefs/error.c | 50 ++-
|
|
fs/bcachefs/error.h | 4 +-
|
|
fs/bcachefs/extents.c | 9 +-
|
|
fs/bcachefs/extents.h | 2 +-
|
|
fs/bcachefs/eytzinger.c | 76 ++--
|
|
fs/bcachefs/eytzinger.h | 95 ++---
|
|
fs/bcachefs/fs-io-buffered.c | 26 +-
|
|
fs/bcachefs/fs-io-direct.c | 20 +-
|
|
fs/bcachefs/fsck.c | 2 +-
|
|
fs/bcachefs/io_misc.c | 3 +-
|
|
fs/bcachefs/io_read.c | 515 ++++++++++++++--------------
|
|
fs/bcachefs/io_read.h | 75 ++--
|
|
fs/bcachefs/io_write.c | 95 ++---
|
|
fs/bcachefs/io_write.h | 29 +-
|
|
fs/bcachefs/io_write_types.h | 2 +-
|
|
fs/bcachefs/journal.c | 123 +++++--
|
|
fs/bcachefs/journal.h | 38 +-
|
|
fs/bcachefs/journal_io.c | 30 +-
|
|
fs/bcachefs/journal_seq_blacklist.c | 7 +-
|
|
fs/bcachefs/journal_types.h | 19 +-
|
|
fs/bcachefs/migrate.c | 26 +-
|
|
fs/bcachefs/move.c | 418 ++++++++++++----------
|
|
fs/bcachefs/move_types.h | 18 +-
|
|
fs/bcachefs/progress.c | 63 ++++
|
|
fs/bcachefs/progress.h | 29 ++
|
|
fs/bcachefs/rebalance.c | 4 +-
|
|
fs/bcachefs/recovery.c | 1 -
|
|
fs/bcachefs/reflink.c | 21 +-
|
|
fs/bcachefs/sb-counters.c | 90 +++--
|
|
fs/bcachefs/sb-counters.h | 4 +
|
|
fs/bcachefs/sb-counters_format.h | 30 +-
|
|
fs/bcachefs/sb-members.h | 12 +
|
|
fs/bcachefs/snapshot.c | 7 +-
|
|
fs/bcachefs/snapshot.h | 1 +
|
|
fs/bcachefs/sysfs.c | 5 -
|
|
fs/bcachefs/trace.h | 76 +---
|
|
fs/bcachefs/util.c | 210 +++++++++---
|
|
fs/bcachefs/util.h | 2 -
|
|
61 files changed, 1967 insertions(+), 1100 deletions(-)
|
|
create mode 100644 fs/bcachefs/progress.c
|
|
create mode 100644 fs/bcachefs/progress.h
|
|
|
|
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
|
|
index e8549d04dcb8..85eea7a4dea3 100644
|
|
--- a/fs/bcachefs/Kconfig
|
|
+++ b/fs/bcachefs/Kconfig
|
|
@@ -15,6 +15,7 @@ config BCACHEFS_FS
|
|
select ZLIB_INFLATE
|
|
select ZSTD_COMPRESS
|
|
select ZSTD_DECOMPRESS
|
|
+ select CRYPTO
|
|
select CRYPTO_SHA256
|
|
select CRYPTO_CHACHA20
|
|
select CRYPTO_POLY1305
|
|
@@ -24,6 +25,7 @@ config BCACHEFS_FS
|
|
select XXHASH
|
|
select SRCU
|
|
select SYMBOLIC_ERRNAME
|
|
+ select MIN_HEAP
|
|
help
|
|
The bcachefs filesystem - a modern, copy on write filesystem, with
|
|
support for multiple devices, compression, checksumming, etc.
|
|
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
|
|
index d2689388d5e8..1cf17a16af9f 100644
|
|
--- a/fs/bcachefs/Makefile
|
|
+++ b/fs/bcachefs/Makefile
|
|
@@ -67,6 +67,7 @@ bcachefs-y := \
|
|
nocow_locking.o \
|
|
opts.o \
|
|
printbuf.o \
|
|
+ progress.o \
|
|
quota.o \
|
|
rebalance.o \
|
|
rcu_pending.o \
|
|
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
|
|
index 3ea809990ef1..a35455802280 100644
|
|
--- a/fs/bcachefs/alloc_background.c
|
|
+++ b/fs/bcachefs/alloc_background.c
|
|
@@ -1897,7 +1897,10 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
|
if (ret)
|
|
goto out;
|
|
|
|
- count_event(c, bucket_discard);
|
|
+ if (!fastpath)
|
|
+ count_event(c, bucket_discard);
|
|
+ else
|
|
+ count_event(c, bucket_discard_fast);
|
|
out:
|
|
fsck_err:
|
|
if (discard_locked)
|
|
@@ -2090,6 +2093,13 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
|
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
|
|
goto out;
|
|
|
|
+ /*
|
|
+ * Impossible since alloc_lru_idx_read() only returns nonzero if the
|
|
+ * bucket is supposed to be on the cached bucket LRU (i.e.
|
|
+ * BCH_DATA_cached)
|
|
+ *
|
|
+ * bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
|
|
+ */
|
|
BUG_ON(a->v.data_type != BCH_DATA_cached);
|
|
BUG_ON(a->v.dirty_sectors);
|
|
|
|
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
|
|
index de25ba4ee94b..c556ccaffe89 100644
|
|
--- a/fs/bcachefs/alloc_background.h
|
|
+++ b/fs/bcachefs/alloc_background.h
|
|
@@ -131,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
|
if (a.stripe)
|
|
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
|
if (bch2_bucket_sectors_dirty(a))
|
|
- return data_type;
|
|
+ return bucket_data_type(data_type);
|
|
if (a.cached_sectors)
|
|
return BCH_DATA_cached;
|
|
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
|
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
|
|
index 5a781fb4c794..1759c15a7745 100644
|
|
--- a/fs/bcachefs/alloc_foreground.c
|
|
+++ b/fs/bcachefs/alloc_foreground.c
|
|
@@ -179,23 +179,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
|
|
closure_wake_up(&c->freelist_wait);
|
|
}
|
|
|
|
-static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
|
|
-{
|
|
- switch (watermark) {
|
|
- case BCH_WATERMARK_interior_updates:
|
|
- return 0;
|
|
- case BCH_WATERMARK_reclaim:
|
|
- return OPEN_BUCKETS_COUNT / 6;
|
|
- case BCH_WATERMARK_btree:
|
|
- case BCH_WATERMARK_btree_copygc:
|
|
- return OPEN_BUCKETS_COUNT / 4;
|
|
- case BCH_WATERMARK_copygc:
|
|
- return OPEN_BUCKETS_COUNT / 3;
|
|
- default:
|
|
- return OPEN_BUCKETS_COUNT / 2;
|
|
- }
|
|
-}
|
|
-
|
|
static inline bool may_alloc_bucket(struct bch_fs *c,
|
|
struct bpos bucket,
|
|
struct bucket_alloc_state *s)
|
|
@@ -239,7 +222,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
|
|
|
spin_lock(&c->freelist_lock);
|
|
|
|
- if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) {
|
|
+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
|
|
if (cl)
|
|
closure_wait(&c->open_buckets_wait, cl);
|
|
|
|
@@ -728,7 +711,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
|
|
|
struct bch_dev_usage usage;
|
|
struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
|
- cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
|
|
+ cl, flags & BCH_WRITE_alloc_nowait, &usage);
|
|
if (!IS_ERR(ob))
|
|
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
|
|
bch2_dev_put(ca);
|
|
@@ -1336,7 +1319,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
|
if (wp->data_type != BCH_DATA_user)
|
|
have_cache = true;
|
|
|
|
- if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
|
+ if (target && !(flags & BCH_WRITE_only_specified_devs)) {
|
|
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
|
target, erasure_code,
|
|
nr_replicas, &nr_effective,
|
|
@@ -1426,7 +1409,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
|
if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
|
ret = -BCH_ERR_bucket_alloc_blocked;
|
|
|
|
- if (cl && !(flags & BCH_WRITE_ALLOC_NOWAIT) &&
|
|
+ if (cl && !(flags & BCH_WRITE_alloc_nowait) &&
|
|
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
|
ret = -BCH_ERR_bucket_alloc_blocked;
|
|
|
|
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
|
|
index f25481a0d1a0..baf5dc163c8a 100644
|
|
--- a/fs/bcachefs/alloc_foreground.h
|
|
+++ b/fs/bcachefs/alloc_foreground.h
|
|
@@ -33,6 +33,23 @@ static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob)
|
|
return bch2_dev_have_ref(c, ob->dev);
|
|
}
|
|
|
|
+static inline unsigned bch2_open_buckets_reserved(enum bch_watermark watermark)
|
|
+{
|
|
+ switch (watermark) {
|
|
+ case BCH_WATERMARK_interior_updates:
|
|
+ return 0;
|
|
+ case BCH_WATERMARK_reclaim:
|
|
+ return OPEN_BUCKETS_COUNT / 6;
|
|
+ case BCH_WATERMARK_btree:
|
|
+ case BCH_WATERMARK_btree_copygc:
|
|
+ return OPEN_BUCKETS_COUNT / 4;
|
|
+ case BCH_WATERMARK_copygc:
|
|
+ return OPEN_BUCKETS_COUNT / 3;
|
|
+ default:
|
|
+ return OPEN_BUCKETS_COUNT / 2;
|
|
+ }
|
|
+}
|
|
+
|
|
struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
|
|
enum bch_watermark, enum bch_data_type,
|
|
struct closure *);
|
|
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
|
|
index 4aa8ee026cb8..8f79f46c2a78 100644
|
|
--- a/fs/bcachefs/alloc_types.h
|
|
+++ b/fs/bcachefs/alloc_types.h
|
|
@@ -90,6 +90,7 @@ struct dev_stripe_state {
|
|
x(stopped) \
|
|
x(waiting_io) \
|
|
x(waiting_work) \
|
|
+ x(runnable) \
|
|
x(running)
|
|
|
|
enum write_point_state {
|
|
@@ -125,6 +126,7 @@ struct write_point {
|
|
enum write_point_state state;
|
|
u64 last_state_change;
|
|
u64 time[WRITE_POINT_STATE_NR];
|
|
+ u64 last_runtime;
|
|
} __aligned(SMP_CACHE_BYTES);
|
|
};
|
|
|
|
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
|
|
index ebeb6a5ff9d2..eb374d1970fe 100644
|
|
--- a/fs/bcachefs/backpointers.c
|
|
+++ b/fs/bcachefs/backpointers.c
|
|
@@ -11,6 +11,7 @@
|
|
#include "checksum.h"
|
|
#include "disk_accounting.h"
|
|
#include "error.h"
|
|
+#include "progress.h"
|
|
|
|
#include <linux/mm.h>
|
|
|
|
@@ -244,27 +245,31 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
|
if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
|
|
return bkey_s_c_null;
|
|
|
|
- if (likely(!bp.v->level)) {
|
|
- bch2_trans_node_iter_init(trans, iter,
|
|
- bp.v->btree_id,
|
|
- bp.v->pos,
|
|
- 0, 0,
|
|
- iter_flags);
|
|
- struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
|
- if (bkey_err(k)) {
|
|
- bch2_trans_iter_exit(trans, iter);
|
|
- return k;
|
|
- }
|
|
+ bch2_trans_node_iter_init(trans, iter,
|
|
+ bp.v->btree_id,
|
|
+ bp.v->pos,
|
|
+ 0,
|
|
+ bp.v->level,
|
|
+ iter_flags);
|
|
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
|
|
+ if (bkey_err(k)) {
|
|
+ bch2_trans_iter_exit(trans, iter);
|
|
+ return k;
|
|
+ }
|
|
|
|
- if (k.k &&
|
|
- extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
|
|
- return k;
|
|
+ if (k.k &&
|
|
+ extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
|
|
+ return k;
|
|
|
|
- bch2_trans_iter_exit(trans, iter);
|
|
+ bch2_trans_iter_exit(trans, iter);
|
|
+
|
|
+ if (!bp.v->level) {
|
|
int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
|
|
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
|
|
} else {
|
|
struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
|
|
+ if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
|
|
+ return bkey_s_c_null;
|
|
if (IS_ERR_OR_NULL(b))
|
|
return ((struct bkey_s_c) { .k = ERR_CAST(b) });
|
|
|
|
@@ -715,71 +720,6 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|
return ret;
|
|
}
|
|
|
|
-struct progress_indicator_state {
|
|
- unsigned long next_print;
|
|
- u64 nodes_seen;
|
|
- u64 nodes_total;
|
|
- struct btree *last_node;
|
|
-};
|
|
-
|
|
-static inline void progress_init(struct progress_indicator_state *s,
|
|
- struct bch_fs *c,
|
|
- u64 btree_id_mask)
|
|
-{
|
|
- memset(s, 0, sizeof(*s));
|
|
-
|
|
- s->next_print = jiffies + HZ * 10;
|
|
-
|
|
- for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
|
- if (!(btree_id_mask & BIT_ULL(i)))
|
|
- continue;
|
|
-
|
|
- struct disk_accounting_pos acc = {
|
|
- .type = BCH_DISK_ACCOUNTING_btree,
|
|
- .btree.id = i,
|
|
- };
|
|
-
|
|
- u64 v;
|
|
- bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
|
- s->nodes_total += div64_ul(v, btree_sectors(c));
|
|
- }
|
|
-}
|
|
-
|
|
-static inline bool progress_update_p(struct progress_indicator_state *s)
|
|
-{
|
|
- bool ret = time_after_eq(jiffies, s->next_print);
|
|
-
|
|
- if (ret)
|
|
- s->next_print = jiffies + HZ * 10;
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static void progress_update_iter(struct btree_trans *trans,
|
|
- struct progress_indicator_state *s,
|
|
- struct btree_iter *iter,
|
|
- const char *msg)
|
|
-{
|
|
- struct bch_fs *c = trans->c;
|
|
- struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
|
-
|
|
- s->nodes_seen += b != s->last_node;
|
|
- s->last_node = b;
|
|
-
|
|
- if (progress_update_p(s)) {
|
|
- struct printbuf buf = PRINTBUF;
|
|
- unsigned percent = s->nodes_total
|
|
- ? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
|
- : 0;
|
|
-
|
|
- prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
|
- msg, percent, s->nodes_seen, s->nodes_total);
|
|
- bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
|
-
|
|
- bch_info(c, "%s", buf.buf);
|
|
- printbuf_exit(&buf);
|
|
- }
|
|
-}
|
|
-
|
|
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
|
struct extents_to_bp_state *s)
|
|
{
|
|
@@ -787,7 +727,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
|
struct progress_indicator_state progress;
|
|
int ret = 0;
|
|
|
|
- progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
|
+ bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
|
|
|
for (enum btree_id btree_id = 0;
|
|
btree_id < btree_id_nr_alive(c);
|
|
@@ -806,7 +746,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
|
BTREE_ITER_prefetch);
|
|
|
|
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
|
- progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
|
+ bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
|
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
|
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
|
}));
|
|
@@ -1206,11 +1146,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
|
|
|
bch2_bkey_buf_init(&last_flushed);
|
|
bkey_init(&last_flushed.k->k);
|
|
- progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
|
+ bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
|
|
|
int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
|
|
POS_MIN, BTREE_ITER_prefetch, k, ({
|
|
- progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
|
+ bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
|
check_one_backpointer(trans, start, end, k, &last_flushed);
|
|
}));
|
|
|
|
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
|
|
index 060dad1521ee..7786731d4ada 100644
|
|
--- a/fs/bcachefs/backpointers.h
|
|
+++ b/fs/bcachefs/backpointers.h
|
|
@@ -1,6 +1,6 @@
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
-#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
|
-#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
|
+#ifndef _BCACHEFS_BACKPOINTERS_H
|
|
+#define _BCACHEFS_BACKPOINTERS_H
|
|
|
|
#include "btree_cache.h"
|
|
#include "btree_iter.h"
|
|
@@ -123,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
|
return BCH_DATA_btree;
|
|
case KEY_TYPE_extent:
|
|
case KEY_TYPE_reflink_v:
|
|
- return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
|
+ if (p.has_ec)
|
|
+ return BCH_DATA_stripe;
|
|
+ if (p.ptr.cached)
|
|
+ return BCH_DATA_cached;
|
|
+ else
|
|
+ return BCH_DATA_user;
|
|
case KEY_TYPE_stripe: {
|
|
const struct bch_extent_ptr *ptr = &entry->ptr;
|
|
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
|
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
|
|
index 161cf2f05d2a..e8f4999806b6 100644
|
|
--- a/fs/bcachefs/bcachefs.h
|
|
+++ b/fs/bcachefs/bcachefs.h
|
|
@@ -444,6 +444,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
|
x(btree_node_sort) \
|
|
x(btree_node_read) \
|
|
x(btree_node_read_done) \
|
|
+ x(btree_node_write) \
|
|
x(btree_interior_update_foreground) \
|
|
x(btree_interior_update_total) \
|
|
x(btree_gc) \
|
|
@@ -456,6 +457,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
|
x(blocked_journal_low_on_space) \
|
|
x(blocked_journal_low_on_pin) \
|
|
x(blocked_journal_max_in_flight) \
|
|
+ x(blocked_journal_max_open) \
|
|
x(blocked_key_cache_flush) \
|
|
x(blocked_allocate) \
|
|
x(blocked_allocate_open_bucket) \
|
|
@@ -687,7 +689,8 @@ struct btree_trans_buf {
|
|
x(gc_gens) \
|
|
x(snapshot_delete_pagecache) \
|
|
x(sysfs) \
|
|
- x(btree_write_buffer)
|
|
+ x(btree_write_buffer) \
|
|
+ x(btree_node_scrub)
|
|
|
|
enum bch_write_ref {
|
|
#define x(n) BCH_WRITE_REF_##n,
|
|
diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h
|
|
index 3c23bdf788ce..52594e925eb7 100644
|
|
--- a/fs/bcachefs/bcachefs_ioctl.h
|
|
+++ b/fs/bcachefs/bcachefs_ioctl.h
|
|
@@ -87,6 +87,7 @@ struct bch_ioctl_incremental {
|
|
#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
|
|
#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online)
|
|
#define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting)
|
|
+#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters)
|
|
|
|
/* ioctl below act on a particular file, not the filesystem as a whole: */
|
|
|
|
@@ -213,6 +214,10 @@ struct bch_ioctl_data {
|
|
struct bpos end_pos;
|
|
|
|
union {
|
|
+ struct {
|
|
+ __u32 dev;
|
|
+ __u32 data_types;
|
|
+ } scrub;
|
|
struct {
|
|
__u32 dev;
|
|
__u32 pad;
|
|
@@ -229,6 +234,11 @@ enum bch_data_event {
|
|
BCH_DATA_EVENT_NR = 1,
|
|
};
|
|
|
|
+enum data_progress_data_type_special {
|
|
+ DATA_PROGRESS_DATA_TYPE_phys = 254,
|
|
+ DATA_PROGRESS_DATA_TYPE_done = 255,
|
|
+};
|
|
+
|
|
struct bch_ioctl_data_progress {
|
|
__u8 data_type;
|
|
__u8 btree_id;
|
|
@@ -237,11 +247,19 @@ struct bch_ioctl_data_progress {
|
|
|
|
__u64 sectors_done;
|
|
__u64 sectors_total;
|
|
+ __u64 sectors_error_corrected;
|
|
+ __u64 sectors_error_uncorrected;
|
|
} __packed __aligned(8);
|
|
|
|
+enum bch_ioctl_data_event_ret {
|
|
+ BCH_IOCTL_DATA_EVENT_RET_done = 1,
|
|
+ BCH_IOCTL_DATA_EVENT_RET_device_offline = 2,
|
|
+};
|
|
+
|
|
struct bch_ioctl_data_event {
|
|
__u8 type;
|
|
- __u8 pad[7];
|
|
+ __u8 ret;
|
|
+ __u8 pad[6];
|
|
union {
|
|
struct bch_ioctl_data_progress p;
|
|
__u64 pad2[15];
|
|
@@ -443,4 +461,13 @@ struct bch_ioctl_query_accounting {
|
|
struct bkey_i_accounting accounting[];
|
|
};
|
|
|
|
+#define BCH_IOCTL_QUERY_COUNTERS_MOUNT (1 << 0)
|
|
+
|
|
+struct bch_ioctl_query_counters {
|
|
+ __u16 nr;
|
|
+ __u16 flags;
|
|
+ __u32 pad;
|
|
+ __u64 d[];
|
|
+};
|
|
+
|
|
#endif /* _BCACHEFS_IOCTL_H */
|
|
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
|
|
index dd1d9b74076e..ff681e733598 100644
|
|
--- a/fs/bcachefs/btree_gc.c
|
|
+++ b/fs/bcachefs/btree_gc.c
|
|
@@ -27,6 +27,7 @@
|
|
#include "journal.h"
|
|
#include "keylist.h"
|
|
#include "move.h"
|
|
+#include "progress.h"
|
|
#include "recovery_passes.h"
|
|
#include "reflink.h"
|
|
#include "recovery.h"
|
|
@@ -656,7 +657,9 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
|
return ret;
|
|
}
|
|
|
|
-static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
|
+static int bch2_gc_btree(struct btree_trans *trans,
|
|
+ struct progress_indicator_state *progress,
|
|
+ enum btree_id btree, bool initial)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
|
@@ -673,6 +676,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
|
BTREE_ITER_prefetch);
|
|
|
|
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
|
+ bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
|
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
|
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
|
}));
|
|
@@ -717,22 +721,24 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
|
static int bch2_gc_btrees(struct bch_fs *c)
|
|
{
|
|
struct btree_trans *trans = bch2_trans_get(c);
|
|
- enum btree_id ids[BTREE_ID_NR];
|
|
struct printbuf buf = PRINTBUF;
|
|
- unsigned i;
|
|
int ret = 0;
|
|
|
|
- for (i = 0; i < BTREE_ID_NR; i++)
|
|
+ struct progress_indicator_state progress;
|
|
+ bch2_progress_init(&progress, c, ~0ULL);
|
|
+
|
|
+ enum btree_id ids[BTREE_ID_NR];
|
|
+ for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
|
ids[i] = i;
|
|
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
|
|
|
- for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
|
+ for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
|
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
|
|
|
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
|
continue;
|
|
|
|
- ret = bch2_gc_btree(trans, btree, true);
|
|
+ ret = bch2_gc_btree(trans, &progress, btree, true);
|
|
}
|
|
|
|
printbuf_exit(&buf);
|
|
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
|
|
index e371e60e3133..e71b278672b6 100644
|
|
--- a/fs/bcachefs/btree_io.c
|
|
+++ b/fs/bcachefs/btree_io.c
|
|
@@ -1,6 +1,7 @@
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include "bcachefs.h"
|
|
+#include "bkey_buf.h"
|
|
#include "bkey_methods.h"
|
|
#include "bkey_sort.h"
|
|
#include "btree_cache.h"
|
|
@@ -1352,7 +1353,7 @@ static void btree_node_read_work(struct work_struct *work)
|
|
|
|
can_retry = bch2_bkey_pick_read_device(c,
|
|
bkey_i_to_s_c(&b->key),
|
|
- &failed, &rb->pick) > 0;
|
|
+ &failed, &rb->pick, -1) > 0;
|
|
|
|
if (!bio->bi_status &&
|
|
!bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
|
|
@@ -1697,7 +1698,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
|
return;
|
|
|
|
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
|
|
- NULL, &pick);
|
|
+ NULL, &pick, -1);
|
|
|
|
if (ret <= 0) {
|
|
struct printbuf buf = PRINTBUF;
|
|
@@ -1811,6 +1812,190 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
|
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
|
|
}
|
|
|
|
+struct btree_node_scrub {
|
|
+ struct bch_fs *c;
|
|
+ struct bch_dev *ca;
|
|
+ void *buf;
|
|
+ bool used_mempool;
|
|
+ unsigned written;
|
|
+
|
|
+ enum btree_id btree;
|
|
+ unsigned level;
|
|
+ struct bkey_buf key;
|
|
+ __le64 seq;
|
|
+
|
|
+ struct work_struct work;
|
|
+ struct bio bio;
|
|
+};
|
|
+
|
|
+static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written,
|
|
+ struct printbuf *err)
|
|
+{
|
|
+ unsigned written = 0;
|
|
+
|
|
+ if (le64_to_cpu(data->magic) != bset_magic(c)) {
|
|
+ prt_printf(err, "bad magic: want %llx, got %llx",
|
|
+ bset_magic(c), le64_to_cpu(data->magic));
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ while (written < (ptr_written ?: btree_sectors(c))) {
|
|
+ struct btree_node_entry *bne;
|
|
+ struct bset *i;
|
|
+ bool first = !written;
|
|
+
|
|
+ if (first) {
|
|
+ bne = NULL;
|
|
+ i = &data->keys;
|
|
+ } else {
|
|
+ bne = (void *) data + (written << 9);
|
|
+ i = &bne->keys;
|
|
+
|
|
+ if (!ptr_written && i->seq != data->keys.seq)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ struct nonce nonce = btree_nonce(i, written << 9);
|
|
+ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
|
|
+
|
|
+ if (first) {
|
|
+ if (good_csum_type) {
|
|
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data);
|
|
+ if (bch2_crc_cmp(data->csum, csum)) {
|
|
+ bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum);
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ written += vstruct_sectors(data, c->block_bits);
|
|
+ } else {
|
|
+ if (good_csum_type) {
|
|
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
|
+ if (bch2_crc_cmp(bne->csum, csum)) {
|
|
+ bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum);
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ written += vstruct_sectors(bne, c->block_bits);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void btree_node_scrub_work(struct work_struct *work)
|
|
+{
|
|
+ struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
|
|
+ struct bch_fs *c = scrub->c;
|
|
+ struct printbuf err = PRINTBUF;
|
|
+
|
|
+ __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
|
|
+ bkey_i_to_s_c(scrub->key.k));
|
|
+ prt_newline(&err);
|
|
+
|
|
+ if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
|
|
+ struct btree_trans *trans = bch2_trans_get(c);
|
|
+
|
|
+ struct btree_iter iter;
|
|
+ bch2_trans_node_iter_init(trans, &iter, scrub->btree,
|
|
+ scrub->key.k->k.p, 0, scrub->level - 1, 0);
|
|
+
|
|
+ struct btree *b;
|
|
+ int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
|
|
+ bch_err(c, "error validating btree node during scrub on %s at btree %s",
|
|
+ scrub->ca->name, err.buf);
|
|
+
|
|
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
|
+ }
|
|
+err:
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
+ bch2_trans_begin(trans);
|
|
+ bch2_trans_put(trans);
|
|
+ }
|
|
+
|
|
+ printbuf_exit(&err);
|
|
+ bch2_bkey_buf_exit(&scrub->key, c);;
|
|
+ btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
|
|
+ percpu_ref_put(&scrub->ca->io_ref);
|
|
+ kfree(scrub);
|
|
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
|
+}
|
|
+
|
|
+static void btree_node_scrub_endio(struct bio *bio)
|
|
+{
|
|
+ struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio);
|
|
+
|
|
+ queue_work(scrub->c->btree_read_complete_wq, &scrub->work);
|
|
+}
|
|
+
|
|
+int bch2_btree_node_scrub(struct btree_trans *trans,
|
|
+ enum btree_id btree, unsigned level,
|
|
+ struct bkey_s_c k, unsigned dev)
|
|
+{
|
|
+ if (k.k->type != KEY_TYPE_btree_ptr_v2)
|
|
+ return 0;
|
|
+
|
|
+ struct bch_fs *c = trans->c;
|
|
+
|
|
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
|
|
+ return -BCH_ERR_erofs_no_writes;
|
|
+
|
|
+ struct extent_ptr_decoded pick;
|
|
+ int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
|
|
+ if (ret <= 0)
|
|
+ goto err;
|
|
+
|
|
+ struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
|
|
+ if (!ca) {
|
|
+ ret = -BCH_ERR_device_offline;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ bool used_mempool = false;
|
|
+ void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool);
|
|
+
|
|
+ unsigned vecs = buf_pages(buf, c->opts.btree_node_size);
|
|
+
|
|
+ struct btree_node_scrub *scrub =
|
|
+ kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL);
|
|
+ if (!scrub) {
|
|
+ ret = -ENOMEM;
|
|
+ goto err_free;
|
|
+ }
|
|
+
|
|
+ scrub->c = c;
|
|
+ scrub->ca = ca;
|
|
+ scrub->buf = buf;
|
|
+ scrub->used_mempool = used_mempool;
|
|
+ scrub->written = btree_ptr_sectors_written(k);
|
|
+
|
|
+ scrub->btree = btree;
|
|
+ scrub->level = level;
|
|
+ bch2_bkey_buf_init(&scrub->key);
|
|
+ bch2_bkey_buf_reassemble(&scrub->key, c, k);
|
|
+ scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq;
|
|
+
|
|
+ INIT_WORK(&scrub->work, btree_node_scrub_work);
|
|
+
|
|
+ bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ);
|
|
+ bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size);
|
|
+ scrub->bio.bi_iter.bi_sector = pick.ptr.offset;
|
|
+ scrub->bio.bi_end_io = btree_node_scrub_endio;
|
|
+ submit_bio(&scrub->bio);
|
|
+ return 0;
|
|
+err_free:
|
|
+ btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
|
|
+ percpu_ref_put(&ca->io_ref);
|
|
+err:
|
|
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
|
struct btree_write *w)
|
|
{
|
|
@@ -1831,7 +2016,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
|
bch2_journal_pin_drop(&c->journal, &w->journal);
|
|
}
|
|
|
|
-static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|
+static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
|
{
|
|
struct btree_write *w = btree_prev_write(b);
|
|
unsigned long old, new;
|
|
@@ -1839,6 +2024,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|
|
|
bch2_btree_complete_write(c, b, w);
|
|
|
|
+ if (start_time)
|
|
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
|
+
|
|
old = READ_ONCE(b->flags);
|
|
do {
|
|
new = old;
|
|
@@ -1869,7 +2057,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
|
}
|
|
|
|
-static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|
+static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
|
{
|
|
struct btree_trans *trans = bch2_trans_get(c);
|
|
|
|
@@ -1877,7 +2065,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
|
|
|
/* we don't need transaction context anymore after we got the lock. */
|
|
bch2_trans_put(trans);
|
|
- __btree_node_write_done(c, b);
|
|
+ __btree_node_write_done(c, b, start_time);
|
|
six_unlock_read(&b->c.lock);
|
|
}
|
|
|
|
@@ -1887,6 +2075,7 @@ static void btree_node_write_work(struct work_struct *work)
|
|
container_of(work, struct btree_write_bio, work);
|
|
struct bch_fs *c = wbio->wbio.c;
|
|
struct btree *b = wbio->wbio.bio.bi_private;
|
|
+ u64 start_time = wbio->start_time;
|
|
int ret = 0;
|
|
|
|
btree_bounce_free(c,
|
|
@@ -1919,7 +2108,7 @@ static void btree_node_write_work(struct work_struct *work)
|
|
}
|
|
out:
|
|
bio_put(&wbio->wbio.bio);
|
|
- btree_node_write_done(c, b);
|
|
+ btree_node_write_done(c, b, start_time);
|
|
return;
|
|
err:
|
|
set_btree_node_noevict(b);
|
|
@@ -2023,6 +2212,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|
bool validate_before_checksum = false;
|
|
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
|
void *data;
|
|
+ u64 start_time = local_clock();
|
|
int ret;
|
|
|
|
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
|
@@ -2231,6 +2421,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|
wbio->data = data;
|
|
wbio->data_bytes = bytes;
|
|
wbio->sector_offset = b->written;
|
|
+ wbio->start_time = start_time;
|
|
wbio->wbio.c = c;
|
|
wbio->wbio.used_mempool = used_mempool;
|
|
wbio->wbio.first_btree_write = !b->written;
|
|
@@ -2258,7 +2449,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|
b->written += sectors_to_write;
|
|
nowrite:
|
|
btree_bounce_free(c, bytes, used_mempool, data);
|
|
- __btree_node_write_done(c, b);
|
|
+ __btree_node_write_done(c, b, 0);
|
|
}
|
|
|
|
/*
|
|
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
|
|
index 6f9e4a6dacf7..dbf76d22c660 100644
|
|
--- a/fs/bcachefs/btree_io.h
|
|
+++ b/fs/bcachefs/btree_io.h
|
|
@@ -52,6 +52,7 @@ struct btree_write_bio {
|
|
void *data;
|
|
unsigned data_bytes;
|
|
unsigned sector_offset;
|
|
+ u64 start_time;
|
|
struct bch_write_bio wbio;
|
|
};
|
|
|
|
@@ -132,6 +133,9 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
|
|
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
|
const struct bkey_i *, unsigned);
|
|
|
|
+int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
|
|
+ struct bkey_s_c, unsigned);
|
|
+
|
|
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
|
|
|
enum btree_write_flags {
|
|
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
|
|
index f4aeadbe53c1..ab111fec1701 100644
|
|
--- a/fs/bcachefs/btree_update_interior.c
|
|
+++ b/fs/bcachefs/btree_update_interior.c
|
|
@@ -2189,6 +2189,26 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
|
goto out;
|
|
}
|
|
|
|
+int bch2_btree_node_rewrite_key(struct btree_trans *trans,
|
|
+ enum btree_id btree, unsigned level,
|
|
+ struct bpos pos, unsigned flags)
|
|
+{
|
|
+ BUG_ON(!level);
|
|
+
|
|
+ /* Traverse one depth lower to get a pointer to the node itself: */
|
|
+ struct btree_iter iter;
|
|
+ bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0);
|
|
+ struct btree *b = bch2_btree_iter_peek_node(&iter);
|
|
+ int ret = PTR_ERR_OR_ZERO(b);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ ret = bch2_btree_node_rewrite(trans, &iter, b, flags);
|
|
+err:
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
struct async_btree_rewrite {
|
|
struct bch_fs *c;
|
|
struct work_struct work;
|
|
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
|
|
index 7930ffea3075..fa5a88f95d89 100644
|
|
--- a/fs/bcachefs/btree_update_interior.h
|
|
+++ b/fs/bcachefs/btree_update_interior.h
|
|
@@ -169,7 +169,11 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
|
|
|
|
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
|
|
struct btree *, unsigned);
|
|
+int bch2_btree_node_rewrite_key(struct btree_trans *,
|
|
+ enum btree_id, unsigned,
|
|
+ struct bpos, unsigned);
|
|
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
|
|
+
|
|
int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
|
|
struct btree *, struct bkey_i *,
|
|
unsigned, bool);
|
|
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
|
|
index 46e9e32105a9..57d55b3ddc71 100644
|
|
--- a/fs/bcachefs/chardev.c
|
|
+++ b/fs/bcachefs/chardev.c
|
|
@@ -11,6 +11,7 @@
|
|
#include "move.h"
|
|
#include "recovery_passes.h"
|
|
#include "replicas.h"
|
|
+#include "sb-counters.h"
|
|
#include "super-io.h"
|
|
#include "thread_with_file.h"
|
|
|
|
@@ -312,7 +313,12 @@ static int bch2_data_thread(void *arg)
|
|
struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
|
|
|
|
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
|
|
- ctx->stats.data_type = U8_MAX;
|
|
+ if (ctx->thr.ret == -BCH_ERR_device_offline)
|
|
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
|
|
+ else {
|
|
+ ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
|
|
+ ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -331,14 +337,30 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
|
|
struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
|
|
struct bch_fs *c = ctx->c;
|
|
struct bch_ioctl_data_event e = {
|
|
- .type = BCH_DATA_EVENT_PROGRESS,
|
|
- .p.data_type = ctx->stats.data_type,
|
|
- .p.btree_id = ctx->stats.pos.btree,
|
|
- .p.pos = ctx->stats.pos.pos,
|
|
- .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
|
|
- .p.sectors_total = bch2_fs_usage_read_short(c).used,
|
|
+ .type = BCH_DATA_EVENT_PROGRESS,
|
|
+ .ret = ctx->stats.ret,
|
|
+ .p.data_type = ctx->stats.data_type,
|
|
+ .p.btree_id = ctx->stats.pos.btree,
|
|
+ .p.pos = ctx->stats.pos.pos,
|
|
+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
|
|
+ .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected),
|
|
+ .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected),
|
|
};
|
|
|
|
+ if (ctx->arg.op == BCH_DATA_OP_scrub) {
|
|
+ struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
|
|
+ if (ca) {
|
|
+ struct bch_dev_usage u;
|
|
+ bch2_dev_usage_read_fast(ca, &u);
|
|
+ for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
|
|
+ if (ctx->arg.scrub.data_types & BIT(i))
|
|
+ e.p.sectors_total += u.d[i].sectors;
|
|
+ bch2_dev_put(ca);
|
|
+ }
|
|
+ } else {
|
|
+ e.p.sectors_total = bch2_fs_usage_read_short(c).used;
|
|
+ }
|
|
+
|
|
if (len < sizeof(e))
|
|
return -EINVAL;
|
|
|
|
@@ -710,6 +732,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
|
BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
|
|
case BCH_IOCTL_QUERY_ACCOUNTING:
|
|
return bch2_ioctl_query_accounting(c, arg);
|
|
+ case BCH_IOCTL_QUERY_COUNTERS:
|
|
+ return bch2_ioctl_query_counters(c, arg);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
|
|
index 1d6b691e8da6..1f8e035d7119 100644
|
|
--- a/fs/bcachefs/clock.c
|
|
+++ b/fs/bcachefs/clock.c
|
|
@@ -14,21 +14,13 @@ static inline bool io_timer_cmp(const void *l, const void *r, void __always_unus
|
|
return (*_l)->expire < (*_r)->expire;
|
|
}
|
|
|
|
-static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
|
|
-{
|
|
- struct io_timer **_l = (struct io_timer **)l;
|
|
- struct io_timer **_r = (struct io_timer **)r;
|
|
-
|
|
- swap(*_l, *_r);
|
|
-}
|
|
+static const struct min_heap_callbacks callbacks = {
|
|
+ .less = io_timer_cmp,
|
|
+ .swp = NULL,
|
|
+};
|
|
|
|
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
|
|
{
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = io_timer_cmp,
|
|
- .swp = io_timer_swp,
|
|
- };
|
|
-
|
|
spin_lock(&clock->timer_lock);
|
|
|
|
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
|
|
@@ -48,11 +40,6 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
|
|
|
|
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
|
|
{
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = io_timer_cmp,
|
|
- .swp = io_timer_swp,
|
|
- };
|
|
-
|
|
spin_lock(&clock->timer_lock);
|
|
|
|
for (size_t i = 0; i < clock->timers.nr; i++)
|
|
@@ -142,10 +129,6 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
|
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
|
|
{
|
|
struct io_timer *ret = NULL;
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = io_timer_cmp,
|
|
- .swp = io_timer_swp,
|
|
- };
|
|
|
|
if (clock->timers.nr &&
|
|
time_after_eq64(now, clock->timers.data[0]->expire)) {
|
|
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
|
|
index 337494facac6..c66ef8a1b5f2 100644
|
|
--- a/fs/bcachefs/data_update.c
|
|
+++ b/fs/bcachefs/data_update.c
|
|
@@ -20,6 +20,8 @@
|
|
#include "subvolume.h"
|
|
#include "trace.h"
|
|
|
|
+#include <linux/ioprio.h>
|
|
+
|
|
static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
|
{
|
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
@@ -33,7 +35,7 @@ static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
|
|
bkey_for_each_ptr(ptrs, ptr) {
|
|
- if (!bch2_dev_tryget(c, ptr->dev)) {
|
|
+ if (unlikely(!bch2_dev_tryget(c, ptr->dev))) {
|
|
bkey_for_each_ptr(ptrs, ptr2) {
|
|
if (ptr2 == ptr)
|
|
break;
|
|
@@ -91,7 +93,7 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
|
|
return true;
|
|
}
|
|
|
|
-static noinline void trace_move_extent_finish2(struct data_update *u,
|
|
+static noinline void trace_io_move_finish2(struct data_update *u,
|
|
struct bkey_i *new,
|
|
struct bkey_i *insert)
|
|
{
|
|
@@ -111,11 +113,11 @@ static noinline void trace_move_extent_finish2(struct data_update *u,
|
|
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
|
prt_newline(&buf);
|
|
|
|
- trace_move_extent_finish(c, buf.buf);
|
|
+ trace_io_move_finish(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
|
|
-static void trace_move_extent_fail2(struct data_update *m,
|
|
+static void trace_io_move_fail2(struct data_update *m,
|
|
struct bkey_s_c new,
|
|
struct bkey_s_c wrote,
|
|
struct bkey_i *insert,
|
|
@@ -126,7 +128,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
|
struct printbuf buf = PRINTBUF;
|
|
unsigned rewrites_found = 0;
|
|
|
|
- if (!trace_move_extent_fail_enabled())
|
|
+ if (!trace_io_move_fail_enabled())
|
|
return;
|
|
|
|
prt_str(&buf, msg);
|
|
@@ -166,7 +168,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
|
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
|
}
|
|
|
|
- trace_move_extent_fail(c, buf.buf);
|
|
+ trace_io_move_fail(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
|
|
@@ -214,7 +216,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|
new = bkey_i_to_extent(bch2_keylist_front(keys));
|
|
|
|
if (!bch2_extents_match(k, old)) {
|
|
- trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
|
+ trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
|
NULL, "no match:");
|
|
goto nowork;
|
|
}
|
|
@@ -254,7 +256,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|
if (m->data_opts.rewrite_ptrs &&
|
|
!rewrites_found &&
|
|
bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
|
|
- trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
|
+ trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
|
goto nowork;
|
|
}
|
|
|
|
@@ -271,7 +273,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|
}
|
|
|
|
if (!bkey_val_u64s(&new->k)) {
|
|
- trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
|
+ trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
|
goto nowork;
|
|
}
|
|
|
|
@@ -384,9 +386,9 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|
if (!ret) {
|
|
bch2_btree_iter_set_pos(&iter, next_pos);
|
|
|
|
- this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
|
|
- if (trace_move_extent_finish_enabled())
|
|
- trace_move_extent_finish2(m, &new->k_i, insert);
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
|
|
+ if (trace_io_move_finish_enabled())
|
|
+ trace_io_move_finish2(m, &new->k_i, insert);
|
|
}
|
|
err:
|
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
@@ -408,7 +410,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|
&m->stats->sectors_raced);
|
|
}
|
|
|
|
- count_event(c, move_extent_fail);
|
|
+ count_event(c, io_move_fail);
|
|
|
|
bch2_btree_iter_advance(&iter);
|
|
goto next;
|
|
@@ -426,14 +428,17 @@ int bch2_data_update_index_update(struct bch_write_op *op)
|
|
return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
|
|
}
|
|
|
|
-void bch2_data_update_read_done(struct data_update *m,
|
|
- struct bch_extent_crc_unpacked crc)
|
|
+void bch2_data_update_read_done(struct data_update *m)
|
|
{
|
|
+ m->read_done = true;
|
|
+
|
|
/* write bio must own pages: */
|
|
BUG_ON(!m->op.wbio.bio.bi_vcnt);
|
|
|
|
- m->op.crc = crc;
|
|
- m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
|
|
+ m->op.crc = m->rbio.pick.crc;
|
|
+ m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
|
|
+
|
|
+ this_cpu_add(m->op.c->counters[BCH_COUNTER_io_move_write], m->k.k->k.size);
|
|
|
|
closure_call(&m->op.cl, bch2_write, NULL, NULL);
|
|
}
|
|
@@ -443,31 +448,34 @@ void bch2_data_update_exit(struct data_update *update)
|
|
struct bch_fs *c = update->op.c;
|
|
struct bkey_s_c k = bkey_i_to_s_c(update->k.k);
|
|
|
|
+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
|
|
+ kfree(update->bvecs);
|
|
+ update->bvecs = NULL;
|
|
+
|
|
if (c->opts.nocow_enabled)
|
|
bkey_nocow_unlock(c, k);
|
|
bkey_put_dev_refs(c, k);
|
|
- bch2_bkey_buf_exit(&update->k, c);
|
|
bch2_disk_reservation_put(c, &update->op.res);
|
|
- bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
|
|
+ bch2_bkey_buf_exit(&update->k, c);
|
|
}
|
|
|
|
-static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
|
- struct data_update *update)
|
|
+static int bch2_update_unwritten_extent(struct btree_trans *trans,
|
|
+ struct data_update *update)
|
|
{
|
|
struct bch_fs *c = update->op.c;
|
|
- struct bio *bio = &update->op.wbio.bio;
|
|
struct bkey_i_extent *e;
|
|
struct write_point *wp;
|
|
struct closure cl;
|
|
struct btree_iter iter;
|
|
struct bkey_s_c k;
|
|
- int ret;
|
|
+ int ret = 0;
|
|
|
|
closure_init_stack(&cl);
|
|
bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys);
|
|
|
|
- while (bio_sectors(bio)) {
|
|
- unsigned sectors = bio_sectors(bio);
|
|
+ while (bpos_lt(update->op.pos, update->k.k->k.p)) {
|
|
+ unsigned sectors = update->k.k->k.p.offset -
|
|
+ update->op.pos.offset;
|
|
|
|
bch2_trans_begin(trans);
|
|
|
|
@@ -503,7 +511,7 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
|
bch_err_fn_ratelimited(c, ret);
|
|
|
|
if (ret)
|
|
- return;
|
|
+ break;
|
|
|
|
sectors = min(sectors, wp->sectors_free);
|
|
|
|
@@ -513,7 +521,6 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
|
bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
|
|
bch2_alloc_sectors_done(c, wp);
|
|
|
|
- bio_advance(bio, sectors << 9);
|
|
update->op.pos.offset += sectors;
|
|
|
|
extent_for_each_ptr(extent_i_to_s(e), ptr)
|
|
@@ -532,13 +539,16 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
|
bch2_trans_unlock(trans);
|
|
closure_sync(&cl);
|
|
}
|
|
+
|
|
+ return ret;
|
|
}
|
|
|
|
void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
|
struct bch_io_opts *io_opts,
|
|
struct data_update_opts *data_opts)
|
|
{
|
|
- printbuf_tabstop_push(out, 20);
|
|
+ if (!out->nr_tabstops)
|
|
+ printbuf_tabstop_push(out, 20);
|
|
|
|
prt_str_indented(out, "rewrite ptrs:\t");
|
|
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
|
|
@@ -562,6 +572,7 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
|
|
|
prt_str_indented(out, "extra replicas:\t");
|
|
prt_u64(out, data_opts->extra_replicas);
|
|
+ prt_newline(out);
|
|
}
|
|
|
|
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
|
|
@@ -573,6 +584,17 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
|
|
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
|
|
}
|
|
|
|
+void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m)
|
|
+{
|
|
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
|
|
+ prt_newline(out);
|
|
+ printbuf_indent_add(out, 2);
|
|
+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
|
|
+ prt_printf(out, "read_done:\t\%u\n", m->read_done);
|
|
+ bch2_write_op_to_text(out, &m->op);
|
|
+ printbuf_indent_sub(out, 2);
|
|
+}
|
|
+
|
|
int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
|
struct btree_iter *iter,
|
|
struct bkey_s_c k,
|
|
@@ -616,12 +638,80 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
|
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
|
}
|
|
|
|
+static bool can_allocate_without_blocking(struct bch_fs *c,
|
|
+ struct data_update *m)
|
|
+{
|
|
+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark)))
|
|
+ return false;
|
|
+
|
|
+ unsigned target = m->op.flags & BCH_WRITE_only_specified_devs
|
|
+ ? m->op.target
|
|
+ : 0;
|
|
+ struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target);
|
|
+
|
|
+ darray_for_each(m->op.devs_have, i)
|
|
+ __clear_bit(*i, devs.d);
|
|
+
|
|
+ rcu_read_lock();
|
|
+ unsigned nr_replicas = 0, i;
|
|
+ for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) {
|
|
+ struct bch_dev *ca = bch2_dev_rcu(c, i);
|
|
+
|
|
+ struct bch_dev_usage usage;
|
|
+ bch2_dev_usage_read_fast(ca, &usage);
|
|
+
|
|
+ if (!dev_buckets_free(ca, usage, m->op.watermark))
|
|
+ continue;
|
|
+
|
|
+ nr_replicas += ca->mi.durability;
|
|
+ if (nr_replicas >= m->op.nr_replicas)
|
|
+ break;
|
|
+ }
|
|
+ rcu_read_unlock();
|
|
+
|
|
+ return nr_replicas >= m->op.nr_replicas;
|
|
+}
|
|
+
|
|
+int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
|
|
+ struct bch_io_opts *io_opts)
|
|
+{
|
|
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
|
|
+ const union bch_extent_entry *entry;
|
|
+ struct extent_ptr_decoded p;
|
|
+
|
|
+ /* write path might have to decompress data: */
|
|
+ unsigned buf_bytes = 0;
|
|
+ bkey_for_each_ptr_decode(&m->k.k->k, ptrs, p, entry)
|
|
+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
|
|
+
|
|
+ unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
|
+
|
|
+ m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
|
+ if (!m->bvecs)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
|
+ bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
|
+
|
|
+ if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) {
|
|
+ kfree(m->bvecs);
|
|
+ m->bvecs = NULL;
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ rbio_init(&m->rbio.bio, c, *io_opts, NULL);
|
|
+ m->rbio.bio.bi_iter.bi_size = buf_bytes;
|
|
+ m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(&m->k.k->k);
|
|
+ m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
int bch2_data_update_init(struct btree_trans *trans,
|
|
struct btree_iter *iter,
|
|
struct moving_context *ctxt,
|
|
struct data_update *m,
|
|
struct write_point_specifier wp,
|
|
- struct bch_io_opts io_opts,
|
|
+ struct bch_io_opts *io_opts,
|
|
struct data_update_opts data_opts,
|
|
enum btree_id btree_id,
|
|
struct bkey_s_c k)
|
|
@@ -639,16 +729,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
|
* snapshots table - just skip it, we can move it later.
|
|
*/
|
|
if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot)))
|
|
- return -BCH_ERR_data_update_done;
|
|
-
|
|
- if (!bkey_get_dev_refs(c, k))
|
|
- return -BCH_ERR_data_update_done;
|
|
-
|
|
- if (c->opts.nocow_enabled &&
|
|
- !bkey_nocow_lock(c, ctxt, k)) {
|
|
- bkey_put_dev_refs(c, k);
|
|
- return -BCH_ERR_nocow_lock_blocked;
|
|
- }
|
|
+ return -BCH_ERR_data_update_done_no_snapshot;
|
|
|
|
bch2_bkey_buf_init(&m->k);
|
|
bch2_bkey_buf_reassemble(&m->k, c, k);
|
|
@@ -657,18 +738,18 @@ int bch2_data_update_init(struct btree_trans *trans,
|
|
m->ctxt = ctxt;
|
|
m->stats = ctxt ? ctxt->stats : NULL;
|
|
|
|
- bch2_write_op_init(&m->op, c, io_opts);
|
|
+ bch2_write_op_init(&m->op, c, *io_opts);
|
|
m->op.pos = bkey_start_pos(k.k);
|
|
m->op.version = k.k->bversion;
|
|
m->op.target = data_opts.target;
|
|
m->op.write_point = wp;
|
|
m->op.nr_replicas = 0;
|
|
- m->op.flags |= BCH_WRITE_PAGES_STABLE|
|
|
- BCH_WRITE_PAGES_OWNED|
|
|
- BCH_WRITE_DATA_ENCODED|
|
|
- BCH_WRITE_MOVE|
|
|
+ m->op.flags |= BCH_WRITE_pages_stable|
|
|
+ BCH_WRITE_pages_owned|
|
|
+ BCH_WRITE_data_encoded|
|
|
+ BCH_WRITE_move|
|
|
m->data_opts.write_flags;
|
|
- m->op.compression_opt = io_opts.background_compression;
|
|
+ m->op.compression_opt = io_opts->background_compression;
|
|
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
|
|
|
|
unsigned durability_have = 0, durability_removing = 0;
|
|
@@ -706,7 +787,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
|
ptr_bit <<= 1;
|
|
}
|
|
|
|
- unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
|
|
+ unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
|
|
|
|
/*
|
|
* If current extent durability is less than io_opts.data_replicas,
|
|
@@ -739,8 +820,16 @@ int bch2_data_update_init(struct btree_trans *trans,
|
|
m->data_opts.rewrite_ptrs = 0;
|
|
/* if iter == NULL, it's just a promote */
|
|
if (iter)
|
|
- ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
|
|
- goto out;
|
|
+ ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
|
|
+ if (!ret)
|
|
+ ret = -BCH_ERR_data_update_done_no_writes_needed;
|
|
+ goto out_bkey_buf_exit;
|
|
+ }
|
|
+
|
|
+ if ((m->op.flags & BCH_WRITE_alloc_nowait) &&
|
|
+ !can_allocate_without_blocking(c, m)) {
|
|
+ ret = -BCH_ERR_data_update_done_would_block;
|
|
+ goto out_bkey_buf_exit;
|
|
}
|
|
|
|
if (reserve_sectors) {
|
|
@@ -749,18 +838,41 @@ int bch2_data_update_init(struct btree_trans *trans,
|
|
? 0
|
|
: BCH_DISK_RESERVATION_NOFAIL);
|
|
if (ret)
|
|
- goto out;
|
|
+ goto out_bkey_buf_exit;
|
|
+ }
|
|
+
|
|
+ if (!bkey_get_dev_refs(c, k)) {
|
|
+ ret = -BCH_ERR_data_update_done_no_dev_refs;
|
|
+ goto out_put_disk_res;
|
|
+ }
|
|
+
|
|
+ if (c->opts.nocow_enabled &&
|
|
+ !bkey_nocow_lock(c, ctxt, k)) {
|
|
+ ret = -BCH_ERR_nocow_lock_blocked;
|
|
+ goto out_put_dev_refs;
|
|
}
|
|
|
|
if (bkey_extent_is_unwritten(k)) {
|
|
- bch2_update_unwritten_extent(trans, m);
|
|
- goto out;
|
|
+ ret = bch2_update_unwritten_extent(trans, m) ?:
|
|
+ -BCH_ERR_data_update_done_unwritten;
|
|
+ goto out_nocow_unlock;
|
|
}
|
|
|
|
+ ret = bch2_data_update_bios_init(m, c, io_opts);
|
|
+ if (ret)
|
|
+ goto out_nocow_unlock;
|
|
+
|
|
return 0;
|
|
-out:
|
|
- bch2_data_update_exit(m);
|
|
- return ret ?: -BCH_ERR_data_update_done;
|
|
+out_nocow_unlock:
|
|
+ if (c->opts.nocow_enabled)
|
|
+ bkey_nocow_unlock(c, k);
|
|
+out_put_dev_refs:
|
|
+ bkey_put_dev_refs(c, k);
|
|
+out_put_disk_res:
|
|
+ bch2_disk_reservation_put(c, &m->op.res);
|
|
+out_bkey_buf_exit:
|
|
+ bch2_bkey_buf_exit(&m->k, c);
|
|
+ return ret;
|
|
}
|
|
|
|
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
|
|
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
|
|
index e4b50723428e..c194cbbf5b51 100644
|
|
--- a/fs/bcachefs/data_update.h
|
|
+++ b/fs/bcachefs/data_update.h
|
|
@@ -4,6 +4,7 @@
|
|
#define _BCACHEFS_DATA_UPDATE_H
|
|
|
|
#include "bkey_buf.h"
|
|
+#include "io_read.h"
|
|
#include "io_write_types.h"
|
|
|
|
struct moving_context;
|
|
@@ -15,6 +16,9 @@ struct data_update_opts {
|
|
u8 extra_replicas;
|
|
unsigned btree_insert_flags;
|
|
unsigned write_flags;
|
|
+
|
|
+ int read_dev;
|
|
+ bool scrub;
|
|
};
|
|
|
|
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
|
|
@@ -22,20 +26,24 @@ void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
|
|
|
|
struct data_update {
|
|
/* extent being updated: */
|
|
+ bool read_done;
|
|
enum btree_id btree_id;
|
|
struct bkey_buf k;
|
|
struct data_update_opts data_opts;
|
|
struct moving_context *ctxt;
|
|
struct bch_move_stats *stats;
|
|
+
|
|
+ struct bch_read_bio rbio;
|
|
struct bch_write_op op;
|
|
+ struct bio_vec *bvecs;
|
|
};
|
|
|
|
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
|
|
+void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *);
|
|
|
|
int bch2_data_update_index_update(struct bch_write_op *);
|
|
|
|
-void bch2_data_update_read_done(struct data_update *,
|
|
- struct bch_extent_crc_unpacked);
|
|
+void bch2_data_update_read_done(struct data_update *);
|
|
|
|
int bch2_extent_drop_ptrs(struct btree_trans *,
|
|
struct btree_iter *,
|
|
@@ -43,12 +51,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *,
|
|
struct bch_io_opts *,
|
|
struct data_update_opts *);
|
|
|
|
+int bch2_data_update_bios_init(struct data_update *, struct bch_fs *,
|
|
+ struct bch_io_opts *);
|
|
+
|
|
void bch2_data_update_exit(struct data_update *);
|
|
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
|
|
struct moving_context *,
|
|
struct data_update *,
|
|
struct write_point_specifier,
|
|
- struct bch_io_opts, struct data_update_opts,
|
|
+ struct bch_io_opts *, struct data_update_opts,
|
|
enum btree_id, struct bkey_s_c);
|
|
void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
|
|
|
|
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
|
|
index 55333e82d1fe..788af88f6979 100644
|
|
--- a/fs/bcachefs/debug.c
|
|
+++ b/fs/bcachefs/debug.c
|
|
@@ -7,6 +7,7 @@
|
|
*/
|
|
|
|
#include "bcachefs.h"
|
|
+#include "alloc_foreground.h"
|
|
#include "bkey_methods.h"
|
|
#include "btree_cache.h"
|
|
#include "btree_io.h"
|
|
@@ -190,7 +191,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
|
|
unsigned offset = 0;
|
|
int ret;
|
|
|
|
- if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
|
|
+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick, -1) <= 0) {
|
|
prt_printf(out, "error getting device to read from: invalid device\n");
|
|
return;
|
|
}
|
|
@@ -844,8 +845,11 @@ static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
|
|
seqmutex_unlock(&c->btree_trans_lock);
|
|
}
|
|
|
|
-static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
|
- size_t size, loff_t *ppos)
|
|
+typedef void (*fs_to_text_fn)(struct printbuf *, struct bch_fs *);
|
|
+
|
|
+static ssize_t bch2_simple_print(struct file *file, char __user *buf,
|
|
+ size_t size, loff_t *ppos,
|
|
+ fs_to_text_fn fn)
|
|
{
|
|
struct dump_iter *i = file->private_data;
|
|
struct bch_fs *c = i->c;
|
|
@@ -856,7 +860,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
|
i->ret = 0;
|
|
|
|
if (!i->iter) {
|
|
- btree_deadlock_to_text(&i->buf, c);
|
|
+ fn(&i->buf, c);
|
|
i->iter++;
|
|
}
|
|
|
|
@@ -869,6 +873,12 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
|
return ret ?: i->ret;
|
|
}
|
|
|
|
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
|
+ size_t size, loff_t *ppos)
|
|
+{
|
|
+ return bch2_simple_print(file, buf, size, ppos, btree_deadlock_to_text);
|
|
+}
|
|
+
|
|
static const struct file_operations btree_deadlock_ops = {
|
|
.owner = THIS_MODULE,
|
|
.open = bch2_dump_open,
|
|
@@ -876,6 +886,19 @@ static const struct file_operations btree_deadlock_ops = {
|
|
.read = bch2_btree_deadlock_read,
|
|
};
|
|
|
|
+static ssize_t bch2_write_points_read(struct file *file, char __user *buf,
|
|
+ size_t size, loff_t *ppos)
|
|
+{
|
|
+ return bch2_simple_print(file, buf, size, ppos, bch2_write_points_to_text);
|
|
+}
|
|
+
|
|
+static const struct file_operations write_points_ops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = bch2_dump_open,
|
|
+ .release = bch2_dump_release,
|
|
+ .read = bch2_write_points_read,
|
|
+};
|
|
+
|
|
void bch2_fs_debug_exit(struct bch_fs *c)
|
|
{
|
|
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
|
|
@@ -927,6 +950,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
|
|
debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
|
|
c->btree_debug, &btree_deadlock_ops);
|
|
|
|
+ debugfs_create_file("write_points", 0400, c->fs_debug_dir,
|
|
+ c->btree_debug, &write_points_ops);
|
|
+
|
|
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
|
|
if (IS_ERR_OR_NULL(c->btree_debug_dir))
|
|
return;
|
|
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
|
|
index b211e90ac54e..1aa56d28de33 100644
|
|
--- a/fs/bcachefs/ec.c
|
|
+++ b/fs/bcachefs/ec.c
|
|
@@ -1056,6 +1056,11 @@ static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
|
|
ec_stripes_heap_set_backpointer(_h, j);
|
|
}
|
|
|
|
+static const struct min_heap_callbacks callbacks = {
|
|
+ .less = ec_stripes_heap_cmp,
|
|
+ .swp = ec_stripes_heap_swap,
|
|
+};
|
|
+
|
|
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
|
{
|
|
ec_stripes_heap *h = &c->ec_stripes_heap;
|
|
@@ -1068,11 +1073,6 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
|
void bch2_stripes_heap_del(struct bch_fs *c,
|
|
struct stripe *m, size_t idx)
|
|
{
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = ec_stripes_heap_cmp,
|
|
- .swp = ec_stripes_heap_swap,
|
|
- };
|
|
-
|
|
mutex_lock(&c->ec_stripes_heap_lock);
|
|
heap_verify_backpointer(c, idx);
|
|
|
|
@@ -1083,11 +1083,6 @@ void bch2_stripes_heap_del(struct bch_fs *c,
|
|
void bch2_stripes_heap_insert(struct bch_fs *c,
|
|
struct stripe *m, size_t idx)
|
|
{
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = ec_stripes_heap_cmp,
|
|
- .swp = ec_stripes_heap_swap,
|
|
- };
|
|
-
|
|
mutex_lock(&c->ec_stripes_heap_lock);
|
|
BUG_ON(min_heap_full(&c->ec_stripes_heap));
|
|
|
|
@@ -1106,10 +1101,6 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
|
|
void bch2_stripes_heap_update(struct bch_fs *c,
|
|
struct stripe *m, size_t idx)
|
|
{
|
|
- const struct min_heap_callbacks callbacks = {
|
|
- .less = ec_stripes_heap_cmp,
|
|
- .swp = ec_stripes_heap_swap,
|
|
- };
|
|
ec_stripes_heap *h = &c->ec_stripes_heap;
|
|
bool do_deletes;
|
|
size_t i;
|
|
@@ -1389,8 +1380,12 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
|
if (bp_k.k->type != KEY_TYPE_backpointer)
|
|
continue;
|
|
|
|
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
|
+ if (bp.v->btree_id == BTREE_ID_stripes)
|
|
+ continue;
|
|
+
|
|
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s,
|
|
- bkey_s_c_to_backpointer(bp_k), &last_flushed);
|
|
+ bp, &last_flushed);
|
|
}));
|
|
|
|
bch2_bkey_buf_exit(&last_flushed, c);
|
|
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
|
|
index 4590cd0c7c90..89df97810076 100644
|
|
--- a/fs/bcachefs/errcode.h
|
|
+++ b/fs/bcachefs/errcode.h
|
|
@@ -180,6 +180,11 @@
|
|
x(EINVAL, not_in_recovery) \
|
|
x(EINVAL, cannot_rewind_recovery) \
|
|
x(0, data_update_done) \
|
|
+ x(BCH_ERR_data_update_done, data_update_done_would_block) \
|
|
+ x(BCH_ERR_data_update_done, data_update_done_unwritten) \
|
|
+ x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \
|
|
+ x(BCH_ERR_data_update_done, data_update_done_no_snapshot) \
|
|
+ x(BCH_ERR_data_update_done, data_update_done_no_dev_refs) \
|
|
x(EINVAL, device_state_not_allowed) \
|
|
x(EINVAL, member_info_missing) \
|
|
x(EINVAL, mismatched_block_size) \
|
|
@@ -269,6 +274,7 @@
|
|
x(EIO, invalidate_stripe_to_dev) \
|
|
x(EIO, no_encryption_key) \
|
|
x(EIO, insufficient_journal_devices) \
|
|
+ x(EIO, device_offline) \
|
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
|
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
|
|
index 038da6a61f6b..c8fc58fab958 100644
|
|
--- a/fs/bcachefs/error.c
|
|
+++ b/fs/bcachefs/error.c
|
|
@@ -530,35 +530,53 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
|
mutex_unlock(&c->fsck_error_msgs_lock);
|
|
}
|
|
|
|
-int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum)
|
|
+int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
|
+ subvol_inum inum, u64 offset)
|
|
{
|
|
u32 restart_count = trans->restart_count;
|
|
int ret = 0;
|
|
|
|
- /* XXX: we don't yet attempt to print paths when we don't know the subvol */
|
|
- if (inum.subvol)
|
|
- ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out));
|
|
+ if (inum.subvol) {
|
|
+ ret = bch2_inum_to_path(trans, inum, out);
|
|
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
+ return ret;
|
|
+ }
|
|
if (!inum.subvol || ret)
|
|
prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum);
|
|
+ prt_printf(out, " offset %llu: ", offset);
|
|
|
|
return trans_was_restarted(trans, restart_count);
|
|
}
|
|
|
|
-int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
|
- subvol_inum inum, u64 offset)
|
|
+void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
|
+ subvol_inum inum, u64 offset)
|
|
{
|
|
- int ret = bch2_inum_err_msg_trans(trans, out, inum);
|
|
- prt_printf(out, " offset %llu: ", offset);
|
|
- return ret;
|
|
+ bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
|
}
|
|
|
|
-void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum)
|
|
+int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
|
+ struct bpos pos)
|
|
{
|
|
- bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum));
|
|
-}
|
|
+ struct bch_fs *c = trans->c;
|
|
+ int ret = 0;
|
|
|
|
-void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
|
- subvol_inum inum, u64 offset)
|
|
-{
|
|
- bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
|
+ if (!bch2_snapshot_is_leaf(c, pos.snapshot))
|
|
+ prt_str(out, "(multiple snapshots) ");
|
|
+
|
|
+ subvol_inum inum = {
|
|
+ .subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot),
|
|
+ .inum = pos.inode,
|
|
+ };
|
|
+
|
|
+ if (inum.subvol) {
|
|
+ ret = bch2_inum_to_path(trans, inum, out);
|
|
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ if (!inum.subvol || ret)
|
|
+ prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot);
|
|
+
|
|
+ prt_printf(out, " offset %llu: ", pos.offset << 8);
|
|
+ return 0;
|
|
}
|
|
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
|
|
index 7acf2a27ca28..76da0e88cee8 100644
|
|
--- a/fs/bcachefs/error.h
|
|
+++ b/fs/bcachefs/error.h
|
|
@@ -238,10 +238,10 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
|
|
_ret; \
|
|
})
|
|
|
|
-int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum);
|
|
int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64);
|
|
|
|
-void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum);
|
|
void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
|
|
|
|
+int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
|
|
+
|
|
#endif /* _BCACHEFS_ERROR_H */
|
|
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
|
|
index 05d5f71a7ca9..78a51d96bd2d 100644
|
|
--- a/fs/bcachefs/extents.c
|
|
+++ b/fs/bcachefs/extents.c
|
|
@@ -114,8 +114,9 @@ static inline bool ptr_better(struct bch_fs *c,
|
|
* other devices, it will still pick a pointer from avoid.
|
|
*/
|
|
int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|
- struct bch_io_failures *failed,
|
|
- struct extent_ptr_decoded *pick)
|
|
+ struct bch_io_failures *failed,
|
|
+ struct extent_ptr_decoded *pick,
|
|
+ int dev)
|
|
{
|
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
const union bch_extent_entry *entry;
|
|
@@ -137,6 +138,10 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|
break;
|
|
}
|
|
|
|
+ /* Are we being asked to read from a specific device? */
|
|
+ if (dev >= 0 && p.ptr.dev != dev)
|
|
+ continue;
|
|
+
|
|
/*
|
|
* If there are any dirty pointers it's an error if we can't
|
|
* read:
|
|
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
|
|
index 620b284aa34f..8fae6b23a341 100644
|
|
--- a/fs/bcachefs/extents.h
|
|
+++ b/fs/bcachefs/extents.h
|
|
@@ -404,7 +404,7 @@ void bch2_mark_io_failure(struct bch_io_failures *,
|
|
struct extent_ptr_decoded *);
|
|
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
|
|
struct bch_io_failures *,
|
|
- struct extent_ptr_decoded *);
|
|
+ struct extent_ptr_decoded *, int);
|
|
|
|
/* KEY_TYPE_btree_ptr: */
|
|
|
|
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
|
|
index 2eaffe37b5e7..0e742555cb0a 100644
|
|
--- a/fs/bcachefs/eytzinger.c
|
|
+++ b/fs/bcachefs/eytzinger.c
|
|
@@ -148,89 +148,99 @@ static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *pr
|
|
return cmp(a, b, priv);
|
|
}
|
|
|
|
-static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
|
+static inline int eytzinger1_do_cmp(void *base1, size_t n, size_t size,
|
|
cmp_r_func_t cmp_func, const void *priv,
|
|
size_t l, size_t r)
|
|
{
|
|
- return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
|
- base + inorder_to_eytzinger0(r, n) * size,
|
|
+ return do_cmp(base1 + inorder_to_eytzinger1(l, n) * size,
|
|
+ base1 + inorder_to_eytzinger1(r, n) * size,
|
|
cmp_func, priv);
|
|
}
|
|
|
|
-static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
|
+static inline void eytzinger1_do_swap(void *base1, size_t n, size_t size,
|
|
swap_r_func_t swap_func, const void *priv,
|
|
size_t l, size_t r)
|
|
{
|
|
- do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
|
- base + inorder_to_eytzinger0(r, n) * size,
|
|
+ do_swap(base1 + inorder_to_eytzinger1(l, n) * size,
|
|
+ base1 + inorder_to_eytzinger1(r, n) * size,
|
|
size, swap_func, priv);
|
|
}
|
|
|
|
-void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
|
- cmp_r_func_t cmp_func,
|
|
- swap_r_func_t swap_func,
|
|
- const void *priv)
|
|
+static void eytzinger1_sort_r(void *base1, size_t n, size_t size,
|
|
+ cmp_r_func_t cmp_func,
|
|
+ swap_r_func_t swap_func,
|
|
+ const void *priv)
|
|
{
|
|
- int i, j, k;
|
|
+ unsigned i, j, k;
|
|
|
|
/* called from 'sort' without swap function, let's pick the default */
|
|
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
|
|
swap_func = NULL;
|
|
|
|
if (!swap_func) {
|
|
- if (is_aligned(base, size, 8))
|
|
+ if (is_aligned(base1, size, 8))
|
|
swap_func = SWAP_WORDS_64;
|
|
- else if (is_aligned(base, size, 4))
|
|
+ else if (is_aligned(base1, size, 4))
|
|
swap_func = SWAP_WORDS_32;
|
|
else
|
|
swap_func = SWAP_BYTES;
|
|
}
|
|
|
|
/* heapify */
|
|
- for (i = n / 2 - 1; i >= 0; --i) {
|
|
+ for (i = n / 2; i >= 1; --i) {
|
|
/* Find the sift-down path all the way to the leaves. */
|
|
- for (j = i; k = j * 2 + 1, k + 1 < n;)
|
|
- j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
|
+ for (j = i; k = j * 2, k < n;)
|
|
+ j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
|
|
|
/* Special case for the last leaf with no sibling. */
|
|
- if (j * 2 + 2 == n)
|
|
- j = j * 2 + 1;
|
|
+ if (j * 2 == n)
|
|
+ j *= 2;
|
|
|
|
/* Backtrack to the correct location. */
|
|
- while (j != i && eytzinger0_do_cmp(base, n, size, cmp_func, priv, i, j) >= 0)
|
|
- j = (j - 1) / 2;
|
|
+ while (j != i && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, i, j) >= 0)
|
|
+ j /= 2;
|
|
|
|
/* Shift the element into its correct place. */
|
|
for (k = j; j != i;) {
|
|
- j = (j - 1) / 2;
|
|
- eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
|
+ j /= 2;
|
|
+ eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
|
}
|
|
}
|
|
|
|
/* sort */
|
|
- for (i = n - 1; i > 0; --i) {
|
|
- eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
|
+ for (i = n; i > 1; --i) {
|
|
+ eytzinger1_do_swap(base1, n, size, swap_func, priv, 1, i);
|
|
|
|
/* Find the sift-down path all the way to the leaves. */
|
|
- for (j = 0; k = j * 2 + 1, k + 1 < i;)
|
|
- j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
|
+ for (j = 1; k = j * 2, k + 1 < i;)
|
|
+ j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
|
|
|
/* Special case for the last leaf with no sibling. */
|
|
- if (j * 2 + 2 == i)
|
|
- j = j * 2 + 1;
|
|
+ if (j * 2 + 1 == i)
|
|
+ j *= 2;
|
|
|
|
/* Backtrack to the correct location. */
|
|
- while (j && eytzinger0_do_cmp(base, n, size, cmp_func, priv, 0, j) >= 0)
|
|
- j = (j - 1) / 2;
|
|
+ while (j >= 1 && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, 1, j) >= 0)
|
|
+ j /= 2;
|
|
|
|
/* Shift the element into its correct place. */
|
|
- for (k = j; j;) {
|
|
- j = (j - 1) / 2;
|
|
- eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
|
+ for (k = j; j > 1;) {
|
|
+ j /= 2;
|
|
+ eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
|
}
|
|
}
|
|
}
|
|
|
|
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
|
+ cmp_r_func_t cmp_func,
|
|
+ swap_r_func_t swap_func,
|
|
+ const void *priv)
|
|
+{
|
|
+ void *base1 = base - size;
|
|
+
|
|
+ return eytzinger1_sort_r(base1, n, size, cmp_func, swap_func, priv);
|
|
+}
|
|
+
|
|
void eytzinger0_sort(void *base, size_t n, size_t size,
|
|
cmp_func_t cmp_func,
|
|
swap_func_t swap_func)
|
|
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
|
|
index 0541192d7bc0..643c1f716061 100644
|
|
--- a/fs/bcachefs/eytzinger.h
|
|
+++ b/fs/bcachefs/eytzinger.h
|
|
@@ -6,6 +6,7 @@
|
|
#include <linux/log2.h>
|
|
|
|
#ifdef EYTZINGER_DEBUG
|
|
+#include <linux/bug.h>
|
|
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
|
#else
|
|
#define EYTZINGER_BUG_ON(cond)
|
|
@@ -56,24 +57,14 @@ static inline unsigned eytzinger1_last(unsigned size)
|
|
return rounddown_pow_of_two(size + 1) - 1;
|
|
}
|
|
|
|
-/*
|
|
- * eytzinger1_next() and eytzinger1_prev() have the nice properties that
|
|
- *
|
|
- * eytzinger1_next(0) == eytzinger1_first())
|
|
- * eytzinger1_prev(0) == eytzinger1_last())
|
|
- *
|
|
- * eytzinger1_prev(eytzinger1_first()) == 0
|
|
- * eytzinger1_next(eytzinger1_last()) == 0
|
|
- */
|
|
-
|
|
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
|
{
|
|
- EYTZINGER_BUG_ON(i > size);
|
|
+ EYTZINGER_BUG_ON(i == 0 || i > size);
|
|
|
|
if (eytzinger1_right_child(i) <= size) {
|
|
i = eytzinger1_right_child(i);
|
|
|
|
- i <<= __fls(size + 1) - __fls(i);
|
|
+ i <<= __fls(size) - __fls(i);
|
|
i >>= i > size;
|
|
} else {
|
|
i >>= ffz(i) + 1;
|
|
@@ -84,12 +75,12 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
|
|
|
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
|
{
|
|
- EYTZINGER_BUG_ON(i > size);
|
|
+ EYTZINGER_BUG_ON(i == 0 || i > size);
|
|
|
|
if (eytzinger1_left_child(i) <= size) {
|
|
i = eytzinger1_left_child(i) + 1;
|
|
|
|
- i <<= __fls(size + 1) - __fls(i);
|
|
+ i <<= __fls(size) - __fls(i);
|
|
i -= 1;
|
|
i >>= i > size;
|
|
} else {
|
|
@@ -243,73 +234,63 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
|
(_i) != -1; \
|
|
(_i) = eytzinger0_next((_i), (_size)))
|
|
|
|
+#define eytzinger0_for_each_prev(_i, _size) \
|
|
+ for (unsigned (_i) = eytzinger0_last((_size)); \
|
|
+ (_i) != -1; \
|
|
+ (_i) = eytzinger0_prev((_i), (_size)))
|
|
+
|
|
/* return greatest node <= @search, or -1 if not found */
|
|
static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
|
|
cmp_func_t cmp, const void *search)
|
|
{
|
|
- unsigned i, n = 0;
|
|
-
|
|
- if (!nr)
|
|
- return -1;
|
|
-
|
|
- do {
|
|
- i = n;
|
|
- n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
|
|
- } while (n < nr);
|
|
-
|
|
- if (n & 1) {
|
|
- /*
|
|
- * @i was greater than @search, return previous node:
|
|
- *
|
|
- * if @i was leftmost/smallest element,
|
|
- * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
|
|
- */
|
|
- return eytzinger0_prev(i, nr);
|
|
- } else {
|
|
- return i;
|
|
- }
|
|
+ void *base1 = base - size;
|
|
+ unsigned n = 1;
|
|
+
|
|
+ while (n <= nr)
|
|
+ n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
|
+ n >>= __ffs(n) + 1;
|
|
+ return n - 1;
|
|
}
|
|
|
|
+/* return smallest node > @search, or -1 if not found */
|
|
static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
|
|
cmp_func_t cmp, const void *search)
|
|
{
|
|
- ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
|
+ void *base1 = base - size;
|
|
+ unsigned n = 1;
|
|
|
|
- /*
|
|
- * if eytitzinger0_find_le() returned -1 - no element was <= search - we
|
|
- * want to return the first element; next/prev identities mean this work
|
|
- * as expected
|
|
- *
|
|
- * similarly if find_le() returns last element, we should return -1;
|
|
- * identities mean this all works out:
|
|
- */
|
|
- return eytzinger0_next(idx, nr);
|
|
+ while (n <= nr)
|
|
+ n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
|
+ n >>= __ffs(n + 1) + 1;
|
|
+ return n - 1;
|
|
}
|
|
|
|
+/* return smallest node >= @search, or -1 if not found */
|
|
static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
|
|
cmp_func_t cmp, const void *search)
|
|
{
|
|
- ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
|
-
|
|
- if (idx < nr && !cmp(base + idx * size, search))
|
|
- return idx;
|
|
+ void *base1 = base - size;
|
|
+ unsigned n = 1;
|
|
|
|
- return eytzinger0_next(idx, nr);
|
|
+ while (n <= nr)
|
|
+ n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0);
|
|
+ n >>= __ffs(n + 1) + 1;
|
|
+ return n - 1;
|
|
}
|
|
|
|
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
|
({ \
|
|
- void *_base = (base); \
|
|
+ size_t _size = (size); \
|
|
+ void *_base1 = (void *)(base) - _size; \
|
|
const void *_search = (search); \
|
|
size_t _nr = (nr); \
|
|
- size_t _size = (size); \
|
|
- size_t _i = 0; \
|
|
+ size_t _i = 1; \
|
|
int _res; \
|
|
\
|
|
- while (_i < _nr && \
|
|
- (_res = _cmp(_search, _base + _i * _size))) \
|
|
- _i = eytzinger0_child(_i, _res > 0); \
|
|
- _i; \
|
|
+ while (_i <= _nr && \
|
|
+ (_res = _cmp(_search, _base1 + _i * _size))) \
|
|
+ _i = eytzinger1_child(_i, _res > 0); \
|
|
+ _i - 1; \
|
|
})
|
|
|
|
void eytzinger0_sort_r(void *, size_t, size_t,
|
|
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
|
|
index ab1d5db2fa56..a1ccb9139b04 100644
|
|
--- a/fs/bcachefs/fs-io-buffered.c
|
|
+++ b/fs/bcachefs/fs-io-buffered.c
|
|
@@ -149,12 +149,10 @@ static void bchfs_read(struct btree_trans *trans,
|
|
struct bch_fs *c = trans->c;
|
|
struct btree_iter iter;
|
|
struct bkey_buf sk;
|
|
- int flags = BCH_READ_RETRY_IF_STALE|
|
|
- BCH_READ_MAY_PROMOTE;
|
|
+ int flags = BCH_READ_retry_if_stale|
|
|
+ BCH_READ_may_promote;
|
|
int ret = 0;
|
|
|
|
- rbio->c = c;
|
|
- rbio->start_time = local_clock();
|
|
rbio->subvol = inum.subvol;
|
|
|
|
bch2_bkey_buf_init(&sk);
|
|
@@ -211,14 +209,14 @@ static void bchfs_read(struct btree_trans *trans,
|
|
swap(rbio->bio.bi_iter.bi_size, bytes);
|
|
|
|
if (rbio->bio.bi_iter.bi_size == bytes)
|
|
- flags |= BCH_READ_LAST_FRAGMENT;
|
|
+ flags |= BCH_READ_last_fragment;
|
|
|
|
bch2_bio_page_state_set(&rbio->bio, k);
|
|
|
|
bch2_read_extent(trans, rbio, iter.pos,
|
|
data_btree, k, offset_into_extent, flags);
|
|
|
|
- if (flags & BCH_READ_LAST_FRAGMENT)
|
|
+ if (flags & BCH_READ_last_fragment)
|
|
break;
|
|
|
|
swap(rbio->bio.bi_iter.bi_size, bytes);
|
|
@@ -232,7 +230,8 @@ static void bchfs_read(struct btree_trans *trans,
|
|
|
|
if (ret) {
|
|
struct printbuf buf = PRINTBUF;
|
|
- bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9);
|
|
+ lockrestart_do(trans,
|
|
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9));
|
|
prt_printf(&buf, "read error %i from btree lookup", ret);
|
|
bch_err_ratelimited(c, "%s", buf.buf);
|
|
printbuf_exit(&buf);
|
|
@@ -280,12 +279,13 @@ void bch2_readahead(struct readahead_control *ractl)
|
|
struct bch_read_bio *rbio =
|
|
rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ,
|
|
GFP_KERNEL, &c->bio_read),
|
|
- opts);
|
|
+ c,
|
|
+ opts,
|
|
+ bch2_readpages_end_io);
|
|
|
|
readpage_iter_advance(&readpages_iter);
|
|
|
|
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
|
|
- rbio->bio.bi_end_io = bch2_readpages_end_io;
|
|
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
|
|
|
|
bchfs_read(trans, rbio, inode_inum(inode),
|
|
@@ -323,10 +323,10 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
|
|
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
|
|
|
|
rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
|
|
- opts);
|
|
+ c,
|
|
+ opts,
|
|
+ bch2_read_single_folio_end_io);
|
|
rbio->bio.bi_private = &done;
|
|
- rbio->bio.bi_end_io = bch2_read_single_folio_end_io;
|
|
-
|
|
rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
|
|
rbio->bio.bi_iter.bi_sector = folio_sector(folio);
|
|
BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
|
|
@@ -420,7 +420,7 @@ static void bch2_writepage_io_done(struct bch_write_op *op)
|
|
}
|
|
}
|
|
|
|
- if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
|
|
+ if (io->op.flags & BCH_WRITE_wrote_data_inline) {
|
|
bio_for_each_folio_all(fi, bio) {
|
|
struct bch_folio *s;
|
|
|
|
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
|
|
index 2089c36b5866..535bc5fcbcc0 100644
|
|
--- a/fs/bcachefs/fs-io-direct.c
|
|
+++ b/fs/bcachefs/fs-io-direct.c
|
|
@@ -73,6 +73,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
|
struct blk_plug plug;
|
|
loff_t offset = req->ki_pos;
|
|
bool sync = is_sync_kiocb(req);
|
|
+ bool split = false;
|
|
size_t shorten;
|
|
ssize_t ret;
|
|
|
|
@@ -99,8 +100,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
|
GFP_KERNEL,
|
|
&c->dio_read_bioset);
|
|
|
|
- bio->bi_end_io = bch2_direct_IO_read_endio;
|
|
-
|
|
dio = container_of(bio, struct dio_read, rbio.bio);
|
|
closure_init(&dio->cl, NULL);
|
|
|
|
@@ -133,12 +132,13 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
|
|
|
goto start;
|
|
while (iter->count) {
|
|
+ split = true;
|
|
+
|
|
bio = bio_alloc_bioset(NULL,
|
|
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
|
|
REQ_OP_READ,
|
|
GFP_KERNEL,
|
|
&c->bio_read);
|
|
- bio->bi_end_io = bch2_direct_IO_read_split_endio;
|
|
start:
|
|
bio->bi_opf = REQ_OP_READ|REQ_SYNC;
|
|
bio->bi_iter.bi_sector = offset >> 9;
|
|
@@ -160,7 +160,15 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
|
if (iter->count)
|
|
closure_get(&dio->cl);
|
|
|
|
- bch2_read(c, rbio_init(bio, opts), inode_inum(inode));
|
|
+ struct bch_read_bio *rbio =
|
|
+ rbio_init(bio,
|
|
+ c,
|
|
+ opts,
|
|
+ split
|
|
+ ? bch2_direct_IO_read_split_endio
|
|
+ : bch2_direct_IO_read_endio);
|
|
+
|
|
+ bch2_read(c, rbio, inode_inum(inode));
|
|
}
|
|
|
|
blk_finish_plug(&plug);
|
|
@@ -511,8 +519,8 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
|
|
dio->op.devs_need_flush = &inode->ei_devs_need_flush;
|
|
|
|
if (sync)
|
|
- dio->op.flags |= BCH_WRITE_SYNC;
|
|
- dio->op.flags |= BCH_WRITE_CHECK_ENOSPC;
|
|
+ dio->op.flags |= BCH_WRITE_sync;
|
|
+ dio->op.flags |= BCH_WRITE_check_enospc;
|
|
|
|
ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
|
|
bio_sectors(bio), true);
|
|
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
|
|
index 8fcf7c8e5ede..53a421ff136d 100644
|
|
--- a/fs/bcachefs/fsck.c
|
|
+++ b/fs/bcachefs/fsck.c
|
|
@@ -450,7 +450,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
|
|
return ret;
|
|
|
|
struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound);
|
|
- struct qstr name = (struct qstr) QSTR(name_buf);
|
|
+ struct qstr name = QSTR(name_buf);
|
|
|
|
inode->bi_dir = lostfound.bi_inum;
|
|
|
|
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
|
|
index 5353979117b0..6b842c8d21be 100644
|
|
--- a/fs/bcachefs/io_misc.c
|
|
+++ b/fs/bcachefs/io_misc.c
|
|
@@ -115,7 +115,8 @@ int bch2_extent_fallocate(struct btree_trans *trans,
|
|
bch2_increment_clock(c, sectors_allocated, WRITE);
|
|
if (should_print_err(ret)) {
|
|
struct printbuf buf = PRINTBUF;
|
|
- bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9);
|
|
+ lockrestart_do(trans,
|
|
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9));
|
|
prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret));
|
|
bch_err_ratelimited(c, "%s", buf.buf);
|
|
printbuf_exit(&buf);
|
|
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
|
|
index 8c7b2d3d779d..821ff222b361 100644
|
|
--- a/fs/bcachefs/io_read.c
|
|
+++ b/fs/bcachefs/io_read.c
|
|
@@ -80,6 +80,7 @@ struct promote_op {
|
|
struct rhash_head hash;
|
|
struct bpos pos;
|
|
|
|
+ struct work_struct work;
|
|
struct data_update write;
|
|
struct bio_vec bi_inline_vecs[]; /* must be last */
|
|
};
|
|
@@ -96,6 +97,26 @@ static inline bool have_io_error(struct bch_io_failures *failed)
|
|
return failed && failed->nr;
|
|
}
|
|
|
|
+static bool ptr_being_rewritten(struct bch_read_bio *orig,
|
|
+ unsigned dev,
|
|
+ unsigned flags)
|
|
+{
|
|
+ if (!(flags & BCH_READ_data_update))
|
|
+ return false;
|
|
+
|
|
+ struct data_update *u = container_of(orig, struct data_update, rbio);
|
|
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(u->k.k));
|
|
+ unsigned i = 0;
|
|
+ bkey_for_each_ptr(ptrs, ptr) {
|
|
+ if (ptr->dev == dev &&
|
|
+ u->data_opts.rewrite_ptrs & BIT(i))
|
|
+ return true;
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
|
|
struct bpos pos,
|
|
struct bch_io_opts opts,
|
|
@@ -105,7 +126,7 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
|
|
if (!have_io_error(failed)) {
|
|
BUG_ON(!opts.promote_target);
|
|
|
|
- if (!(flags & BCH_READ_MAY_PROMOTE))
|
|
+ if (!(flags & BCH_READ_may_promote))
|
|
return -BCH_ERR_nopromote_may_not;
|
|
|
|
if (bch2_bkey_has_target(c, k, opts.promote_target))
|
|
@@ -125,98 +146,94 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
|
|
return 0;
|
|
}
|
|
|
|
-static void promote_free(struct bch_fs *c, struct promote_op *op)
|
|
+static noinline void promote_free(struct bch_read_bio *rbio)
|
|
{
|
|
- int ret;
|
|
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
|
|
+ struct bch_fs *c = rbio->c;
|
|
+
|
|
+ int ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
|
|
+ bch_promote_params);
|
|
+ BUG_ON(ret);
|
|
|
|
bch2_data_update_exit(&op->write);
|
|
|
|
- ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
|
|
- bch_promote_params);
|
|
- BUG_ON(ret);
|
|
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
|
|
kfree_rcu(op, rcu);
|
|
}
|
|
|
|
static void promote_done(struct bch_write_op *wop)
|
|
{
|
|
- struct promote_op *op =
|
|
- container_of(wop, struct promote_op, write.op);
|
|
- struct bch_fs *c = op->write.op.c;
|
|
+ struct promote_op *op = container_of(wop, struct promote_op, write.op);
|
|
+ struct bch_fs *c = op->write.rbio.c;
|
|
|
|
- bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
|
|
- op->start_time);
|
|
- promote_free(c, op);
|
|
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time);
|
|
+ promote_free(&op->write.rbio);
|
|
}
|
|
|
|
-static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
|
+static void promote_start_work(struct work_struct *work)
|
|
{
|
|
- struct bio *bio = &op->write.op.wbio.bio;
|
|
+ struct promote_op *op = container_of(work, struct promote_op, work);
|
|
|
|
- trace_and_count(op->write.op.c, read_promote, &rbio->bio);
|
|
+ bch2_data_update_read_done(&op->write);
|
|
+}
|
|
|
|
- /* we now own pages: */
|
|
- BUG_ON(!rbio->bounce);
|
|
- BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
|
|
+static noinline void promote_start(struct bch_read_bio *rbio)
|
|
+{
|
|
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
|
|
|
|
- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
|
|
- sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
|
|
- swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
|
|
+ trace_and_count(op->write.op.c, io_read_promote, &rbio->bio);
|
|
|
|
- bch2_data_update_read_done(&op->write, rbio->pick.crc);
|
|
+ INIT_WORK(&op->work, promote_start_work);
|
|
+ queue_work(rbio->c->write_ref_wq, &op->work);
|
|
}
|
|
|
|
-static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
|
- enum btree_id btree_id,
|
|
- struct bkey_s_c k,
|
|
- struct bpos pos,
|
|
- struct extent_ptr_decoded *pick,
|
|
- struct bch_io_opts opts,
|
|
- unsigned sectors,
|
|
- struct bch_read_bio **rbio,
|
|
- struct bch_io_failures *failed)
|
|
+static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
|
|
+ enum btree_id btree_id,
|
|
+ struct bkey_s_c k,
|
|
+ struct bpos pos,
|
|
+ struct extent_ptr_decoded *pick,
|
|
+ unsigned sectors,
|
|
+ unsigned flags,
|
|
+ struct bch_read_bio *orig,
|
|
+ struct bch_io_failures *failed)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
- struct promote_op *op = NULL;
|
|
- struct bio *bio;
|
|
- unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
|
int ret;
|
|
|
|
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
|
|
- return ERR_PTR(-BCH_ERR_nopromote_no_writes);
|
|
+ struct data_update_opts update_opts = { .write_flags = BCH_WRITE_alloc_nowait };
|
|
|
|
- op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL);
|
|
- if (!op) {
|
|
- ret = -BCH_ERR_nopromote_enomem;
|
|
- goto err;
|
|
- }
|
|
+ if (!have_io_error(failed)) {
|
|
+ update_opts.target = orig->opts.promote_target;
|
|
+ update_opts.extra_replicas = 1;
|
|
+ update_opts.write_flags |= BCH_WRITE_cached;
|
|
+ update_opts.write_flags |= BCH_WRITE_only_specified_devs;
|
|
+ } else {
|
|
+ update_opts.target = orig->opts.foreground_target;
|
|
|
|
- op->start_time = local_clock();
|
|
- op->pos = pos;
|
|
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
+ unsigned ptr_bit = 1;
|
|
+ bkey_for_each_ptr(ptrs, ptr) {
|
|
+ if (bch2_dev_io_failures(failed, ptr->dev) &&
|
|
+ !ptr_being_rewritten(orig, ptr->dev, flags))
|
|
+ update_opts.rewrite_ptrs |= ptr_bit;
|
|
+ ptr_bit <<= 1;
|
|
+ }
|
|
|
|
- /*
|
|
- * We don't use the mempool here because extents that aren't
|
|
- * checksummed or compressed can be too big for the mempool:
|
|
- */
|
|
- *rbio = kzalloc(sizeof(struct bch_read_bio) +
|
|
- sizeof(struct bio_vec) * pages,
|
|
- GFP_KERNEL);
|
|
- if (!*rbio) {
|
|
- ret = -BCH_ERR_nopromote_enomem;
|
|
- goto err;
|
|
+ if (!update_opts.rewrite_ptrs)
|
|
+ return NULL;
|
|
}
|
|
|
|
- rbio_init(&(*rbio)->bio, opts);
|
|
- bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
|
|
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
|
|
+ return ERR_PTR(-BCH_ERR_nopromote_no_writes);
|
|
|
|
- if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) {
|
|
+ struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
|
|
+ if (!op) {
|
|
ret = -BCH_ERR_nopromote_enomem;
|
|
- goto err;
|
|
+ goto err_put;
|
|
}
|
|
|
|
- (*rbio)->bounce = true;
|
|
- (*rbio)->split = true;
|
|
- (*rbio)->kmalloc = true;
|
|
+ op->start_time = local_clock();
|
|
+ op->pos = pos;
|
|
|
|
if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
|
|
bch_promote_params)) {
|
|
@@ -224,64 +241,43 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
|
goto err;
|
|
}
|
|
|
|
- bio = &op->write.op.wbio.bio;
|
|
- bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
|
-
|
|
- struct data_update_opts update_opts = {};
|
|
-
|
|
- if (!have_io_error(failed)) {
|
|
- update_opts.target = opts.promote_target;
|
|
- update_opts.extra_replicas = 1;
|
|
- update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
|
|
- } else {
|
|
- update_opts.target = opts.foreground_target;
|
|
-
|
|
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
- unsigned ptr_bit = 1;
|
|
- bkey_for_each_ptr(ptrs, ptr) {
|
|
- if (bch2_dev_io_failures(failed, ptr->dev))
|
|
- update_opts.rewrite_ptrs |= ptr_bit;
|
|
- ptr_bit <<= 1;
|
|
- }
|
|
- }
|
|
-
|
|
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
|
writepoint_hashed((unsigned long) current),
|
|
- opts,
|
|
+ &orig->opts,
|
|
update_opts,
|
|
btree_id, k);
|
|
/*
|
|
* possible errors: -BCH_ERR_nocow_lock_blocked,
|
|
* -BCH_ERR_ENOSPC_disk_reservation:
|
|
*/
|
|
- if (ret) {
|
|
- BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
|
|
- bch_promote_params));
|
|
- goto err;
|
|
- }
|
|
+ if (ret)
|
|
+ goto err_remove_hash;
|
|
|
|
+ rbio_init_fragment(&op->write.rbio.bio, orig);
|
|
+ op->write.rbio.bounce = true;
|
|
+ op->write.rbio.promote = true;
|
|
op->write.op.end_io = promote_done;
|
|
|
|
- return op;
|
|
+ return &op->write.rbio;
|
|
+err_remove_hash:
|
|
+ BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
|
|
+ bch_promote_params));
|
|
err:
|
|
- if (*rbio)
|
|
- bio_free_pages(&(*rbio)->bio);
|
|
- kfree(*rbio);
|
|
- *rbio = NULL;
|
|
+ bio_free_pages(&op->write.op.wbio.bio);
|
|
/* We may have added to the rhashtable and thus need rcu freeing: */
|
|
kfree_rcu(op, rcu);
|
|
+err_put:
|
|
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
noinline
|
|
-static struct promote_op *promote_alloc(struct btree_trans *trans,
|
|
+static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
|
|
struct bvec_iter iter,
|
|
struct bkey_s_c k,
|
|
struct extent_ptr_decoded *pick,
|
|
- struct bch_io_opts opts,
|
|
unsigned flags,
|
|
- struct bch_read_bio **rbio,
|
|
+ struct bch_read_bio *orig,
|
|
bool *bounce,
|
|
bool *read_full,
|
|
struct bch_io_failures *failed)
|
|
@@ -301,18 +297,21 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
|
struct bpos pos = promote_full
|
|
? bkey_start_pos(k.k)
|
|
: POS(k.k->p.inode, iter.bi_sector);
|
|
- struct promote_op *promote;
|
|
int ret;
|
|
|
|
- ret = should_promote(c, k, pos, opts, flags, failed);
|
|
+ ret = should_promote(c, k, pos, orig->opts, flags, failed);
|
|
if (ret)
|
|
goto nopromote;
|
|
|
|
- promote = __promote_alloc(trans,
|
|
- k.k->type == KEY_TYPE_reflink_v
|
|
- ? BTREE_ID_reflink
|
|
- : BTREE_ID_extents,
|
|
- k, pos, pick, opts, sectors, rbio, failed);
|
|
+ struct bch_read_bio *promote =
|
|
+ __promote_alloc(trans,
|
|
+ k.k->type == KEY_TYPE_reflink_v
|
|
+ ? BTREE_ID_reflink
|
|
+ : BTREE_ID_extents,
|
|
+ k, pos, pick, sectors, flags, orig, failed);
|
|
+ if (!promote)
|
|
+ return NULL;
|
|
+
|
|
ret = PTR_ERR_OR_ZERO(promote);
|
|
if (ret)
|
|
goto nopromote;
|
|
@@ -321,7 +320,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
|
*read_full = promote_full;
|
|
return promote;
|
|
nopromote:
|
|
- trace_read_nopromote(c, ret);
|
|
+ trace_io_read_nopromote(c, ret);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -330,9 +329,10 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
|
|
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
|
struct bch_read_bio *rbio, struct bpos read_pos)
|
|
{
|
|
- return bch2_inum_offset_err_msg_trans(trans, out,
|
|
- (subvol_inum) { rbio->subvol, read_pos.inode },
|
|
- read_pos.offset << 9);
|
|
+ return lockrestart_do(trans,
|
|
+ bch2_inum_offset_err_msg_trans(trans, out,
|
|
+ (subvol_inum) { rbio->subvol, read_pos.inode },
|
|
+ read_pos.offset << 9));
|
|
}
|
|
|
|
static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
|
|
@@ -375,20 +375,20 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
|
|
{
|
|
BUG_ON(rbio->bounce && !rbio->split);
|
|
|
|
- if (rbio->promote)
|
|
- promote_free(rbio->c, rbio->promote);
|
|
- rbio->promote = NULL;
|
|
-
|
|
- if (rbio->bounce)
|
|
- bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
|
|
-
|
|
if (rbio->split) {
|
|
struct bch_read_bio *parent = rbio->parent;
|
|
|
|
- if (rbio->kmalloc)
|
|
- kfree(rbio);
|
|
- else
|
|
+ if (unlikely(rbio->promote)) {
|
|
+ if (!rbio->bio.bi_status)
|
|
+ promote_start(rbio);
|
|
+ else
|
|
+ promote_free(rbio);
|
|
+ } else {
|
|
+ if (rbio->bounce)
|
|
+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
|
|
+
|
|
bio_put(&rbio->bio);
|
|
+ }
|
|
|
|
rbio = parent;
|
|
}
|
|
@@ -408,61 +408,47 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
|
|
bio_endio(&rbio->bio);
|
|
}
|
|
|
|
-static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
+static noinline void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
struct bvec_iter bvec_iter,
|
|
struct bch_io_failures *failed,
|
|
unsigned flags)
|
|
{
|
|
+ struct data_update *u = container_of(rbio, struct data_update, rbio);
|
|
struct btree_trans *trans = bch2_trans_get(c);
|
|
- struct btree_iter iter;
|
|
- struct bkey_buf sk;
|
|
- struct bkey_s_c k;
|
|
- int ret;
|
|
-
|
|
- flags &= ~BCH_READ_LAST_FRAGMENT;
|
|
- flags |= BCH_READ_MUST_CLONE;
|
|
-
|
|
- bch2_bkey_buf_init(&sk);
|
|
-
|
|
- bch2_trans_iter_init(trans, &iter, rbio->data_btree,
|
|
- rbio->read_pos, BTREE_ITER_slots);
|
|
retry:
|
|
bch2_trans_begin(trans);
|
|
- rbio->bio.bi_status = 0;
|
|
|
|
- ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
|
|
+ struct btree_iter iter;
|
|
+ struct bkey_s_c k;
|
|
+ int ret = lockrestart_do(trans,
|
|
+ bkey_err(k = bch2_bkey_get_iter(trans, &iter,
|
|
+ u->btree_id, bkey_start_pos(&u->k.k->k),
|
|
+ 0)));
|
|
if (ret)
|
|
goto err;
|
|
|
|
- bch2_bkey_buf_reassemble(&sk, c, k);
|
|
- k = bkey_i_to_s_c(sk.k);
|
|
-
|
|
- if (!bch2_bkey_matches_ptr(c, k,
|
|
- rbio->pick.ptr,
|
|
- rbio->data_pos.offset -
|
|
- rbio->pick.crc.offset)) {
|
|
+ if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) {
|
|
/* extent we wanted to read no longer exists: */
|
|
rbio->hole = true;
|
|
- goto out;
|
|
+ goto err;
|
|
}
|
|
|
|
ret = __bch2_read_extent(trans, rbio, bvec_iter,
|
|
- rbio->read_pos,
|
|
- rbio->data_btree,
|
|
- k, 0, failed, flags);
|
|
+ bkey_start_pos(&u->k.k->k),
|
|
+ u->btree_id,
|
|
+ bkey_i_to_s_c(u->k.k),
|
|
+ 0, failed, flags, -1);
|
|
+err:
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
+
|
|
if (ret == READ_RETRY)
|
|
goto retry;
|
|
if (ret)
|
|
- goto err;
|
|
-out:
|
|
+ rbio->bio.bi_status = BLK_STS_IOERR;
|
|
+
|
|
+ BUG_ON(atomic_read(&rbio->bio.__bi_remaining) != 1);
|
|
bch2_rbio_done(rbio);
|
|
- bch2_trans_iter_exit(trans, &iter);
|
|
bch2_trans_put(trans);
|
|
- bch2_bkey_buf_exit(&sk, c);
|
|
- return;
|
|
-err:
|
|
- rbio->bio.bi_status = BLK_STS_IOERR;
|
|
- goto out;
|
|
}
|
|
|
|
static void bch2_rbio_retry(struct work_struct *work)
|
|
@@ -478,34 +464,36 @@ static void bch2_rbio_retry(struct work_struct *work)
|
|
};
|
|
struct bch_io_failures failed = { .nr = 0 };
|
|
|
|
- trace_and_count(c, read_retry, &rbio->bio);
|
|
+ trace_io_read_retry(&rbio->bio);
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
|
|
+ bvec_iter_sectors(rbio->bvec_iter));
|
|
|
|
if (rbio->retry == READ_RETRY_AVOID)
|
|
bch2_mark_io_failure(&failed, &rbio->pick);
|
|
|
|
- rbio->bio.bi_status = 0;
|
|
+ if (!rbio->split)
|
|
+ rbio->bio.bi_status = 0;
|
|
|
|
rbio = bch2_rbio_free(rbio);
|
|
|
|
- flags |= BCH_READ_IN_RETRY;
|
|
- flags &= ~BCH_READ_MAY_PROMOTE;
|
|
+ flags |= BCH_READ_in_retry;
|
|
+ flags &= ~BCH_READ_may_promote;
|
|
+ flags &= ~BCH_READ_last_fragment;
|
|
+ flags |= BCH_READ_must_clone;
|
|
|
|
- if (flags & BCH_READ_NODECODE) {
|
|
+ if (flags & BCH_READ_data_update)
|
|
bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
|
|
- } else {
|
|
- flags &= ~BCH_READ_LAST_FRAGMENT;
|
|
- flags |= BCH_READ_MUST_CLONE;
|
|
-
|
|
+ else
|
|
__bch2_read(c, rbio, iter, inum, &failed, flags);
|
|
- }
|
|
}
|
|
|
|
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
|
|
blk_status_t error)
|
|
{
|
|
rbio->retry = retry;
|
|
+ rbio->saw_error = true;
|
|
|
|
- if (rbio->flags & BCH_READ_IN_RETRY)
|
|
+ if (rbio->flags & BCH_READ_in_retry)
|
|
return;
|
|
|
|
if (retry == READ_ERR) {
|
|
@@ -712,32 +700,40 @@ static void __bch2_read_endio(struct work_struct *work)
|
|
if (unlikely(rbio->narrow_crcs))
|
|
bch2_rbio_narrow_crcs(rbio);
|
|
|
|
- if (rbio->flags & BCH_READ_NODECODE)
|
|
- goto nodecode;
|
|
+ if (likely(!(rbio->flags & BCH_READ_data_update))) {
|
|
+ /* Adjust crc to point to subset of data we want: */
|
|
+ crc.offset += rbio->offset_into_extent;
|
|
+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
|
|
|
|
- /* Adjust crc to point to subset of data we want: */
|
|
- crc.offset += rbio->offset_into_extent;
|
|
- crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
|
|
+ if (crc_is_compressed(crc)) {
|
|
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
|
|
+ if (ret)
|
|
+ goto decrypt_err;
|
|
|
|
- if (crc_is_compressed(crc)) {
|
|
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
|
|
- if (ret)
|
|
- goto decrypt_err;
|
|
+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
|
|
+ !c->opts.no_data_io)
|
|
+ goto decompression_err;
|
|
+ } else {
|
|
+ /* don't need to decrypt the entire bio: */
|
|
+ nonce = nonce_add(nonce, crc.offset << 9);
|
|
+ bio_advance(src, crc.offset << 9);
|
|
|
|
- if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
|
|
- !c->opts.no_data_io)
|
|
- goto decompression_err;
|
|
- } else {
|
|
- /* don't need to decrypt the entire bio: */
|
|
- nonce = nonce_add(nonce, crc.offset << 9);
|
|
- bio_advance(src, crc.offset << 9);
|
|
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
|
|
+ src->bi_iter.bi_size = dst_iter.bi_size;
|
|
|
|
- BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
|
|
- src->bi_iter.bi_size = dst_iter.bi_size;
|
|
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
|
|
+ if (ret)
|
|
+ goto decrypt_err;
|
|
|
|
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
|
|
- if (ret)
|
|
- goto decrypt_err;
|
|
+ if (rbio->bounce) {
|
|
+ struct bvec_iter src_iter = src->bi_iter;
|
|
+
|
|
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ if (rbio->split)
|
|
+ rbio->parent->pick = rbio->pick;
|
|
|
|
if (rbio->bounce) {
|
|
struct bvec_iter src_iter = src->bi_iter;
|
|
@@ -754,12 +750,9 @@ static void __bch2_read_endio(struct work_struct *work)
|
|
ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
|
|
if (ret)
|
|
goto decrypt_err;
|
|
-
|
|
- promote_start(rbio->promote, rbio);
|
|
- rbio->promote = NULL;
|
|
}
|
|
-nodecode:
|
|
- if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
|
|
+
|
|
+ if (likely(!(rbio->flags & BCH_READ_in_retry))) {
|
|
rbio = bch2_rbio_free(rbio);
|
|
bch2_rbio_done(rbio);
|
|
}
|
|
@@ -772,8 +765,8 @@ static void __bch2_read_endio(struct work_struct *work)
|
|
* reading into buffers owned by userspace (that userspace can
|
|
* scribble over) - retry the read, bouncing it this time:
|
|
*/
|
|
- if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
|
|
- rbio->flags |= BCH_READ_MUST_BOUNCE;
|
|
+ if (!rbio->bounce && (rbio->flags & BCH_READ_user_mapped)) {
|
|
+ rbio->flags |= BCH_READ_must_bounce;
|
|
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
|
|
goto out;
|
|
}
|
|
@@ -810,11 +803,11 @@ static void bch2_read_endio(struct bio *bio)
|
|
return;
|
|
}
|
|
|
|
- if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
|
|
+ if (((rbio->flags & BCH_READ_retry_if_stale) && race_fault()) ||
|
|
(ca && dev_ptr_stale(ca, &rbio->pick.ptr))) {
|
|
- trace_and_count(c, read_reuse_race, &rbio->bio);
|
|
+ trace_and_count(c, io_read_reuse_race, &rbio->bio);
|
|
|
|
- if (rbio->flags & BCH_READ_RETRY_IF_STALE)
|
|
+ if (rbio->flags & BCH_READ_retry_if_stale)
|
|
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
|
|
else
|
|
bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
|
|
@@ -883,12 +876,11 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
struct bvec_iter iter, struct bpos read_pos,
|
|
enum btree_id data_btree, struct bkey_s_c k,
|
|
unsigned offset_into_extent,
|
|
- struct bch_io_failures *failed, unsigned flags)
|
|
+ struct bch_io_failures *failed, unsigned flags, int dev)
|
|
{
|
|
struct bch_fs *c = trans->c;
|
|
struct extent_ptr_decoded pick;
|
|
struct bch_read_bio *rbio = NULL;
|
|
- struct promote_op *promote = NULL;
|
|
bool bounce = false, read_full = false, narrow_crcs = false;
|
|
struct bpos data_pos = bkey_start_pos(k.k);
|
|
int pick_ret;
|
|
@@ -902,10 +894,12 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
swap(iter.bi_size, bytes);
|
|
bio_advance_iter(&orig->bio, &iter, bytes);
|
|
zero_fill_bio_iter(&orig->bio, iter);
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_inline],
|
|
+ bvec_iter_sectors(iter));
|
|
goto out_read_done;
|
|
}
|
|
retry_pick:
|
|
- pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
|
|
+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
|
|
|
|
/* hole or reservation - just zero fill: */
|
|
if (!pick_ret)
|
|
@@ -941,7 +935,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
* retry path, don't check here, it'll be caught in bch2_read_endio()
|
|
* and we'll end up in the retry path:
|
|
*/
|
|
- if ((flags & BCH_READ_IN_RETRY) &&
|
|
+ if ((flags & BCH_READ_in_retry) &&
|
|
!pick.ptr.cached &&
|
|
ca &&
|
|
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
|
|
@@ -955,48 +949,53 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
* Unlock the iterator while the btree node's lock is still in
|
|
* cache, before doing the IO:
|
|
*/
|
|
- bch2_trans_unlock(trans);
|
|
+ if (!(flags & BCH_READ_in_retry))
|
|
+ bch2_trans_unlock(trans);
|
|
+ else
|
|
+ bch2_trans_unlock_long(trans);
|
|
+
|
|
+ if (!(flags & BCH_READ_data_update)) {
|
|
+ if (!(flags & BCH_READ_last_fragment) ||
|
|
+ bio_flagged(&orig->bio, BIO_CHAIN))
|
|
+ flags |= BCH_READ_must_clone;
|
|
+
|
|
+ narrow_crcs = !(flags & BCH_READ_in_retry) &&
|
|
+ bch2_can_narrow_extent_crcs(k, pick.crc);
|
|
+
|
|
+ if (narrow_crcs && (flags & BCH_READ_user_mapped))
|
|
+ flags |= BCH_READ_must_bounce;
|
|
|
|
- if (flags & BCH_READ_NODECODE) {
|
|
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
|
|
+
|
|
+ if (crc_is_compressed(pick.crc) ||
|
|
+ (pick.crc.csum_type != BCH_CSUM_none &&
|
|
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
|
|
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
|
|
+ (flags & BCH_READ_user_mapped)) ||
|
|
+ (flags & BCH_READ_must_bounce)))) {
|
|
+ read_full = true;
|
|
+ bounce = true;
|
|
+ }
|
|
+ } else {
|
|
+ read_full = true;
|
|
/*
|
|
* can happen if we retry, and the extent we were going to read
|
|
* has been merged in the meantime:
|
|
*/
|
|
- if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) {
|
|
+ struct data_update *u = container_of(orig, struct data_update, rbio);
|
|
+ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
|
|
+ BUG();
|
|
if (ca)
|
|
percpu_ref_put(&ca->io_ref);
|
|
goto hole;
|
|
}
|
|
|
|
iter.bi_size = pick.crc.compressed_size << 9;
|
|
- goto get_bio;
|
|
- }
|
|
-
|
|
- if (!(flags & BCH_READ_LAST_FRAGMENT) ||
|
|
- bio_flagged(&orig->bio, BIO_CHAIN))
|
|
- flags |= BCH_READ_MUST_CLONE;
|
|
-
|
|
- narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
|
|
- bch2_can_narrow_extent_crcs(k, pick.crc);
|
|
-
|
|
- if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
|
|
- flags |= BCH_READ_MUST_BOUNCE;
|
|
-
|
|
- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
|
|
-
|
|
- if (crc_is_compressed(pick.crc) ||
|
|
- (pick.crc.csum_type != BCH_CSUM_none &&
|
|
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
|
|
- (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
|
|
- (flags & BCH_READ_USER_MAPPED)) ||
|
|
- (flags & BCH_READ_MUST_BOUNCE)))) {
|
|
- read_full = true;
|
|
- bounce = true;
|
|
}
|
|
|
|
if (orig->opts.promote_target || have_io_error(failed))
|
|
- promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
|
|
- &rbio, &bounce, &read_full, failed);
|
|
+ rbio = promote_alloc(trans, iter, k, &pick, flags, orig,
|
|
+ &bounce, &read_full, failed);
|
|
|
|
if (!read_full) {
|
|
EBUG_ON(crc_is_compressed(pick.crc));
|
|
@@ -1015,7 +1014,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
pick.crc.offset = 0;
|
|
pick.crc.live_size = bvec_iter_sectors(iter);
|
|
}
|
|
-get_bio:
|
|
+
|
|
if (rbio) {
|
|
/*
|
|
* promote already allocated bounce rbio:
|
|
@@ -1030,17 +1029,16 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
} else if (bounce) {
|
|
unsigned sectors = pick.crc.compressed_size;
|
|
|
|
- rbio = rbio_init(bio_alloc_bioset(NULL,
|
|
+ rbio = rbio_init_fragment(bio_alloc_bioset(NULL,
|
|
DIV_ROUND_UP(sectors, PAGE_SECTORS),
|
|
0,
|
|
GFP_NOFS,
|
|
&c->bio_read_split),
|
|
- orig->opts);
|
|
+ orig);
|
|
|
|
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
|
|
rbio->bounce = true;
|
|
- rbio->split = true;
|
|
- } else if (flags & BCH_READ_MUST_CLONE) {
|
|
+ } else if (flags & BCH_READ_must_clone) {
|
|
/*
|
|
* Have to clone if there were any splits, due to error
|
|
* reporting issues (if a split errored, and retrying didn't
|
|
@@ -1049,11 +1047,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
* from the whole bio, in which case we don't want to retry and
|
|
* lose the error)
|
|
*/
|
|
- rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
|
|
+ rbio = rbio_init_fragment(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
|
|
&c->bio_read_split),
|
|
- orig->opts);
|
|
+ orig);
|
|
rbio->bio.bi_iter = iter;
|
|
- rbio->split = true;
|
|
} else {
|
|
rbio = orig;
|
|
rbio->bio.bi_iter = iter;
|
|
@@ -1062,11 +1059,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
|
|
EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
|
|
|
|
- rbio->c = c;
|
|
rbio->submit_time = local_clock();
|
|
- if (rbio->split)
|
|
- rbio->parent = orig;
|
|
- else
|
|
+ if (!rbio->split)
|
|
rbio->end_io = orig->bio.bi_end_io;
|
|
rbio->bvec_iter = iter;
|
|
rbio->offset_into_extent= offset_into_extent;
|
|
@@ -1076,41 +1070,38 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
rbio->hole = 0;
|
|
rbio->retry = 0;
|
|
rbio->context = 0;
|
|
- /* XXX: only initialize this if needed */
|
|
- rbio->devs_have = bch2_bkey_devs(k);
|
|
rbio->pick = pick;
|
|
rbio->subvol = orig->subvol;
|
|
rbio->read_pos = read_pos;
|
|
rbio->data_btree = data_btree;
|
|
rbio->data_pos = data_pos;
|
|
rbio->version = k.k->bversion;
|
|
- rbio->promote = promote;
|
|
INIT_WORK(&rbio->work, NULL);
|
|
|
|
- if (flags & BCH_READ_NODECODE)
|
|
- orig->pick = pick;
|
|
-
|
|
rbio->bio.bi_opf = orig->bio.bi_opf;
|
|
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
|
|
rbio->bio.bi_end_io = bch2_read_endio;
|
|
|
|
if (rbio->bounce)
|
|
- trace_and_count(c, read_bounce, &rbio->bio);
|
|
+ trace_and_count(c, io_read_bounce, &rbio->bio);
|
|
|
|
- this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
|
|
+ if (!(flags & BCH_READ_data_update))
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
|
|
+ else
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio));
|
|
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
|
|
|
|
/*
|
|
* If it's being moved internally, we don't want to flag it as a cache
|
|
* hit:
|
|
*/
|
|
- if (ca && pick.ptr.cached && !(flags & BCH_READ_NODECODE))
|
|
+ if (ca && pick.ptr.cached && !(flags & BCH_READ_data_update))
|
|
bch2_bucket_io_time_reset(trans, pick.ptr.dev,
|
|
PTR_BUCKET_NR(ca, &pick.ptr), READ);
|
|
|
|
- if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
|
|
+ if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) {
|
|
bio_inc_remaining(&orig->bio);
|
|
- trace_and_count(c, read_split, &orig->bio);
|
|
+ trace_and_count(c, io_read_split, &orig->bio);
|
|
}
|
|
|
|
if (!rbio->pick.idx) {
|
|
@@ -1132,10 +1123,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
|
|
|
|
if (unlikely(c->opts.no_data_io)) {
|
|
- if (likely(!(flags & BCH_READ_IN_RETRY)))
|
|
+ if (likely(!(flags & BCH_READ_in_retry)))
|
|
bio_endio(&rbio->bio);
|
|
} else {
|
|
- if (likely(!(flags & BCH_READ_IN_RETRY)))
|
|
+ if (likely(!(flags & BCH_READ_in_retry)))
|
|
submit_bio(&rbio->bio);
|
|
else
|
|
submit_bio_wait(&rbio->bio);
|
|
@@ -1153,11 +1144,11 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
goto out;
|
|
}
|
|
|
|
- if (likely(!(flags & BCH_READ_IN_RETRY)))
|
|
+ if (likely(!(flags & BCH_READ_in_retry)))
|
|
bio_endio(&rbio->bio);
|
|
}
|
|
out:
|
|
- if (likely(!(flags & BCH_READ_IN_RETRY))) {
|
|
+ if (likely(!(flags & BCH_READ_in_retry))) {
|
|
return 0;
|
|
} else {
|
|
int ret;
|
|
@@ -1180,24 +1171,26 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
|
}
|
|
|
|
err:
|
|
- if (flags & BCH_READ_IN_RETRY)
|
|
+ if (flags & BCH_READ_in_retry)
|
|
return READ_ERR;
|
|
|
|
orig->bio.bi_status = BLK_STS_IOERR;
|
|
goto out_read_done;
|
|
|
|
hole:
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_read_hole],
|
|
+ bvec_iter_sectors(iter));
|
|
/*
|
|
- * won't normally happen in the BCH_READ_NODECODE
|
|
+ * won't normally happen in the BCH_READ_data_update
|
|
* (bch2_move_extent()) path, but if we retry and the extent we wanted
|
|
* to read no longer exists we have to signal that:
|
|
*/
|
|
- if (flags & BCH_READ_NODECODE)
|
|
+ if (flags & BCH_READ_data_update)
|
|
orig->hole = true;
|
|
|
|
zero_fill_bio_iter(&orig->bio, iter);
|
|
out_read_done:
|
|
- if (flags & BCH_READ_LAST_FRAGMENT)
|
|
+ if (flags & BCH_READ_last_fragment)
|
|
bch2_rbio_done(orig);
|
|
return 0;
|
|
}
|
|
@@ -1212,7 +1205,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
struct bkey_s_c k;
|
|
int ret;
|
|
|
|
- BUG_ON(flags & BCH_READ_NODECODE);
|
|
+ BUG_ON(flags & BCH_READ_data_update);
|
|
|
|
bch2_bkey_buf_init(&sk);
|
|
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
|
@@ -1262,15 +1255,15 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
swap(bvec_iter.bi_size, bytes);
|
|
|
|
if (bvec_iter.bi_size == bytes)
|
|
- flags |= BCH_READ_LAST_FRAGMENT;
|
|
+ flags |= BCH_READ_last_fragment;
|
|
|
|
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
|
|
data_btree, k,
|
|
- offset_into_extent, failed, flags);
|
|
+ offset_into_extent, failed, flags, -1);
|
|
if (ret)
|
|
goto err;
|
|
|
|
- if (flags & BCH_READ_LAST_FRAGMENT)
|
|
+ if (flags & BCH_READ_last_fragment)
|
|
break;
|
|
|
|
swap(bvec_iter.bi_size, bytes);
|
|
@@ -1287,7 +1280,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
|
|
if (ret) {
|
|
struct printbuf buf = PRINTBUF;
|
|
- bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9);
|
|
+ lockrestart_do(trans,
|
|
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum,
|
|
+ bvec_iter.bi_sector << 9));
|
|
prt_printf(&buf, "read error %i from btree lookup", ret);
|
|
bch_err_ratelimited(c, "%s", buf.buf);
|
|
printbuf_exit(&buf);
|
|
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
|
|
index a82e8a94ccb6..73275da5d2c4 100644
|
|
--- a/fs/bcachefs/io_read.h
|
|
+++ b/fs/bcachefs/io_read.h
|
|
@@ -35,20 +35,19 @@ struct bch_read_bio {
|
|
u16 flags;
|
|
union {
|
|
struct {
|
|
- u16 bounce:1,
|
|
+ u16 promote:1,
|
|
+ bounce:1,
|
|
split:1,
|
|
- kmalloc:1,
|
|
have_ioref:1,
|
|
narrow_crcs:1,
|
|
hole:1,
|
|
+ saw_error:1,
|
|
retry:2,
|
|
context:2;
|
|
};
|
|
u16 _state;
|
|
};
|
|
|
|
- struct bch_devs_list devs_have;
|
|
-
|
|
struct extent_ptr_decoded pick;
|
|
|
|
/*
|
|
@@ -65,8 +64,6 @@ struct bch_read_bio {
|
|
struct bpos data_pos;
|
|
struct bversion version;
|
|
|
|
- struct promote_op *promote;
|
|
-
|
|
struct bch_io_opts opts;
|
|
|
|
struct work_struct work;
|
|
@@ -108,23 +105,32 @@ static inline int bch2_read_indirect_extent(struct btree_trans *trans,
|
|
return 0;
|
|
}
|
|
|
|
+#define BCH_READ_FLAGS() \
|
|
+ x(retry_if_stale) \
|
|
+ x(may_promote) \
|
|
+ x(user_mapped) \
|
|
+ x(data_update) \
|
|
+ x(last_fragment) \
|
|
+ x(must_bounce) \
|
|
+ x(must_clone) \
|
|
+ x(in_retry)
|
|
+
|
|
+enum __bch_read_flags {
|
|
+#define x(n) __BCH_READ_##n,
|
|
+ BCH_READ_FLAGS()
|
|
+#undef x
|
|
+};
|
|
+
|
|
enum bch_read_flags {
|
|
- BCH_READ_RETRY_IF_STALE = 1 << 0,
|
|
- BCH_READ_MAY_PROMOTE = 1 << 1,
|
|
- BCH_READ_USER_MAPPED = 1 << 2,
|
|
- BCH_READ_NODECODE = 1 << 3,
|
|
- BCH_READ_LAST_FRAGMENT = 1 << 4,
|
|
-
|
|
- /* internal: */
|
|
- BCH_READ_MUST_BOUNCE = 1 << 5,
|
|
- BCH_READ_MUST_CLONE = 1 << 6,
|
|
- BCH_READ_IN_RETRY = 1 << 7,
|
|
+#define x(n) BCH_READ_##n = BIT(__BCH_READ_##n),
|
|
+ BCH_READ_FLAGS()
|
|
+#undef x
|
|
};
|
|
|
|
int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
|
|
struct bvec_iter, struct bpos, enum btree_id,
|
|
struct bkey_s_c, unsigned,
|
|
- struct bch_io_failures *, unsigned);
|
|
+ struct bch_io_failures *, unsigned, int);
|
|
|
|
static inline void bch2_read_extent(struct btree_trans *trans,
|
|
struct bch_read_bio *rbio, struct bpos read_pos,
|
|
@@ -132,7 +138,7 @@ static inline void bch2_read_extent(struct btree_trans *trans,
|
|
unsigned offset_into_extent, unsigned flags)
|
|
{
|
|
__bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
|
|
- data_btree, k, offset_into_extent, NULL, flags);
|
|
+ data_btree, k, offset_into_extent, NULL, flags, -1);
|
|
}
|
|
|
|
void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
|
|
@@ -145,24 +151,39 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
|
|
|
|
BUG_ON(rbio->_state);
|
|
|
|
- rbio->c = c;
|
|
- rbio->start_time = local_clock();
|
|
rbio->subvol = inum.subvol;
|
|
|
|
__bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
|
|
- BCH_READ_RETRY_IF_STALE|
|
|
- BCH_READ_MAY_PROMOTE|
|
|
- BCH_READ_USER_MAPPED);
|
|
+ BCH_READ_retry_if_stale|
|
|
+ BCH_READ_may_promote|
|
|
+ BCH_READ_user_mapped);
|
|
}
|
|
|
|
-static inline struct bch_read_bio *rbio_init(struct bio *bio,
|
|
- struct bch_io_opts opts)
|
|
+static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
|
|
+ struct bch_read_bio *orig)
|
|
{
|
|
struct bch_read_bio *rbio = to_rbio(bio);
|
|
|
|
+ rbio->c = orig->c;
|
|
rbio->_state = 0;
|
|
- rbio->promote = NULL;
|
|
- rbio->opts = opts;
|
|
+ rbio->split = true;
|
|
+ rbio->parent = orig;
|
|
+ rbio->opts = orig->opts;
|
|
+ return rbio;
|
|
+}
|
|
+
|
|
+static inline struct bch_read_bio *rbio_init(struct bio *bio,
|
|
+ struct bch_fs *c,
|
|
+ struct bch_io_opts opts,
|
|
+ bio_end_io_t end_io)
|
|
+{
|
|
+ struct bch_read_bio *rbio = to_rbio(bio);
|
|
+
|
|
+ rbio->start_time = local_clock();
|
|
+ rbio->c = c;
|
|
+ rbio->_state = 0;
|
|
+ rbio->opts = opts;
|
|
+ rbio->bio.bi_end_io = end_io;
|
|
return rbio;
|
|
}
|
|
|
|
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
|
|
index dd508d93e9fc..0177198e90eb 100644
|
|
--- a/fs/bcachefs/io_write.c
|
|
+++ b/fs/bcachefs/io_write.c
|
|
@@ -374,7 +374,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
|
bch2_extent_update(trans, inum, &iter, sk.k,
|
|
&op->res,
|
|
op->new_i_size, &op->i_sectors_delta,
|
|
- op->flags & BCH_WRITE_CHECK_ENOSPC);
|
|
+ op->flags & BCH_WRITE_check_enospc);
|
|
bch2_trans_iter_exit(trans, &iter);
|
|
|
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
@@ -403,7 +403,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
|
|
(subvol_inum) { op->subvol, op->pos.inode, },
|
|
offset << 9);
|
|
prt_printf(out, "write error%s: ",
|
|
- op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
|
|
+ op->flags & BCH_WRITE_move ? "(internal move)" : "");
|
|
}
|
|
|
|
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
|
|
@@ -483,7 +483,7 @@ static void bch2_write_done(struct closure *cl)
|
|
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
|
bch2_disk_reservation_put(c, &op->res);
|
|
|
|
- if (!(op->flags & BCH_WRITE_MOVE))
|
|
+ if (!(op->flags & BCH_WRITE_move))
|
|
bch2_write_ref_put(c, BCH_WRITE_REF_write);
|
|
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
|
|
|
@@ -529,7 +529,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
|
unsigned dev;
|
|
int ret = 0;
|
|
|
|
- if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
|
|
+ if (unlikely(op->flags & BCH_WRITE_io_error)) {
|
|
ret = bch2_write_drop_io_error_ptrs(op);
|
|
if (ret)
|
|
goto err;
|
|
@@ -538,7 +538,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
|
if (!bch2_keylist_empty(keys)) {
|
|
u64 sectors_start = keylist_sectors(keys);
|
|
|
|
- ret = !(op->flags & BCH_WRITE_MOVE)
|
|
+ ret = !(op->flags & BCH_WRITE_move)
|
|
? bch2_write_index_default(op)
|
|
: bch2_data_update_index_update(op);
|
|
|
|
@@ -570,14 +570,22 @@ static void __bch2_write_index(struct bch_write_op *op)
|
|
err:
|
|
keys->top = keys->keys;
|
|
op->error = ret;
|
|
- op->flags |= BCH_WRITE_SUBMITTED;
|
|
+ op->flags |= BCH_WRITE_submitted;
|
|
goto out;
|
|
}
|
|
|
|
static inline void __wp_update_state(struct write_point *wp, enum write_point_state state)
|
|
{
|
|
if (state != wp->state) {
|
|
+ struct task_struct *p = current;
|
|
u64 now = ktime_get_ns();
|
|
+ u64 runtime = p->se.sum_exec_runtime +
|
|
+ (now - p->se.exec_start);
|
|
+
|
|
+ if (state == WRITE_POINT_runnable)
|
|
+ wp->last_runtime = runtime;
|
|
+ else if (wp->state == WRITE_POINT_runnable)
|
|
+ wp->time[WRITE_POINT_running] += runtime - wp->last_runtime;
|
|
|
|
if (wp->last_state_change &&
|
|
time_after64(now, wp->last_state_change))
|
|
@@ -591,7 +599,7 @@ static inline void wp_update_state(struct write_point *wp, bool running)
|
|
{
|
|
enum write_point_state state;
|
|
|
|
- state = running ? WRITE_POINT_running :
|
|
+ state = running ? WRITE_POINT_runnable:
|
|
!list_empty(&wp->writes) ? WRITE_POINT_waiting_io
|
|
: WRITE_POINT_stopped;
|
|
|
|
@@ -605,8 +613,8 @@ static CLOSURE_CALLBACK(bch2_write_index)
|
|
struct workqueue_struct *wq = index_update_wq(op);
|
|
unsigned long flags;
|
|
|
|
- if ((op->flags & BCH_WRITE_SUBMITTED) &&
|
|
- (op->flags & BCH_WRITE_MOVE))
|
|
+ if ((op->flags & BCH_WRITE_submitted) &&
|
|
+ (op->flags & BCH_WRITE_move))
|
|
bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
|
|
|
|
spin_lock_irqsave(&wp->writes_lock, flags);
|
|
@@ -644,11 +652,11 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
|
|
if (!op)
|
|
break;
|
|
|
|
- op->flags |= BCH_WRITE_IN_WORKER;
|
|
+ op->flags |= BCH_WRITE_in_worker;
|
|
|
|
__bch2_write_index(op);
|
|
|
|
- if (!(op->flags & BCH_WRITE_SUBMITTED))
|
|
+ if (!(op->flags & BCH_WRITE_submitted))
|
|
__bch2_write(op);
|
|
else
|
|
bch2_write_done(&op->cl);
|
|
@@ -672,7 +680,7 @@ static void bch2_write_endio(struct bio *bio)
|
|
"data write error: %s",
|
|
bch2_blk_status_to_str(bio->bi_status))) {
|
|
set_bit(wbio->dev, op->failed.d);
|
|
- op->flags |= BCH_WRITE_IO_ERROR;
|
|
+ op->flags |= BCH_WRITE_io_error;
|
|
}
|
|
|
|
if (wbio->nocow) {
|
|
@@ -719,7 +727,7 @@ static void init_append_extent(struct bch_write_op *op,
|
|
bch2_extent_crc_append(&e->k_i, crc);
|
|
|
|
bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
|
|
- op->flags & BCH_WRITE_CACHED);
|
|
+ op->flags & BCH_WRITE_cached);
|
|
|
|
bch2_keylist_push(&op->insert_keys);
|
|
}
|
|
@@ -836,7 +844,7 @@ static enum prep_encoded_ret {
|
|
struct bch_fs *c = op->c;
|
|
struct bio *bio = &op->wbio.bio;
|
|
|
|
- if (!(op->flags & BCH_WRITE_DATA_ENCODED))
|
|
+ if (!(op->flags & BCH_WRITE_data_encoded))
|
|
return PREP_ENCODED_OK;
|
|
|
|
BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
|
|
@@ -944,9 +952,9 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
|
if (ec_buf ||
|
|
op->compression_opt ||
|
|
(op->csum_type &&
|
|
- !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
|
|
+ !(op->flags & BCH_WRITE_pages_stable)) ||
|
|
(bch2_csum_type_is_encryption(op->csum_type) &&
|
|
- !(op->flags & BCH_WRITE_PAGES_OWNED))) {
|
|
+ !(op->flags & BCH_WRITE_pages_owned))) {
|
|
dst = bch2_write_bio_alloc(c, wp, src,
|
|
&page_alloc_failed,
|
|
ec_buf);
|
|
@@ -966,7 +974,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
|
break;
|
|
|
|
BUG_ON(op->compression_opt &&
|
|
- (op->flags & BCH_WRITE_DATA_ENCODED) &&
|
|
+ (op->flags & BCH_WRITE_data_encoded) &&
|
|
bch2_csum_type_is_encryption(op->crc.csum_type));
|
|
BUG_ON(op->compression_opt && !bounce);
|
|
|
|
@@ -1004,7 +1012,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
|
}
|
|
}
|
|
|
|
- if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
|
|
+ if ((op->flags & BCH_WRITE_data_encoded) &&
|
|
!crc_is_compressed(crc) &&
|
|
bch2_csum_type_is_encryption(op->crc.csum_type) ==
|
|
bch2_csum_type_is_encryption(op->csum_type)) {
|
|
@@ -1036,7 +1044,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
|
crc.compression_type = compression_type;
|
|
crc.nonce = nonce;
|
|
} else {
|
|
- if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
|
|
+ if ((op->flags & BCH_WRITE_data_encoded) &&
|
|
bch2_rechecksum_bio(c, src, version, op->crc,
|
|
NULL, &op->crc,
|
|
src_len >> 9,
|
|
@@ -1210,9 +1218,9 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
|
|
|
static void __bch2_nocow_write_done(struct bch_write_op *op)
|
|
{
|
|
- if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
|
|
+ if (unlikely(op->flags & BCH_WRITE_io_error)) {
|
|
op->error = -EIO;
|
|
- } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN))
|
|
+ } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten))
|
|
bch2_nocow_write_convert_unwritten(op);
|
|
}
|
|
|
|
@@ -1241,7 +1249,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
struct bucket_to_lock *stale_at;
|
|
int stale, ret;
|
|
|
|
- if (op->flags & BCH_WRITE_MOVE)
|
|
+ if (op->flags & BCH_WRITE_move)
|
|
return;
|
|
|
|
darray_init(&buckets);
|
|
@@ -1299,7 +1307,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
}), GFP_KERNEL|__GFP_NOFAIL);
|
|
|
|
if (ptr->unwritten)
|
|
- op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
|
|
+ op->flags |= BCH_WRITE_convert_unwritten;
|
|
}
|
|
|
|
/* Unlock before taking nocow locks, doing IO: */
|
|
@@ -1307,7 +1315,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
bch2_trans_unlock(trans);
|
|
|
|
bch2_cut_front(op->pos, op->insert_keys.top);
|
|
- if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
|
|
+ if (op->flags & BCH_WRITE_convert_unwritten)
|
|
bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
|
|
|
|
darray_for_each(buckets, i) {
|
|
@@ -1332,7 +1340,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
wbio_init(bio)->put_bio = true;
|
|
bio->bi_opf = op->wbio.bio.bi_opf;
|
|
} else {
|
|
- op->flags |= BCH_WRITE_SUBMITTED;
|
|
+ op->flags |= BCH_WRITE_submitted;
|
|
}
|
|
|
|
op->pos.offset += bio_sectors(bio);
|
|
@@ -1346,7 +1354,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
op->insert_keys.top, true);
|
|
|
|
bch2_keylist_push(&op->insert_keys);
|
|
- if (op->flags & BCH_WRITE_SUBMITTED)
|
|
+ if (op->flags & BCH_WRITE_submitted)
|
|
break;
|
|
bch2_btree_iter_advance(&iter);
|
|
}
|
|
@@ -1366,15 +1374,15 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
|
bch_err_ratelimited(c, "%s", buf.buf);
|
|
printbuf_exit(&buf);
|
|
op->error = ret;
|
|
- op->flags |= BCH_WRITE_SUBMITTED;
|
|
+ op->flags |= BCH_WRITE_submitted;
|
|
}
|
|
|
|
/* fallback to cow write path? */
|
|
- if (!(op->flags & BCH_WRITE_SUBMITTED)) {
|
|
+ if (!(op->flags & BCH_WRITE_submitted)) {
|
|
closure_sync(&op->cl);
|
|
__bch2_nocow_write_done(op);
|
|
op->insert_keys.top = op->insert_keys.keys;
|
|
- } else if (op->flags & BCH_WRITE_SYNC) {
|
|
+ } else if (op->flags & BCH_WRITE_sync) {
|
|
closure_sync(&op->cl);
|
|
bch2_nocow_write_done(&op->cl.work);
|
|
} else {
|
|
@@ -1426,7 +1434,7 @@ static void __bch2_write(struct bch_write_op *op)
|
|
|
|
if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
|
|
bch2_nocow_write(op);
|
|
- if (op->flags & BCH_WRITE_SUBMITTED)
|
|
+ if (op->flags & BCH_WRITE_submitted)
|
|
goto out_nofs_restore;
|
|
}
|
|
again:
|
|
@@ -1456,7 +1464,7 @@ static void __bch2_write(struct bch_write_op *op)
|
|
ret = bch2_trans_run(c, lockrestart_do(trans,
|
|
bch2_alloc_sectors_start_trans(trans,
|
|
op->target,
|
|
- op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
|
|
+ op->opts.erasure_code && !(op->flags & BCH_WRITE_cached),
|
|
op->write_point,
|
|
&op->devs_have,
|
|
op->nr_replicas,
|
|
@@ -1479,10 +1487,10 @@ static void __bch2_write(struct bch_write_op *op)
|
|
bch2_alloc_sectors_done_inlined(c, wp);
|
|
err:
|
|
if (ret <= 0) {
|
|
- op->flags |= BCH_WRITE_SUBMITTED;
|
|
+ op->flags |= BCH_WRITE_submitted;
|
|
|
|
if (unlikely(ret < 0)) {
|
|
- if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) {
|
|
+ if (!(op->flags & BCH_WRITE_alloc_nowait)) {
|
|
struct printbuf buf = PRINTBUF;
|
|
bch2_write_op_error(&buf, op);
|
|
prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret));
|
|
@@ -1514,14 +1522,14 @@ static void __bch2_write(struct bch_write_op *op)
|
|
* synchronously here if we weren't able to submit all of the IO at
|
|
* once, as that signals backpressure to the caller.
|
|
*/
|
|
- if ((op->flags & BCH_WRITE_SYNC) ||
|
|
- (!(op->flags & BCH_WRITE_SUBMITTED) &&
|
|
- !(op->flags & BCH_WRITE_IN_WORKER))) {
|
|
+ if ((op->flags & BCH_WRITE_sync) ||
|
|
+ (!(op->flags & BCH_WRITE_submitted) &&
|
|
+ !(op->flags & BCH_WRITE_in_worker))) {
|
|
bch2_wait_on_allocator(c, &op->cl);
|
|
|
|
__bch2_write_index(op);
|
|
|
|
- if (!(op->flags & BCH_WRITE_SUBMITTED))
|
|
+ if (!(op->flags & BCH_WRITE_submitted))
|
|
goto again;
|
|
bch2_write_done(&op->cl);
|
|
} else {
|
|
@@ -1542,8 +1550,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
|
|
|
memset(&op->failed, 0, sizeof(op->failed));
|
|
|
|
- op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
|
|
- op->flags |= BCH_WRITE_SUBMITTED;
|
|
+ op->flags |= BCH_WRITE_wrote_data_inline;
|
|
+ op->flags |= BCH_WRITE_submitted;
|
|
|
|
bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
|
|
|
|
@@ -1606,8 +1614,8 @@ CLOSURE_CALLBACK(bch2_write)
|
|
BUG_ON(!op->write_point.v);
|
|
BUG_ON(bkey_eq(op->pos, POS_MAX));
|
|
|
|
- if (op->flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
|
- op->flags |= BCH_WRITE_ALLOC_NOWAIT;
|
|
+ if (op->flags & BCH_WRITE_only_specified_devs)
|
|
+ op->flags |= BCH_WRITE_alloc_nowait;
|
|
|
|
op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
|
|
op->start_time = local_clock();
|
|
@@ -1628,13 +1636,14 @@ CLOSURE_CALLBACK(bch2_write)
|
|
goto err;
|
|
}
|
|
|
|
- if (!(op->flags & BCH_WRITE_MOVE) &&
|
|
+ if (!(op->flags & BCH_WRITE_move) &&
|
|
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
|
|
op->error = -BCH_ERR_erofs_no_writes;
|
|
goto err;
|
|
}
|
|
|
|
- this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio));
|
|
+ if (!(op->flags & BCH_WRITE_move))
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio));
|
|
bch2_increment_clock(c, bio_sectors(bio), WRITE);
|
|
|
|
data_len = min_t(u64, bio->bi_iter.bi_size,
|
|
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
|
|
index b4626013abc8..02cca52be0bd 100644
|
|
--- a/fs/bcachefs/io_write.h
|
|
+++ b/fs/bcachefs/io_write.h
|
|
@@ -23,21 +23,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
|
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
|
|
|
|
#define BCH_WRITE_FLAGS() \
|
|
- x(ALLOC_NOWAIT) \
|
|
- x(CACHED) \
|
|
- x(DATA_ENCODED) \
|
|
- x(PAGES_STABLE) \
|
|
- x(PAGES_OWNED) \
|
|
- x(ONLY_SPECIFIED_DEVS) \
|
|
- x(WROTE_DATA_INLINE) \
|
|
- x(FROM_INTERNAL) \
|
|
- x(CHECK_ENOSPC) \
|
|
- x(SYNC) \
|
|
- x(MOVE) \
|
|
- x(IN_WORKER) \
|
|
- x(SUBMITTED) \
|
|
- x(IO_ERROR) \
|
|
- x(CONVERT_UNWRITTEN)
|
|
+ x(alloc_nowait) \
|
|
+ x(cached) \
|
|
+ x(data_encoded) \
|
|
+ x(pages_stable) \
|
|
+ x(pages_owned) \
|
|
+ x(only_specified_devs) \
|
|
+ x(wrote_data_inline) \
|
|
+ x(check_enospc) \
|
|
+ x(sync) \
|
|
+ x(move) \
|
|
+ x(in_worker) \
|
|
+ x(submitted) \
|
|
+ x(io_error) \
|
|
+ x(convert_unwritten)
|
|
|
|
enum __bch_write_flags {
|
|
#define x(f) __BCH_WRITE_##f,
|
|
diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h
|
|
index 6e878a6f2f0b..3ef6df9145ef 100644
|
|
--- a/fs/bcachefs/io_write_types.h
|
|
+++ b/fs/bcachefs/io_write_types.h
|
|
@@ -64,7 +64,7 @@ struct bch_write_op {
|
|
struct bpos pos;
|
|
struct bversion version;
|
|
|
|
- /* For BCH_WRITE_DATA_ENCODED: */
|
|
+ /* For BCH_WRITE_data_encoded: */
|
|
struct bch_extent_crc_unpacked crc;
|
|
|
|
struct write_point_specifier write_point;
|
|
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
|
|
index 24c294d4634e..ea96605cf162 100644
|
|
--- a/fs/bcachefs/journal.c
|
|
+++ b/fs/bcachefs/journal.c
|
|
@@ -56,11 +56,18 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
|
prt_printf(out, "seq:\t%llu\n", seq);
|
|
printbuf_indent_add(out, 2);
|
|
|
|
- prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
|
|
+ if (!buf->write_started)
|
|
+ prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK));
|
|
|
|
- prt_printf(out, "size:\t");
|
|
- prt_human_readable_u64(out, vstruct_bytes(buf->data));
|
|
- prt_newline(out);
|
|
+ struct closure *cl = &buf->io;
|
|
+ int r = atomic_read(&cl->remaining);
|
|
+ prt_printf(out, "io:\t%pS r %i\n", cl->fn, r & CLOSURE_REMAINING_MASK);
|
|
+
|
|
+ if (buf->data) {
|
|
+ prt_printf(out, "size:\t");
|
|
+ prt_human_readable_u64(out, vstruct_bytes(buf->data));
|
|
+ prt_newline(out);
|
|
+ }
|
|
|
|
prt_printf(out, "expires:\t");
|
|
prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
|
|
@@ -87,6 +94,9 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
|
|
|
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
|
{
|
|
+ lockdep_assert_held(&j->lock);
|
|
+ out->atomic++;
|
|
+
|
|
if (!out->nr_tabstops)
|
|
printbuf_tabstop_push(out, 24);
|
|
|
|
@@ -95,6 +105,8 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
|
seq++)
|
|
bch2_journal_buf_to_text(out, j, seq);
|
|
prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
|
|
+
|
|
+ --out->atomic;
|
|
}
|
|
|
|
static inline struct journal_buf *
|
|
@@ -104,10 +116,8 @@ journal_seq_to_buf(struct journal *j, u64 seq)
|
|
|
|
EBUG_ON(seq > journal_cur_seq(j));
|
|
|
|
- if (journal_seq_unwritten(j, seq)) {
|
|
+ if (journal_seq_unwritten(j, seq))
|
|
buf = j->buf + (seq & JOURNAL_BUF_MASK);
|
|
- EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
|
|
- }
|
|
return buf;
|
|
}
|
|
|
|
@@ -195,7 +205,8 @@ void bch2_journal_do_writes(struct journal *j)
|
|
if (w->write_started)
|
|
continue;
|
|
|
|
- if (!journal_state_count(j->reservations, idx)) {
|
|
+ if (!journal_state_seq_count(j, j->reservations, seq)) {
|
|
+ j->seq_write_started = seq;
|
|
w->write_started = true;
|
|
closure_call(&w->io, bch2_journal_write, j->wq, NULL);
|
|
}
|
|
@@ -306,7 +317,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
|
|
|
|
bch2_journal_space_available(j);
|
|
|
|
- __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
|
|
+ __bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq));
|
|
}
|
|
|
|
void bch2_journal_halt(struct journal *j)
|
|
@@ -391,6 +402,9 @@ static int journal_entry_open(struct journal *j)
|
|
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
|
|
return JOURNAL_ERR_max_in_flight;
|
|
|
|
+ if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
|
|
+ return JOURNAL_ERR_max_open;
|
|
+
|
|
if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
|
|
bch_err(c, "cannot start: journal seq overflow");
|
|
if (bch2_fs_emergency_read_only_locked(c))
|
|
@@ -398,8 +412,16 @@ static int journal_entry_open(struct journal *j)
|
|
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
|
|
}
|
|
|
|
+ if (!j->free_buf && !buf->data)
|
|
+ return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
|
|
+
|
|
BUG_ON(!j->cur_entry_sectors);
|
|
|
|
+ if (!buf->data) {
|
|
+ swap(buf->data, j->free_buf);
|
|
+ swap(buf->buf_size, j->free_buf_size);
|
|
+ }
|
|
+
|
|
buf->expires =
|
|
(journal_cur_seq(j) == j->flushed_seq_ondisk
|
|
? jiffies
|
|
@@ -464,7 +486,7 @@ static int journal_entry_open(struct journal *j)
|
|
|
|
new.idx++;
|
|
BUG_ON(journal_state_count(new, new.idx));
|
|
- BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
|
|
+ BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_STATE_BUF_MASK));
|
|
|
|
journal_state_inc(&new);
|
|
|
|
@@ -514,6 +536,33 @@ static void journal_write_work(struct work_struct *work)
|
|
spin_unlock(&j->lock);
|
|
}
|
|
|
|
+static void journal_buf_prealloc(struct journal *j)
|
|
+{
|
|
+ if (j->free_buf &&
|
|
+ j->free_buf_size >= j->buf_size_want)
|
|
+ return;
|
|
+
|
|
+ unsigned buf_size = j->buf_size_want;
|
|
+
|
|
+ spin_unlock(&j->lock);
|
|
+ void *buf = kvmalloc(buf_size, GFP_NOFS);
|
|
+ spin_lock(&j->lock);
|
|
+
|
|
+ if (buf &&
|
|
+ (!j->free_buf ||
|
|
+ buf_size > j->free_buf_size)) {
|
|
+ swap(buf, j->free_buf);
|
|
+ swap(buf_size, j->free_buf_size);
|
|
+ }
|
|
+
|
|
+ if (unlikely(buf)) {
|
|
+ spin_unlock(&j->lock);
|
|
+ /* kvfree can sleep */
|
|
+ kvfree(buf);
|
|
+ spin_lock(&j->lock);
|
|
+ }
|
|
+}
|
|
+
|
|
static int __journal_res_get(struct journal *j, struct journal_res *res,
|
|
unsigned flags)
|
|
{
|
|
@@ -544,6 +593,8 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
|
|
|
|
spin_lock(&j->lock);
|
|
|
|
+ journal_buf_prealloc(j);
|
|
+
|
|
/*
|
|
* Recheck after taking the lock, so we don't race with another thread
|
|
* that just did journal_entry_open() and call bch2_journal_entry_close()
|
|
@@ -571,20 +622,43 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
|
|
can_discard = j->can_discard;
|
|
spin_unlock(&j->lock);
|
|
out:
|
|
+ if (likely(!ret))
|
|
+ return 0;
|
|
if (ret == JOURNAL_ERR_retry)
|
|
goto retry;
|
|
- if (!ret)
|
|
- return 0;
|
|
|
|
if (journal_error_check_stuck(j, ret, flags))
|
|
ret = -BCH_ERR_journal_res_get_blocked;
|
|
|
|
if (ret == JOURNAL_ERR_max_in_flight &&
|
|
- track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
|
|
+ track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
|
|
+ trace_journal_entry_full_enabled()) {
|
|
+ struct printbuf buf = PRINTBUF;
|
|
+
|
|
+ bch2_printbuf_make_room(&buf, 4096);
|
|
|
|
+ spin_lock(&j->lock);
|
|
+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
|
|
+ bch2_journal_bufs_to_text(&buf, j);
|
|
+ spin_unlock(&j->lock);
|
|
+
|
|
+ trace_journal_entry_full(c, buf.buf);
|
|
+ printbuf_exit(&buf);
|
|
+ count_event(c, journal_entry_full);
|
|
+ }
|
|
+
|
|
+ if (ret == JOURNAL_ERR_max_open &&
|
|
+ track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) &&
|
|
+ trace_journal_entry_full_enabled()) {
|
|
struct printbuf buf = PRINTBUF;
|
|
+
|
|
+ bch2_printbuf_make_room(&buf, 4096);
|
|
+
|
|
+ spin_lock(&j->lock);
|
|
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
|
|
bch2_journal_bufs_to_text(&buf, j);
|
|
+ spin_unlock(&j->lock);
|
|
+
|
|
trace_journal_entry_full(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
count_event(c, journal_entry_full);
|
|
@@ -951,7 +1025,8 @@ static void __bch2_journal_block(struct journal *j)
|
|
new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL;
|
|
} while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
|
|
|
|
- journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
|
+ if (old.cur_entry_offset < JOURNAL_ENTRY_BLOCKED_VAL)
|
|
+ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
|
}
|
|
}
|
|
|
|
@@ -992,7 +1067,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
|
|
*blocked = true;
|
|
}
|
|
|
|
- ret = journal_state_count(s, idx) > open
|
|
+ ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open
|
|
? ERR_PTR(-EAGAIN)
|
|
: buf;
|
|
break;
|
|
@@ -1342,6 +1417,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
|
j->replay_journal_seq_end = cur_seq;
|
|
j->last_seq_ondisk = last_seq;
|
|
j->flushed_seq_ondisk = cur_seq - 1;
|
|
+ j->seq_write_started = cur_seq - 1;
|
|
j->seq_ondisk = cur_seq - 1;
|
|
j->pin.front = last_seq;
|
|
j->pin.back = cur_seq;
|
|
@@ -1382,8 +1458,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
|
set_bit(JOURNAL_running, &j->flags);
|
|
j->last_flush_write = jiffies;
|
|
|
|
- j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
|
|
- j->reservations.unwritten_idx++;
|
|
+ j->reservations.idx = journal_cur_seq(j);
|
|
|
|
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
|
|
|
@@ -1475,6 +1550,7 @@ void bch2_fs_journal_exit(struct journal *j)
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
|
|
kvfree(j->buf[i].data);
|
|
+ kvfree(j->free_buf);
|
|
free_fifo(&j->pin);
|
|
}
|
|
|
|
@@ -1501,13 +1577,13 @@ int bch2_fs_journal_init(struct journal *j)
|
|
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
|
|
return -BCH_ERR_ENOMEM_journal_pin_fifo;
|
|
|
|
- for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
|
|
- j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
|
- j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
|
|
- if (!j->buf[i].data)
|
|
- return -BCH_ERR_ENOMEM_journal_buf;
|
|
+ j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
|
|
+ j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
|
|
+ if (!j->free_buf)
|
|
+ return -BCH_ERR_ENOMEM_journal_buf;
|
|
+
|
|
+ for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
|
|
j->buf[i].idx = i;
|
|
- }
|
|
|
|
j->pin.front = j->pin.back = 1;
|
|
|
|
@@ -1557,6 +1633,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|
prt_printf(out, "average write size:\t");
|
|
prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
|
|
prt_newline(out);
|
|
+ prt_printf(out, "free buf:\t%u\n", j->free_buf ? j->free_buf_size : 0);
|
|
prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
|
|
prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
|
|
prt_printf(out, "reclaim kicked:\t%u\n", j->reclaim_kicked);
|
|
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
|
|
index 107f7f901cd9..1c460ded2a11 100644
|
|
--- a/fs/bcachefs/journal.h
|
|
+++ b/fs/bcachefs/journal.h
|
|
@@ -121,11 +121,6 @@ static inline void journal_wake(struct journal *j)
|
|
closure_wake_up(&j->async_wait);
|
|
}
|
|
|
|
-static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
|
-{
|
|
- return j->buf + j->reservations.idx;
|
|
-}
|
|
-
|
|
/* Sequence number of oldest dirty journal entry */
|
|
|
|
static inline u64 journal_last_seq(struct journal *j)
|
|
@@ -143,6 +138,15 @@ static inline u64 journal_last_unwritten_seq(struct journal *j)
|
|
return j->seq_ondisk + 1;
|
|
}
|
|
|
|
+static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
|
+{
|
|
+ unsigned idx = (journal_cur_seq(j) &
|
|
+ JOURNAL_BUF_MASK &
|
|
+ ~JOURNAL_STATE_BUF_MASK) + j->reservations.idx;
|
|
+
|
|
+ return j->buf + idx;
|
|
+}
|
|
+
|
|
static inline int journal_state_count(union journal_res_state s, int idx)
|
|
{
|
|
switch (idx) {
|
|
@@ -154,6 +158,15 @@ static inline int journal_state_count(union journal_res_state s, int idx)
|
|
BUG();
|
|
}
|
|
|
|
+static inline int journal_state_seq_count(struct journal *j,
|
|
+ union journal_res_state s, u64 seq)
|
|
+{
|
|
+ if (journal_cur_seq(j) - seq <= JOURNAL_STATE_BUF_NR)
|
|
+ return journal_state_count(s, seq & JOURNAL_STATE_BUF_MASK);
|
|
+ else
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static inline void journal_state_inc(union journal_res_state *s)
|
|
{
|
|
s->buf0_count += s->idx == 0;
|
|
@@ -193,7 +206,7 @@ bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
|
|
static inline struct jset_entry *
|
|
journal_res_entry(struct journal *j, struct journal_res *res)
|
|
{
|
|
- return vstruct_idx(j->buf[res->idx].data, res->offset);
|
|
+ return vstruct_idx(j->buf[res->seq & JOURNAL_BUF_MASK].data, res->offset);
|
|
}
|
|
|
|
static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type,
|
|
@@ -267,8 +280,9 @@ bool bch2_journal_entry_close(struct journal *);
|
|
void bch2_journal_do_writes(struct journal *);
|
|
void bch2_journal_buf_put_final(struct journal *, u64);
|
|
|
|
-static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
|
|
+static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
|
|
{
|
|
+ unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
|
|
union journal_res_state s;
|
|
|
|
s = journal_state_buf_put(j, idx);
|
|
@@ -276,8 +290,9 @@ static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 s
|
|
bch2_journal_buf_put_final(j, seq);
|
|
}
|
|
|
|
-static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
|
|
+static inline void bch2_journal_buf_put(struct journal *j, u64 seq)
|
|
{
|
|
+ unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
|
|
union journal_res_state s;
|
|
|
|
s = journal_state_buf_put(j, idx);
|
|
@@ -306,7 +321,7 @@ static inline void bch2_journal_res_put(struct journal *j,
|
|
BCH_JSET_ENTRY_btree_keys,
|
|
0, 0, 0);
|
|
|
|
- bch2_journal_buf_put(j, res->idx, res->seq);
|
|
+ bch2_journal_buf_put(j, res->seq);
|
|
|
|
res->ref = 0;
|
|
}
|
|
@@ -361,9 +376,9 @@ static inline int journal_res_get_fast(struct journal *j,
|
|
&old.v, new.v));
|
|
|
|
res->ref = true;
|
|
- res->idx = old.idx;
|
|
res->offset = old.cur_entry_offset;
|
|
- res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
|
|
+ res->seq = journal_cur_seq(j);
|
|
+ res->seq -= (res->seq - old.idx) & JOURNAL_STATE_BUF_MASK;
|
|
return 1;
|
|
}
|
|
|
|
@@ -390,6 +405,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
|
|
(flags & JOURNAL_RES_GET_NONBLOCK) != 0,
|
|
NULL, _THIS_IP_);
|
|
EBUG_ON(!res->ref);
|
|
+ BUG_ON(!res->seq);
|
|
}
|
|
return 0;
|
|
}
|
|
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
|
|
index 11c39e0c34f4..61f71e7baff2 100644
|
|
--- a/fs/bcachefs/journal_io.c
|
|
+++ b/fs/bcachefs/journal_io.c
|
|
@@ -1611,7 +1611,6 @@ static CLOSURE_CALLBACK(journal_write_done)
|
|
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
|
struct bch_replicas_padded replicas;
|
|
- union journal_res_state old, new;
|
|
u64 seq = le64_to_cpu(w->data->seq);
|
|
int err = 0;
|
|
|
|
@@ -1641,6 +1640,21 @@ static CLOSURE_CALLBACK(journal_write_done)
|
|
j->err_seq = seq;
|
|
w->write_done = true;
|
|
|
|
+ if (!j->free_buf || j->free_buf_size < w->buf_size) {
|
|
+ swap(j->free_buf, w->data);
|
|
+ swap(j->free_buf_size, w->buf_size);
|
|
+ }
|
|
+
|
|
+ if (w->data) {
|
|
+ void *buf = w->data;
|
|
+ w->data = NULL;
|
|
+ w->buf_size = 0;
|
|
+
|
|
+ spin_unlock(&j->lock);
|
|
+ kvfree(buf);
|
|
+ spin_lock(&j->lock);
|
|
+ }
|
|
+
|
|
bool completed = false;
|
|
|
|
for (seq = journal_last_unwritten_seq(j);
|
|
@@ -1650,7 +1664,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
|
if (!w->write_done)
|
|
break;
|
|
|
|
- if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
|
|
+ if (!j->err_seq && !w->noflush) {
|
|
j->flushed_seq_ondisk = seq;
|
|
j->last_seq_ondisk = w->last_seq;
|
|
|
|
@@ -1671,16 +1685,6 @@ static CLOSURE_CALLBACK(journal_write_done)
|
|
if (j->watermark != BCH_WATERMARK_stripe)
|
|
journal_reclaim_kick(&c->journal);
|
|
|
|
- old.v = atomic64_read(&j->reservations.counter);
|
|
- do {
|
|
- new.v = old.v;
|
|
- BUG_ON(journal_state_count(new, new.unwritten_idx));
|
|
- BUG_ON(new.unwritten_idx != (seq & JOURNAL_BUF_MASK));
|
|
-
|
|
- new.unwritten_idx++;
|
|
- } while (!atomic64_try_cmpxchg(&j->reservations.counter,
|
|
- &old.v, new.v));
|
|
-
|
|
closure_wake_up(&w->wait);
|
|
completed = true;
|
|
}
|
|
@@ -1695,7 +1699,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
|
}
|
|
|
|
if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
|
|
- new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
|
|
+ j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
|
|
struct journal_buf *buf = journal_cur_buf(j);
|
|
long delta = buf->expires - jiffies;
|
|
|
|
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
|
|
index 1f25c111c54c..e463d2d95359 100644
|
|
--- a/fs/bcachefs/journal_seq_blacklist.c
|
|
+++ b/fs/bcachefs/journal_seq_blacklist.c
|
|
@@ -231,15 +231,14 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
|
|
struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
|
|
BUG_ON(nr != t->nr);
|
|
|
|
- unsigned i;
|
|
- for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
|
|
- src < bl->start + nr;
|
|
- src++, i = eytzinger0_next(i, nr)) {
|
|
+ src = bl->start;
|
|
+ eytzinger0_for_each(i, nr) {
|
|
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
|
|
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
|
|
|
|
if (t->entries[i].dirty || t->entries[i].end >= c->journal.oldest_seq_found_ondisk)
|
|
*dst++ = *src;
|
|
+ src++;
|
|
}
|
|
|
|
unsigned new_nr = dst - bl->start;
|
|
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
|
|
index a198a81d7478..060ec991dd2b 100644
|
|
--- a/fs/bcachefs/journal_types.h
|
|
+++ b/fs/bcachefs/journal_types.h
|
|
@@ -12,7 +12,11 @@
|
|
/* btree write buffer steals 8 bits for its own purposes: */
|
|
#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1)
|
|
|
|
-#define JOURNAL_BUF_BITS 2
|
|
+#define JOURNAL_STATE_BUF_BITS 2
|
|
+#define JOURNAL_STATE_BUF_NR (1U << JOURNAL_STATE_BUF_BITS)
|
|
+#define JOURNAL_STATE_BUF_MASK (JOURNAL_STATE_BUF_NR - 1)
|
|
+
|
|
+#define JOURNAL_BUF_BITS 4
|
|
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
|
|
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
|
|
|
|
@@ -79,7 +83,6 @@ struct journal_entry_pin {
|
|
|
|
struct journal_res {
|
|
bool ref;
|
|
- u8 idx;
|
|
u16 u64s;
|
|
u32 offset;
|
|
u64 seq;
|
|
@@ -95,9 +98,8 @@ union journal_res_state {
|
|
};
|
|
|
|
struct {
|
|
- u64 cur_entry_offset:20,
|
|
+ u64 cur_entry_offset:22,
|
|
idx:2,
|
|
- unwritten_idx:2,
|
|
buf0_count:10,
|
|
buf1_count:10,
|
|
buf2_count:10,
|
|
@@ -107,13 +109,13 @@ union journal_res_state {
|
|
|
|
/* bytes: */
|
|
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
|
-#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
|
+#define JOURNAL_ENTRY_SIZE_MAX (4U << 22) /* 16M */
|
|
|
|
/*
|
|
* We stash some journal state as sentinal values in cur_entry_offset:
|
|
* note - cur_entry_offset is in units of u64s
|
|
*/
|
|
-#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
|
|
+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 22) - 1)
|
|
|
|
#define JOURNAL_ENTRY_BLOCKED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 2)
|
|
#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1)
|
|
@@ -152,9 +154,11 @@ enum journal_flags {
|
|
x(retry) \
|
|
x(blocked) \
|
|
x(max_in_flight) \
|
|
+ x(max_open) \
|
|
x(journal_full) \
|
|
x(journal_pin_full) \
|
|
x(journal_stuck) \
|
|
+ x(enomem) \
|
|
x(insufficient_devices)
|
|
|
|
enum journal_errors {
|
|
@@ -217,6 +221,8 @@ struct journal {
|
|
* other is possibly being written out.
|
|
*/
|
|
struct journal_buf buf[JOURNAL_BUF_NR];
|
|
+ void *free_buf;
|
|
+ unsigned free_buf_size;
|
|
|
|
spinlock_t lock;
|
|
|
|
@@ -234,6 +240,7 @@ struct journal {
|
|
/* Sequence number of most recent journal entry (last entry in @pin) */
|
|
atomic64_t seq;
|
|
|
|
+ u64 seq_write_started;
|
|
/* seq, last_seq from the most recent journal entry successfully written */
|
|
u64 seq_ondisk;
|
|
u64 flushed_seq_ondisk;
|
|
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
|
|
index ddc187fb693d..57ad662871ba 100644
|
|
--- a/fs/bcachefs/migrate.c
|
|
+++ b/fs/bcachefs/migrate.c
|
|
@@ -15,6 +15,7 @@
|
|
#include "keylist.h"
|
|
#include "migrate.h"
|
|
#include "move.h"
|
|
+#include "progress.h"
|
|
#include "replicas.h"
|
|
#include "super-io.h"
|
|
|
|
@@ -76,7 +77,9 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
|
|
return 0;
|
|
}
|
|
|
|
-static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
+static int bch2_dev_usrdata_drop(struct bch_fs *c,
|
|
+ struct progress_indicator_state *progress,
|
|
+ unsigned dev_idx, int flags)
|
|
{
|
|
struct btree_trans *trans = bch2_trans_get(c);
|
|
enum btree_id id;
|
|
@@ -88,8 +91,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
|
|
ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
|
|
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
|
- NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
|
- bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
|
|
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
|
+ bch2_progress_update_iter(trans, progress, &iter, "dropping user data");
|
|
+ bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
|
|
+ }));
|
|
if (ret)
|
|
break;
|
|
}
|
|
@@ -99,7 +104,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
return ret;
|
|
}
|
|
|
|
-static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
+static int bch2_dev_metadata_drop(struct bch_fs *c,
|
|
+ struct progress_indicator_state *progress,
|
|
+ unsigned dev_idx, int flags)
|
|
{
|
|
struct btree_trans *trans;
|
|
struct btree_iter iter;
|
|
@@ -125,6 +132,8 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
while (bch2_trans_begin(trans),
|
|
(b = bch2_btree_iter_peek_node(&iter)) &&
|
|
!(ret = PTR_ERR_OR_ZERO(b))) {
|
|
+ bch2_progress_update_iter(trans, progress, &iter, "dropping metadata");
|
|
+
|
|
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
|
|
goto next;
|
|
|
|
@@ -169,6 +178,11 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
|
|
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|
{
|
|
- return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
|
|
- bch2_dev_metadata_drop(c, dev_idx, flags);
|
|
+ struct progress_indicator_state progress;
|
|
+ bch2_progress_init(&progress, c,
|
|
+ BIT_ULL(BTREE_ID_extents)|
|
|
+ BIT_ULL(BTREE_ID_reflink));
|
|
+
|
|
+ return bch2_dev_usrdata_drop(c, &progress, dev_idx, flags) ?:
|
|
+ bch2_dev_metadata_drop(c, &progress, dev_idx, flags);
|
|
}
|
|
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
|
|
index c493ea625553..e0e10deaea73 100644
|
|
--- a/fs/bcachefs/move.c
|
|
+++ b/fs/bcachefs/move.c
|
|
@@ -38,28 +38,28 @@ const char * const bch2_data_ops_strs[] = {
|
|
NULL
|
|
};
|
|
|
|
-static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
|
|
+static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
|
|
struct bch_io_opts *io_opts,
|
|
struct data_update_opts *data_opts)
|
|
{
|
|
- if (trace_move_extent_enabled()) {
|
|
+ if (trace_io_move_enabled()) {
|
|
struct printbuf buf = PRINTBUF;
|
|
|
|
bch2_bkey_val_to_text(&buf, c, k);
|
|
prt_newline(&buf);
|
|
bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
|
|
- trace_move_extent(c, buf.buf);
|
|
+ trace_io_move(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
}
|
|
|
|
-static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
|
|
+static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
|
|
{
|
|
- if (trace_move_extent_read_enabled()) {
|
|
+ if (trace_io_move_read_enabled()) {
|
|
struct printbuf buf = PRINTBUF;
|
|
|
|
bch2_bkey_val_to_text(&buf, c, k);
|
|
- trace_move_extent_read(c, buf.buf);
|
|
+ trace_io_move_read(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
}
|
|
@@ -74,11 +74,7 @@ struct moving_io {
|
|
unsigned read_sectors;
|
|
unsigned write_sectors;
|
|
|
|
- struct bch_read_bio rbio;
|
|
-
|
|
struct data_update write;
|
|
- /* Must be last since it is variable size */
|
|
- struct bio_vec bi_inline_vecs[];
|
|
};
|
|
|
|
static void move_free(struct moving_io *io)
|
|
@@ -88,13 +84,17 @@ static void move_free(struct moving_io *io)
|
|
if (io->b)
|
|
atomic_dec(&io->b->count);
|
|
|
|
- bch2_data_update_exit(&io->write);
|
|
-
|
|
mutex_lock(&ctxt->lock);
|
|
list_del(&io->io_list);
|
|
wake_up(&ctxt->wait);
|
|
mutex_unlock(&ctxt->lock);
|
|
|
|
+ if (!io->write.data_opts.scrub) {
|
|
+ bch2_data_update_exit(&io->write);
|
|
+ } else {
|
|
+ bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio);
|
|
+ kfree(io->write.bvecs);
|
|
+ }
|
|
kfree(io);
|
|
}
|
|
|
|
@@ -114,17 +114,30 @@ static void move_write_done(struct bch_write_op *op)
|
|
|
|
static void move_write(struct moving_io *io)
|
|
{
|
|
- if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
|
|
+ struct moving_context *ctxt = io->write.ctxt;
|
|
+
|
|
+ if (ctxt->stats) {
|
|
+ if (io->write.rbio.bio.bi_status)
|
|
+ atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
|
|
+ &ctxt->stats->sectors_error_uncorrected);
|
|
+ else if (io->write.rbio.saw_error)
|
|
+ atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
|
|
+ &ctxt->stats->sectors_error_corrected);
|
|
+ }
|
|
+
|
|
+ if (unlikely(io->write.rbio.bio.bi_status ||
|
|
+ io->write.rbio.hole ||
|
|
+ io->write.data_opts.scrub)) {
|
|
move_free(io);
|
|
return;
|
|
}
|
|
|
|
- if (trace_move_extent_write_enabled()) {
|
|
+ if (trace_io_move_write_enabled()) {
|
|
struct bch_fs *c = io->write.op.c;
|
|
struct printbuf buf = PRINTBUF;
|
|
|
|
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
|
|
- trace_move_extent_write(c, buf.buf);
|
|
+ trace_io_move_write(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
|
|
@@ -132,7 +145,7 @@ static void move_write(struct moving_io *io)
|
|
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
|
|
atomic_inc(&io->write.ctxt->write_ios);
|
|
|
|
- bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
|
|
+ bch2_data_update_read_done(&io->write);
|
|
}
|
|
|
|
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
|
|
@@ -145,7 +158,7 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctx
|
|
|
|
static void move_read_endio(struct bio *bio)
|
|
{
|
|
- struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
|
|
+ struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio);
|
|
struct moving_context *ctxt = io->write.ctxt;
|
|
|
|
atomic_sub(io->read_sectors, &ctxt->read_sectors);
|
|
@@ -258,14 +271,10 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
{
|
|
struct btree_trans *trans = ctxt->trans;
|
|
struct bch_fs *c = trans->c;
|
|
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
- struct moving_io *io;
|
|
- const union bch_extent_entry *entry;
|
|
- struct extent_ptr_decoded p;
|
|
- unsigned sectors = k.k->size, pages;
|
|
int ret = -ENOMEM;
|
|
|
|
- trace_move_extent2(c, k, &io_opts, &data_opts);
|
|
+ trace_io_move2(c, k, &io_opts, &data_opts);
|
|
+ this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
|
|
|
|
if (ctxt->stats)
|
|
ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
|
|
@@ -273,7 +282,8 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
bch2_data_update_opts_normalize(k, &data_opts);
|
|
|
|
if (!data_opts.rewrite_ptrs &&
|
|
- !data_opts.extra_replicas) {
|
|
+ !data_opts.extra_replicas &&
|
|
+ !data_opts.scrub) {
|
|
if (data_opts.kill_ptrs)
|
|
return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
|
|
return 0;
|
|
@@ -285,13 +295,7 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
*/
|
|
bch2_trans_unlock(trans);
|
|
|
|
- /* write path might have to decompress data: */
|
|
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
|
- sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
|
|
-
|
|
- pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
|
- io = kzalloc(sizeof(struct moving_io) +
|
|
- sizeof(struct bio_vec) * pages, GFP_KERNEL);
|
|
+ struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL);
|
|
if (!io)
|
|
goto err;
|
|
|
|
@@ -300,31 +304,27 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
io->read_sectors = k.k->size;
|
|
io->write_sectors = k.k->size;
|
|
|
|
- bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0);
|
|
- bio_set_prio(&io->write.op.wbio.bio,
|
|
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
|
-
|
|
- if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
|
|
- GFP_KERNEL))
|
|
- goto err_free;
|
|
+ if (!data_opts.scrub) {
|
|
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
|
|
+ &io_opts, data_opts, iter->btree_id, k);
|
|
+ if (ret)
|
|
+ goto err_free;
|
|
|
|
- io->rbio.c = c;
|
|
- io->rbio.opts = io_opts;
|
|
- bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0);
|
|
- io->rbio.bio.bi_vcnt = pages;
|
|
- bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
|
- io->rbio.bio.bi_iter.bi_size = sectors << 9;
|
|
+ io->write.op.end_io = move_write_done;
|
|
+ } else {
|
|
+ bch2_bkey_buf_init(&io->write.k);
|
|
+ bch2_bkey_buf_reassemble(&io->write.k, c, k);
|
|
|
|
- io->rbio.bio.bi_opf = REQ_OP_READ;
|
|
- io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
|
- io->rbio.bio.bi_end_io = move_read_endio;
|
|
+ io->write.op.c = c;
|
|
+ io->write.data_opts = data_opts;
|
|
|
|
- ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
|
|
- io_opts, data_opts, iter->btree_id, k);
|
|
- if (ret)
|
|
- goto err_free_pages;
|
|
+ ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
|
|
+ if (ret)
|
|
+ goto err_free;
|
|
+ }
|
|
|
|
- io->write.op.end_io = move_write_done;
|
|
+ io->write.rbio.bio.bi_end_io = move_read_endio;
|
|
+ io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
|
|
|
if (ctxt->rate)
|
|
bch2_ratelimit_increment(ctxt->rate, k.k->size);
|
|
@@ -339,9 +339,7 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
atomic_inc(&io->b->count);
|
|
}
|
|
|
|
- this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
|
|
- this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
|
|
- trace_move_extent_read2(c, k);
|
|
+ trace_io_move_read2(c, k);
|
|
|
|
mutex_lock(&ctxt->lock);
|
|
atomic_add(io->read_sectors, &ctxt->read_sectors);
|
|
@@ -356,33 +354,34 @@ int bch2_move_extent(struct moving_context *ctxt,
|
|
* ctxt when doing wakeup
|
|
*/
|
|
closure_get(&ctxt->cl);
|
|
- bch2_read_extent(trans, &io->rbio,
|
|
- bkey_start_pos(k.k),
|
|
- iter->btree_id, k, 0,
|
|
- BCH_READ_NODECODE|
|
|
- BCH_READ_LAST_FRAGMENT);
|
|
+ __bch2_read_extent(trans, &io->write.rbio,
|
|
+ io->write.rbio.bio.bi_iter,
|
|
+ bkey_start_pos(k.k),
|
|
+ iter->btree_id, k, 0,
|
|
+ NULL,
|
|
+ BCH_READ_data_update|
|
|
+ BCH_READ_last_fragment,
|
|
+ data_opts.scrub ? data_opts.read_dev : -1);
|
|
return 0;
|
|
-err_free_pages:
|
|
- bio_free_pages(&io->write.op.wbio.bio);
|
|
err_free:
|
|
kfree(io);
|
|
err:
|
|
- if (ret == -BCH_ERR_data_update_done)
|
|
+ if (bch2_err_matches(ret, BCH_ERR_data_update_done))
|
|
return 0;
|
|
|
|
if (bch2_err_matches(ret, EROFS) ||
|
|
bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
return ret;
|
|
|
|
- count_event(c, move_extent_start_fail);
|
|
+ count_event(c, io_move_start_fail);
|
|
|
|
- if (trace_move_extent_start_fail_enabled()) {
|
|
+ if (trace_io_move_start_fail_enabled()) {
|
|
struct printbuf buf = PRINTBUF;
|
|
|
|
bch2_bkey_val_to_text(&buf, c, k);
|
|
prt_str(&buf, ": ");
|
|
prt_str(&buf, bch2_err_str(ret));
|
|
- trace_move_extent_start_fail(c, buf.buf);
|
|
+ trace_io_move_start_fail(c, buf.buf);
|
|
printbuf_exit(&buf);
|
|
}
|
|
return ret;
|
|
@@ -627,7 +626,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
|
|
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
|
|
continue;
|
|
|
|
- if (ret2 == -ENOMEM) {
|
|
+ if (bch2_err_matches(ret2, ENOMEM)) {
|
|
/* memory allocation failure, wait for some IO to finish */
|
|
bch2_move_ctxt_wait_for_io(ctxt);
|
|
continue;
|
|
@@ -689,21 +688,22 @@ int bch2_move_data(struct bch_fs *c,
|
|
bool wait_on_copygc,
|
|
move_pred_fn pred, void *arg)
|
|
{
|
|
-
|
|
struct moving_context ctxt;
|
|
- int ret;
|
|
|
|
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
|
- ret = __bch2_move_data(&ctxt, start, end, pred, arg);
|
|
+ int ret = __bch2_move_data(&ctxt, start, end, pred, arg);
|
|
bch2_moving_ctxt_exit(&ctxt);
|
|
|
|
return ret;
|
|
}
|
|
|
|
-int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
- struct move_bucket_in_flight *bucket_in_flight,
|
|
- struct bpos bucket, int gen,
|
|
- struct data_update_opts _data_opts)
|
|
+static int __bch2_move_data_phys(struct moving_context *ctxt,
|
|
+ struct move_bucket_in_flight *bucket_in_flight,
|
|
+ unsigned dev,
|
|
+ u64 bucket_start,
|
|
+ u64 bucket_end,
|
|
+ unsigned data_types,
|
|
+ move_pred_fn pred, void *arg)
|
|
{
|
|
struct btree_trans *trans = ctxt->trans;
|
|
struct bch_fs *c = trans->c;
|
|
@@ -712,16 +712,20 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
struct btree_iter iter = {}, bp_iter = {};
|
|
struct bkey_buf sk;
|
|
struct bkey_s_c k;
|
|
- struct data_update_opts data_opts;
|
|
unsigned sectors_moved = 0;
|
|
struct bkey_buf last_flushed;
|
|
int ret = 0;
|
|
|
|
- struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode);
|
|
+ struct bch_dev *ca = bch2_dev_tryget(c, dev);
|
|
if (!ca)
|
|
return 0;
|
|
|
|
- trace_bucket_evacuate(c, &bucket);
|
|
+ bucket_end = min(bucket_end, ca->mi.nbuckets);
|
|
+
|
|
+ struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
|
|
+ struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
|
|
+ bch2_dev_put(ca);
|
|
+ ca = NULL;
|
|
|
|
bch2_bkey_buf_init(&last_flushed);
|
|
bkey_init(&last_flushed.k->k);
|
|
@@ -732,8 +736,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
*/
|
|
bch2_trans_begin(trans);
|
|
|
|
- bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
|
- bucket_pos_to_bp_start(ca, bucket), 0);
|
|
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0);
|
|
|
|
bch_err_msg(c, ret, "looking up alloc key");
|
|
if (ret)
|
|
@@ -757,7 +760,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
if (ret)
|
|
goto err;
|
|
|
|
- if (!k.k || bkey_gt(k.k->p, bucket_pos_to_bp_end(ca, bucket)))
|
|
+ if (!k.k || bkey_gt(k.k->p, bp_end))
|
|
break;
|
|
|
|
if (k.k->type != KEY_TYPE_backpointer)
|
|
@@ -765,107 +768,146 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
|
|
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
|
|
|
- if (!bp.v->level) {
|
|
- k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
|
|
- ret = bkey_err(k);
|
|
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
- continue;
|
|
- if (ret)
|
|
- goto err;
|
|
- if (!k.k)
|
|
- goto next;
|
|
+ if (ctxt->stats)
|
|
+ ctxt->stats->offset = bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
|
|
|
|
- bch2_bkey_buf_reassemble(&sk, c, k);
|
|
- k = bkey_i_to_s_c(sk.k);
|
|
+ if (!(data_types & BIT(bp.v->data_type)))
|
|
+ goto next;
|
|
|
|
+ k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
|
|
+ ret = bkey_err(k);
|
|
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
+ continue;
|
|
+ if (ret)
|
|
+ goto err;
|
|
+ if (!k.k)
|
|
+ goto next;
|
|
+
|
|
+ if (!bp.v->level) {
|
|
ret = bch2_move_get_io_opts_one(trans, &io_opts, &iter, k);
|
|
if (ret) {
|
|
bch2_trans_iter_exit(trans, &iter);
|
|
continue;
|
|
}
|
|
+ }
|
|
|
|
- data_opts = _data_opts;
|
|
- data_opts.target = io_opts.background_target;
|
|
- data_opts.rewrite_ptrs = 0;
|
|
-
|
|
- unsigned sectors = bp.v->bucket_len; /* move_extent will drop locks */
|
|
- unsigned i = 0;
|
|
- const union bch_extent_entry *entry;
|
|
- struct extent_ptr_decoded p;
|
|
- bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
|
|
- if (p.ptr.dev == bucket.inode) {
|
|
- if (p.ptr.cached) {
|
|
- bch2_trans_iter_exit(trans, &iter);
|
|
- goto next;
|
|
- }
|
|
- data_opts.rewrite_ptrs |= 1U << i;
|
|
- break;
|
|
- }
|
|
- i++;
|
|
- }
|
|
-
|
|
- ret = bch2_move_extent(ctxt, bucket_in_flight,
|
|
- &iter, k, io_opts, data_opts);
|
|
+ struct data_update_opts data_opts = {};
|
|
+ if (!pred(c, arg, k, &io_opts, &data_opts)) {
|
|
bch2_trans_iter_exit(trans, &iter);
|
|
+ goto next;
|
|
+ }
|
|
|
|
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
- continue;
|
|
- if (ret == -ENOMEM) {
|
|
- /* memory allocation failure, wait for some IO to finish */
|
|
- bch2_move_ctxt_wait_for_io(ctxt);
|
|
- continue;
|
|
- }
|
|
- if (ret)
|
|
- goto err;
|
|
+ if (data_opts.scrub &&
|
|
+ !bch2_dev_idx_is_online(c, data_opts.read_dev)) {
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
+ ret = -BCH_ERR_device_offline;
|
|
+ break;
|
|
+ }
|
|
|
|
- if (ctxt->stats)
|
|
- atomic64_add(sectors, &ctxt->stats->sectors_seen);
|
|
- sectors_moved += sectors;
|
|
- } else {
|
|
- struct btree *b;
|
|
+ bch2_bkey_buf_reassemble(&sk, c, k);
|
|
+ k = bkey_i_to_s_c(sk.k);
|
|
|
|
- b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed);
|
|
- ret = PTR_ERR_OR_ZERO(b);
|
|
- if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
|
- goto next;
|
|
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
- continue;
|
|
- if (ret)
|
|
- goto err;
|
|
- if (!b)
|
|
- goto next;
|
|
+ /* move_extent will drop locks */
|
|
+ unsigned sectors = bp.v->bucket_len;
|
|
|
|
- unsigned sectors = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
|
|
+ if (!bp.v->level)
|
|
+ ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts);
|
|
+ else if (!data_opts.scrub)
|
|
+ ret = bch2_btree_node_rewrite_key(trans, bp.v->btree_id, bp.v->level, k.k->p, 0);
|
|
+ else
|
|
+ ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev);
|
|
|
|
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
|
- bch2_trans_iter_exit(trans, &iter);
|
|
-
|
|
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
- continue;
|
|
- if (ret)
|
|
- goto err;
|
|
+ bch2_trans_iter_exit(trans, &iter);
|
|
|
|
- if (ctxt->rate)
|
|
- bch2_ratelimit_increment(ctxt->rate, sectors);
|
|
- if (ctxt->stats) {
|
|
- atomic64_add(sectors, &ctxt->stats->sectors_seen);
|
|
- atomic64_add(sectors, &ctxt->stats->sectors_moved);
|
|
- }
|
|
- sectors_moved += btree_sectors(c);
|
|
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
+ continue;
|
|
+ if (ret == -ENOMEM) {
|
|
+ /* memory allocation failure, wait for some IO to finish */
|
|
+ bch2_move_ctxt_wait_for_io(ctxt);
|
|
+ continue;
|
|
}
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ if (ctxt->stats)
|
|
+ atomic64_add(sectors, &ctxt->stats->sectors_seen);
|
|
+ sectors_moved += sectors;
|
|
next:
|
|
bch2_btree_iter_advance(&bp_iter);
|
|
}
|
|
-
|
|
- trace_evacuate_bucket(c, &bucket, sectors_moved, ca->mi.bucket_size, ret);
|
|
err:
|
|
bch2_trans_iter_exit(trans, &bp_iter);
|
|
- bch2_dev_put(ca);
|
|
bch2_bkey_buf_exit(&sk, c);
|
|
bch2_bkey_buf_exit(&last_flushed, c);
|
|
return ret;
|
|
}
|
|
|
|
+static int bch2_move_data_phys(struct bch_fs *c,
|
|
+ unsigned dev,
|
|
+ u64 start,
|
|
+ u64 end,
|
|
+ unsigned data_types,
|
|
+ struct bch_ratelimit *rate,
|
|
+ struct bch_move_stats *stats,
|
|
+ struct write_point_specifier wp,
|
|
+ bool wait_on_copygc,
|
|
+ move_pred_fn pred, void *arg)
|
|
+{
|
|
+ struct moving_context ctxt;
|
|
+
|
|
+ bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans));
|
|
+
|
|
+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
|
+ ctxt.stats->phys = true;
|
|
+ ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
|
|
+
|
|
+ int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
|
|
+ bch2_moving_ctxt_exit(&ctxt);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+struct evacuate_bucket_arg {
|
|
+ struct bpos bucket;
|
|
+ int gen;
|
|
+ struct data_update_opts data_opts;
|
|
+};
|
|
+
|
|
+static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k,
|
|
+ struct bch_io_opts *io_opts,
|
|
+ struct data_update_opts *data_opts)
|
|
+{
|
|
+ struct evacuate_bucket_arg *arg = _arg;
|
|
+
|
|
+ *data_opts = arg->data_opts;
|
|
+
|
|
+ unsigned i = 0;
|
|
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
|
|
+ if (ptr->dev == arg->bucket.inode &&
|
|
+ (arg->gen < 0 || arg->gen == ptr->gen) &&
|
|
+ !ptr->cached)
|
|
+ data_opts->rewrite_ptrs |= BIT(i);
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ return data_opts->rewrite_ptrs != 0;
|
|
+}
|
|
+
|
|
+int bch2_evacuate_bucket(struct moving_context *ctxt,
|
|
+ struct move_bucket_in_flight *bucket_in_flight,
|
|
+ struct bpos bucket, int gen,
|
|
+ struct data_update_opts data_opts)
|
|
+{
|
|
+ struct evacuate_bucket_arg arg = { bucket, gen, data_opts, };
|
|
+
|
|
+ return __bch2_move_data_phys(ctxt, bucket_in_flight,
|
|
+ bucket.inode,
|
|
+ bucket.offset,
|
|
+ bucket.offset + 1,
|
|
+ ~0,
|
|
+ evacuate_bucket_pred, &arg);
|
|
+}
|
|
+
|
|
typedef bool (*move_btree_pred)(struct bch_fs *, void *,
|
|
struct btree *, struct bch_io_opts *,
|
|
struct data_update_opts *);
|
|
@@ -1007,14 +1049,6 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
|
|
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
|
}
|
|
|
|
-static bool migrate_btree_pred(struct bch_fs *c, void *arg,
|
|
- struct btree *b,
|
|
- struct bch_io_opts *io_opts,
|
|
- struct data_update_opts *data_opts)
|
|
-{
|
|
- return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
|
-}
|
|
-
|
|
/*
|
|
* Ancient versions of bcachefs produced packed formats which could represent
|
|
* keys that the in memory format cannot represent; this checks for those
|
|
@@ -1104,6 +1138,30 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
|
|
return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
|
}
|
|
|
|
+static bool scrub_pred(struct bch_fs *c, void *_arg,
|
|
+ struct bkey_s_c k,
|
|
+ struct bch_io_opts *io_opts,
|
|
+ struct data_update_opts *data_opts)
|
|
+{
|
|
+ struct bch_ioctl_data *arg = _arg;
|
|
+
|
|
+ if (k.k->type != KEY_TYPE_btree_ptr_v2) {
|
|
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
|
+ const union bch_extent_entry *entry;
|
|
+ struct extent_ptr_decoded p;
|
|
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
|
+ if (p.ptr.dev == arg->migrate.dev) {
|
|
+ if (!p.crc.csum_type)
|
|
+ return false;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ data_opts->scrub = true;
|
|
+ data_opts->read_dev = arg->migrate.dev;
|
|
+ return true;
|
|
+}
|
|
+
|
|
int bch2_data_job(struct bch_fs *c,
|
|
struct bch_move_stats *stats,
|
|
struct bch_ioctl_data op)
|
|
@@ -1118,6 +1176,22 @@ int bch2_data_job(struct bch_fs *c,
|
|
bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
|
|
|
|
switch (op.op) {
|
|
+ case BCH_DATA_OP_scrub:
|
|
+ /*
|
|
+ * prevent tests from spuriously failing, make sure we see all
|
|
+ * btree nodes that need to be repaired
|
|
+ */
|
|
+ bch2_btree_interior_updates_flush(c);
|
|
+
|
|
+ ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
|
|
+ op.scrub.data_types,
|
|
+ NULL,
|
|
+ stats,
|
|
+ writepoint_hashed((unsigned long) current),
|
|
+ false,
|
|
+ scrub_pred, &op) ?: ret;
|
|
+ break;
|
|
+
|
|
case BCH_DATA_OP_rereplicate:
|
|
stats->data_type = BCH_DATA_journal;
|
|
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
|
@@ -1137,14 +1211,14 @@ int bch2_data_job(struct bch_fs *c,
|
|
|
|
stats->data_type = BCH_DATA_journal;
|
|
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
|
- ret = bch2_move_btree(c, start, end,
|
|
- migrate_btree_pred, &op, stats) ?: ret;
|
|
- ret = bch2_move_data(c, start, end,
|
|
- NULL,
|
|
- stats,
|
|
- writepoint_hashed((unsigned long) current),
|
|
- true,
|
|
- migrate_pred, &op) ?: ret;
|
|
+ ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX,
|
|
+ ~0,
|
|
+ NULL,
|
|
+ stats,
|
|
+ writepoint_hashed((unsigned long) current),
|
|
+ true,
|
|
+ migrate_pred, &op) ?: ret;
|
|
+ bch2_btree_interior_updates_flush(c);
|
|
ret = bch2_replicas_gc2(c) ?: ret;
|
|
break;
|
|
case BCH_DATA_OP_rewrite_old_nodes:
|
|
@@ -1216,7 +1290,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
|
|
|
|
mutex_lock(&ctxt->lock);
|
|
list_for_each_entry(io, &ctxt->ios, io_list)
|
|
- bch2_write_op_to_text(out, &io->write.op);
|
|
+ bch2_data_update_inflight_to_text(out, &io->write);
|
|
mutex_unlock(&ctxt->lock);
|
|
|
|
printbuf_indent_sub(out, 4);
|
|
diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h
|
|
index e22841ef31e4..82e473ed48d2 100644
|
|
--- a/fs/bcachefs/move_types.h
|
|
+++ b/fs/bcachefs/move_types.h
|
|
@@ -3,17 +3,31 @@
|
|
#define _BCACHEFS_MOVE_TYPES_H
|
|
|
|
#include "bbpos_types.h"
|
|
+#include "bcachefs_ioctl.h"
|
|
|
|
struct bch_move_stats {
|
|
- enum bch_data_type data_type;
|
|
- struct bbpos pos;
|
|
char name[32];
|
|
+ bool phys;
|
|
+ enum bch_ioctl_data_event_ret ret;
|
|
+
|
|
+ union {
|
|
+ struct {
|
|
+ enum bch_data_type data_type;
|
|
+ struct bbpos pos;
|
|
+ };
|
|
+ struct {
|
|
+ unsigned dev;
|
|
+ u64 offset;
|
|
+ };
|
|
+ };
|
|
|
|
atomic64_t keys_moved;
|
|
atomic64_t keys_raced;
|
|
atomic64_t sectors_seen;
|
|
atomic64_t sectors_moved;
|
|
atomic64_t sectors_raced;
|
|
+ atomic64_t sectors_error_corrected;
|
|
+ atomic64_t sectors_error_uncorrected;
|
|
};
|
|
|
|
struct move_bucket_key {
|
|
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
|
|
new file mode 100644
|
|
index 000000000000..bafd1c91a802
|
|
--- /dev/null
|
|
+++ b/fs/bcachefs/progress.c
|
|
@@ -0,0 +1,63 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+#include "bcachefs.h"
|
|
+#include "bbpos.h"
|
|
+#include "disk_accounting.h"
|
|
+#include "progress.h"
|
|
+
|
|
+void bch2_progress_init(struct progress_indicator_state *s,
|
|
+ struct bch_fs *c,
|
|
+ u64 btree_id_mask)
|
|
+{
|
|
+ memset(s, 0, sizeof(*s));
|
|
+
|
|
+ s->next_print = jiffies + HZ * 10;
|
|
+
|
|
+ for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
|
+ if (!(btree_id_mask & BIT_ULL(i)))
|
|
+ continue;
|
|
+
|
|
+ struct disk_accounting_pos acc = {
|
|
+ .type = BCH_DISK_ACCOUNTING_btree,
|
|
+ .btree.id = i,
|
|
+ };
|
|
+
|
|
+ u64 v;
|
|
+ bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
|
+ s->nodes_total += div64_ul(v, btree_sectors(c));
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline bool progress_update_p(struct progress_indicator_state *s)
|
|
+{
|
|
+ bool ret = time_after_eq(jiffies, s->next_print);
|
|
+
|
|
+ if (ret)
|
|
+ s->next_print = jiffies + HZ * 10;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void bch2_progress_update_iter(struct btree_trans *trans,
|
|
+ struct progress_indicator_state *s,
|
|
+ struct btree_iter *iter,
|
|
+ const char *msg)
|
|
+{
|
|
+ struct bch_fs *c = trans->c;
|
|
+ struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
|
+
|
|
+ s->nodes_seen += b != s->last_node;
|
|
+ s->last_node = b;
|
|
+
|
|
+ if (progress_update_p(s)) {
|
|
+ struct printbuf buf = PRINTBUF;
|
|
+ unsigned percent = s->nodes_total
|
|
+ ? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
|
+ : 0;
|
|
+
|
|
+ prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
|
+ msg, percent, s->nodes_seen, s->nodes_total);
|
|
+ bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
|
+
|
|
+ bch_info(c, "%s", buf.buf);
|
|
+ printbuf_exit(&buf);
|
|
+ }
|
|
+}
|
|
diff --git a/fs/bcachefs/progress.h b/fs/bcachefs/progress.h
|
|
new file mode 100644
|
|
index 000000000000..23fb1811f943
|
|
--- /dev/null
|
|
+++ b/fs/bcachefs/progress.h
|
|
@@ -0,0 +1,29 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+#ifndef _BCACHEFS_PROGRESS_H
|
|
+#define _BCACHEFS_PROGRESS_H
|
|
+
|
|
+/*
|
|
+ * Lame progress indicators
|
|
+ *
|
|
+ * We don't like to use these because they print to the dmesg console, which is
|
|
+ * spammy - we much prefer to be wired up to a userspace programm (e.g. via
|
|
+ * thread_with_file) and have it print the progress indicator.
|
|
+ *
|
|
+ * But some code is old and doesn't support that, or runs in a context where
|
|
+ * that's not yet practical (mount).
|
|
+ */
|
|
+
|
|
+struct progress_indicator_state {
|
|
+ unsigned long next_print;
|
|
+ u64 nodes_seen;
|
|
+ u64 nodes_total;
|
|
+ struct btree *last_node;
|
|
+};
|
|
+
|
|
+void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64);
|
|
+void bch2_progress_update_iter(struct btree_trans *,
|
|
+ struct progress_indicator_state *,
|
|
+ struct btree_iter *,
|
|
+ const char *);
|
|
+
|
|
+#endif /* _BCACHEFS_PROGRESS_H */
|
|
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
|
|
index d0a1f5cd5c2b..58f6d97e506c 100644
|
|
--- a/fs/bcachefs/rebalance.c
|
|
+++ b/fs/bcachefs/rebalance.c
|
|
@@ -341,7 +341,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
|
memset(data_opts, 0, sizeof(*data_opts));
|
|
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k);
|
|
data_opts->target = io_opts->background_target;
|
|
- data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
|
|
+ data_opts->write_flags |= BCH_WRITE_only_specified_devs;
|
|
|
|
if (!data_opts->rewrite_ptrs) {
|
|
/*
|
|
@@ -449,7 +449,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
|
|
{
|
|
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k);
|
|
data_opts->target = io_opts->background_target;
|
|
- data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
|
|
+ data_opts->write_flags |= BCH_WRITE_only_specified_devs;
|
|
return data_opts->rewrite_ptrs != 0;
|
|
}
|
|
|
|
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
|
|
index 98825437381c..71c786cdb192 100644
|
|
--- a/fs/bcachefs/recovery.c
|
|
+++ b/fs/bcachefs/recovery.c
|
|
@@ -32,7 +32,6 @@
|
|
#include <linux/sort.h>
|
|
#include <linux/stat.h>
|
|
|
|
-#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
|
|
|
int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
|
|
{
|
|
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
|
|
index 376fd0a6e868..33b656c01942 100644
|
|
--- a/fs/bcachefs/reflink.c
|
|
+++ b/fs/bcachefs/reflink.c
|
|
@@ -185,12 +185,21 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
|
|
BUG_ON(missing_start < refd_start);
|
|
BUG_ON(missing_end > refd_end);
|
|
|
|
- if (fsck_err(trans, reflink_p_to_missing_reflink_v,
|
|
- "pointer to missing indirect extent\n"
|
|
- " %s\n"
|
|
- " missing range %llu-%llu",
|
|
- (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
|
|
- missing_start, missing_end)) {
|
|
+ struct bpos missing_pos = bkey_start_pos(p.k);
|
|
+ missing_pos.offset += missing_start - live_start;
|
|
+
|
|
+ prt_printf(&buf, "pointer to missing indirect extent in ");
|
|
+ ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ prt_printf(&buf, "-%llu\n ", (missing_pos.offset + (missing_end - missing_start)) << 9);
|
|
+ bch2_bkey_val_to_text(&buf, c, p.s_c);
|
|
+
|
|
+ prt_printf(&buf, "\n missing reflink btree range %llu-%llu",
|
|
+ missing_start, missing_end);
|
|
+
|
|
+ if (fsck_err(trans, reflink_p_to_missing_reflink_v, "%s", buf.buf)) {
|
|
struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p);
|
|
ret = PTR_ERR_OR_ZERO(new);
|
|
if (ret)
|
|
diff --git a/fs/bcachefs/sb-counters.c b/fs/bcachefs/sb-counters.c
|
|
index 6992e7469112..2b4b8445d418 100644
|
|
--- a/fs/bcachefs/sb-counters.c
|
|
+++ b/fs/bcachefs/sb-counters.c
|
|
@@ -5,7 +5,13 @@
|
|
|
|
/* BCH_SB_FIELD_counters */
|
|
|
|
-static const char * const bch2_counter_names[] = {
|
|
+static const u8 counters_to_stable_map[] = {
|
|
+#define x(n, id, ...) [BCH_COUNTER_##n] = BCH_COUNTER_STABLE_##n,
|
|
+ BCH_PERSISTENT_COUNTERS()
|
|
+#undef x
|
|
+};
|
|
+
|
|
+const char * const bch2_counter_names[] = {
|
|
#define x(t, n, ...) (#t),
|
|
BCH_PERSISTENT_COUNTERS()
|
|
#undef x
|
|
@@ -18,13 +24,13 @@ static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs)
|
|
return 0;
|
|
|
|
return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0];
|
|
-};
|
|
+}
|
|
|
|
static int bch2_sb_counters_validate(struct bch_sb *sb, struct bch_sb_field *f,
|
|
enum bch_validate_flags flags, struct printbuf *err)
|
|
{
|
|
return 0;
|
|
-};
|
|
+}
|
|
|
|
static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
|
|
struct bch_sb_field *f)
|
|
@@ -32,50 +38,56 @@ static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
|
|
struct bch_sb_field_counters *ctrs = field_to_type(f, counters);
|
|
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
|
|
|
|
- for (unsigned i = 0; i < nr; i++)
|
|
- prt_printf(out, "%s \t%llu\n",
|
|
- i < BCH_COUNTER_NR ? bch2_counter_names[i] : "(unknown)",
|
|
- le64_to_cpu(ctrs->d[i]));
|
|
-};
|
|
+ for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
|
+ unsigned stable = counters_to_stable_map[i];
|
|
+ if (stable < nr)
|
|
+ prt_printf(out, "%s \t%llu\n",
|
|
+ bch2_counter_names[i],
|
|
+ le64_to_cpu(ctrs->d[stable]));
|
|
+ }
|
|
+}
|
|
|
|
int bch2_sb_counters_to_cpu(struct bch_fs *c)
|
|
{
|
|
struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
|
|
- unsigned int i;
|
|
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
|
|
- u64 val = 0;
|
|
|
|
- for (i = 0; i < BCH_COUNTER_NR; i++)
|
|
+ for (unsigned i = 0; i < BCH_COUNTER_NR; i++)
|
|
c->counters_on_mount[i] = 0;
|
|
|
|
- for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) {
|
|
- val = le64_to_cpu(ctrs->d[i]);
|
|
- percpu_u64_set(&c->counters[i], val);
|
|
- c->counters_on_mount[i] = val;
|
|
+ for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
|
+ unsigned stable = counters_to_stable_map[i];
|
|
+ if (stable < nr) {
|
|
+ u64 v = le64_to_cpu(ctrs->d[stable]);
|
|
+ percpu_u64_set(&c->counters[i], v);
|
|
+ c->counters_on_mount[i] = v;
|
|
+ }
|
|
}
|
|
+
|
|
return 0;
|
|
-};
|
|
+}
|
|
|
|
int bch2_sb_counters_from_cpu(struct bch_fs *c)
|
|
{
|
|
struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
|
|
struct bch_sb_field_counters *ret;
|
|
- unsigned int i;
|
|
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
|
|
|
|
if (nr < BCH_COUNTER_NR) {
|
|
ret = bch2_sb_field_resize(&c->disk_sb, counters,
|
|
- sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR);
|
|
-
|
|
+ sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR);
|
|
if (ret) {
|
|
ctrs = ret;
|
|
nr = bch2_sb_counter_nr_entries(ctrs);
|
|
}
|
|
}
|
|
|
|
+ for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
|
+ unsigned stable = counters_to_stable_map[i];
|
|
+ if (stable < nr)
|
|
+ ctrs->d[stable] = cpu_to_le64(percpu_u64_get(&c->counters[i]));
|
|
+ }
|
|
|
|
- for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++)
|
|
- ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i]));
|
|
return 0;
|
|
}
|
|
|
|
@@ -97,3 +109,39 @@ const struct bch_sb_field_ops bch_sb_field_ops_counters = {
|
|
.validate = bch2_sb_counters_validate,
|
|
.to_text = bch2_sb_counters_to_text,
|
|
};
|
|
+
|
|
+#ifndef NO_BCACHEFS_CHARDEV
|
|
+long bch2_ioctl_query_counters(struct bch_fs *c,
|
|
+ struct bch_ioctl_query_counters __user *user_arg)
|
|
+{
|
|
+ struct bch_ioctl_query_counters arg;
|
|
+ int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg));
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if ((arg.flags & ~BCH_IOCTL_QUERY_COUNTERS_MOUNT) ||
|
|
+ arg.pad)
|
|
+ return -EINVAL;
|
|
+
|
|
+ arg.nr = min(arg.nr, BCH_COUNTER_NR);
|
|
+ ret = put_user(arg.nr, &user_arg->nr);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ for (unsigned i = 0; i < BCH_COUNTER_NR; i++) {
|
|
+ unsigned stable = counters_to_stable_map[i];
|
|
+
|
|
+ if (stable < arg.nr) {
|
|
+ u64 v = !(arg.flags & BCH_IOCTL_QUERY_COUNTERS_MOUNT)
|
|
+ ? percpu_u64_get(&c->counters[i])
|
|
+ : c->counters_on_mount[i];
|
|
+
|
|
+ ret = put_user(v, &user_arg->d[stable]);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
diff --git a/fs/bcachefs/sb-counters.h b/fs/bcachefs/sb-counters.h
|
|
index 81f8aec9fcb1..a4329ad8dd1b 100644
|
|
--- a/fs/bcachefs/sb-counters.h
|
|
+++ b/fs/bcachefs/sb-counters.h
|
|
@@ -11,6 +11,10 @@ int bch2_sb_counters_from_cpu(struct bch_fs *);
|
|
void bch2_fs_counters_exit(struct bch_fs *);
|
|
int bch2_fs_counters_init(struct bch_fs *);
|
|
|
|
+extern const char * const bch2_counter_names[];
|
|
extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
|
|
|
|
+long bch2_ioctl_query_counters(struct bch_fs *,
|
|
+ struct bch_ioctl_query_counters __user *);
|
|
+
|
|
#endif // _BCACHEFS_SB_COUNTERS_H
|
|
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
|
|
index fdcf598f08b1..c82a891026d3 100644
|
|
--- a/fs/bcachefs/sb-counters_format.h
|
|
+++ b/fs/bcachefs/sb-counters_format.h
|
|
@@ -9,10 +9,23 @@ enum counters_flags {
|
|
|
|
#define BCH_PERSISTENT_COUNTERS() \
|
|
x(io_read, 0, TYPE_SECTORS) \
|
|
+ x(io_read_inline, 80, TYPE_SECTORS) \
|
|
+ x(io_read_hole, 81, TYPE_SECTORS) \
|
|
+ x(io_read_promote, 30, TYPE_COUNTER) \
|
|
+ x(io_read_bounce, 31, TYPE_COUNTER) \
|
|
+ x(io_read_split, 33, TYPE_COUNTER) \
|
|
+ x(io_read_reuse_race, 34, TYPE_COUNTER) \
|
|
+ x(io_read_retry, 32, TYPE_COUNTER) \
|
|
x(io_write, 1, TYPE_SECTORS) \
|
|
x(io_move, 2, TYPE_SECTORS) \
|
|
+ x(io_move_read, 35, TYPE_SECTORS) \
|
|
+ x(io_move_write, 36, TYPE_SECTORS) \
|
|
+ x(io_move_finish, 37, TYPE_SECTORS) \
|
|
+ x(io_move_fail, 38, TYPE_COUNTER) \
|
|
+ x(io_move_start_fail, 39, TYPE_COUNTER) \
|
|
x(bucket_invalidate, 3, TYPE_COUNTER) \
|
|
x(bucket_discard, 4, TYPE_COUNTER) \
|
|
+ x(bucket_discard_fast, 79, TYPE_COUNTER) \
|
|
x(bucket_alloc, 5, TYPE_COUNTER) \
|
|
x(bucket_alloc_fail, 6, TYPE_COUNTER) \
|
|
x(btree_cache_scan, 7, TYPE_COUNTER) \
|
|
@@ -38,16 +51,6 @@ enum counters_flags {
|
|
x(journal_reclaim_finish, 27, TYPE_COUNTER) \
|
|
x(journal_reclaim_start, 28, TYPE_COUNTER) \
|
|
x(journal_write, 29, TYPE_COUNTER) \
|
|
- x(read_promote, 30, TYPE_COUNTER) \
|
|
- x(read_bounce, 31, TYPE_COUNTER) \
|
|
- x(read_split, 33, TYPE_COUNTER) \
|
|
- x(read_retry, 32, TYPE_COUNTER) \
|
|
- x(read_reuse_race, 34, TYPE_COUNTER) \
|
|
- x(move_extent_read, 35, TYPE_SECTORS) \
|
|
- x(move_extent_write, 36, TYPE_SECTORS) \
|
|
- x(move_extent_finish, 37, TYPE_SECTORS) \
|
|
- x(move_extent_fail, 38, TYPE_COUNTER) \
|
|
- x(move_extent_start_fail, 39, TYPE_COUNTER) \
|
|
x(copygc, 40, TYPE_COUNTER) \
|
|
x(copygc_wait, 41, TYPE_COUNTER) \
|
|
x(gc_gens_end, 42, TYPE_COUNTER) \
|
|
@@ -95,6 +98,13 @@ enum bch_persistent_counters {
|
|
BCH_COUNTER_NR
|
|
};
|
|
|
|
+enum bch_persistent_counters_stable {
|
|
+#define x(t, n, ...) BCH_COUNTER_STABLE_##t = n,
|
|
+ BCH_PERSISTENT_COUNTERS()
|
|
+#undef x
|
|
+ BCH_COUNTER_STABLE_NR
|
|
+};
|
|
+
|
|
struct bch_sb_field_counters {
|
|
struct bch_sb_field field;
|
|
__le64 d[];
|
|
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
|
|
index 762083b564ee..b29b6c6c21dd 100644
|
|
--- a/fs/bcachefs/sb-members.h
|
|
+++ b/fs/bcachefs/sb-members.h
|
|
@@ -23,6 +23,18 @@ static inline bool bch2_dev_is_online(struct bch_dev *ca)
|
|
return !percpu_ref_is_zero(&ca->io_ref);
|
|
}
|
|
|
|
+static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
|
|
+
|
|
+static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
|
|
+{
|
|
+ rcu_read_lock();
|
|
+ struct bch_dev *ca = bch2_dev_rcu(c, dev);
|
|
+ bool ret = ca && bch2_dev_is_online(ca);
|
|
+ rcu_read_unlock();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
static inline bool bch2_dev_is_readable(struct bch_dev *ca)
|
|
{
|
|
return bch2_dev_is_online(ca) &&
|
|
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
|
|
index c54091a28909..e7f197896db1 100644
|
|
--- a/fs/bcachefs/snapshot.c
|
|
+++ b/fs/bcachefs/snapshot.c
|
|
@@ -146,8 +146,9 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
|
goto out;
|
|
}
|
|
|
|
- while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
|
- id = get_ancestor_below(t, id, ancestor);
|
|
+ if (likely(ancestor >= IS_ANCESTOR_BITMAP))
|
|
+ while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
|
+ id = get_ancestor_below(t, id, ancestor);
|
|
|
|
ret = id && id < ancestor
|
|
? test_ancestor_bitmap(t, id, ancestor)
|
|
@@ -389,7 +390,7 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
|
|
return 0;
|
|
}
|
|
|
|
-static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
|
|
+u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
|
|
{
|
|
u32 id = snapshot_root;
|
|
u32 subvol = 0, s;
|
|
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
|
|
index 00373cf32e7b..81180181d7c9 100644
|
|
--- a/fs/bcachefs/snapshot.h
|
|
+++ b/fs/bcachefs/snapshot.h
|
|
@@ -105,6 +105,7 @@ static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
|
|
return id;
|
|
}
|
|
|
|
+u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *, u32);
|
|
u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
|
|
|
|
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
|
|
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
|
|
index a7eb1f511484..b3f2c651c1f8 100644
|
|
--- a/fs/bcachefs/sysfs.c
|
|
+++ b/fs/bcachefs/sysfs.c
|
|
@@ -176,7 +176,6 @@ read_attribute(btree_reserve_cache);
|
|
read_attribute(stripes_heap);
|
|
read_attribute(open_buckets);
|
|
read_attribute(open_buckets_partial);
|
|
-read_attribute(write_points);
|
|
read_attribute(nocow_lock_table);
|
|
|
|
#ifdef BCH_WRITE_REF_DEBUG
|
|
@@ -364,9 +363,6 @@ SHOW(bch2_fs)
|
|
if (attr == &sysfs_open_buckets_partial)
|
|
bch2_open_buckets_partial_to_text(out, c);
|
|
|
|
- if (attr == &sysfs_write_points)
|
|
- bch2_write_points_to_text(out, c);
|
|
-
|
|
if (attr == &sysfs_compression_stats)
|
|
bch2_compression_stats_to_text(out, c);
|
|
|
|
@@ -569,7 +565,6 @@ struct attribute *bch2_fs_internal_files[] = {
|
|
&sysfs_stripes_heap,
|
|
&sysfs_open_buckets,
|
|
&sysfs_open_buckets_partial,
|
|
- &sysfs_write_points,
|
|
#ifdef BCH_WRITE_REF_DEBUG
|
|
&sysfs_write_refs,
|
|
#endif
|
|
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
|
|
index c1b51009edf6..5718988dd7d6 100644
|
|
--- a/fs/bcachefs/trace.h
|
|
+++ b/fs/bcachefs/trace.h
|
|
@@ -295,12 +295,12 @@ TRACE_EVENT(write_super,
|
|
|
|
/* io.c: */
|
|
|
|
-DEFINE_EVENT(bio, read_promote,
|
|
+DEFINE_EVENT(bio, io_read_promote,
|
|
TP_PROTO(struct bio *bio),
|
|
TP_ARGS(bio)
|
|
);
|
|
|
|
-TRACE_EVENT(read_nopromote,
|
|
+TRACE_EVENT(io_read_nopromote,
|
|
TP_PROTO(struct bch_fs *c, int ret),
|
|
TP_ARGS(c, ret),
|
|
|
|
@@ -319,22 +319,22 @@ TRACE_EVENT(read_nopromote,
|
|
__entry->ret)
|
|
);
|
|
|
|
-DEFINE_EVENT(bio, read_bounce,
|
|
+DEFINE_EVENT(bio, io_read_bounce,
|
|
TP_PROTO(struct bio *bio),
|
|
TP_ARGS(bio)
|
|
);
|
|
|
|
-DEFINE_EVENT(bio, read_split,
|
|
+DEFINE_EVENT(bio, io_read_split,
|
|
TP_PROTO(struct bio *bio),
|
|
TP_ARGS(bio)
|
|
);
|
|
|
|
-DEFINE_EVENT(bio, read_retry,
|
|
+DEFINE_EVENT(bio, io_read_retry,
|
|
TP_PROTO(struct bio *bio),
|
|
TP_ARGS(bio)
|
|
);
|
|
|
|
-DEFINE_EVENT(bio, read_reuse_race,
|
|
+DEFINE_EVENT(bio, io_read_reuse_race,
|
|
TP_PROTO(struct bio *bio),
|
|
TP_ARGS(bio)
|
|
);
|
|
@@ -797,53 +797,32 @@ TRACE_EVENT(bucket_invalidate,
|
|
|
|
/* Moving IO */
|
|
|
|
-TRACE_EVENT(bucket_evacuate,
|
|
- TP_PROTO(struct bch_fs *c, struct bpos *bucket),
|
|
- TP_ARGS(c, bucket),
|
|
-
|
|
- TP_STRUCT__entry(
|
|
- __field(dev_t, dev )
|
|
- __field(u32, dev_idx )
|
|
- __field(u64, bucket )
|
|
- ),
|
|
-
|
|
- TP_fast_assign(
|
|
- __entry->dev = c->dev;
|
|
- __entry->dev_idx = bucket->inode;
|
|
- __entry->bucket = bucket->offset;
|
|
- ),
|
|
-
|
|
- TP_printk("%d:%d %u:%llu",
|
|
- MAJOR(__entry->dev), MINOR(__entry->dev),
|
|
- __entry->dev_idx, __entry->bucket)
|
|
-);
|
|
-
|
|
-DEFINE_EVENT(fs_str, move_extent,
|
|
+DEFINE_EVENT(fs_str, io_move,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
|
|
-DEFINE_EVENT(fs_str, move_extent_read,
|
|
+DEFINE_EVENT(fs_str, io_move_read,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
|
|
-DEFINE_EVENT(fs_str, move_extent_write,
|
|
+DEFINE_EVENT(fs_str, io_move_write,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
|
|
-DEFINE_EVENT(fs_str, move_extent_finish,
|
|
+DEFINE_EVENT(fs_str, io_move_finish,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
|
|
-DEFINE_EVENT(fs_str, move_extent_fail,
|
|
+DEFINE_EVENT(fs_str, io_move_fail,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
|
|
-DEFINE_EVENT(fs_str, move_extent_start_fail,
|
|
+DEFINE_EVENT(fs_str, io_move_start_fail,
|
|
TP_PROTO(struct bch_fs *c, const char *str),
|
|
TP_ARGS(c, str)
|
|
);
|
|
@@ -881,37 +860,6 @@ TRACE_EVENT(move_data,
|
|
__entry->sectors_raced)
|
|
);
|
|
|
|
-TRACE_EVENT(evacuate_bucket,
|
|
- TP_PROTO(struct bch_fs *c, struct bpos *bucket,
|
|
- unsigned sectors, unsigned bucket_size,
|
|
- int ret),
|
|
- TP_ARGS(c, bucket, sectors, bucket_size, ret),
|
|
-
|
|
- TP_STRUCT__entry(
|
|
- __field(dev_t, dev )
|
|
- __field(u64, member )
|
|
- __field(u64, bucket )
|
|
- __field(u32, sectors )
|
|
- __field(u32, bucket_size )
|
|
- __field(int, ret )
|
|
- ),
|
|
-
|
|
- TP_fast_assign(
|
|
- __entry->dev = c->dev;
|
|
- __entry->member = bucket->inode;
|
|
- __entry->bucket = bucket->offset;
|
|
- __entry->sectors = sectors;
|
|
- __entry->bucket_size = bucket_size;
|
|
- __entry->ret = ret;
|
|
- ),
|
|
-
|
|
- TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i",
|
|
- MAJOR(__entry->dev), MINOR(__entry->dev),
|
|
- __entry->member, __entry->bucket,
|
|
- __entry->sectors, __entry->bucket_size,
|
|
- __entry->ret)
|
|
-);
|
|
-
|
|
TRACE_EVENT(copygc,
|
|
TP_PROTO(struct bch_fs *c,
|
|
u64 buckets,
|
|
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
|
|
index e0a876cbaa6b..50a90e48f6dd 100644
|
|
--- a/fs/bcachefs/util.c
|
|
+++ b/fs/bcachefs/util.c
|
|
@@ -473,10 +473,10 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
|
u64 last_q = 0;
|
|
|
|
prt_printf(out, "quantiles (%s):\t", u->name);
|
|
- eytzinger0_for_each(i, NR_QUANTILES) {
|
|
- bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
|
+ eytzinger0_for_each(j, NR_QUANTILES) {
|
|
+ bool is_last = eytzinger0_next(j, NR_QUANTILES) == -1;
|
|
|
|
- u64 q = max(quantiles->entries[i].m, last_q);
|
|
+ u64 q = max(quantiles->entries[j].m, last_q);
|
|
prt_printf(out, "%llu ", div64_u64(q, u->nsecs));
|
|
if (is_last)
|
|
prt_newline(out);
|
|
@@ -701,9 +701,9 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
|
#if 0
|
|
void eytzinger1_test(void)
|
|
{
|
|
- unsigned inorder, eytz, size;
|
|
+ unsigned inorder, size;
|
|
|
|
- pr_info("1 based eytzinger test:");
|
|
+ pr_info("1 based eytzinger test:\n");
|
|
|
|
for (size = 2;
|
|
size < 65536;
|
|
@@ -711,13 +711,7 @@ void eytzinger1_test(void)
|
|
unsigned extra = eytzinger1_extra(size);
|
|
|
|
if (!(size % 4096))
|
|
- pr_info("tree size %u", size);
|
|
-
|
|
- BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
|
|
- BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
|
|
-
|
|
- BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0);
|
|
- BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0);
|
|
+ pr_info("tree size %u\n", size);
|
|
|
|
inorder = 1;
|
|
eytzinger1_for_each(eytz, size) {
|
|
@@ -728,15 +722,16 @@ void eytzinger1_test(void)
|
|
|
|
inorder++;
|
|
}
|
|
+ BUG_ON(inorder - 1 != size);
|
|
}
|
|
}
|
|
|
|
void eytzinger0_test(void)
|
|
{
|
|
|
|
- unsigned inorder, eytz, size;
|
|
+ unsigned inorder, size;
|
|
|
|
- pr_info("0 based eytzinger test:");
|
|
+ pr_info("0 based eytzinger test:\n");
|
|
|
|
for (size = 1;
|
|
size < 65536;
|
|
@@ -744,13 +739,7 @@ void eytzinger0_test(void)
|
|
unsigned extra = eytzinger0_extra(size);
|
|
|
|
if (!(size % 4096))
|
|
- pr_info("tree size %u", size);
|
|
-
|
|
- BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
|
|
- BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
|
|
-
|
|
- BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1);
|
|
- BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1);
|
|
+ pr_info("tree size %u\n", size);
|
|
|
|
inorder = 0;
|
|
eytzinger0_for_each(eytz, size) {
|
|
@@ -761,54 +750,191 @@ void eytzinger0_test(void)
|
|
|
|
inorder++;
|
|
}
|
|
+ BUG_ON(inorder != size);
|
|
+
|
|
+ inorder = size - 1;
|
|
+ eytzinger0_for_each_prev(eytz, size) {
|
|
+ BUG_ON(eytz != eytzinger0_first(size) &&
|
|
+ eytzinger0_next(eytzinger0_prev(eytz, size), size) != eytz);
|
|
+
|
|
+ inorder--;
|
|
+ }
|
|
+ BUG_ON(inorder != -1);
|
|
}
|
|
}
|
|
|
|
-static inline int cmp_u16(const void *_l, const void *_r, size_t size)
|
|
+static inline int cmp_u16(const void *_l, const void *_r)
|
|
{
|
|
const u16 *l = _l, *r = _r;
|
|
|
|
- return (*l > *r) - (*r - *l);
|
|
+ return (*l > *r) - (*r > *l);
|
|
}
|
|
|
|
-static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
|
|
+static void eytzinger0_find_test_le(u16 *test_array, unsigned nr, u16 search)
|
|
{
|
|
- int i, c1 = -1, c2 = -1;
|
|
- ssize_t r;
|
|
+ int r, s;
|
|
+ bool bad;
|
|
|
|
r = eytzinger0_find_le(test_array, nr,
|
|
sizeof(test_array[0]),
|
|
cmp_u16, &search);
|
|
- if (r >= 0)
|
|
- c1 = test_array[r];
|
|
-
|
|
- for (i = 0; i < nr; i++)
|
|
- if (test_array[i] <= search && test_array[i] > c2)
|
|
- c2 = test_array[i];
|
|
-
|
|
- if (c1 != c2) {
|
|
- eytzinger0_for_each(i, nr)
|
|
- pr_info("[%3u] = %12u", i, test_array[i]);
|
|
- pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
|
|
- i, r, c1, c2);
|
|
+ if (r >= 0) {
|
|
+ if (test_array[r] > search) {
|
|
+ bad = true;
|
|
+ } else {
|
|
+ s = eytzinger0_next(r, nr);
|
|
+ bad = s >= 0 && test_array[s] <= search;
|
|
+ }
|
|
+ } else {
|
|
+ s = eytzinger0_last(nr);
|
|
+ bad = s >= 0 && test_array[s] <= search;
|
|
+ }
|
|
+
|
|
+ if (bad) {
|
|
+ s = -1;
|
|
+ eytzinger0_for_each_prev(j, nr) {
|
|
+ if (test_array[j] <= search) {
|
|
+ s = j;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ eytzinger0_for_each(j, nr)
|
|
+ pr_info("[%3u] = %12u\n", j, test_array[j]);
|
|
+ pr_info("find_le(%12u) = %3i should be %3i\n",
|
|
+ search, r, s);
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void eytzinger0_find_test_gt(u16 *test_array, unsigned nr, u16 search)
|
|
+{
|
|
+ int r, s;
|
|
+ bool bad;
|
|
+
|
|
+ r = eytzinger0_find_gt(test_array, nr,
|
|
+ sizeof(test_array[0]),
|
|
+ cmp_u16, &search);
|
|
+ if (r >= 0) {
|
|
+ if (test_array[r] <= search) {
|
|
+ bad = true;
|
|
+ } else {
|
|
+ s = eytzinger0_prev(r, nr);
|
|
+ bad = s >= 0 && test_array[s] > search;
|
|
+ }
|
|
+ } else {
|
|
+ s = eytzinger0_first(nr);
|
|
+ bad = s >= 0 && test_array[s] > search;
|
|
+ }
|
|
+
|
|
+ if (bad) {
|
|
+ s = -1;
|
|
+ eytzinger0_for_each(j, nr) {
|
|
+ if (test_array[j] > search) {
|
|
+ s = j;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ eytzinger0_for_each(j, nr)
|
|
+ pr_info("[%3u] = %12u\n", j, test_array[j]);
|
|
+ pr_info("find_gt(%12u) = %3i should be %3i\n",
|
|
+ search, r, s);
|
|
+ BUG();
|
|
}
|
|
}
|
|
|
|
+static void eytzinger0_find_test_ge(u16 *test_array, unsigned nr, u16 search)
|
|
+{
|
|
+ int r, s;
|
|
+ bool bad;
|
|
+
|
|
+ r = eytzinger0_find_ge(test_array, nr,
|
|
+ sizeof(test_array[0]),
|
|
+ cmp_u16, &search);
|
|
+ if (r >= 0) {
|
|
+ if (test_array[r] < search) {
|
|
+ bad = true;
|
|
+ } else {
|
|
+ s = eytzinger0_prev(r, nr);
|
|
+ bad = s >= 0 && test_array[s] >= search;
|
|
+ }
|
|
+ } else {
|
|
+ s = eytzinger0_first(nr);
|
|
+ bad = s >= 0 && test_array[s] >= search;
|
|
+ }
|
|
+
|
|
+ if (bad) {
|
|
+ s = -1;
|
|
+ eytzinger0_for_each(j, nr) {
|
|
+ if (test_array[j] >= search) {
|
|
+ s = j;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ eytzinger0_for_each(j, nr)
|
|
+ pr_info("[%3u] = %12u\n", j, test_array[j]);
|
|
+ pr_info("find_ge(%12u) = %3i should be %3i\n",
|
|
+ search, r, s);
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void eytzinger0_find_test_eq(u16 *test_array, unsigned nr, u16 search)
|
|
+{
|
|
+ unsigned r;
|
|
+ int s;
|
|
+ bool bad;
|
|
+
|
|
+ r = eytzinger0_find(test_array, nr,
|
|
+ sizeof(test_array[0]),
|
|
+ cmp_u16, &search);
|
|
+
|
|
+ if (r < nr) {
|
|
+ bad = test_array[r] != search;
|
|
+ } else {
|
|
+ s = eytzinger0_find_le(test_array, nr,
|
|
+ sizeof(test_array[0]),
|
|
+ cmp_u16, &search);
|
|
+ bad = s >= 0 && test_array[s] == search;
|
|
+ }
|
|
+
|
|
+ if (bad) {
|
|
+ eytzinger0_for_each(j, nr)
|
|
+ pr_info("[%3u] = %12u\n", j, test_array[j]);
|
|
+ pr_info("find(%12u) = %3i is incorrect\n",
|
|
+ search, r);
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
|
|
+{
|
|
+ eytzinger0_find_test_le(test_array, nr, search);
|
|
+ eytzinger0_find_test_gt(test_array, nr, search);
|
|
+ eytzinger0_find_test_ge(test_array, nr, search);
|
|
+ eytzinger0_find_test_eq(test_array, nr, search);
|
|
+}
|
|
+
|
|
void eytzinger0_find_test(void)
|
|
{
|
|
unsigned i, nr, allocated = 1 << 12;
|
|
u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
|
|
|
|
for (nr = 1; nr < allocated; nr++) {
|
|
- pr_info("testing %u elems", nr);
|
|
+ u16 prev = 0;
|
|
+
|
|
+ pr_info("testing %u elems\n", nr);
|
|
|
|
get_random_bytes(test_array, nr * sizeof(test_array[0]));
|
|
eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
|
|
|
|
/* verify array is sorted correctly: */
|
|
- eytzinger0_for_each(i, nr)
|
|
- BUG_ON(i != eytzinger0_last(nr) &&
|
|
- test_array[i] > test_array[eytzinger0_next(i, nr)]);
|
|
+ eytzinger0_for_each(j, nr) {
|
|
+ BUG_ON(test_array[j] < prev);
|
|
+ prev = test_array[j];
|
|
+ }
|
|
|
|
for (i = 0; i < U16_MAX; i += 1 << 12)
|
|
eytzinger0_find_test_val(test_array, nr, i);
|
|
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
|
|
index 1a1720116071..e7c3541b38f3 100644
|
|
--- a/fs/bcachefs/util.h
|
|
+++ b/fs/bcachefs/util.h
|
|
@@ -670,8 +670,6 @@ static inline int cmp_le32(__le32 l, __le32 r)
|
|
|
|
#include <linux/uuid.h>
|
|
|
|
-#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
|
|
-
|
|
static inline bool qstr_eq(const struct qstr l, const struct qstr r)
|
|
{
|
|
return l.len == r.len && !memcmp(l.name, r.name, l.len);
|
|
--
|
|
2.45.3
|
|
|