From dded444b20dd3f47393937315b8217535ff3c51d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 18 Aug 2022 12:32:10 -0400 Subject: [PATCH] Update bcachefs sources to dfaf9a6ee2 lib/printbuf: Clean up headers --- .bcachefs_revision | 2 +- Makefile | 6 +- cmd_fs.c | 23 +- include/linux/printbuf.h | 108 +++++++--- include/linux/string_helpers.h | 20 ++ include/trace/events/bcachefs.h | 23 +- libbcachefs/alloc_foreground.c | 190 ++++++++++++----- libbcachefs/alloc_foreground.h | 8 + libbcachefs/backpointers.c | 41 ++-- libbcachefs/bcachefs.h | 15 +- libbcachefs/bkey.c | 80 ++++--- libbcachefs/bkey.h | 11 +- libbcachefs/btree_io.c | 23 +- libbcachefs/btree_iter.c | 167 ++++++++++----- libbcachefs/btree_iter.h | 17 +- libbcachefs/btree_key_cache.c | 15 +- libbcachefs/btree_locking.h | 30 ++- libbcachefs/btree_types.h | 2 +- libbcachefs/btree_update_interior.c | 83 +++++--- libbcachefs/debug.c | 164 +++++++------- libbcachefs/errcode.h | 1 + libbcachefs/fsck.c | 14 +- libbcachefs/journal.c | 3 +- libbcachefs/move.c | 2 + libbcachefs/rebalance.c | 3 +- libbcachefs/subvolume.c | 4 +- libbcachefs/super-io.c | 8 +- libbcachefs/sysfs.c | 3 +- libbcachefs/util.c | 9 +- libbcachefs/util.h | 2 + linux/printbuf.c | 319 ++++++++++++++-------------- linux/string_helpers.c | 131 ++++++++++++ 32 files changed, 1007 insertions(+), 520 deletions(-) create mode 100644 include/linux/string_helpers.h create mode 100644 linux/string_helpers.c diff --git a/.bcachefs_revision b/.bcachefs_revision index 720981ca..9f7af72c 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -49c34dadcad9c33b1e8510b5543d60c40fa0bebd +dfaf9a6ee24f5c415635f9a75f5281f385535ebd diff --git a/Makefile b/Makefile index bed43bda..a5a74fed 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PREFIX?=/usr/local PKG_CONFIG?=pkg-config INSTALL=install -CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \ +CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \ -Wno-pointer-sign \ -fno-strict-aliasing \ -fno-delete-null-pointer-checks \ @@ -195,6 +195,10 @@ update-bcachefs-sources: git add linux/generic-radix-tree.c cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/ git add include/linux/kmemleak.h + cp $(LINUX_DIR)/include/linux/printbuf.h include/linux/ + git add include/linux/printbuf.h + cp $(LINUX_DIR)/lib/printbuf.c linux/ + git add linux/printbuf.c cp $(LINUX_DIR)/scripts/Makefile.compiler ./ git add Makefile.compiler $(RM) libbcachefs/*.mod.c diff --git a/cmd_fs.c b/cmd_fs.c index 195ad302..007c8d87 100644 --- a/cmd_fs.c +++ b/cmd_fs.c @@ -179,8 +179,9 @@ static void fs_usage_to_text(struct printbuf *out, const char *path) pr_uuid(out, fs.uuid.b); prt_newline(out); - out->tabstops[0] = 20; - out->tabstops[1] = 36; + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 20); + printbuf_tabstop_push(out, 16); prt_str(out, "Size:"); prt_tab(out); @@ -202,10 +203,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path) prt_newline(out); - out->tabstops[0] = 16; - out->tabstops[1] = 32; - out->tabstops[2] = 50; - out->tabstops[3] = 68; + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 18); + printbuf_tabstop_push(out, 18); prt_str(out, "Data type"); prt_tab(out); @@ -255,10 +257,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path) sort(dev_names.data, dev_names.nr, sizeof(dev_names.data[0]), dev_by_label_cmp, NULL); - out->tabstops[0] = 16; - out->tabstops[1] = 36; - out->tabstops[2] = 52; - out->tabstops[3] = 68; + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 20); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 14); darray_for_each(dev_names, dev) dev_usage_to_text(out, fs, dev); diff --git a/include/linux/printbuf.h b/include/linux/printbuf.h index fa8e73d5..24e62e56 100644 --- a/include/linux/printbuf.h +++ b/include/linux/printbuf.h @@ -32,6 +32,10 @@ * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations * will be done with GFP_NOWAIT if printbuf->atomic is nonzero. * + * It's allowed to grab the output buffer and free it later with kfree() instead + * of using printbuf_exit(), if the user just needs a heap allocated string at + * the end. + * * Memory allocation failures: We don't return errors directly, because on * memory allocation failure we usually don't want to bail out and unwind - we * want to print what we've got, on a best-effort basis. But code that does want @@ -67,6 +71,8 @@ enum printbuf_si { PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */ }; +#define PRINTBUF_INLINE_TABSTOPS 4 + struct printbuf { char *buf; unsigned size; @@ -82,19 +88,34 @@ struct printbuf { bool heap_allocated:1; enum printbuf_si si_units:1; bool human_readable_units:1; - u8 tabstop; - u8 tabstops[4]; + bool has_indent_or_tabstops:1; + bool suppress_indent_tabstop_handling:1; + u8 nr_tabstops; + + /* + * Do not modify directly: use printbuf_tabstop_add(), + * printbuf_tabstop_get() + */ + u8 cur_tabstop; + u8 _tabstops[PRINTBUF_INLINE_TABSTOPS]; }; int printbuf_make_room(struct printbuf *, unsigned); const char *printbuf_str(const struct printbuf *); void printbuf_exit(struct printbuf *); -void prt_newline(struct printbuf *); +void printbuf_tabstops_reset(struct printbuf *); +void printbuf_tabstop_pop(struct printbuf *); +int printbuf_tabstop_push(struct printbuf *, unsigned); + void printbuf_indent_add(struct printbuf *, unsigned); void printbuf_indent_sub(struct printbuf *, unsigned); + +void prt_newline(struct printbuf *); void prt_tab(struct printbuf *); void prt_tab_rjust(struct printbuf *); + +void prt_bytes_indented(struct printbuf *, const char *, unsigned); void prt_human_readable_u64(struct printbuf *, u64); void prt_human_readable_s64(struct printbuf *, s64); void prt_units_u64(struct printbuf *, u64); @@ -129,7 +150,7 @@ static inline unsigned printbuf_remaining(struct printbuf *out) static inline unsigned printbuf_written(struct printbuf *out) { - return min(out->pos, out->size); + return out->size ? min(out->pos, out->size - 1) : 0; } /* @@ -150,21 +171,6 @@ static inline void printbuf_nul_terminate(struct printbuf *out) out->buf[out->size - 1] = 0; } -static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n) -{ - memset(out->buf + out->pos, - c, - min(n, printbuf_remaining(out))); - out->pos += n; -} - -static inline void prt_chars(struct printbuf *out, char c, unsigned n) -{ - printbuf_make_room(out, n); - __prt_chars_reserved(out, c, n); - printbuf_nul_terminate(out); -} - /* Doesn't call printbuf_make_room(), doesn't nul terminate: */ static inline void __prt_char_reserved(struct printbuf *out, char c) { @@ -186,14 +192,34 @@ static inline void prt_char(struct printbuf *out, char c) printbuf_nul_terminate(out); } -static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n) +static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n) +{ + unsigned i, can_print = min(n, printbuf_remaining(out)); + + for (i = 0; i < can_print; i++) + out->buf[out->pos++] = c; + out->pos += n - can_print; +} + +static inline void prt_chars(struct printbuf *out, char c, unsigned n) { printbuf_make_room(out, n); + __prt_chars_reserved(out, c, n); + printbuf_nul_terminate(out); +} + +static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n) +{ + unsigned i, can_print; + + printbuf_make_room(out, n); + + can_print = min(n, printbuf_remaining(out)); + + for (i = 0; i < can_print; i++) + out->buf[out->pos++] = ((char *) b)[i]; + out->pos += n - can_print; - memcpy(out->buf + out->pos, - b, - min(n, printbuf_remaining(out))); - out->pos += n; printbuf_nul_terminate(out); } @@ -202,6 +228,11 @@ static inline void prt_str(struct printbuf *out, const char *str) prt_bytes(out, str, strlen(str)); } +static inline void prt_str_indented(struct printbuf *out, const char *str) +{ + prt_bytes_indented(out, str, strlen(str)); +} + static inline void prt_hex_byte(struct printbuf *out, u8 byte) { printbuf_make_room(out, 2); @@ -226,7 +257,8 @@ static inline void printbuf_reset(struct printbuf *buf) buf->pos = 0; buf->allocation_failure = 0; buf->indent = 0; - buf->tabstop = 0; + buf->nr_tabstops = 0; + buf->cur_tabstop = 0; } /** @@ -245,4 +277,30 @@ static inline void printbuf_atomic_dec(struct printbuf *buf) buf->atomic--; } +/* + * This is used for the %pf(%p) sprintf format extension, where we pass a pretty + * printer and arguments to the pretty-printer to sprintf + * + * Instead of passing a pretty-printer function to sprintf directly, we pass it + * a pointer to a struct call_pp, so that sprintf can check that the magic + * number is present, which in turn ensures that the CALL_PP() macro has been + * used in order to typecheck the arguments to the pretty printer function + * + * Example usage: + * sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev)); + */ +struct call_pp { + unsigned long magic; + void *fn; +}; + +#define PP_TYPECHECK(fn, ...) \ + ({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); }) + +#define CALL_PP_MAGIC (unsigned long) 0xce0b92d22f6b6be4 + +#define CALL_PP(fn, ...) \ + (PP_TYPECHECK(fn, ##__VA_ARGS__), \ + &((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__ + #endif /* _LINUX_PRINTBUF_H */ diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h new file mode 100644 index 00000000..af587706 --- /dev/null +++ b/include/linux/string_helpers.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_STRING_HELPERS_H_ +#define _LINUX_STRING_HELPERS_H_ + +#include +#include +#include + + +/* Descriptions of the types of units to + * print in */ +enum string_size_units { + STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ + STRING_UNITS_2, /* use binary powers of 2^10 */ +}; + +int string_get_size(u64 size, u64 blk_size, enum string_size_units units, + char *buf, int len); + +#endif diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 2c980742..a18c59a3 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -317,24 +317,27 @@ DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock, ); TRACE_EVENT(btree_reserve_get_fail, - TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl), - TP_ARGS(c, required, cl), + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, + size_t required), + TP_ARGS(trans_fn, caller_ip, required), TP_STRUCT__entry( - __field(dev_t, dev ) + __array(char, trans_fn, 24 ) + __field(unsigned long, caller_ip ) __field(size_t, required ) - __field(struct closure *, cl ) ), TP_fast_assign( - __entry->dev = c->dev; - __entry->required = required; - __entry->cl = cl; + strlcpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->required = required; ), - TP_printk("%d,%d required %zu by %p", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->required, __entry->cl) + TP_printk("%s %pS required %zu", + __entry->trans_fn, + (void *) __entry->caller_ip, + __entry->required) ); DEFINE_EVENT(btree_node, btree_split, diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 0a9f1313..c57baa1f 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -339,6 +339,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc skipped_need_journal_commit, skipped_nouse, cl); + if (!ob) + iter.path->preserve = false; err: set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); @@ -379,15 +381,15 @@ static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch * journal buckets - journal buckets will be < ca->new_fs_bucket_idx */ static noinline struct open_bucket * -bch2_bucket_alloc_trans_early(struct btree_trans *trans, - struct bch_dev *ca, - enum alloc_reserve reserve, - u64 *cur_bucket, - u64 *buckets_seen, - u64 *skipped_open, - u64 *skipped_need_journal_commit, - u64 *skipped_nouse, - struct closure *cl) +bch2_bucket_alloc_early(struct btree_trans *trans, + struct bch_dev *ca, + enum alloc_reserve reserve, + u64 *cur_bucket, + u64 *buckets_seen, + u64 *skipped_open, + u64 *skipped_need_journal_commit, + u64 *skipped_nouse, + struct closure *cl) { struct btree_iter iter; struct bkey_s_c k; @@ -430,7 +432,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans, return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); } -static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, +static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, struct bch_dev *ca, enum alloc_reserve reserve, u64 *cur_bucket, @@ -445,15 +447,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct open_bucket *ob = NULL; int ret; - if (unlikely(!ca->mi.freespace_initialized)) - return bch2_bucket_alloc_trans_early(trans, ca, reserve, - cur_bucket, - buckets_seen, - skipped_open, - skipped_need_journal_commit, - skipped_nouse, - cl); - BUG_ON(ca->new_fs_bucket_idx); /* @@ -467,7 +460,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, break; for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k)); - *cur_bucket < k.k->p.offset && !ob; + *cur_bucket < k.k->p.offset; (*cur_bucket)++) { ret = btree_trans_too_many_iters(trans); if (ret) @@ -481,6 +474,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, skipped_need_journal_commit, skipped_nouse, k, cl); + if (ob) + break; } if (ob || ret) @@ -496,11 +491,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, * * Returns index of bucket on success, 0 on failure * */ -struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, +static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, + struct bch_dev *ca, enum alloc_reserve reserve, bool may_alloc_partial, struct closure *cl) { + struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; struct bch_dev_usage usage; bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); @@ -512,7 +509,6 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, u64 skipped_need_journal_commit = 0; u64 skipped_nouse = 0; bool waiting = false; - int ret; again: usage = bch2_dev_usage_read(ca); avail = dev_buckets_free(ca, usage, reserve); @@ -549,19 +545,26 @@ again: return ob; } - ret = bch2_trans_do(c, NULL, NULL, 0, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, - &cur_bucket, - &buckets_seen, - &skipped_open, - &skipped_need_journal_commit, - &skipped_nouse, - cl))); + ob = likely(ca->mi.freespace_initialized) + ? bch2_bucket_alloc_freelist(trans, ca, reserve, + &cur_bucket, + &buckets_seen, + &skipped_open, + &skipped_need_journal_commit, + &skipped_nouse, + cl) + : bch2_bucket_alloc_early(trans, ca, reserve, + &cur_bucket, + &buckets_seen, + &skipped_open, + &skipped_need_journal_commit, + &skipped_nouse, + cl); if (skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && !ret && !freespace_initialized && start) { + if (!ob && !freespace_initialized && start) { start = cur_bucket = 0; goto again; } @@ -570,7 +573,7 @@ again: ca->bucket_alloc_trans_early_cursor = cur_bucket; err: if (!ob) - ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); + ob = ERR_PTR(-BCH_ERR_no_buckets_found); if (IS_ERR(ob)) { trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], @@ -590,6 +593,19 @@ err: return ob; } +struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, + enum alloc_reserve reserve, + bool may_alloc_partial, + struct closure *cl) +{ + struct open_bucket *ob; + + bch2_trans_do(c, NULL, NULL, 0, + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, + may_alloc_partial, cl))); + return ob; +} + static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { @@ -655,7 +671,7 @@ static void add_new_bucket(struct bch_fs *c, ob_push(c, ptrs, ob); } -int bch2_bucket_alloc_set(struct bch_fs *c, +static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct open_buckets *ptrs, struct dev_stripe_state *stripe, struct bch_devs_mask *devs_may_alloc, @@ -666,11 +682,12 @@ int bch2_bucket_alloc_set(struct bch_fs *c, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); unsigned dev; struct bch_dev *ca; - int ret = -BCH_ERR_insufficient_devices; + int ret = 0; unsigned i; BUG_ON(*nr_effective >= nr_replicas); @@ -694,16 +711,15 @@ int bch2_bucket_alloc_set(struct bch_fs *c, continue; } - ob = bch2_bucket_alloc(c, ca, reserve, + ob = bch2_bucket_alloc_trans(trans, ca, reserve, flags & BUCKET_MAY_ALLOC_PARTIAL, cl); if (!IS_ERR(ob)) bch2_dev_stripe_increment(ca, stripe); percpu_ref_put(&ca->ref); - if (IS_ERR(ob)) { - ret = PTR_ERR(ob); - - if (cl) + ret = PTR_ERR_OR_ZERO(ob); + if (ret) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) break; continue; } @@ -711,15 +727,36 @@ int bch2_bucket_alloc_set(struct bch_fs *c, add_new_bucket(c, ptrs, devs_may_alloc, nr_effective, have_cache, flags, ob); - if (*nr_effective >= nr_replicas) { - ret = 0; + if (*nr_effective >= nr_replicas) break; - } } + if (*nr_effective >= nr_replicas) + ret = 0; + else if (!ret) + ret = -BCH_ERR_insufficient_devices; + return ret; } +int bch2_bucket_alloc_set(struct bch_fs *c, + struct open_buckets *ptrs, + struct dev_stripe_state *stripe, + struct bch_devs_mask *devs_may_alloc, + unsigned nr_replicas, + unsigned *nr_effective, + bool *have_cache, + enum alloc_reserve reserve, + unsigned flags, + struct closure *cl) +{ + return bch2_trans_do(c, NULL, NULL, 0, + bch2_bucket_alloc_set_trans(&trans, ptrs, stripe, + devs_may_alloc, nr_replicas, + nr_effective, have_cache, reserve, + flags, cl)); +} + /* Allocate from stripes: */ /* @@ -824,7 +861,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c, wp->ptrs = ptrs_skip; } -static int open_bucket_add_buckets(struct bch_fs *c, +static int open_bucket_add_buckets(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_list *devs_have, @@ -837,6 +874,7 @@ static int open_bucket_add_buckets(struct bch_fs *c, unsigned flags, struct closure *_cl) { + struct bch_fs *c = trans->c; struct bch_devs_mask devs; struct open_bucket *ob; struct closure *cl = NULL; @@ -868,7 +906,8 @@ static int open_bucket_add_buckets(struct bch_fs *c, target, erasure_code, nr_replicas, nr_effective, have_cache, flags, _cl); - if (bch2_err_matches(ret, BCH_ERR_freelist_empty) || + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_freelist_empty) || bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) return ret; if (*nr_effective >= nr_replicas) @@ -887,10 +926,11 @@ retry_blocking: * Try nonblocking first, so that if one device is full we'll try from * other devices: */ - ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs, + ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, nr_replicas, nr_effective, have_cache, reserve, flags, cl); if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && !cl && _cl) { cl = _cl; @@ -1010,15 +1050,25 @@ static bool try_decrease_writepoints(struct bch_fs *c, return true; } -static struct write_point *writepoint_find(struct bch_fs *c, +static void bch2_trans_mutex_lock(struct btree_trans *trans, + struct mutex *lock) +{ + if (!mutex_trylock(lock)) { + bch2_trans_unlock(trans); + mutex_lock(lock); + } +} + +static struct write_point *writepoint_find(struct btree_trans *trans, unsigned long write_point) { + struct bch_fs *c = trans->c; struct write_point *wp, *oldest; struct hlist_head *head; if (!(write_point & 1UL)) { wp = (struct write_point *) write_point; - mutex_lock(&wp->lock); + bch2_trans_mutex_lock(trans, &wp->lock); return wp; } @@ -1027,7 +1077,7 @@ restart_find: wp = __writepoint_find(head, write_point); if (wp) { lock_wp: - mutex_lock(&wp->lock); + bch2_trans_mutex_lock(trans, &wp->lock); if (wp->write_point == write_point) goto out; mutex_unlock(&wp->lock); @@ -1040,8 +1090,8 @@ restart_find_oldest: if (!oldest || time_before64(wp->last_used, oldest->last_used)) oldest = wp; - mutex_lock(&oldest->lock); - mutex_lock(&c->write_points_hash_lock); + bch2_trans_mutex_lock(trans, &oldest->lock); + bch2_trans_mutex_lock(trans, &c->write_points_hash_lock); if (oldest >= c->write_points + c->write_points_nr || try_increase_writepoints(c)) { mutex_unlock(&c->write_points_hash_lock); @@ -1069,7 +1119,7 @@ out: /* * Get us an open_bucket we can allocate from, return with it locked: */ -struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, +struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans, unsigned target, unsigned erasure_code, struct write_point_specifier write_point, @@ -1080,6 +1130,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; struct write_point *wp; struct open_bucket *ob; struct open_buckets ptrs; @@ -1099,7 +1150,7 @@ retry: write_points_nr = c->write_points_nr; have_cache = false; - wp = writepoint_find(c, write_point.v); + wp = writepoint_find(trans, write_point.v); if (wp->data_type == BCH_DATA_user) ob_flags |= BUCKET_MAY_ALLOC_PARTIAL; @@ -1109,21 +1160,22 @@ retry: have_cache = true; if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, ob_flags, cl); } else { - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, ob_flags, NULL); - if (!ret) + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto alloc_done; - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, 0, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, @@ -1180,6 +1232,32 @@ err: return ERR_PTR(ret); } +struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, + unsigned target, + unsigned erasure_code, + struct write_point_specifier write_point, + struct bch_devs_list *devs_have, + unsigned nr_replicas, + unsigned nr_replicas_required, + enum alloc_reserve reserve, + unsigned flags, + struct closure *cl) +{ + struct write_point *wp; + + bch2_trans_do(c, NULL, NULL, 0, + PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target, + erasure_code, + write_point, + devs_have, + nr_replicas, + nr_replicas_required, + reserve, + flags, cl))); + return wp; + +} + struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index 8bc78877..6de63a35 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -136,6 +136,14 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *, unsigned, unsigned *, bool *, enum alloc_reserve, unsigned, struct closure *); +struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *, + unsigned, unsigned, + struct write_point_specifier, + struct bch_devs_list *, + unsigned, unsigned, + enum alloc_reserve, + unsigned, + struct closure *); struct write_point *bch2_alloc_sectors_start(struct bch_fs *, unsigned, unsigned, struct write_point_specifier, diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 5a46b25b..029b1ec1 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -492,7 +492,7 @@ static void backpointer_not_found(struct btree_trans *trans, prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) - bch_err(c, "%s", buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); else bch2_trans_inconsistent(trans, "%s", buf.buf); @@ -526,9 +526,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) return k; - backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); - bch2_trans_iter_exit(trans, iter); + + if (bp.level) { + /* + * If a backpointer for a btree node wasn't found, it may be + * because it was overwritten by a new btree node that hasn't + * been written out yet - backpointer_get_node() checks for + * this: + */ + bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp); + bch2_trans_iter_exit(trans, iter); + return bkey_s_c_null; + } + + backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); return bkey_s_c_null; } @@ -540,7 +552,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree *b; - struct bkey_s_c k; BUG_ON(!bp.level); @@ -551,22 +562,24 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, bp.level - 1, 0); b = bch2_btree_iter_peek_node(iter); - if (IS_ERR(b)) { - bch2_trans_iter_exit(trans, iter); - return b; - } + if (IS_ERR(b)) + goto err; if (extent_matches_bp(c, bp.btree_id, bp.level, bkey_i_to_s_c(&b->key), bucket, bp)) return b; - if (!btree_node_will_make_reachable(b)) - backpointer_not_found(trans, bucket, bp_offset, - bp, k, "btree node"); - + if (btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { + backpointer_not_found(trans, bucket, bp_offset, bp, + bkey_i_to_s_c(&b->key), "btree node"); + b = NULL; + } +err: bch2_trans_iter_exit(trans, iter); - return NULL; + return b; } static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, @@ -829,6 +842,8 @@ static int check_one_backpointer(struct btree_trans *trans, k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp); ret = bkey_err(k); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + return 0; if (ret) return ret; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 8ffdb4de..a5bf8087 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -319,8 +319,6 @@ BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM #endif -#define BCH_LOCK_TIME_NR 128 - #define BCH_TIME_STATS() \ x(btree_node_mem_alloc) \ x(btree_node_split) \ @@ -531,9 +529,13 @@ struct btree_debug { unsigned id; }; -struct lock_held_stats { - struct time_stats times[BCH_LOCK_TIME_NR]; - const char *names[BCH_LOCK_TIME_NR]; +#define BCH_TRANSACTIONS_NR 128 + +struct btree_transaction_stats { + struct mutex lock; + struct time_stats lock_hold_times; + unsigned nr_max_paths; + char *max_paths_text; }; struct bch_fs_pcpu { @@ -930,7 +932,8 @@ struct bch_fs { struct time_stats times[BCH_TIME_STAT_NR]; - struct lock_held_stats lock_held_stats; + const char *btree_transaction_fns[BCH_TRANSACTIONS_NR]; + struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; }; static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index cc068963..d348175e 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -19,33 +19,49 @@ const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; struct bkey __bch2_bkey_unpack_key(const struct bkey_format *, const struct bkey_packed *); -void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits) +void bch2_bkey_packed_to_binary_text(struct printbuf *out, + const struct bkey_format *f, + const struct bkey_packed *k) { - unsigned bit = high_bit_offset, done = 0; + const u64 *p = high_word(f, k); + unsigned word_bits = 64 - high_bit_offset; + unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset; + u64 v = *p & (~0ULL >> high_bit_offset); + + if (!nr_key_bits) { + prt_str(out, "(empty)"); + return; + } while (1) { - while (bit < 64) { - if (done && !(done % 8)) - *out++ = ' '; - *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0'; - bit++; - done++; - if (done == nr_bits) { - *out++ = '\0'; - return; - } + unsigned next_key_bits = nr_key_bits; + + if (nr_key_bits < 64) { + v >>= 64 - nr_key_bits; + next_key_bits = 0; + } else { + next_key_bits -= 64; } + bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); + + if (!next_key_bits) + break; + + prt_char(out, ' '); + p = next_word(p); - bit = 0; + v = *p; + word_bits = 64; + nr_key_bits = next_key_bits; } } #ifdef CONFIG_BCACHEFS_DEBUG static void bch2_bkey_pack_verify(const struct bkey_packed *packed, - const struct bkey *unpacked, - const struct bkey_format *format) + const struct bkey *unpacked, + const struct bkey_format *format) { struct bkey tmp; @@ -57,23 +73,35 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, tmp = __bch2_bkey_unpack_key(format, packed); if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - char buf3[160], buf4[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_to_text(&buf1, unpacked); - bch2_bkey_to_text(&buf2, &tmp); - bch2_to_binary(buf3, (void *) unpacked, 80); - bch2_to_binary(buf4, high_word(format, packed), 80); - - panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n", + prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n", format->key_u64s, format->bits_per_field[0], format->bits_per_field[1], format->bits_per_field[2], format->bits_per_field[3], - format->bits_per_field[4], - buf1.buf, buf2.buf, buf3, buf4); + format->bits_per_field[4]); + + prt_printf(&buf, "compiled unpack: "); + bch2_bkey_to_text(&buf, unpacked); + prt_newline(&buf); + + prt_printf(&buf, "c unpack: "); + bch2_bkey_to_text(&buf, &tmp); + prt_newline(&buf); + + prt_printf(&buf, "compiled unpack: "); + bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, + (struct bkey_packed *) unpacked); + prt_newline(&buf); + + prt_printf(&buf, "c unpack: "); + bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, + (struct bkey_packed *) &tmp); + prt_newline(&buf); + + panic("%s", buf.buf); } } diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 7dee3d8e..df9fb859 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -12,7 +12,9 @@ #define HAVE_BCACHEFS_COMPILED_UNPACK 1 #endif -void bch2_to_binary(char *, const u64 *, unsigned); +void bch2_bkey_packed_to_binary_text(struct printbuf *, + const struct bkey_format *, + const struct bkey_packed *); /* bkey with split value, const */ struct bkey_s_c { @@ -42,12 +44,15 @@ static inline size_t bkey_val_bytes(const struct bkey *k) static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s) { - k->u64s = BKEY_U64s + val_u64s; + unsigned u64s = BKEY_U64s + val_u64s; + + BUG_ON(u64s > U8_MAX); + k->u64s = u64s; } static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) { - k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); + set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64))); } #define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index ae731b3a..8aad87ea 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -616,7 +616,6 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) (u64 *) vstruct_end(i) - (u64 *) k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); } for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) @@ -626,10 +625,14 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) if (k != vstruct_last(i)) { i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); } } + /* + * Always rebuild search trees: eytzinger search tree nodes directly + * depend on the values of min/max key: + */ + bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { @@ -778,8 +781,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, } static int validate_bset_keys(struct bch_fs *c, struct btree *b, - struct bset *i, unsigned *whiteout_u64s, - int write, bool have_retry) + struct bset *i, int write, bool have_retry) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; @@ -915,7 +917,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, } while (b->written < (ptr_written ?: btree_sectors(c))) { - unsigned sectors, whiteout_u64s = 0; + unsigned sectors; struct nonce nonce; struct bch_csum csum; bool first = !b->written; @@ -984,8 +986,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (!b->written) btree_node_set_format(b, b->data->format); - ret = validate_bset_keys(c, b, i, &whiteout_u64s, - READ, have_retry); + ret = validate_bset_keys(c, b, i, READ, have_retry); if (ret) goto fsck_err; @@ -1011,11 +1012,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (blacklisted && !first) continue; - sort_iter_add(iter, i->start, - vstruct_idx(i, whiteout_u64s)); - sort_iter_add(iter, - vstruct_idx(i, whiteout_u64s), + vstruct_idx(i, 0), vstruct_last(i)); nonblacklisted_written = b->written; @@ -1745,7 +1743,6 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - unsigned whiteout_u64s = 0; struct printbuf buf = PRINTBUF; int ret; @@ -1758,7 +1755,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, if (ret) return ret; - ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?: + ret = validate_bset_keys(c, b, i, WRITE, false) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false); if (ret) { bch2_inconsistent_error(c); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 04a61318..1d4b9fde 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1418,16 +1418,16 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (unlikely(ret)) goto err; - mark_btree_node_locked(trans, path, level, lock_type); - btree_path_level_init(trans, path, b); - if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && unlikely(b != btree_node_mem_ptr(tmp.k))) btree_node_mem_ptr_set(trans, path, level + 1, b); if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); + + mark_btree_node_locked(trans, path, level, lock_type); path->level = level; + btree_path_level_init(trans, path, b); bch2_btree_path_verify_locks(path); err: @@ -1872,42 +1872,69 @@ void bch2_dump_trans_updates(struct btree_trans *trans) printbuf_exit(&buf); } +void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) +{ + prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ", + path->idx, path->ref, path->intent_ref, + path->preserve ? 'P' : ' ', + path->should_be_locked ? 'S' : ' ', + bch2_btree_ids[path->btree_id], + path->level); + bch2_bpos_to_text(out, path->pos); + + prt_printf(out, " locks %u", path->nodes_locked); +#ifdef CONFIG_BCACHEFS_DEBUG + prt_printf(out, " %pS", (void *) path->ip_allocated); +#endif + prt_newline(out); +} + +void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) +{ + struct btree_path *path; + unsigned idx; + + trans_for_each_path_inorder(trans, path, idx) + bch2_btree_path_to_text(out, path); +} + noinline __cold void bch2_dump_trans_paths_updates(struct btree_trans *trans) { - struct btree_path *path; struct printbuf buf = PRINTBUF; - unsigned idx; - trans_for_each_path_inorder(trans, path, idx) { - printbuf_reset(&buf); - - bch2_bpos_to_text(&buf, path->pos); - - printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n", - path->idx, path->ref, path->intent_ref, - path->preserve ? 'P' : ' ', - path->should_be_locked ? 'S' : ' ', - bch2_btree_ids[path->btree_id], - path->level, - buf.buf, - path->nodes_locked, -#ifdef CONFIG_BCACHEFS_DEBUG - (void *) path->ip_allocated -#else - NULL -#endif - ); - } + bch2_trans_paths_to_text(&buf, trans); + printk(KERN_ERR "%s", buf.buf); printbuf_exit(&buf); bch2_dump_trans_updates(trans); } +noinline +static void bch2_trans_update_max_paths(struct btree_trans *trans) +{ + struct btree_transaction_stats *s = btree_trans_stats(trans); + struct printbuf buf = PRINTBUF; + + bch2_trans_paths_to_text(&buf, trans); + + if (!buf.allocation_failure) { + mutex_lock(&s->lock); + if (s->nr_max_paths < hweight64(trans->paths_allocated)) { + s->nr_max_paths = hweight64(trans->paths_allocated); + swap(s->max_paths_text, buf.buf); + } + mutex_unlock(&s->lock); + } + + printbuf_exit(&buf); +} + static struct btree_path *btree_path_alloc(struct btree_trans *trans, struct btree_path *pos) { + struct btree_transaction_stats *s = btree_trans_stats(trans); struct btree_path *path; unsigned idx; @@ -1920,6 +1947,9 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans, idx = __ffs64(~trans->paths_allocated); trans->paths_allocated |= 1ULL << idx; + if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths)) + bch2_trans_update_max_paths(trans); + path = &trans->paths[idx]; path->idx = idx; @@ -2013,12 +2043,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct struct bkey_s_c k; + EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); + EBUG_ON(!btree_node_locked(path, path->level)); + if (!path->cached) { struct btree_path_level *l = path_l(path); struct bkey_packed *_k; - EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); - _k = bch2_btree_node_iter_peek_all(&l->iter, l->b); k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; @@ -2033,7 +2064,6 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct (path->btree_id != ck->key.btree_id || bkey_cmp(path->pos, ck->key.pos))); EBUG_ON(!ck || !ck->valid); - EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); *u = ck->k->k; k = bkey_i_to_s_c(ck->k); @@ -2288,7 +2318,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, * bkey_s_c_null: */ static noinline -struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) { struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; @@ -2317,6 +2347,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return bch2_btree_path_peek_slot(iter->key_cache_path, &u); } +static noinline +struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +{ + struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos); + int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_); + + return err ? bkey_s_c_err(err) : ret; +} + static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) { struct btree_trans *trans = iter->trans; @@ -2347,15 +2386,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && k.k && (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - ret = bkey_err(k2); + k = k2; + ret = bkey_err(k); if (ret) { - k = k2; bch2_btree_iter_set_pos(iter, iter->pos); goto out; } - - k = k2; - iter->k = *k.k; } if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) @@ -2803,8 +2839,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) btree_iter_ip_allocated(iter)); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) - return bkey_s_c_err(ret); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } if ((iter->flags & BTREE_ITER_CACHED) || !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { @@ -2828,13 +2866,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && - (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { - if (bkey_err(k)) { - goto out_no_locked; - } else { + (k = __btree_trans_peek_key_cache(iter, iter->pos)).k) { + if (!bkey_err(k)) iter->k = *k.k; - goto out; - } + /* We're not returning a key from iter->path: */ + goto out_no_locked; } k = bch2_btree_path_peek_slot(iter->path, &iter->k); @@ -2862,11 +2898,14 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bpos pos = iter->pos; k = bch2_btree_iter_peek(iter); - iter->pos = pos; + if (unlikely(bkey_err(k))) + bch2_btree_iter_set_pos(iter, pos); + else + iter->pos = pos; } if (unlikely(bkey_err(k))) - return k; + goto out_no_locked; next = k.k ? bkey_start_pos(k.k) : POS_MAX; @@ -3195,6 +3234,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_reset_updates(trans); + trans->restart_count++; trans->mem_top = 0; if (trans->fs_usage_deltas) { @@ -3245,10 +3285,10 @@ u32 bch2_trans_begin(struct btree_trans *trans) void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count) { - bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans, - "trans->restart_count %u, should be %u, last restarted by %ps\n", - trans->restart_count, restart_count, - (void *) trans->last_restarted_ip); + if (trans_was_restarted(trans, restart_count)) + panic("trans->restart_count %u, should be %u, last restarted by %pS\n", + trans->restart_count, restart_count, + (void *) trans->last_restarted_ip); } static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) @@ -3269,6 +3309,22 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) trans->updates = p; p += updates_bytes; } +static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c, + const char *fn) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++) + if (!c->btree_transaction_fns[i] || + c->btree_transaction_fns[i] == fn) { + c->btree_transaction_fns[i] = fn; + return i; + } + + pr_warn_once("BCH_TRANSACTIONS_NR not big enough!"); + return i; +} + void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned expected_nr_iters, size_t expected_mem_bytes, @@ -3284,15 +3340,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->fn = fn; trans->last_begin_time = ktime_get_ns(); trans->task = current; - - while (c->lock_held_stats.names[trans->lock_name_idx] != fn - && c->lock_held_stats.names[trans->lock_name_idx] != 0) - trans->lock_name_idx++; - - if (trans->lock_name_idx >= BCH_LOCK_TIME_NR) - pr_warn_once("lock_times array not big enough!"); - else - c->lock_held_stats.names[trans->lock_name_idx] = fn; + trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn); bch2_trans_alloc_paths(trans, c); @@ -3463,9 +3511,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) int bch2_fs_btree_iter_init(struct bch_fs *c) { - unsigned nr = BTREE_ITER_MAX; + unsigned i, nr = BTREE_ITER_MAX; int ret; + for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++) + mutex_init(&c->btree_transaction_stats[i].lock); + INIT_LIST_HEAD(&c->btree_trans_list); mutex_init(&c->btree_trans_lock); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index f38fd25b..6ad28ff6 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -182,7 +182,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); trans->restarted = err; - trans->restart_count++; return -err; } @@ -368,7 +367,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter * static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) { + if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) { trace_trans_restart_too_many_iters(trans, _THIS_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); } @@ -392,13 +391,17 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, #define lockrestart_do(_trans, _do) \ ({ \ + u32 _restart_count; \ int _ret; \ \ do { \ - bch2_trans_begin(_trans); \ + _restart_count = bch2_trans_begin(_trans); \ _ret = (_do); \ } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ \ + if (!_ret) \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ + \ _ret; \ }) @@ -439,7 +442,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, (_start), (_flags)); \ \ while (1) { \ - bch2_trans_begin(_trans); \ + u32 _restart_count = bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ if (!(_k).k) { \ _ret = 0; \ @@ -451,6 +454,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, continue; \ if (_ret) \ break; \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_advance(&(_iter))) \ break; \ } \ @@ -468,7 +472,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, (_start), (_flags)); \ \ while (1) { \ - bch2_trans_begin(_trans); \ + u32 _restart_count = bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ if (!(_k).k) { \ _ret = 0; \ @@ -480,6 +484,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, continue; \ if (_ret) \ break; \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_rewind(&(_iter))) \ break; \ } \ @@ -535,6 +540,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, /* new multiple iterator interface: */ void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); +void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); +void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); void __bch2_trans_init(struct btree_trans *, struct bch_fs *, diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index fa90581f..38b16f95 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -631,11 +631,22 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, void bch2_btree_key_cache_drop(struct btree_trans *trans, struct btree_path *path) { + struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; - ck->valid = false; + BUG_ON(!ck->valid); - BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); + /* + * We just did an update to the btree, bypassing the key cache: the key + * cache key is now stale and must be dropped, even if dirty: + */ + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + clear_bit(BKEY_CACHED_DIRTY, &ck->flags); + atomic_long_dec(&c->btree_key_cache.nr_dirty); + bch2_journal_pin_drop(&c->journal, &ck->journal); + } + + ck->valid = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index c3f3cb87..205c6b59 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -115,6 +115,26 @@ btree_lock_want(struct btree_path *path, int level) return BTREE_NODE_UNLOCKED; } +static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans) +{ + return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats) + ? &trans->c->btree_transaction_stats[trans->fn_idx] + : NULL; +} + +static void btree_trans_lock_hold_time_update(struct btree_trans *trans, + struct btree_path *path, unsigned level) +{ +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + struct btree_transaction_stats *s = btree_trans_stats(trans); + + if (s) + __bch2_time_stats_update(&s->lock_hold_times, + path->l[level].lock_taken_time, + ktime_get_ns()); +#endif +} + static inline void btree_node_unlock(struct btree_trans *trans, struct btree_path *path, unsigned level) { @@ -124,15 +144,7 @@ static inline void btree_node_unlock(struct btree_trans *trans, if (lock_type != BTREE_NODE_UNLOCKED) { six_unlock_type(&path->l[level].b->c.lock, lock_type); -#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS - if (trans->lock_name_idx < BCH_LOCK_TIME_NR) { - struct bch_fs *c = trans->c; - - __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx], - path->l[level].lock_taken_time, - ktime_get_ns()); - } -#endif + btree_trans_lock_hold_time_update(trans, path, level); } mark_btree_node_unlocked(path, level); } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 1ff99917..21d76181 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -392,6 +392,7 @@ struct btree_trans { struct task_struct *task; int srcu_idx; + u8 fn_idx; u8 nr_sorted; u8 nr_updates; u8 traverse_all_idx; @@ -432,7 +433,6 @@ struct btree_trans { unsigned journal_u64s; unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; - int lock_name_idx; }; #define BTREE_FLAGS() \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index e4138614..0409737f 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -178,12 +178,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, six_unlock_intent(&b->c.lock); } -static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, +static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, bool interior_node, unsigned flags) { + struct bch_fs *c = trans->c; struct write_point *wp; struct btree *b; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; @@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, mutex_unlock(&c->btree_reserve_cache_lock); retry: - wp = bch2_alloc_sectors_start(c, + wp = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: c->opts.foreground_target, 0, @@ -412,18 +413,16 @@ static void bch2_btree_reserve_put(struct btree_update *as) } } -static int bch2_btree_reserve_get(struct btree_update *as, +static int bch2_btree_reserve_get(struct btree_trans *trans, + struct btree_update *as, unsigned nr_nodes[2], - unsigned flags) + unsigned flags, + struct closure *cl) { struct bch_fs *c = as->c; - struct closure cl; struct btree *b; unsigned interior; - int ret; - - closure_init_stack(&cl); -retry: + int ret = 0; BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); @@ -434,18 +433,17 @@ retry: * BTREE_INSERT_NOWAIT only applies to btree node allocation, not * blocking on this lock: */ - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(c, cl); if (ret) - goto err; + return ret; for (interior = 0; interior < 2; interior++) { struct prealloc_nodes *p = as->prealloc_nodes + interior; while (p->nr < nr_nodes[interior]) { - b = __bch2_btree_node_alloc(c, &as->disk_res, - flags & BTREE_INSERT_NOWAIT - ? NULL : &cl, - interior, flags); + b = __bch2_btree_node_alloc(trans, &as->disk_res, + flags & BTREE_INSERT_NOWAIT ? NULL : cl, + interior, flags); if (IS_ERR(b)) { ret = PTR_ERR(b); goto err; @@ -454,18 +452,8 @@ retry: p->b[p->nr++] = b; } } - - bch2_btree_cache_cannibalize_unlock(c); - closure_sync(&cl); - return 0; err: bch2_btree_cache_cannibalize_unlock(c); - closure_sync(&cl); - - if (ret == -EAGAIN) - goto retry; - - trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl); return ret; } @@ -980,6 +968,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned update_level = level; int journal_flags = flags & JOURNAL_WATERMARK_MASK; int ret = 0; + u32 restart_count = trans->restart_count; BUG_ON(!path->should_be_locked); @@ -1053,16 +1042,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - bch2_trans_unlock(trans); - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, - journal_flags); + journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret) { - bch2_btree_update_free(as); - trace_trans_restart_journal_preres_get(trans, _RET_IP_); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); - return ERR_PTR(ret); + bch2_trans_unlock(trans); + + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + BTREE_UPDATE_JOURNAL_RES, + journal_flags); + if (ret) { + trace_trans_restart_journal_preres_get(trans, _RET_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); + goto err; + } + + ret = bch2_trans_relock(trans); + if (ret) + goto err; } ret = bch2_disk_reservation_get(c, &as->disk_res, @@ -1072,14 +1069,32 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - ret = bch2_btree_reserve_get(as, nr_nodes, flags); - if (ret) + ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); + if (ret == -EAGAIN || + ret == -ENOMEM) { + struct closure cl; + + closure_init_stack(&cl); + + bch2_trans_unlock(trans); + + do { + ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); + closure_sync(&cl); + } while (ret == -EAGAIN); + } + + if (ret) { + trace_btree_reserve_get_fail(trans->fn, _RET_IP_, + nr_nodes[0] + nr_nodes[1]); goto err; + } ret = bch2_trans_relock(trans); if (ret) goto err; + bch2_trans_verify_not_restarted(trans, restart_count); return as; err: bch2_btree_update_free(as); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index cd37a101..f35e714e 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -199,7 +199,7 @@ struct dump_iter { ssize_t ret; /* bytes read so far */ }; -static int flush_buf(struct dump_iter *i) +static ssize_t flush_buf(struct dump_iter *i) { if (i->buf.pos) { size_t bytes = min_t(size_t, i->buf.pos, i->size); @@ -215,7 +215,7 @@ static int flush_buf(struct dump_iter *i) memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); } - return 0; + return i->size ? 0 : i->ret; } static int bch2_dump_open(struct inode *inode, struct file *file) @@ -253,7 +253,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; @@ -261,14 +261,11 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - err = for_each_btree_key2(&trans, iter, i->id, i->from, + ret = for_each_btree_key2(&trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ - err = flush_buf(i); - if (err) - break; - - if (!i->size) + ret = flush_buf(i); + if (ret) break; bch2_bkey_val_to_text(&i->buf, i->c, k); @@ -277,12 +274,12 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, })); i->from = iter.pos; - if (!err) - err = flush_buf(i); + if (!ret) + ret = flush_buf(i); bch2_trans_exit(&trans); - return err ?: i->ret; + return ret ?: i->ret; } static const struct file_operations btree_debug_ops = { @@ -299,43 +296,39 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct btree *b; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; + ret = flush_buf(i); + if (ret) + return ret; - if (!i->size || !bpos_cmp(SPOS_MAX, i->from)) + if (!bpos_cmp(SPOS_MAX, i->from)) return i->ret; bch2_trans_init(&trans, i->c, 0, 0); - for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { - bch2_btree_node_to_text(&i->buf, i->c, b); - err = flush_buf(i); - if (err) + for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) { + ret = flush_buf(i); + if (ret) break; - /* - * can't easily correctly restart a btree node traversal across - * all nodes, meh - */ + bch2_btree_node_to_text(&i->buf, i->c, b); i->from = bpos_cmp(SPOS_MAX, b->key.k.p) ? bpos_successor(b->key.k.p) : b->key.k.p; - - if (!i->size) - break; } bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - return err < 0 ? err : i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -352,33 +345,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; + ret = flush_buf(i); + if (ret) + return ret; bch2_trans_init(&trans, i->c, 0, 0); - err = for_each_btree_key2(&trans, iter, i->id, i->from, + ret = for_each_btree_key2(&trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); - err = flush_buf(i); - if (err) - break; - - if (!i->size) + ret = flush_buf(i); + if (ret) break; if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) { @@ -391,12 +378,12 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, })); i->from = iter.pos; - if (!err) - err = flush_buf(i); - bch2_trans_exit(&trans); - return err ?: i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -409,7 +396,8 @@ static const struct file_operations bfloat_failed_debug_ops = { static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, struct btree *b) { - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); prt_printf(out, "%px btree=%s l=%u ", b, @@ -466,7 +454,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; bool done = false; - int err; + ssize_t ret = 0; i->ubuf = buf; i->size = size; @@ -477,12 +465,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct rhash_head *pos; struct btree *b; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - break; + ret = flush_buf(i); + if (ret) + return ret; rcu_read_lock(); i->buf.atomic++; @@ -500,9 +485,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, } while (!done); if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations cached_btree_nodes_ops = { @@ -538,7 +526,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; struct btree_trans *trans; - int err; + ssize_t ret = 0; i->ubuf = buf; i->size = size; @@ -549,12 +537,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, if (trans->task->pid <= i->iter) continue; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - break; + ret = flush_buf(i); + if (ret) + return ret; bch2_btree_trans_to_text(&i->buf, trans); @@ -570,9 +555,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, mutex_unlock(&c->btree_trans_lock); if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_transactions_ops = { @@ -651,14 +639,16 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct lock_held_stats *lhs = &i->c->lock_held_stats; + struct bch_fs *c = i->c; int err; i->ubuf = buf; i->size = size; i->ret = 0; - while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) { + while (1) { + struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; + err = flush_buf(i); if (err) return err; @@ -666,11 +656,37 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, if (!i->size) break; - prt_printf(&i->buf, "%s:", lhs->names[i->iter]); + if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) || + !c->btree_transaction_fns[i->iter]) + break; + + prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]); prt_newline(&i->buf); - printbuf_indent_add(&i->buf, 8); - bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]); - printbuf_indent_sub(&i->buf, 8); + printbuf_indent_add(&i->buf, 2); + + mutex_lock(&s->lock); + + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { + prt_printf(&i->buf, "Lock hold times:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); + printbuf_indent_sub(&i->buf, 2); + } + + if (s->max_paths_text) { + prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + prt_str_indented(&i->buf, s->max_paths_text); + printbuf_indent_sub(&i->buf, 2); + } + + mutex_unlock(&s->lock); + + printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; } @@ -716,10 +732,8 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); - if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { - debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir, - c, &lock_held_stats_op); - } + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, + c, &lock_held_stats_op); c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 15a1be2f..232f7c79 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -37,6 +37,7 @@ x(no_btree_node, no_btree_node_down) \ x(no_btree_node, no_btree_node_init) \ x(no_btree_node, no_btree_node_cached) \ + x(0, backpointer_to_overwritten_btree_node) \ x(0, lock_fail_node_reused) \ x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index c93e177a..1a841146 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -290,7 +290,7 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - return ret; + return ret ?: -BCH_ERR_transaction_restart_nested; } static int __remove_dirent(struct btree_trans *trans, struct bpos pos) @@ -914,7 +914,7 @@ static int check_inode(struct btree_trans *trans, bch2_fs_lazy_rw(c); ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); - if (ret) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error in fsck: error while deleting inode: %s", bch2_err_str(ret)); return ret; @@ -1149,13 +1149,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) } } fsck_err: - if (ret) { + if (ret) bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret)); - return ret; - } - if (trans_was_restarted(trans, restart_count)) - return -BCH_ERR_transaction_restart_nested; - return 0; + if (!ret && trans_was_restarted(trans, restart_count)) + ret = -BCH_ERR_transaction_restart_nested; + return ret; } static int check_extent(struct btree_trans *trans, struct btree_iter *iter, diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d77092aa..3f1cf1ac 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1255,8 +1255,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) u64 seq; unsigned i; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 24); out->atomic++; - out->tabstops[0] = 24; rcu_read_lock(); s = READ_ONCE(j->reservations); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 2fc24745..22470067 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -636,6 +636,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, b = bch2_backpointer_get_node(&trans, &iter, bucket, bp_offset, bp); ret = PTR_ERR_OR_ZERO(b); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + continue; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index ecc64dd9..17b289b0 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -268,7 +268,8 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); - out->tabstops[0] = 20; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); prt_tab(out); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 24244bc3..fb3f8e40 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -278,8 +278,8 @@ int bch2_fs_check_snapshots(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, - POS(BCACHEFS_ROOT_INO, 0), + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_snapshots, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, check_snapshot(&trans, &iter, k)); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 55f8c65a..ade09bdf 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1427,8 +1427,8 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR ? bch2_sb_field_ops[type] : NULL; - if (!out->tabstops[0]) - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); if (ops) prt_printf(out, "%s", bch2_sb_fields[type]); @@ -1476,8 +1476,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, u64 fields_have = 0; unsigned nr_devices = 0; - if (!out->tabstops[0]) - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); mi = bch2_sb_get_members(sb); if (mi) { diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 2c650055..2dfed1ff 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -560,7 +560,8 @@ SHOW(bch2_fs_counters) u64 counter = 0; u64 counter_since_mount = 0; - out->tabstops[0] = 32; + printbuf_tabstop_push(out, 32); + #define x(t, ...) \ if (attr == &sysfs_##t) { \ counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ diff --git a/libbcachefs/util.c b/libbcachefs/util.c index ee2c7d9e..42da6623 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -268,6 +268,12 @@ static void bch2_quantiles_update(struct quantiles *q, u64 v) } } +void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) +{ + while (nr_bits) + prt_char(out, '0' + ((v >> --nr_bits) & 1)); +} + /* time stats: */ static void bch2_time_stats_update_one(struct time_stats *stats, @@ -526,7 +532,8 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd) void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) { - out->tabstops[0] = 20; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); prt_printf(out, "rate:"); prt_tab(out); diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 1fe66fd9..ab7e43d4 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -353,6 +353,8 @@ bool bch2_is_zero(const void *, size_t); u64 bch2_read_flag_list(char *, const char * const[]); +void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); + #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) #define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) diff --git a/linux/printbuf.c b/linux/printbuf.c index 3fc9ff47..5cf79d43 100644 --- a/linux/printbuf.c +++ b/linux/printbuf.c @@ -2,20 +2,13 @@ /* Copyright (C) 2022 Kent Overstreet */ #include -#include -#include -#include - -#ifdef __KERNEL__ #include #include -#else -#ifndef EXPORT_SYMBOL -#define EXPORT_SYMBOL(x) -#endif -#endif +#include +#include +#include -static inline size_t printbuf_linelen(struct printbuf *buf) +static inline unsigned printbuf_linelen(struct printbuf *buf) { return buf->pos - buf->last_newline; } @@ -35,6 +28,11 @@ int printbuf_make_room(struct printbuf *out, unsigned extra) return 0; new_size = roundup_pow_of_two(out->size + extra); + + /* + * Note: output buffer must be freeable with kfree(), it's not required + * that the user use printbuf_exit(). + */ buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT); if (!buf) { @@ -78,25 +76,43 @@ void printbuf_exit(struct printbuf *buf) } EXPORT_SYMBOL(printbuf_exit); -void prt_newline(struct printbuf *buf) +void printbuf_tabstops_reset(struct printbuf *buf) { - unsigned i; - - printbuf_make_room(buf, 1 + buf->indent); - - __prt_char(buf, '\n'); - - buf->last_newline = buf->pos; - - for (i = 0; i < buf->indent; i++) - __prt_char(buf, ' '); - - printbuf_nul_terminate(buf); - - buf->last_field = buf->pos; - buf->tabstop = 0; + buf->nr_tabstops = 0; } -EXPORT_SYMBOL(prt_newline); +EXPORT_SYMBOL(printbuf_tabstops_reset); + +void printbuf_tabstop_pop(struct printbuf *buf) +{ + if (buf->nr_tabstops) + --buf->nr_tabstops; +} +EXPORT_SYMBOL(printbuf_tabstop_pop); + +/* + * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop + * + * @buf: printbuf to control + * @spaces: number of spaces from previous tabpstop + * + * In the future this function may allocate memory if setting more than + * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start + * of line. + */ +int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces) +{ + unsigned prev_tabstop = buf->nr_tabstops + ? buf->_tabstops[buf->nr_tabstops - 1] + : 0; + + if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops))) + return -EINVAL; + + buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces; + buf->has_indent_or_tabstops = true; + return 0; +} +EXPORT_SYMBOL(printbuf_tabstop_push); /** * printbuf_indent_add - add to the current indent level @@ -113,8 +129,9 @@ void printbuf_indent_add(struct printbuf *buf, unsigned spaces) spaces = 0; buf->indent += spaces; - while (spaces--) - prt_char(buf, ' '); + prt_chars(buf, ' ', spaces); + + buf->has_indent_or_tabstops = true; } EXPORT_SYMBOL(printbuf_indent_add); @@ -137,9 +154,52 @@ void printbuf_indent_sub(struct printbuf *buf, unsigned spaces) printbuf_nul_terminate(buf); } buf->indent -= spaces; + + if (!buf->indent && !buf->nr_tabstops) + buf->has_indent_or_tabstops = false; } EXPORT_SYMBOL(printbuf_indent_sub); +void prt_newline(struct printbuf *buf) +{ + unsigned i; + + printbuf_make_room(buf, 1 + buf->indent); + + __prt_char(buf, '\n'); + + buf->last_newline = buf->pos; + + for (i = 0; i < buf->indent; i++) + __prt_char(buf, ' '); + + printbuf_nul_terminate(buf); + + buf->last_field = buf->pos; + buf->cur_tabstop = 0; +} +EXPORT_SYMBOL(prt_newline); + +/* + * Returns spaces from start of line, if set, or 0 if unset: + */ +static inline unsigned cur_tabstop(struct printbuf *buf) +{ + return buf->cur_tabstop < buf->nr_tabstops + ? buf->_tabstops[buf->cur_tabstop] + : 0; +} + +static void __prt_tab(struct printbuf *out) +{ + int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out)); + + prt_chars(out, ' ', spaces); + + out->last_field = out->pos; + out->cur_tabstop++; +} + /** * prt_tab - Advance printbuf to the next tabstop * @@ -149,17 +209,38 @@ EXPORT_SYMBOL(printbuf_indent_sub); */ void prt_tab(struct printbuf *out) { - int spaces = max_t(int, 0, out->tabstops[out->tabstop] - printbuf_linelen(out)); + if (WARN_ON(!cur_tabstop(out))) + return; - BUG_ON(out->tabstop > ARRAY_SIZE(out->tabstops)); - - prt_chars(out, ' ', spaces); - - out->last_field = out->pos; - out->tabstop++; + __prt_tab(out); } EXPORT_SYMBOL(prt_tab); +static void __prt_tab_rjust(struct printbuf *buf) +{ + unsigned move = buf->pos - buf->last_field; + int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf); + + if (pad > 0) { + printbuf_make_room(buf, pad); + + if (buf->last_field + pad < buf->size) + memmove(buf->buf + buf->last_field + pad, + buf->buf + buf->last_field, + min(move, buf->size - 1 - buf->last_field - pad)); + + if (buf->last_field < buf->size) + memset(buf->buf + buf->last_field, ' ', + min((unsigned) pad, buf->size - buf->last_field)); + + buf->pos += pad; + printbuf_nul_terminate(buf); + } + + buf->last_field = buf->pos; + buf->cur_tabstop++; +} + /** * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying * previous output @@ -171,134 +252,64 @@ EXPORT_SYMBOL(prt_tab); */ void prt_tab_rjust(struct printbuf *buf) { - BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops)); + if (WARN_ON(!cur_tabstop(buf))) + return; - if (printbuf_linelen(buf) < buf->tabstops[buf->tabstop]) { - unsigned move = buf->pos - buf->last_field; - unsigned shift = buf->tabstops[buf->tabstop] - - printbuf_linelen(buf); - - printbuf_make_room(buf, shift); - - if (buf->last_field + shift < buf->size) - memmove(buf->buf + buf->last_field + shift, - buf->buf + buf->last_field, - min(move, buf->size - 1 - buf->last_field - shift)); - - if (buf->last_field < buf->size) - memset(buf->buf + buf->last_field, ' ', - min(shift, buf->size - buf->last_field)); - - buf->pos += shift; - printbuf_nul_terminate(buf); - } - - buf->last_field = buf->pos; - buf->tabstop++; + __prt_tab_rjust(buf); } EXPORT_SYMBOL(prt_tab_rjust); -enum string_size_units { - STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ - STRING_UNITS_2, /* use binary powers of 2^10 */ -}; -static int string_get_size(u64 size, u64 blk_size, - const enum string_size_units units, - char *buf, int len) +/** + * prt_bytes_indented - Print an array of chars, handling embedded control characters + * + * @out: printbuf to output to + * @str: string to print + * @count: number of bytes to print + * + * The following contol characters are handled as so: + * \n: prt_newline newline that obeys current indent level + * \t: prt_tab advance to next tabstop + * \r: prt_tab_rjust advance to next tabstop, with right justification + */ +void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count) { - static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" - }; - static const char *const units_2[] = { - "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" - }; - static const char *const *const units_str[] = { - [STRING_UNITS_10] = units_10, - [STRING_UNITS_2] = units_2, - }; - static const unsigned int divisor[] = { - [STRING_UNITS_10] = 1000, - [STRING_UNITS_2] = 1024, - }; - static const unsigned int rounding[] = { 500, 50, 5 }; - int i = 0, j; - u32 remainder = 0, sf_cap; - char tmp[13]; - const char *unit; + const char *unprinted_start = str; + const char *end = str + count; - tmp[0] = '\0'; - - if (blk_size == 0) - size = 0; - if (size == 0) - goto out; - - /* This is Napier's algorithm. Reduce the original block size to - * - * coefficient * divisor[units]^i - * - * we do the reduction so both coefficients are just under 32 bits so - * that multiplying them together won't overflow 64 bits and we keep - * as much precision as possible in the numbers. - * - * Note: it's safe to throw away the remainders here because all the - * precision is in the coefficients. - */ - while (blk_size >> 32) { - do_div(blk_size, divisor[units]); - i++; + if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) { + prt_bytes(out, str, count); + return; } - while (size >> 32) { - do_div(size, divisor[units]); - i++; + while (str != end) { + switch (*str) { + case '\n': + prt_bytes(out, unprinted_start, str - unprinted_start); + unprinted_start = str + 1; + prt_newline(out); + break; + case '\t': + if (likely(cur_tabstop(out))) { + prt_bytes(out, unprinted_start, str - unprinted_start); + unprinted_start = str + 1; + __prt_tab(out); + } + break; + case '\r': + if (likely(cur_tabstop(out))) { + prt_bytes(out, unprinted_start, str - unprinted_start); + unprinted_start = str + 1; + __prt_tab_rjust(out); + } + break; + } + + str++; } - /* now perform the actual multiplication keeping i as the sum of the - * two logarithms */ - size *= blk_size; - - /* and logarithmically reduce it until it's just under the divisor */ - while (size >= divisor[units]) { - remainder = do_div(size, divisor[units]); - i++; - } - - /* work out in j how many digits of precision we need from the - * remainder */ - sf_cap = size; - for (j = 0; sf_cap*10 < 1000; j++) - sf_cap *= 10; - - if (units == STRING_UNITS_2) { - /* express the remainder as a decimal. It's currently the - * numerator of a fraction whose denominator is - * divisor[units], which is 1 << 10 for STRING_UNITS_2 */ - remainder *= 1000; - remainder >>= 10; - } - - /* add a 5 to the digit below what will be printed to ensure - * an arithmetical round up and carry it through to size */ - remainder += rounding[j]; - if (remainder >= 1000) { - remainder -= 1000; - size += 1; - } - - if (j) { - snprintf(tmp, sizeof(tmp), ".%03u", remainder); - tmp[j+1] = '\0'; - } - - out: - if (i >= ARRAY_SIZE(units_2)) - unit = "UNK"; - else - unit = units_str[units][i]; - - return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit); + prt_bytes(out, unprinted_start, str - unprinted_start); } +EXPORT_SYMBOL(prt_bytes_indented); /** * prt_human_readable_u64 - Print out a u64 in human readable units diff --git a/linux/string_helpers.c b/linux/string_helpers.c new file mode 100644 index 00000000..3d720bc0 --- /dev/null +++ b/linux/string_helpers.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Helpers for formatting and printing strings + * + * Copyright 31 August 2008 James Bottomley + * Copyright (C) 2013, Intel Corporation + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * string_get_size - get the size in the specified units + * @size: The size to be converted in blocks + * @blk_size: Size of the block (use 1 for size in bytes) + * @units: units to use (powers of 1000 or 1024) + * @buf: buffer to format to + * @len: length of buffer + * + * This function returns a string formatted to 3 significant figures + * giving the size in the required units. @buf should have room for + * at least 9 bytes and will always be zero terminated. + * + */ +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, + char *buf, int len) +{ + static const char *const units_10[] = { + "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" + }; + static const char *const units_2[] = { + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" + }; + static const char *const *const units_str[] = { + [STRING_UNITS_10] = units_10, + [STRING_UNITS_2] = units_2, + }; + static const unsigned int divisor[] = { + [STRING_UNITS_10] = 1000, + [STRING_UNITS_2] = 1024, + }; + static const unsigned int rounding[] = { 500, 50, 5 }; + int i = 0, j; + u32 remainder = 0, sf_cap; + char tmp[8]; + const char *unit; + + tmp[0] = '\0'; + + if (blk_size == 0) + size = 0; + if (size == 0) + goto out; + + /* This is Napier's algorithm. Reduce the original block size to + * + * coefficient * divisor[units]^i + * + * we do the reduction so both coefficients are just under 32 bits so + * that multiplying them together won't overflow 64 bits and we keep + * as much precision as possible in the numbers. + * + * Note: it's safe to throw away the remainders here because all the + * precision is in the coefficients. + */ + while (blk_size >> 32) { + do_div(blk_size, divisor[units]); + i++; + } + + while (size >> 32) { + do_div(size, divisor[units]); + i++; + } + + /* now perform the actual multiplication keeping i as the sum of the + * two logarithms */ + size *= blk_size; + + /* and logarithmically reduce it until it's just under the divisor */ + while (size >= divisor[units]) { + remainder = do_div(size, divisor[units]); + i++; + } + + /* work out in j how many digits of precision we need from the + * remainder */ + sf_cap = size; + for (j = 0; sf_cap*10 < 1000; j++) + sf_cap *= 10; + + if (units == STRING_UNITS_2) { + /* express the remainder as a decimal. It's currently the + * numerator of a fraction whose denominator is + * divisor[units], which is 1 << 10 for STRING_UNITS_2 */ + remainder *= 1000; + remainder >>= 10; + } + + /* add a 5 to the digit below what will be printed to ensure + * an arithmetical round up and carry it through to size */ + remainder += rounding[j]; + if (remainder >= 1000) { + remainder -= 1000; + size += 1; + } + + if (j) { + snprintf(tmp, sizeof(tmp), ".%03u", remainder); + tmp[j+1] = '\0'; + } + + out: + if (i >= ARRAY_SIZE(units_2)) + unit = "UNK"; + else + unit = units_str[units][i]; + + return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit); +} +EXPORT_SYMBOL(string_get_size);