Update bcachefs sources to dfaf9a6ee2 lib/printbuf: Clean up headers

This commit is contained in:
Kent Overstreet 2022-08-18 12:32:10 -04:00
parent 51ffcc6993
commit dded444b20
32 changed files with 1007 additions and 520 deletions

View File

@ -1 +1 @@
49c34dadcad9c33b1e8510b5543d60c40fa0bebd
dfaf9a6ee24f5c415635f9a75f5281f385535ebd

View File

@ -2,7 +2,7 @@ PREFIX?=/usr/local
PKG_CONFIG?=pkg-config
INSTALL=install
CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \
CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \
-Wno-pointer-sign \
-fno-strict-aliasing \
-fno-delete-null-pointer-checks \
@ -195,6 +195,10 @@ update-bcachefs-sources:
git add linux/generic-radix-tree.c
cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/
git add include/linux/kmemleak.h
cp $(LINUX_DIR)/include/linux/printbuf.h include/linux/
git add include/linux/printbuf.h
cp $(LINUX_DIR)/lib/printbuf.c linux/
git add linux/printbuf.c
cp $(LINUX_DIR)/scripts/Makefile.compiler ./
git add Makefile.compiler
$(RM) libbcachefs/*.mod.c

View File

@ -179,8 +179,9 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
pr_uuid(out, fs.uuid.b);
prt_newline(out);
out->tabstops[0] = 20;
out->tabstops[1] = 36;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 20);
printbuf_tabstop_push(out, 16);
prt_str(out, "Size:");
prt_tab(out);
@ -202,10 +203,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
prt_newline(out);
out->tabstops[0] = 16;
out->tabstops[1] = 32;
out->tabstops[2] = 50;
out->tabstops[3] = 68;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 18);
printbuf_tabstop_push(out, 18);
prt_str(out, "Data type");
prt_tab(out);
@ -255,10 +257,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
sort(dev_names.data, dev_names.nr,
sizeof(dev_names.data[0]), dev_by_label_cmp, NULL);
out->tabstops[0] = 16;
out->tabstops[1] = 36;
out->tabstops[2] = 52;
out->tabstops[3] = 68;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 20);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 14);
darray_for_each(dev_names, dev)
dev_usage_to_text(out, fs, dev);

View File

@ -32,6 +32,10 @@
* Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations
* will be done with GFP_NOWAIT if printbuf->atomic is nonzero.
*
* It's allowed to grab the output buffer and free it later with kfree() instead
* of using printbuf_exit(), if the user just needs a heap allocated string at
* the end.
*
* Memory allocation failures: We don't return errors directly, because on
* memory allocation failure we usually don't want to bail out and unwind - we
* want to print what we've got, on a best-effort basis. But code that does want
@ -67,6 +71,8 @@ enum printbuf_si {
PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */
};
#define PRINTBUF_INLINE_TABSTOPS 4
struct printbuf {
char *buf;
unsigned size;
@ -82,19 +88,34 @@ struct printbuf {
bool heap_allocated:1;
enum printbuf_si si_units:1;
bool human_readable_units:1;
u8 tabstop;
u8 tabstops[4];
bool has_indent_or_tabstops:1;
bool suppress_indent_tabstop_handling:1;
u8 nr_tabstops;
/*
* Do not modify directly: use printbuf_tabstop_add(),
* printbuf_tabstop_get()
*/
u8 cur_tabstop;
u8 _tabstops[PRINTBUF_INLINE_TABSTOPS];
};
int printbuf_make_room(struct printbuf *, unsigned);
const char *printbuf_str(const struct printbuf *);
void printbuf_exit(struct printbuf *);
void prt_newline(struct printbuf *);
void printbuf_tabstops_reset(struct printbuf *);
void printbuf_tabstop_pop(struct printbuf *);
int printbuf_tabstop_push(struct printbuf *, unsigned);
void printbuf_indent_add(struct printbuf *, unsigned);
void printbuf_indent_sub(struct printbuf *, unsigned);
void prt_newline(struct printbuf *);
void prt_tab(struct printbuf *);
void prt_tab_rjust(struct printbuf *);
void prt_bytes_indented(struct printbuf *, const char *, unsigned);
void prt_human_readable_u64(struct printbuf *, u64);
void prt_human_readable_s64(struct printbuf *, s64);
void prt_units_u64(struct printbuf *, u64);
@ -129,7 +150,7 @@ static inline unsigned printbuf_remaining(struct printbuf *out)
static inline unsigned printbuf_written(struct printbuf *out)
{
return min(out->pos, out->size);
return out->size ? min(out->pos, out->size - 1) : 0;
}
/*
@ -150,21 +171,6 @@ static inline void printbuf_nul_terminate(struct printbuf *out)
out->buf[out->size - 1] = 0;
}
static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
{
memset(out->buf + out->pos,
c,
min(n, printbuf_remaining(out)));
out->pos += n;
}
static inline void prt_chars(struct printbuf *out, char c, unsigned n)
{
printbuf_make_room(out, n);
__prt_chars_reserved(out, c, n);
printbuf_nul_terminate(out);
}
/* Doesn't call printbuf_make_room(), doesn't nul terminate: */
static inline void __prt_char_reserved(struct printbuf *out, char c)
{
@ -186,14 +192,34 @@ static inline void prt_char(struct printbuf *out, char c)
printbuf_nul_terminate(out);
}
static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
{
unsigned i, can_print = min(n, printbuf_remaining(out));
for (i = 0; i < can_print; i++)
out->buf[out->pos++] = c;
out->pos += n - can_print;
}
static inline void prt_chars(struct printbuf *out, char c, unsigned n)
{
printbuf_make_room(out, n);
__prt_chars_reserved(out, c, n);
printbuf_nul_terminate(out);
}
static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
{
unsigned i, can_print;
printbuf_make_room(out, n);
can_print = min(n, printbuf_remaining(out));
for (i = 0; i < can_print; i++)
out->buf[out->pos++] = ((char *) b)[i];
out->pos += n - can_print;
memcpy(out->buf + out->pos,
b,
min(n, printbuf_remaining(out)));
out->pos += n;
printbuf_nul_terminate(out);
}
@ -202,6 +228,11 @@ static inline void prt_str(struct printbuf *out, const char *str)
prt_bytes(out, str, strlen(str));
}
static inline void prt_str_indented(struct printbuf *out, const char *str)
{
prt_bytes_indented(out, str, strlen(str));
}
static inline void prt_hex_byte(struct printbuf *out, u8 byte)
{
printbuf_make_room(out, 2);
@ -226,7 +257,8 @@ static inline void printbuf_reset(struct printbuf *buf)
buf->pos = 0;
buf->allocation_failure = 0;
buf->indent = 0;
buf->tabstop = 0;
buf->nr_tabstops = 0;
buf->cur_tabstop = 0;
}
/**
@ -245,4 +277,30 @@ static inline void printbuf_atomic_dec(struct printbuf *buf)
buf->atomic--;
}
/*
* This is used for the %pf(%p) sprintf format extension, where we pass a pretty
* printer and arguments to the pretty-printer to sprintf
*
* Instead of passing a pretty-printer function to sprintf directly, we pass it
* a pointer to a struct call_pp, so that sprintf can check that the magic
* number is present, which in turn ensures that the CALL_PP() macro has been
* used in order to typecheck the arguments to the pretty printer function
*
* Example usage:
* sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev));
*/
struct call_pp {
unsigned long magic;
void *fn;
};
#define PP_TYPECHECK(fn, ...) \
({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); })
#define CALL_PP_MAGIC (unsigned long) 0xce0b92d22f6b6be4
#define CALL_PP(fn, ...) \
(PP_TYPECHECK(fn, ##__VA_ARGS__), \
&((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__
#endif /* _LINUX_PRINTBUF_H */

View File

@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_STRING_HELPERS_H_
#define _LINUX_STRING_HELPERS_H_
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/types.h>
/* Descriptions of the types of units to
* print in */
enum string_size_units {
STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
STRING_UNITS_2, /* use binary powers of 2^10 */
};
int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
char *buf, int len);
#endif

View File

@ -317,24 +317,27 @@ DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
);
TRACE_EVENT(btree_reserve_get_fail,
TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
TP_ARGS(c, required, cl),
TP_PROTO(const char *trans_fn,
unsigned long caller_ip,
size_t required),
TP_ARGS(trans_fn, caller_ip, required),
TP_STRUCT__entry(
__field(dev_t, dev )
__array(char, trans_fn, 24 )
__field(unsigned long, caller_ip )
__field(size_t, required )
__field(struct closure *, cl )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->required = required;
__entry->cl = cl;
strlcpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip;
__entry->required = required;
),
TP_printk("%d,%d required %zu by %p",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->required, __entry->cl)
TP_printk("%s %pS required %zu",
__entry->trans_fn,
(void *) __entry->caller_ip,
__entry->required)
);
DEFINE_EVENT(btree_node, btree_split,

View File

@ -339,6 +339,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
skipped_need_journal_commit,
skipped_nouse,
cl);
if (!ob)
iter.path->preserve = false;
err:
set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter);
@ -379,15 +381,15 @@ static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch
* journal buckets - journal buckets will be < ca->new_fs_bucket_idx
*/
static noinline struct open_bucket *
bch2_bucket_alloc_trans_early(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
u64 *cur_bucket,
u64 *buckets_seen,
u64 *skipped_open,
u64 *skipped_need_journal_commit,
u64 *skipped_nouse,
struct closure *cl)
bch2_bucket_alloc_early(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
u64 *cur_bucket,
u64 *buckets_seen,
u64 *skipped_open,
u64 *skipped_need_journal_commit,
u64 *skipped_nouse,
struct closure *cl)
{
struct btree_iter iter;
struct bkey_s_c k;
@ -430,7 +432,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
}
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
u64 *cur_bucket,
@ -445,15 +447,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct open_bucket *ob = NULL;
int ret;
if (unlikely(!ca->mi.freespace_initialized))
return bch2_bucket_alloc_trans_early(trans, ca, reserve,
cur_bucket,
buckets_seen,
skipped_open,
skipped_need_journal_commit,
skipped_nouse,
cl);
BUG_ON(ca->new_fs_bucket_idx);
/*
@ -467,7 +460,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
break;
for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
*cur_bucket < k.k->p.offset && !ob;
*cur_bucket < k.k->p.offset;
(*cur_bucket)++) {
ret = btree_trans_too_many_iters(trans);
if (ret)
@ -481,6 +474,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
skipped_need_journal_commit,
skipped_nouse,
k, cl);
if (ob)
break;
}
if (ob || ret)
@ -496,11 +491,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
*
* Returns index of bucket on success, 0 on failure
* */
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL;
struct bch_dev_usage usage;
bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
@ -512,7 +509,6 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
u64 skipped_need_journal_commit = 0;
u64 skipped_nouse = 0;
bool waiting = false;
int ret;
again:
usage = bch2_dev_usage_read(ca);
avail = dev_buckets_free(ca, usage, reserve);
@ -549,19 +545,26 @@ again:
return ob;
}
ret = bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
&cur_bucket,
&buckets_seen,
&skipped_open,
&skipped_need_journal_commit,
&skipped_nouse,
cl)));
ob = likely(ca->mi.freespace_initialized)
? bch2_bucket_alloc_freelist(trans, ca, reserve,
&cur_bucket,
&buckets_seen,
&skipped_open,
&skipped_need_journal_commit,
&skipped_nouse,
cl)
: bch2_bucket_alloc_early(trans, ca, reserve,
&cur_bucket,
&buckets_seen,
&skipped_open,
&skipped_need_journal_commit,
&skipped_nouse,
cl);
if (skipped_need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
if (!ob && !ret && !freespace_initialized && start) {
if (!ob && !freespace_initialized && start) {
start = cur_bucket = 0;
goto again;
}
@ -570,7 +573,7 @@ again:
ca->bucket_alloc_trans_early_cursor = cur_bucket;
err:
if (!ob)
ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
if (IS_ERR(ob)) {
trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
@ -590,6 +593,19 @@ err:
return ob;
}
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl)
{
struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
may_alloc_partial, cl)));
return ob;
}
static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
unsigned l, unsigned r)
{
@ -655,7 +671,7 @@ static void add_new_bucket(struct bch_fs *c,
ob_push(c, ptrs, ob);
}
int bch2_bucket_alloc_set(struct bch_fs *c,
static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
struct open_buckets *ptrs,
struct dev_stripe_state *stripe,
struct bch_devs_mask *devs_may_alloc,
@ -666,11 +682,12 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
unsigned flags,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct dev_alloc_list devs_sorted =
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
unsigned dev;
struct bch_dev *ca;
int ret = -BCH_ERR_insufficient_devices;
int ret = 0;
unsigned i;
BUG_ON(*nr_effective >= nr_replicas);
@ -694,16 +711,15 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
continue;
}
ob = bch2_bucket_alloc(c, ca, reserve,
ob = bch2_bucket_alloc_trans(trans, ca, reserve,
flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
if (!IS_ERR(ob))
bch2_dev_stripe_increment(ca, stripe);
percpu_ref_put(&ca->ref);
if (IS_ERR(ob)) {
ret = PTR_ERR(ob);
if (cl)
ret = PTR_ERR_OR_ZERO(ob);
if (ret) {
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl)
break;
continue;
}
@ -711,15 +727,36 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob);
if (*nr_effective >= nr_replicas) {
ret = 0;
if (*nr_effective >= nr_replicas)
break;
}
}
if (*nr_effective >= nr_replicas)
ret = 0;
else if (!ret)
ret = -BCH_ERR_insufficient_devices;
return ret;
}
int bch2_bucket_alloc_set(struct bch_fs *c,
struct open_buckets *ptrs,
struct dev_stripe_state *stripe,
struct bch_devs_mask *devs_may_alloc,
unsigned nr_replicas,
unsigned *nr_effective,
bool *have_cache,
enum alloc_reserve reserve,
unsigned flags,
struct closure *cl)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_bucket_alloc_set_trans(&trans, ptrs, stripe,
devs_may_alloc, nr_replicas,
nr_effective, have_cache, reserve,
flags, cl));
}
/* Allocate from stripes: */
/*
@ -824,7 +861,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
wp->ptrs = ptrs_skip;
}
static int open_bucket_add_buckets(struct bch_fs *c,
static int open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
@ -837,6 +874,7 @@ static int open_bucket_add_buckets(struct bch_fs *c,
unsigned flags,
struct closure *_cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
@ -868,7 +906,8 @@ static int open_bucket_add_buckets(struct bch_fs *c,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, _cl);
if (bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
if (*nr_effective >= nr_replicas)
@ -887,10 +926,11 @@ retry_blocking:
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
!cl && _cl) {
cl = _cl;
@ -1010,15 +1050,25 @@ static bool try_decrease_writepoints(struct bch_fs *c,
return true;
}
static struct write_point *writepoint_find(struct bch_fs *c,
static void bch2_trans_mutex_lock(struct btree_trans *trans,
struct mutex *lock)
{
if (!mutex_trylock(lock)) {
bch2_trans_unlock(trans);
mutex_lock(lock);
}
}
static struct write_point *writepoint_find(struct btree_trans *trans,
unsigned long write_point)
{
struct bch_fs *c = trans->c;
struct write_point *wp, *oldest;
struct hlist_head *head;
if (!(write_point & 1UL)) {
wp = (struct write_point *) write_point;
mutex_lock(&wp->lock);
bch2_trans_mutex_lock(trans, &wp->lock);
return wp;
}
@ -1027,7 +1077,7 @@ restart_find:
wp = __writepoint_find(head, write_point);
if (wp) {
lock_wp:
mutex_lock(&wp->lock);
bch2_trans_mutex_lock(trans, &wp->lock);
if (wp->write_point == write_point)
goto out;
mutex_unlock(&wp->lock);
@ -1040,8 +1090,8 @@ restart_find_oldest:
if (!oldest || time_before64(wp->last_used, oldest->last_used))
oldest = wp;
mutex_lock(&oldest->lock);
mutex_lock(&c->write_points_hash_lock);
bch2_trans_mutex_lock(trans, &oldest->lock);
bch2_trans_mutex_lock(trans, &c->write_points_hash_lock);
if (oldest >= c->write_points + c->write_points_nr ||
try_increase_writepoints(c)) {
mutex_unlock(&c->write_points_hash_lock);
@ -1069,7 +1119,7 @@ out:
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans,
unsigned target,
unsigned erasure_code,
struct write_point_specifier write_point,
@ -1080,6 +1130,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
unsigned flags,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct write_point *wp;
struct open_bucket *ob;
struct open_buckets ptrs;
@ -1099,7 +1150,7 @@ retry:
write_points_nr = c->write_points_nr;
have_cache = false;
wp = writepoint_find(c, write_point.v);
wp = writepoint_find(trans, write_point.v);
if (wp->data_type == BCH_DATA_user)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
@ -1109,21 +1160,22 @@ retry:
have_cache = true;
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, cl);
} else {
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, NULL);
if (!ret)
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
0, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
@ -1180,6 +1232,32 @@ err:
return ERR_PTR(ret);
}
struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
unsigned target,
unsigned erasure_code,
struct write_point_specifier write_point,
struct bch_devs_list *devs_have,
unsigned nr_replicas,
unsigned nr_replicas_required,
enum alloc_reserve reserve,
unsigned flags,
struct closure *cl)
{
struct write_point *wp;
bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target,
erasure_code,
write_point,
devs_have,
nr_replicas,
nr_replicas_required,
reserve,
flags, cl)));
return wp;
}
struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);

View File

@ -136,6 +136,14 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
unsigned, unsigned *, bool *, enum alloc_reserve,
unsigned, struct closure *);
struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,
struct write_point_specifier,
struct bch_devs_list *,
unsigned, unsigned,
enum alloc_reserve,
unsigned,
struct closure *);
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
unsigned, unsigned,
struct write_point_specifier,

View File

@ -492,7 +492,7 @@ static void backpointer_not_found(struct btree_trans *trans,
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k);
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
bch_err(c, "%s", buf.buf);
bch_err_ratelimited(c, "%s", buf.buf);
else
bch2_trans_inconsistent(trans, "%s", buf.buf);
@ -526,9 +526,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
bch2_trans_iter_exit(trans, iter);
if (bp.level) {
/*
* If a backpointer for a btree node wasn't found, it may be
* because it was overwritten by a new btree node that hasn't
* been written out yet - backpointer_get_node() checks for
* this:
*/
bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
bch2_trans_iter_exit(trans, iter);
return bkey_s_c_null;
}
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
return bkey_s_c_null;
}
@ -540,7 +552,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree *b;
struct bkey_s_c k;
BUG_ON(!bp.level);
@ -551,22 +562,24 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
bp.level - 1,
0);
b = bch2_btree_iter_peek_node(iter);
if (IS_ERR(b)) {
bch2_trans_iter_exit(trans, iter);
return b;
}
if (IS_ERR(b))
goto err;
if (extent_matches_bp(c, bp.btree_id, bp.level,
bkey_i_to_s_c(&b->key),
bucket, bp))
return b;
if (!btree_node_will_make_reachable(b))
backpointer_not_found(trans, bucket, bp_offset,
bp, k, "btree node");
if (btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bucket, bp_offset, bp,
bkey_i_to_s_c(&b->key), "btree node");
b = NULL;
}
err:
bch2_trans_iter_exit(trans, iter);
return NULL;
return b;
}
static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
@ -829,6 +842,8 @@ static int check_one_backpointer(struct btree_trans *trans,
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
ret = bkey_err(k);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
return 0;
if (ret)
return ret;

View File

@ -319,8 +319,6 @@ BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
#endif
#define BCH_LOCK_TIME_NR 128
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
@ -531,9 +529,13 @@ struct btree_debug {
unsigned id;
};
struct lock_held_stats {
struct time_stats times[BCH_LOCK_TIME_NR];
const char *names[BCH_LOCK_TIME_NR];
#define BCH_TRANSACTIONS_NR 128
struct btree_transaction_stats {
struct mutex lock;
struct time_stats lock_hold_times;
unsigned nr_max_paths;
char *max_paths_text;
};
struct bch_fs_pcpu {
@ -930,7 +932,8 @@ struct bch_fs {
struct time_stats times[BCH_TIME_STAT_NR];
struct lock_held_stats lock_held_stats;
const char *btree_transaction_fns[BCH_TRANSACTIONS_NR];
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
};
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)

View File

@ -19,33 +19,49 @@ const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
const struct bkey_packed *);
void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
void bch2_bkey_packed_to_binary_text(struct printbuf *out,
const struct bkey_format *f,
const struct bkey_packed *k)
{
unsigned bit = high_bit_offset, done = 0;
const u64 *p = high_word(f, k);
unsigned word_bits = 64 - high_bit_offset;
unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
u64 v = *p & (~0ULL >> high_bit_offset);
if (!nr_key_bits) {
prt_str(out, "(empty)");
return;
}
while (1) {
while (bit < 64) {
if (done && !(done % 8))
*out++ = ' ';
*out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
bit++;
done++;
if (done == nr_bits) {
*out++ = '\0';
return;
}
unsigned next_key_bits = nr_key_bits;
if (nr_key_bits < 64) {
v >>= 64 - nr_key_bits;
next_key_bits = 0;
} else {
next_key_bits -= 64;
}
bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
if (!next_key_bits)
break;
prt_char(out, ' ');
p = next_word(p);
bit = 0;
v = *p;
word_bits = 64;
nr_key_bits = next_key_bits;
}
}
#ifdef CONFIG_BCACHEFS_DEBUG
static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
const struct bkey *unpacked,
const struct bkey_format *format)
const struct bkey *unpacked,
const struct bkey_format *format)
{
struct bkey tmp;
@ -57,23 +73,35 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
tmp = __bch2_bkey_unpack_key(format, packed);
if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
char buf3[160], buf4[160];
struct printbuf buf = PRINTBUF;
bch2_bkey_to_text(&buf1, unpacked);
bch2_bkey_to_text(&buf2, &tmp);
bch2_to_binary(buf3, (void *) unpacked, 80);
bch2_to_binary(buf4, high_word(format, packed), 80);
panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
format->key_u64s,
format->bits_per_field[0],
format->bits_per_field[1],
format->bits_per_field[2],
format->bits_per_field[3],
format->bits_per_field[4],
buf1.buf, buf2.buf, buf3, buf4);
format->bits_per_field[4]);
prt_printf(&buf, "compiled unpack: ");
bch2_bkey_to_text(&buf, unpacked);
prt_newline(&buf);
prt_printf(&buf, "c unpack: ");
bch2_bkey_to_text(&buf, &tmp);
prt_newline(&buf);
prt_printf(&buf, "compiled unpack: ");
bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
(struct bkey_packed *) unpacked);
prt_newline(&buf);
prt_printf(&buf, "c unpack: ");
bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
(struct bkey_packed *) &tmp);
prt_newline(&buf);
panic("%s", buf.buf);
}
}

View File

@ -12,7 +12,9 @@
#define HAVE_BCACHEFS_COMPILED_UNPACK 1
#endif
void bch2_to_binary(char *, const u64 *, unsigned);
void bch2_bkey_packed_to_binary_text(struct printbuf *,
const struct bkey_format *,
const struct bkey_packed *);
/* bkey with split value, const */
struct bkey_s_c {
@ -42,12 +44,15 @@ static inline size_t bkey_val_bytes(const struct bkey *k)
static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
{
k->u64s = BKEY_U64s + val_u64s;
unsigned u64s = BKEY_U64s + val_u64s;
BUG_ON(u64s > U8_MAX);
k->u64s = u64s;
}
static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
{
k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
}
#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))

View File

@ -616,7 +616,6 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
(u64 *) vstruct_end(i) - (u64 *) k);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift);
set_btree_bset_end(b, t);
bch2_bset_set_no_aux_tree(b, t);
}
for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
@ -626,10 +625,14 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
if (k != vstruct_last(i)) {
i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start);
set_btree_bset_end(b, t);
bch2_bset_set_no_aux_tree(b, t);
}
}
/*
* Always rebuild search trees: eytzinger search tree nodes directly
* depend on the values of min/max key:
*/
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
@ -778,8 +781,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
}
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned *whiteout_u64s,
int write, bool have_retry)
struct bset *i, int write, bool have_retry)
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
@ -915,7 +917,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
}
while (b->written < (ptr_written ?: btree_sectors(c))) {
unsigned sectors, whiteout_u64s = 0;
unsigned sectors;
struct nonce nonce;
struct bch_csum csum;
bool first = !b->written;
@ -984,8 +986,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (!b->written)
btree_node_set_format(b, b->data->format);
ret = validate_bset_keys(c, b, i, &whiteout_u64s,
READ, have_retry);
ret = validate_bset_keys(c, b, i, READ, have_retry);
if (ret)
goto fsck_err;
@ -1011,11 +1012,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (blacklisted && !first)
continue;
sort_iter_add(iter, i->start,
vstruct_idx(i, whiteout_u64s));
sort_iter_add(iter,
vstruct_idx(i, whiteout_u64s),
vstruct_idx(i, 0),
vstruct_last(i));
nonblacklisted_written = b->written;
@ -1745,7 +1743,6 @@ static void btree_node_write_endio(struct bio *bio)
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
unsigned whiteout_u64s = 0;
struct printbuf buf = PRINTBUF;
int ret;
@ -1758,7 +1755,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (ret)
return ret;
ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
ret = validate_bset_keys(c, b, i, WRITE, false) ?:
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false);
if (ret) {
bch2_inconsistent_error(c);

View File

@ -1418,16 +1418,16 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (unlikely(ret))
goto err;
mark_btree_node_locked(trans, path, level, lock_type);
btree_path_level_init(trans, path, b);
if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(trans, path, level + 1, b);
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(trans, path, level + 1);
mark_btree_node_locked(trans, path, level, lock_type);
path->level = level;
btree_path_level_init(trans, path, b);
bch2_btree_path_verify_locks(path);
err:
@ -1872,42 +1872,69 @@ void bch2_dump_trans_updates(struct btree_trans *trans)
printbuf_exit(&buf);
}
void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
{
prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
path->idx, path->ref, path->intent_ref,
path->preserve ? 'P' : ' ',
path->should_be_locked ? 'S' : ' ',
bch2_btree_ids[path->btree_id],
path->level);
bch2_bpos_to_text(out, path->pos);
prt_printf(out, " locks %u", path->nodes_locked);
#ifdef CONFIG_BCACHEFS_DEBUG
prt_printf(out, " %pS", (void *) path->ip_allocated);
#endif
prt_newline(out);
}
void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
{
struct btree_path *path;
unsigned idx;
trans_for_each_path_inorder(trans, path, idx)
bch2_btree_path_to_text(out, path);
}
noinline __cold
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
{
struct btree_path *path;
struct printbuf buf = PRINTBUF;
unsigned idx;
trans_for_each_path_inorder(trans, path, idx) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, path->pos);
printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n",
path->idx, path->ref, path->intent_ref,
path->preserve ? 'P' : ' ',
path->should_be_locked ? 'S' : ' ',
bch2_btree_ids[path->btree_id],
path->level,
buf.buf,
path->nodes_locked,
#ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated
#else
NULL
#endif
);
}
bch2_trans_paths_to_text(&buf, trans);
printk(KERN_ERR "%s", buf.buf);
printbuf_exit(&buf);
bch2_dump_trans_updates(trans);
}
noinline
static void bch2_trans_update_max_paths(struct btree_trans *trans)
{
struct btree_transaction_stats *s = btree_trans_stats(trans);
struct printbuf buf = PRINTBUF;
bch2_trans_paths_to_text(&buf, trans);
if (!buf.allocation_failure) {
mutex_lock(&s->lock);
if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
s->nr_max_paths = hweight64(trans->paths_allocated);
swap(s->max_paths_text, buf.buf);
}
mutex_unlock(&s->lock);
}
printbuf_exit(&buf);
}
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
struct btree_path *pos)
{
struct btree_transaction_stats *s = btree_trans_stats(trans);
struct btree_path *path;
unsigned idx;
@ -1920,6 +1947,9 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans,
idx = __ffs64(~trans->paths_allocated);
trans->paths_allocated |= 1ULL << idx;
if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths))
bch2_trans_update_max_paths(trans);
path = &trans->paths[idx];
path->idx = idx;
@ -2013,12 +2043,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
struct bkey_s_c k;
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
EBUG_ON(!btree_node_locked(path, path->level));
if (!path->cached) {
struct btree_path_level *l = path_l(path);
struct bkey_packed *_k;
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
_k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
@ -2033,7 +2064,6 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
(path->btree_id != ck->key.btree_id ||
bkey_cmp(path->pos, ck->key.pos)));
EBUG_ON(!ck || !ck->valid);
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
*u = ck->k->k;
k = bkey_i_to_s_c(ck->k);
@ -2288,7 +2318,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
* bkey_s_c_null:
*/
static noinline
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
@ -2317,6 +2347,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
}
static noinline
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
{
struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos);
int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_);
return err ? bkey_s_c_err(err) : ret;
}
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
{
struct btree_trans *trans = iter->trans;
@ -2347,15 +2386,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
ret = bkey_err(k2);
k = k2;
ret = bkey_err(k);
if (ret) {
k = k2;
bch2_btree_iter_set_pos(iter, iter->pos);
goto out;
}
k = k2;
iter->k = *k.k;
}
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
@ -2803,8 +2839,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
btree_iter_ip_allocated(iter));
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (unlikely(ret))
return bkey_s_c_err(ret);
if (unlikely(ret)) {
k = bkey_s_c_err(ret);
goto out_no_locked;
}
if ((iter->flags & BTREE_ITER_CACHED) ||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
@ -2828,13 +2866,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
if (bkey_err(k)) {
goto out_no_locked;
} else {
(k = __btree_trans_peek_key_cache(iter, iter->pos)).k) {
if (!bkey_err(k))
iter->k = *k.k;
goto out;
}
/* We're not returning a key from iter->path: */
goto out_no_locked;
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
@ -2862,11 +2898,14 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bpos pos = iter->pos;
k = bch2_btree_iter_peek(iter);
iter->pos = pos;
if (unlikely(bkey_err(k)))
bch2_btree_iter_set_pos(iter, pos);
else
iter->pos = pos;
}
if (unlikely(bkey_err(k)))
return k;
goto out_no_locked;
next = k.k ? bkey_start_pos(k.k) : POS_MAX;
@ -3195,6 +3234,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_reset_updates(trans);
trans->restart_count++;
trans->mem_top = 0;
if (trans->fs_usage_deltas) {
@ -3245,10 +3285,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
{
bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans,
"trans->restart_count %u, should be %u, last restarted by %ps\n",
trans->restart_count, restart_count,
(void *) trans->last_restarted_ip);
if (trans_was_restarted(trans, restart_count))
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
trans->restart_count, restart_count,
(void *) trans->last_restarted_ip);
}
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@ -3269,6 +3309,22 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
trans->updates = p; p += updates_bytes;
}
static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c,
const char *fn)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++)
if (!c->btree_transaction_fns[i] ||
c->btree_transaction_fns[i] == fn) {
c->btree_transaction_fns[i] = fn;
return i;
}
pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
return i;
}
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters,
size_t expected_mem_bytes,
@ -3284,15 +3340,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->fn = fn;
trans->last_begin_time = ktime_get_ns();
trans->task = current;
while (c->lock_held_stats.names[trans->lock_name_idx] != fn
&& c->lock_held_stats.names[trans->lock_name_idx] != 0)
trans->lock_name_idx++;
if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
pr_warn_once("lock_times array not big enough!");
else
c->lock_held_stats.names[trans->lock_name_idx] = fn;
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
bch2_trans_alloc_paths(trans, c);
@ -3463,9 +3511,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
unsigned nr = BTREE_ITER_MAX;
unsigned i, nr = BTREE_ITER_MAX;
int ret;
for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++)
mutex_init(&c->btree_transaction_stats[i].lock);
INIT_LIST_HEAD(&c->btree_trans_list);
mutex_init(&c->btree_trans_lock);

View File

@ -182,7 +182,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er
BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
trans->restarted = err;
trans->restart_count++;
return -err;
}
@ -368,7 +367,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) {
if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) {
trace_trans_restart_too_many_iters(trans, _THIS_IP_);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
}
@ -392,13 +391,17 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
#define lockrestart_do(_trans, _do) \
({ \
u32 _restart_count; \
int _ret; \
\
do { \
bch2_trans_begin(_trans); \
_restart_count = bch2_trans_begin(_trans); \
_ret = (_do); \
} while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \
\
if (!_ret) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
_ret; \
})
@ -439,7 +442,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
(_start), (_flags)); \
\
while (1) { \
bch2_trans_begin(_trans); \
u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
if (!(_k).k) { \
_ret = 0; \
@ -451,6 +454,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
continue; \
if (_ret) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
break; \
} \
@ -468,7 +472,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
(_start), (_flags)); \
\
while (1) { \
bch2_trans_begin(_trans); \
u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
if (!(_k).k) { \
_ret = 0; \
@ -480,6 +484,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
continue; \
if (_ret) \
break; \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_rewind(&(_iter))) \
break; \
} \
@ -535,6 +540,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
/* new multiple iterator interface: */
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *,

View File

@ -631,11 +631,22 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
void bch2_btree_key_cache_drop(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
ck->valid = false;
BUG_ON(!ck->valid);
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
/*
* We just did an update to the btree, bypassing the key cache: the key
* cache key is now stale and must be dropped, even if dirty:
*/
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
atomic_long_dec(&c->btree_key_cache.nr_dirty);
bch2_journal_pin_drop(&c->journal, &ck->journal);
}
ck->valid = false;
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,

View File

@ -115,6 +115,26 @@ btree_lock_want(struct btree_path *path, int level)
return BTREE_NODE_UNLOCKED;
}
static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
{
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
? &trans->c->btree_transaction_stats[trans->fn_idx]
: NULL;
}
static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (s)
__bch2_time_stats_update(&s->lock_hold_times,
path->l[level].lock_taken_time,
ktime_get_ns());
#endif
}
static inline void btree_node_unlock(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
@ -124,15 +144,7 @@ static inline void btree_node_unlock(struct btree_trans *trans,
if (lock_type != BTREE_NODE_UNLOCKED) {
six_unlock_type(&path->l[level].b->c.lock, lock_type);
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
struct bch_fs *c = trans->c;
__bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
path->l[level].lock_taken_time,
ktime_get_ns());
}
#endif
btree_trans_lock_hold_time_update(trans, path, level);
}
mark_btree_node_unlocked(path, level);
}

View File

@ -392,6 +392,7 @@ struct btree_trans {
struct task_struct *task;
int srcu_idx;
u8 fn_idx;
u8 nr_sorted;
u8 nr_updates;
u8 traverse_all_idx;
@ -432,7 +433,6 @@ struct btree_trans {
unsigned journal_u64s;
unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
int lock_name_idx;
};
#define BTREE_FLAGS() \

View File

@ -178,12 +178,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
six_unlock_intent(&b->c.lock);
}
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
bool interior_node,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct write_point *wp;
struct btree *b;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
mutex_unlock(&c->btree_reserve_cache_lock);
retry:
wp = bch2_alloc_sectors_start(c,
wp = bch2_alloc_sectors_start_trans(trans,
c->opts.metadata_target ?:
c->opts.foreground_target,
0,
@ -412,18 +413,16 @@ static void bch2_btree_reserve_put(struct btree_update *as)
}
}
static int bch2_btree_reserve_get(struct btree_update *as,
static int bch2_btree_reserve_get(struct btree_trans *trans,
struct btree_update *as,
unsigned nr_nodes[2],
unsigned flags)
unsigned flags,
struct closure *cl)
{
struct bch_fs *c = as->c;
struct closure cl;
struct btree *b;
unsigned interior;
int ret;
closure_init_stack(&cl);
retry:
int ret = 0;
BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
@ -434,18 +433,17 @@ retry:
* BTREE_INSERT_NOWAIT only applies to btree node allocation, not
* blocking on this lock:
*/
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
ret = bch2_btree_cache_cannibalize_lock(c, cl);
if (ret)
goto err;
return ret;
for (interior = 0; interior < 2; interior++) {
struct prealloc_nodes *p = as->prealloc_nodes + interior;
while (p->nr < nr_nodes[interior]) {
b = __bch2_btree_node_alloc(c, &as->disk_res,
flags & BTREE_INSERT_NOWAIT
? NULL : &cl,
interior, flags);
b = __bch2_btree_node_alloc(trans, &as->disk_res,
flags & BTREE_INSERT_NOWAIT ? NULL : cl,
interior, flags);
if (IS_ERR(b)) {
ret = PTR_ERR(b);
goto err;
@ -454,18 +452,8 @@ retry:
p->b[p->nr++] = b;
}
}
bch2_btree_cache_cannibalize_unlock(c);
closure_sync(&cl);
return 0;
err:
bch2_btree_cache_cannibalize_unlock(c);
closure_sync(&cl);
if (ret == -EAGAIN)
goto retry;
trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl);
return ret;
}
@ -980,6 +968,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned update_level = level;
int journal_flags = flags & JOURNAL_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
BUG_ON(!path->should_be_locked);
@ -1053,16 +1042,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
bch2_trans_unlock(trans);
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret) {
bch2_btree_update_free(as);
trace_trans_restart_journal_preres_get(trans, _RET_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
return ERR_PTR(ret);
bch2_trans_unlock(trans);
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
if (ret) {
trace_trans_restart_journal_preres_get(trans, _RET_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
goto err;
}
ret = bch2_trans_relock(trans);
if (ret)
goto err;
}
ret = bch2_disk_reservation_get(c, &as->disk_res,
@ -1072,14 +1069,32 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
ret = bch2_btree_reserve_get(as, nr_nodes, flags);
if (ret)
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
if (ret == -EAGAIN ||
ret == -ENOMEM) {
struct closure cl;
closure_init_stack(&cl);
bch2_trans_unlock(trans);
do {
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
closure_sync(&cl);
} while (ret == -EAGAIN);
}
if (ret) {
trace_btree_reserve_get_fail(trans->fn, _RET_IP_,
nr_nodes[0] + nr_nodes[1]);
goto err;
}
ret = bch2_trans_relock(trans);
if (ret)
goto err;
bch2_trans_verify_not_restarted(trans, restart_count);
return as;
err:
bch2_btree_update_free(as);

View File

@ -199,7 +199,7 @@ struct dump_iter {
ssize_t ret; /* bytes read so far */
};
static int flush_buf(struct dump_iter *i)
static ssize_t flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
@ -215,7 +215,7 @@ static int flush_buf(struct dump_iter *i)
memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
}
return 0;
return i->size ? 0 : i->ret;
}
static int bch2_dump_open(struct inode *inode, struct file *file)
@ -253,7 +253,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int err;
ssize_t ret;
i->ubuf = buf;
i->size = size;
@ -261,14 +261,11 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0);
err = for_each_btree_key2(&trans, iter, i->id, i->from,
ret = for_each_btree_key2(&trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
err = flush_buf(i);
if (err)
break;
if (!i->size)
ret = flush_buf(i);
if (ret)
break;
bch2_bkey_val_to_text(&i->buf, i->c, k);
@ -277,12 +274,12 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
}));
i->from = iter.pos;
if (!err)
err = flush_buf(i);
if (!ret)
ret = flush_buf(i);
bch2_trans_exit(&trans);
return err ?: i->ret;
return ret ?: i->ret;
}
static const struct file_operations btree_debug_ops = {
@ -299,43 +296,39 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct btree *b;
int err;
ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
err = flush_buf(i);
if (err)
return err;
ret = flush_buf(i);
if (ret)
return ret;
if (!i->size || !bpos_cmp(SPOS_MAX, i->from))
if (!bpos_cmp(SPOS_MAX, i->from))
return i->ret;
bch2_trans_init(&trans, i->c, 0, 0);
for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) {
bch2_btree_node_to_text(&i->buf, i->c, b);
err = flush_buf(i);
if (err)
for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
ret = flush_buf(i);
if (ret)
break;
/*
* can't easily correctly restart a btree node traversal across
* all nodes, meh
*/
bch2_btree_node_to_text(&i->buf, i->c, b);
i->from = bpos_cmp(SPOS_MAX, b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
if (!i->size)
break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
return err < 0 ? err : i->ret;
if (!ret)
ret = flush_buf(i);
return ret ?: i->ret;
}
static const struct file_operations btree_format_debug_ops = {
@ -352,33 +345,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int err;
ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
err = flush_buf(i);
if (err)
return err;
if (!i->size)
return i->ret;
ret = flush_buf(i);
if (ret)
return ret;
bch2_trans_init(&trans, i->c, 0, 0);
err = for_each_btree_key2(&trans, iter, i->id, i->from,
ret = for_each_btree_key2(&trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
struct bkey_packed *_k =
bch2_btree_node_iter_peek(&l->iter, l->b);
err = flush_buf(i);
if (err)
break;
if (!i->size)
ret = flush_buf(i);
if (ret)
break;
if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) {
@ -391,12 +378,12 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
}));
i->from = iter.pos;
if (!err)
err = flush_buf(i);
bch2_trans_exit(&trans);
return err ?: i->ret;
if (!ret)
ret = flush_buf(i);
return ret ?: i->ret;
}
static const struct file_operations bfloat_failed_debug_ops = {
@ -409,7 +396,8 @@ static const struct file_operations bfloat_failed_debug_ops = {
static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
struct btree *b)
{
out->tabstops[0] = 32;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 32);
prt_printf(out, "%px btree=%s l=%u ",
b,
@ -466,7 +454,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
bool done = false;
int err;
ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
@ -477,12 +465,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct rhash_head *pos;
struct btree *b;
err = flush_buf(i);
if (err)
return err;
if (!i->size)
break;
ret = flush_buf(i);
if (ret)
return ret;
rcu_read_lock();
i->buf.atomic++;
@ -500,9 +485,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
} while (!done);
if (i->buf.allocation_failure)
return -ENOMEM;
ret = -ENOMEM;
return i->ret;
if (!ret)
ret = flush_buf(i);
return ret ?: i->ret;
}
static const struct file_operations cached_btree_nodes_ops = {
@ -538,7 +526,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
struct btree_trans *trans;
int err;
ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
@ -549,12 +537,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
if (trans->task->pid <= i->iter)
continue;
err = flush_buf(i);
if (err)
return err;
if (!i->size)
break;
ret = flush_buf(i);
if (ret)
return ret;
bch2_btree_trans_to_text(&i->buf, trans);
@ -570,9 +555,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
mutex_unlock(&c->btree_trans_lock);
if (i->buf.allocation_failure)
return -ENOMEM;
ret = -ENOMEM;
return i->ret;
if (!ret)
ret = flush_buf(i);
return ret ?: i->ret;
}
static const struct file_operations btree_transactions_ops = {
@ -651,14 +639,16 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct lock_held_stats *lhs = &i->c->lock_held_stats;
struct bch_fs *c = i->c;
int err;
i->ubuf = buf;
i->size = size;
i->ret = 0;
while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
while (1) {
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
err = flush_buf(i);
if (err)
return err;
@ -666,11 +656,37 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
if (!i->size)
break;
prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) ||
!c->btree_transaction_fns[i->iter])
break;
prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]);
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 8);
bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
printbuf_indent_sub(&i->buf, 8);
printbuf_indent_add(&i->buf, 2);
mutex_lock(&s->lock);
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
prt_printf(&i->buf, "Lock hold times:");
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
printbuf_indent_sub(&i->buf, 2);
}
if (s->max_paths_text) {
prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
prt_str_indented(&i->buf, s->max_paths_text);
printbuf_indent_sub(&i->buf, 2);
}
mutex_unlock(&s->lock);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter++;
}
@ -716,10 +732,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
c, &lock_held_stats_op);
}
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &lock_held_stats_op);
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))

View File

@ -37,6 +37,7 @@
x(no_btree_node, no_btree_node_down) \
x(no_btree_node, no_btree_node_init) \
x(no_btree_node, no_btree_node_cached) \
x(0, backpointer_to_overwritten_btree_node) \
x(0, lock_fail_node_reused) \
x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \

View File

@ -290,7 +290,7 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
return ret;
return ret ?: -BCH_ERR_transaction_restart_nested;
}
static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
@ -914,7 +914,7 @@ static int check_inode(struct btree_trans *trans,
bch2_fs_lazy_rw(c);
ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
if (ret)
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error in fsck: error while deleting inode: %s",
bch2_err_str(ret));
return ret;
@ -1149,13 +1149,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
}
}
fsck_err:
if (ret) {
if (ret)
bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
return ret;
}
if (trans_was_restarted(trans, restart_count))
return -BCH_ERR_transaction_restart_nested;
return 0;
if (!ret && trans_was_restarted(trans, restart_count))
ret = -BCH_ERR_transaction_restart_nested;
return ret;
}
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,

View File

@ -1255,8 +1255,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
u64 seq;
unsigned i;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 24);
out->atomic++;
out->tabstops[0] = 24;
rcu_read_lock();
s = READ_ONCE(j->reservations);

View File

@ -636,6 +636,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
b = bch2_backpointer_get_node(&trans, &iter,
bucket, bp_offset, bp);
ret = PTR_ERR_OR_ZERO(b);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
continue;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)

View File

@ -268,7 +268,8 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
struct bch_fs_rebalance *r = &c->rebalance;
struct rebalance_work w = rebalance_work(c);
out->tabstops[0] = 20;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 20);
prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx);
prt_tab(out);

View File

@ -278,8 +278,8 @@ int bch2_fs_check_snapshots(struct bch_fs *c)
bch2_trans_init(&trans, c, 0, 0);
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
POS(BCACHEFS_ROOT_INO, 0),
ret = for_each_btree_key_commit(&trans, iter,
BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k));

View File

@ -1427,8 +1427,8 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
? bch2_sb_field_ops[type] : NULL;
if (!out->tabstops[0])
out->tabstops[0] = 32;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 32);
if (ops)
prt_printf(out, "%s", bch2_sb_fields[type]);
@ -1476,8 +1476,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
u64 fields_have = 0;
unsigned nr_devices = 0;
if (!out->tabstops[0])
out->tabstops[0] = 32;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 32);
mi = bch2_sb_get_members(sb);
if (mi) {

View File

@ -560,7 +560,8 @@ SHOW(bch2_fs_counters)
u64 counter = 0;
u64 counter_since_mount = 0;
out->tabstops[0] = 32;
printbuf_tabstop_push(out, 32);
#define x(t, ...) \
if (attr == &sysfs_##t) { \
counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\

View File

@ -268,6 +268,12 @@ static void bch2_quantiles_update(struct quantiles *q, u64 v)
}
}
void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
{
while (nr_bits)
prt_char(out, '0' + ((v >> --nr_bits) & 1));
}
/* time stats: */
static void bch2_time_stats_update_one(struct time_stats *stats,
@ -526,7 +532,8 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd)
void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd)
{
out->tabstops[0] = 20;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 20);
prt_printf(out, "rate:");
prt_tab(out);

View File

@ -353,6 +353,8 @@ bool bch2_is_zero(const void *, size_t);
u64 bch2_read_flag_list(char *, const char * const[]);
void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)

View File

@ -2,20 +2,13 @@
/* Copyright (C) 2022 Kent Overstreet */
#include <linux/err.h>
#include <linux/math64.h>
#include <linux/printbuf.h>
#include <linux/slab.h>
#ifdef __KERNEL__
#include <linux/export.h>
#include <linux/kernel.h>
#else
#ifndef EXPORT_SYMBOL
#define EXPORT_SYMBOL(x)
#endif
#endif
#include <linux/printbuf.h>
#include <linux/slab.h>
#include <linux/string_helpers.h>
static inline size_t printbuf_linelen(struct printbuf *buf)
static inline unsigned printbuf_linelen(struct printbuf *buf)
{
return buf->pos - buf->last_newline;
}
@ -35,6 +28,11 @@ int printbuf_make_room(struct printbuf *out, unsigned extra)
return 0;
new_size = roundup_pow_of_two(out->size + extra);
/*
* Note: output buffer must be freeable with kfree(), it's not required
* that the user use printbuf_exit().
*/
buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT);
if (!buf) {
@ -78,25 +76,43 @@ void printbuf_exit(struct printbuf *buf)
}
EXPORT_SYMBOL(printbuf_exit);
void prt_newline(struct printbuf *buf)
void printbuf_tabstops_reset(struct printbuf *buf)
{
unsigned i;
printbuf_make_room(buf, 1 + buf->indent);
__prt_char(buf, '\n');
buf->last_newline = buf->pos;
for (i = 0; i < buf->indent; i++)
__prt_char(buf, ' ');
printbuf_nul_terminate(buf);
buf->last_field = buf->pos;
buf->tabstop = 0;
buf->nr_tabstops = 0;
}
EXPORT_SYMBOL(prt_newline);
EXPORT_SYMBOL(printbuf_tabstops_reset);
void printbuf_tabstop_pop(struct printbuf *buf)
{
if (buf->nr_tabstops)
--buf->nr_tabstops;
}
EXPORT_SYMBOL(printbuf_tabstop_pop);
/*
* printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
*
* @buf: printbuf to control
* @spaces: number of spaces from previous tabpstop
*
* In the future this function may allocate memory if setting more than
* PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
* of line.
*/
int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
{
unsigned prev_tabstop = buf->nr_tabstops
? buf->_tabstops[buf->nr_tabstops - 1]
: 0;
if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops)))
return -EINVAL;
buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces;
buf->has_indent_or_tabstops = true;
return 0;
}
EXPORT_SYMBOL(printbuf_tabstop_push);
/**
* printbuf_indent_add - add to the current indent level
@ -113,8 +129,9 @@ void printbuf_indent_add(struct printbuf *buf, unsigned spaces)
spaces = 0;
buf->indent += spaces;
while (spaces--)
prt_char(buf, ' ');
prt_chars(buf, ' ', spaces);
buf->has_indent_or_tabstops = true;
}
EXPORT_SYMBOL(printbuf_indent_add);
@ -137,9 +154,52 @@ void printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
printbuf_nul_terminate(buf);
}
buf->indent -= spaces;
if (!buf->indent && !buf->nr_tabstops)
buf->has_indent_or_tabstops = false;
}
EXPORT_SYMBOL(printbuf_indent_sub);
void prt_newline(struct printbuf *buf)
{
unsigned i;
printbuf_make_room(buf, 1 + buf->indent);
__prt_char(buf, '\n');
buf->last_newline = buf->pos;
for (i = 0; i < buf->indent; i++)
__prt_char(buf, ' ');
printbuf_nul_terminate(buf);
buf->last_field = buf->pos;
buf->cur_tabstop = 0;
}
EXPORT_SYMBOL(prt_newline);
/*
* Returns spaces from start of line, if set, or 0 if unset:
*/
static inline unsigned cur_tabstop(struct printbuf *buf)
{
return buf->cur_tabstop < buf->nr_tabstops
? buf->_tabstops[buf->cur_tabstop]
: 0;
}
static void __prt_tab(struct printbuf *out)
{
int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out));
prt_chars(out, ' ', spaces);
out->last_field = out->pos;
out->cur_tabstop++;
}
/**
* prt_tab - Advance printbuf to the next tabstop
*
@ -149,17 +209,38 @@ EXPORT_SYMBOL(printbuf_indent_sub);
*/
void prt_tab(struct printbuf *out)
{
int spaces = max_t(int, 0, out->tabstops[out->tabstop] - printbuf_linelen(out));
if (WARN_ON(!cur_tabstop(out)))
return;
BUG_ON(out->tabstop > ARRAY_SIZE(out->tabstops));
prt_chars(out, ' ', spaces);
out->last_field = out->pos;
out->tabstop++;
__prt_tab(out);
}
EXPORT_SYMBOL(prt_tab);
static void __prt_tab_rjust(struct printbuf *buf)
{
unsigned move = buf->pos - buf->last_field;
int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
if (pad > 0) {
printbuf_make_room(buf, pad);
if (buf->last_field + pad < buf->size)
memmove(buf->buf + buf->last_field + pad,
buf->buf + buf->last_field,
min(move, buf->size - 1 - buf->last_field - pad));
if (buf->last_field < buf->size)
memset(buf->buf + buf->last_field, ' ',
min((unsigned) pad, buf->size - buf->last_field));
buf->pos += pad;
printbuf_nul_terminate(buf);
}
buf->last_field = buf->pos;
buf->cur_tabstop++;
}
/**
* prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
* previous output
@ -171,134 +252,64 @@ EXPORT_SYMBOL(prt_tab);
*/
void prt_tab_rjust(struct printbuf *buf)
{
BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops));
if (WARN_ON(!cur_tabstop(buf)))
return;
if (printbuf_linelen(buf) < buf->tabstops[buf->tabstop]) {
unsigned move = buf->pos - buf->last_field;
unsigned shift = buf->tabstops[buf->tabstop] -
printbuf_linelen(buf);
printbuf_make_room(buf, shift);
if (buf->last_field + shift < buf->size)
memmove(buf->buf + buf->last_field + shift,
buf->buf + buf->last_field,
min(move, buf->size - 1 - buf->last_field - shift));
if (buf->last_field < buf->size)
memset(buf->buf + buf->last_field, ' ',
min(shift, buf->size - buf->last_field));
buf->pos += shift;
printbuf_nul_terminate(buf);
}
buf->last_field = buf->pos;
buf->tabstop++;
__prt_tab_rjust(buf);
}
EXPORT_SYMBOL(prt_tab_rjust);
enum string_size_units {
STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
STRING_UNITS_2, /* use binary powers of 2^10 */
};
static int string_get_size(u64 size, u64 blk_size,
const enum string_size_units units,
char *buf, int len)
/**
* prt_bytes_indented - Print an array of chars, handling embedded control characters
*
* @out: printbuf to output to
* @str: string to print
* @count: number of bytes to print
*
* The following contol characters are handled as so:
* \n: prt_newline newline that obeys current indent level
* \t: prt_tab advance to next tabstop
* \r: prt_tab_rjust advance to next tabstop, with right justification
*/
void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
{
static const char *const units_10[] = {
"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
};
static const char *const units_2[] = {
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
};
static const char *const *const units_str[] = {
[STRING_UNITS_10] = units_10,
[STRING_UNITS_2] = units_2,
};
static const unsigned int divisor[] = {
[STRING_UNITS_10] = 1000,
[STRING_UNITS_2] = 1024,
};
static const unsigned int rounding[] = { 500, 50, 5 };
int i = 0, j;
u32 remainder = 0, sf_cap;
char tmp[13];
const char *unit;
const char *unprinted_start = str;
const char *end = str + count;
tmp[0] = '\0';
if (blk_size == 0)
size = 0;
if (size == 0)
goto out;
/* This is Napier's algorithm. Reduce the original block size to
*
* coefficient * divisor[units]^i
*
* we do the reduction so both coefficients are just under 32 bits so
* that multiplying them together won't overflow 64 bits and we keep
* as much precision as possible in the numbers.
*
* Note: it's safe to throw away the remainders here because all the
* precision is in the coefficients.
*/
while (blk_size >> 32) {
do_div(blk_size, divisor[units]);
i++;
if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) {
prt_bytes(out, str, count);
return;
}
while (size >> 32) {
do_div(size, divisor[units]);
i++;
while (str != end) {
switch (*str) {
case '\n':
prt_bytes(out, unprinted_start, str - unprinted_start);
unprinted_start = str + 1;
prt_newline(out);
break;
case '\t':
if (likely(cur_tabstop(out))) {
prt_bytes(out, unprinted_start, str - unprinted_start);
unprinted_start = str + 1;
__prt_tab(out);
}
break;
case '\r':
if (likely(cur_tabstop(out))) {
prt_bytes(out, unprinted_start, str - unprinted_start);
unprinted_start = str + 1;
__prt_tab_rjust(out);
}
break;
}
str++;
}
/* now perform the actual multiplication keeping i as the sum of the
* two logarithms */
size *= blk_size;
/* and logarithmically reduce it until it's just under the divisor */
while (size >= divisor[units]) {
remainder = do_div(size, divisor[units]);
i++;
}
/* work out in j how many digits of precision we need from the
* remainder */
sf_cap = size;
for (j = 0; sf_cap*10 < 1000; j++)
sf_cap *= 10;
if (units == STRING_UNITS_2) {
/* express the remainder as a decimal. It's currently the
* numerator of a fraction whose denominator is
* divisor[units], which is 1 << 10 for STRING_UNITS_2 */
remainder *= 1000;
remainder >>= 10;
}
/* add a 5 to the digit below what will be printed to ensure
* an arithmetical round up and carry it through to size */
remainder += rounding[j];
if (remainder >= 1000) {
remainder -= 1000;
size += 1;
}
if (j) {
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
tmp[j+1] = '\0';
}
out:
if (i >= ARRAY_SIZE(units_2))
unit = "UNK";
else
unit = units_str[units][i];
return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
prt_bytes(out, unprinted_start, str - unprinted_start);
}
EXPORT_SYMBOL(prt_bytes_indented);
/**
* prt_human_readable_u64 - Print out a u64 in human readable units

131
linux/string_helpers.c Normal file
View File

@ -0,0 +1,131 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Helpers for formatting and printing strings
*
* Copyright 31 August 2008 James Bottomley
* Copyright (C) 2013, Intel Corporation
*/
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/export.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/printbuf.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/string_helpers.h>
/**
* string_get_size - get the size in the specified units
* @size: The size to be converted in blocks
* @blk_size: Size of the block (use 1 for size in bytes)
* @units: units to use (powers of 1000 or 1024)
* @buf: buffer to format to
* @len: length of buffer
*
* This function returns a string formatted to 3 significant figures
* giving the size in the required units. @buf should have room for
* at least 9 bytes and will always be zero terminated.
*
*/
int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
char *buf, int len)
{
static const char *const units_10[] = {
"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
};
static const char *const units_2[] = {
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
};
static const char *const *const units_str[] = {
[STRING_UNITS_10] = units_10,
[STRING_UNITS_2] = units_2,
};
static const unsigned int divisor[] = {
[STRING_UNITS_10] = 1000,
[STRING_UNITS_2] = 1024,
};
static const unsigned int rounding[] = { 500, 50, 5 };
int i = 0, j;
u32 remainder = 0, sf_cap;
char tmp[8];
const char *unit;
tmp[0] = '\0';
if (blk_size == 0)
size = 0;
if (size == 0)
goto out;
/* This is Napier's algorithm. Reduce the original block size to
*
* coefficient * divisor[units]^i
*
* we do the reduction so both coefficients are just under 32 bits so
* that multiplying them together won't overflow 64 bits and we keep
* as much precision as possible in the numbers.
*
* Note: it's safe to throw away the remainders here because all the
* precision is in the coefficients.
*/
while (blk_size >> 32) {
do_div(blk_size, divisor[units]);
i++;
}
while (size >> 32) {
do_div(size, divisor[units]);
i++;
}
/* now perform the actual multiplication keeping i as the sum of the
* two logarithms */
size *= blk_size;
/* and logarithmically reduce it until it's just under the divisor */
while (size >= divisor[units]) {
remainder = do_div(size, divisor[units]);
i++;
}
/* work out in j how many digits of precision we need from the
* remainder */
sf_cap = size;
for (j = 0; sf_cap*10 < 1000; j++)
sf_cap *= 10;
if (units == STRING_UNITS_2) {
/* express the remainder as a decimal. It's currently the
* numerator of a fraction whose denominator is
* divisor[units], which is 1 << 10 for STRING_UNITS_2 */
remainder *= 1000;
remainder >>= 10;
}
/* add a 5 to the digit below what will be printed to ensure
* an arithmetical round up and carry it through to size */
remainder += rounding[j];
if (remainder >= 1000) {
remainder -= 1000;
size += 1;
}
if (j) {
snprintf(tmp, sizeof(tmp), ".%03u", remainder);
tmp[j+1] = '\0';
}
out:
if (i >= ARRAY_SIZE(units_2))
unit = "UNK";
else
unit = units_str[units][i];
return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
}
EXPORT_SYMBOL(string_get_size);