Update bcachefs sources to 50ac18afbb bcachefs: Fix an uninitialized variable

This commit is contained in:
Kent Overstreet 2022-01-03 23:43:03 -05:00
parent 69529e3136
commit 931ed5a709
36 changed files with 816 additions and 764 deletions

View File

@ -1 +1 @@
90d824456e169e50965814b74a75c50045b13976 50ac18afbb522a3103cecff9aaf9519d4eb5e908

View File

@ -63,6 +63,10 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags)
((size) != 0 && (n) > SIZE_MAX / (size) \ ((size) != 0 && (n) > SIZE_MAX / (size) \
? NULL : kmalloc((n) * (size), flags)) ? NULL : kmalloc((n) * (size), flags))
#define kvmalloc_array(n, size, flags) \
((size) != 0 && (n) > SIZE_MAX / (size) \
? NULL : kmalloc((n) * (size), flags))
#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO) #define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO)
#define kfree(p) free(p) #define kfree(p) free(p)

View File

@ -387,7 +387,7 @@ TRACE_EVENT(alloc_scan,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->disk_sb.bdev->bd_dev; __entry->dev = ca->dev;
__entry->found = found; __entry->found = found;
__entry->inc_gen = inc_gen; __entry->inc_gen = inc_gen;
__entry->inc_gen_skipped = inc_gen_skipped; __entry->inc_gen_skipped = inc_gen_skipped;
@ -409,7 +409,7 @@ TRACE_EVENT(invalidate,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->disk_sb.bdev->bd_dev; __entry->dev = ca->dev;
__entry->offset = offset, __entry->offset = offset,
__entry->sectors = sectors; __entry->sectors = sectors;
), ),
@ -431,7 +431,7 @@ DECLARE_EVENT_CLASS(bucket_alloc,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ca->disk_sb.bdev->bd_dev; __entry->dev = ca->dev;
__entry->reserve = reserve; __entry->reserve = reserve;
), ),
@ -546,94 +546,81 @@ TRACE_EVENT(copygc_wait,
__entry->wait_amount, __entry->until) __entry->wait_amount, __entry->until)
); );
TRACE_EVENT(transaction_restart_ip,
TP_PROTO(unsigned long caller, unsigned long ip),
TP_ARGS(caller, ip),
TP_STRUCT__entry(
__field(unsigned long, caller )
__field(unsigned long, ip )
),
TP_fast_assign(
__entry->caller = caller;
__entry->ip = ip;
),
TP_printk("%ps %pS", (void *) __entry->caller, (void *) __entry->ip)
);
DECLARE_EVENT_CLASS(transaction_restart, DECLARE_EVENT_CLASS(transaction_restart,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip), TP_ARGS(trans_fn, caller_ip),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __array(char, trans_fn, 24 )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
), ),
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
), ),
TP_printk("%ps %pS", TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
(void *) __entry->trans_ip, );
(void *) __entry->caller_ip)
DEFINE_EVENT(transaction_restart, transaction_restart_ip,
TP_PROTO(const char *trans_fn,
unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim, DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, DEFINE_EVENT(transaction_restart, trans_restart_fault_inject,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_traverse_all, DEFINE_EVENT(transaction_restart, trans_traverse_all,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_ip, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DECLARE_EVENT_CLASS(transaction_restart_iter, DECLARE_EVENT_CLASS(transaction_restart_iter,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos), TP_ARGS(trans_fn, caller_ip, btree_id, pos),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __array(char, trans_fn, 24 )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
__field(u8, btree_id ) __field(u8, btree_id )
__field(u64, pos_inode ) __field(u64, pos_inode )
@ -642,7 +629,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
), ),
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->btree_id = btree_id; __entry->btree_id = btree_id;
__entry->pos_inode = pos->inode; __entry->pos_inode = pos->inode;
@ -650,8 +637,8 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
__entry->pos_snapshot = pos->snapshot; __entry->pos_snapshot = pos->snapshot;
), ),
TP_printk("%ps %pS btree %u pos %llu:%llu:%u", TP_printk("%s %pS btree %u pos %llu:%llu:%u",
(void *) __entry->trans_ip, __entry->trans_fn,
(void *) __entry->caller_ip, (void *) __entry->caller_ip,
__entry->btree_id, __entry->btree_id,
__entry->pos_inode, __entry->pos_inode,
@ -660,63 +647,63 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_mark, DEFINE_EVENT(transaction_restart_iter, trans_restart_mark,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos), struct bpos *pos),
TP_ARGS(trans_ip, caller_ip, btree_id, pos) TP_ARGS(trans_fn, caller_ip, btree_id, pos)
); );
TRACE_EVENT(trans_restart_would_deadlock, TRACE_EVENT(trans_restart_would_deadlock,
TP_PROTO(unsigned long trans_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip, unsigned long caller_ip,
bool in_traverse_all, bool in_traverse_all,
unsigned reason, unsigned reason,
@ -726,12 +713,12 @@ TRACE_EVENT(trans_restart_would_deadlock,
enum btree_id want_btree_id, enum btree_id want_btree_id,
unsigned want_iter_type, unsigned want_iter_type,
struct bpos *want_pos), struct bpos *want_pos),
TP_ARGS(trans_ip, caller_ip, in_traverse_all, reason, TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason,
have_btree_id, have_iter_type, have_pos, have_btree_id, have_iter_type, have_pos,
want_btree_id, want_iter_type, want_pos), want_btree_id, want_iter_type, want_pos),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __array(char, trans_fn, 24 )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
__field(u8, in_traverse_all ) __field(u8, in_traverse_all )
__field(u8, reason ) __field(u8, reason )
@ -749,7 +736,7 @@ TRACE_EVENT(trans_restart_would_deadlock,
), ),
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->in_traverse_all = in_traverse_all; __entry->in_traverse_all = in_traverse_all;
__entry->reason = reason; __entry->reason = reason;
@ -767,8 +754,8 @@ TRACE_EVENT(trans_restart_would_deadlock,
__entry->want_pos_snapshot = want_pos->snapshot; __entry->want_pos_snapshot = want_pos->snapshot;
), ),
TP_printk("%ps %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u",
(void *) __entry->trans_ip, __entry->trans_fn,
(void *) __entry->caller_ip, (void *) __entry->caller_ip,
__entry->in_traverse_all, __entry->in_traverse_all,
__entry->reason, __entry->reason,
@ -785,39 +772,40 @@ TRACE_EVENT(trans_restart_would_deadlock,
); );
TRACE_EVENT(trans_restart_would_deadlock_write, TRACE_EVENT(trans_restart_would_deadlock_write,
TP_PROTO(unsigned long trans_ip), TP_PROTO(const char *trans_fn),
TP_ARGS(trans_ip), TP_ARGS(trans_fn),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __array(char, trans_fn, 24 )
), ),
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
), ),
TP_printk("%ps", (void *) __entry->trans_ip) TP_printk("%s", __entry->trans_fn)
); );
TRACE_EVENT(trans_restart_mem_realloced, TRACE_EVENT(trans_restart_mem_realloced,
TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, TP_PROTO(const char *trans_fn,
unsigned long caller_ip,
unsigned long bytes), unsigned long bytes),
TP_ARGS(trans_ip, caller_ip, bytes), TP_ARGS(trans_fn, caller_ip, bytes),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __array(char, trans_fn, 24 )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
__field(unsigned long, bytes ) __field(unsigned long, bytes )
), ),
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->bytes = bytes; __entry->bytes = bytes;
), ),
TP_printk("%ps %pS bytes %lu", TP_printk("%s %pS bytes %lu",
(void *) __entry->trans_ip, __entry->trans_fn,
(void *) __entry->caller_ip, (void *) __entry->caller_ip,
__entry->bytes) __entry->bytes)
); );

View File

@ -177,7 +177,11 @@
*/ */
#undef pr_fmt #undef pr_fmt
#ifdef __KERNEL__
#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__ #define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
#else
#define pr_fmt(fmt) "%s() " fmt "\n", __func__
#endif
#include <linux/backing-dev-defs.h> #include <linux/backing-dev-defs.h>
#include <linux/bug.h> #include <linux/bug.h>
@ -219,8 +223,8 @@
#define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name) #define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name)
#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum) #define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
#else #else
#define bch2_fmt(_c, fmt) "%s: " fmt "\n", ((_c)->name) #define bch2_fmt(_c, fmt) fmt "\n"
#define bch2_fmt_inum(_c, _inum, fmt) "%s inum %llu: " fmt "\n", ((_c)->name), (_inum) #define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
#endif #endif
#define bch_info(c, fmt, ...) \ #define bch_info(c, fmt, ...) \
@ -432,6 +436,7 @@ struct bch_dev {
struct bch_sb_handle disk_sb; struct bch_sb_handle disk_sb;
struct bch_sb *sb_read_scratch; struct bch_sb *sb_read_scratch;
int sb_write_error; int sb_write_error;
dev_t dev;
struct bch_devs_mask self; struct bch_devs_mask self;
@ -749,6 +754,7 @@ struct bch_fs {
/* JOURNAL SEQ BLACKLIST */ /* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table * struct journal_seq_blacklist_table *
journal_seq_blacklist_table; journal_seq_blacklist_table;
struct work_struct journal_seq_blacklist_gc_work;
/* ALLOCATOR */ /* ALLOCATOR */
spinlock_t freelist_lock; spinlock_t freelist_lock;

View File

@ -473,7 +473,7 @@ static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
unsigned j) unsigned j)
{ {
return cacheline_to_bkey(b, t, return cacheline_to_bkey(b, t,
__eytzinger1_to_inorder(j, t->size, t->extra), __eytzinger1_to_inorder(j, t->size - 1, t->extra),
bkey_float(b, t, j)->key_offset); bkey_float(b, t, j)->key_offset);
} }
@ -607,10 +607,10 @@ static inline unsigned bkey_mantissa(const struct bkey_packed *k,
} }
__always_inline __always_inline
static inline void __make_bfloat(struct btree *b, struct bset_tree *t, static inline void make_bfloat(struct btree *b, struct bset_tree *t,
unsigned j, unsigned j,
struct bkey_packed *min_key, struct bkey_packed *min_key,
struct bkey_packed *max_key) struct bkey_packed *max_key)
{ {
struct bkey_float *f = bkey_float(b, t, j); struct bkey_float *f = bkey_float(b, t, j);
struct bkey_packed *m = tree_to_bkey(b, t, j); struct bkey_packed *m = tree_to_bkey(b, t, j);
@ -679,34 +679,6 @@ static inline void __make_bfloat(struct btree *b, struct bset_tree *t,
f->mantissa = mantissa; f->mantissa = mantissa;
} }
static void make_bfloat(struct btree *b, struct bset_tree *t,
unsigned j,
struct bkey_packed *min_key,
struct bkey_packed *max_key)
{
struct bkey_i *k;
if (is_power_of_2(j) &&
!min_key->u64s) {
if (!bkey_pack_pos(min_key, b->data->min_key, b)) {
k = (void *) min_key;
bkey_init(&k->k);
k->k.p = b->data->min_key;
}
}
if (is_power_of_2(j + 1) &&
!max_key->u64s) {
if (!bkey_pack_pos(max_key, b->data->max_key, b)) {
k = (void *) max_key;
bkey_init(&k->k);
k->k.p = b->data->max_key;
}
}
__make_bfloat(b, t, j, min_key, max_key);
}
/* bytes remaining - only valid for last bset: */ /* bytes remaining - only valid for last bset: */
static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t) static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
{ {
@ -763,7 +735,7 @@ retry:
t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
/* First we figure out where the first key in each cacheline is */ /* First we figure out where the first key in each cacheline is */
eytzinger1_for_each(j, t->size) { eytzinger1_for_each(j, t->size - 1) {
while (bkey_to_cacheline(b, t, k) < cacheline) while (bkey_to_cacheline(b, t, k) < cacheline)
prev = k, k = bkey_next(k); prev = k, k = bkey_next(k);
@ -795,10 +767,10 @@ retry:
} }
/* Then we build the tree */ /* Then we build the tree */
eytzinger1_for_each(j, t->size) eytzinger1_for_each(j, t->size - 1)
__make_bfloat(b, t, j, make_bfloat(b, t, j,
bkey_to_packed(&min_key), bkey_to_packed(&min_key),
bkey_to_packed(&max_key)); bkey_to_packed(&max_key));
} }
static void bset_alloc_tree(struct btree *b, struct bset_tree *t) static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
@ -897,7 +869,7 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
do { do {
p = j ? tree_to_bkey(b, t, p = j ? tree_to_bkey(b, t,
__inorder_to_eytzinger1(j--, __inorder_to_eytzinger1(j--,
t->size, t->extra)) t->size - 1, t->extra))
: btree_bkey_first(b, t); : btree_bkey_first(b, t);
} while (p >= k); } while (p >= k);
break; break;
@ -943,91 +915,6 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
/* Insert */ /* Insert */
static void rw_aux_tree_fix_invalidated_key(struct btree *b,
struct bset_tree *t,
struct bkey_packed *k)
{
unsigned offset = __btree_node_key_to_offset(b, k);
unsigned j = rw_aux_tree_bsearch(b, t, offset);
if (j < t->size &&
rw_aux_tree(b, t)[j].offset == offset)
rw_aux_tree_set(b, t, j, k);
bch2_bset_verify_rw_aux_tree(b, t);
}
static void ro_aux_tree_fix_invalidated_key(struct btree *b,
struct bset_tree *t,
struct bkey_packed *k)
{
struct bkey_packed min_key, max_key;
unsigned inorder, j;
EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
/* signal to make_bfloat() that they're uninitialized: */
min_key.u64s = max_key.u64s = 0;
if (bkey_next(k) == btree_bkey_last(b, t)) {
for (j = 1; j < t->size; j = j * 2 + 1)
make_bfloat(b, t, j, &min_key, &max_key);
}
inorder = bkey_to_cacheline(b, t, k);
if (inorder &&
inorder < t->size) {
j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
if (k == tree_to_bkey(b, t, j)) {
/* Fix the node this key corresponds to */
make_bfloat(b, t, j, &min_key, &max_key);
/* Children for which this key is the right boundary */
for (j = eytzinger1_left_child(j);
j < t->size;
j = eytzinger1_right_child(j))
make_bfloat(b, t, j, &min_key, &max_key);
}
}
if (inorder + 1 < t->size) {
j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
if (k == tree_to_prev_bkey(b, t, j)) {
make_bfloat(b, t, j, &min_key, &max_key);
/* Children for which this key is the left boundary */
for (j = eytzinger1_right_child(j);
j < t->size;
j = eytzinger1_left_child(j))
make_bfloat(b, t, j, &min_key, &max_key);
}
}
}
/**
* bch2_bset_fix_invalidated_key() - given an existing key @k that has been
* modified, fix any auxiliary search tree by remaking all the nodes in the
* auxiliary search tree that @k corresponds to
*/
void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
{
struct bset_tree *t = bch2_bkey_to_bset(b, k);
switch (bset_aux_tree_type(t)) {
case BSET_NO_AUX_TREE:
break;
case BSET_RO_AUX_TREE:
ro_aux_tree_fix_invalidated_key(b, t, k);
break;
case BSET_RW_AUX_TREE:
rw_aux_tree_fix_invalidated_key(b, t, k);
break;
}
}
static void bch2_bset_fix_lookup_table(struct btree *b, static void bch2_bset_fix_lookup_table(struct btree *b,
struct bset_tree *t, struct bset_tree *t,
struct bkey_packed *_where, struct bkey_packed *_where,
@ -1262,7 +1149,7 @@ slowpath:
n = n * 2 + (cmp < 0); n = n * 2 + (cmp < 0);
} while (n < t->size); } while (n < t->size);
inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra); inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra);
/* /*
* n would have been the node we recursed to - the low bit tells us if * n would have been the node we recursed to - the low bit tells us if
@ -1273,7 +1160,7 @@ slowpath:
if (unlikely(!inorder)) if (unlikely(!inorder))
return btree_bkey_first(b, t); return btree_bkey_first(b, t);
f = &base->f[eytzinger1_prev(n >> 1, t->size)]; f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)];
} }
return cacheline_to_bkey(b, t, inorder, f->key_offset); return cacheline_to_bkey(b, t, inorder, f->key_offset);
@ -1690,7 +1577,7 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
if (!inorder || inorder >= t->size) if (!inorder || inorder >= t->size)
return; return;
j = __inorder_to_eytzinger1(inorder, t->size, t->extra); j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra);
if (k != tree_to_bkey(b, t, j)) if (k != tree_to_bkey(b, t, j))
return; return;

View File

@ -361,7 +361,6 @@ void bch2_bset_init_first(struct btree *, struct bset *);
void bch2_bset_init_next(struct bch_fs *, struct btree *, void bch2_bset_init_next(struct bch_fs *, struct btree *,
struct btree_node_entry *); struct btree_node_entry *);
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
void bch2_bset_insert(struct btree *, struct btree_node_iter *, void bch2_bset_insert(struct btree *, struct btree_node_iter *,
struct bkey_packed *, struct bkey_i *, unsigned); struct bkey_packed *, struct bkey_i *, unsigned);

View File

@ -858,7 +858,7 @@ lock_node:
if (bch2_btree_node_relock(trans, path, level + 1)) if (bch2_btree_node_relock(trans, path, level + 1))
goto retry; goto retry;
trace_trans_restart_btree_node_reused(trans->ip, trace_trans_restart_btree_node_reused(trans->fn,
trace_ip, trace_ip,
path->btree_id, path->btree_id,
&path->pos); &path->pos);

View File

@ -156,6 +156,34 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
} }
} }
static void bch2_btree_node_update_key_early(struct bch_fs *c,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_i *new)
{
struct btree *b;
struct bkey_buf tmp;
int ret;
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_reassemble(&tmp, c, old);
b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
if (!IS_ERR_OR_NULL(b)) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, new);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
BUG_ON(ret);
mutex_unlock(&c->btree_cache.lock);
six_unlock_read(&b->c.lock);
}
bch2_bkey_buf_exit(&tmp, c);
}
static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
{ {
struct bkey_i_btree_ptr_v2 *new; struct bkey_i_btree_ptr_v2 *new;
@ -523,18 +551,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
} }
} }
if (fsck_err_on(data_type == BCH_DATA_btree &&
g->mark.gen != p.ptr.gen, c,
"bucket %u:%zu data type %s has metadata but wrong gen: %u != %u\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
p.ptr.gen, g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) {
g->_mark.data_type = data_type;
g->gen_valid = true;
}
if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c, if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
"while marking %s", "while marking %s",
@ -573,7 +589,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
(bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf)))
do_update = true; do_update = true;
if (p.ptr.gen != g->mark.gen) if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen)
continue; continue;
if (fsck_err_on(g->mark.data_type && if (fsck_err_on(g->mark.data_type &&
@ -687,16 +703,19 @@ found:
} }
ret = bch2_journal_key_insert_take(c, btree_id, level, new); ret = bch2_journal_key_insert_take(c, btree_id, level, new);
if (ret) {
if (ret)
kfree(new); kfree(new);
else { return ret;
bch2_bkey_val_to_text(&PBUF(buf), c, *k);
bch_info(c, "updated %s", buf);
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new));
bch_info(c, "new key %s", buf);
*k = bkey_i_to_s_c(new);
} }
if (level)
bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
bch2_bkey_val_to_text(&PBUF(buf), c, *k);
bch_info(c, "updated %s", buf);
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new));
bch_info(c, "new key %s", buf);
*k = bkey_i_to_s_c(new);
} }
fsck_err: fsck_err:
return ret; return ret;

View File

@ -972,19 +972,23 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
b->written += sectors;
blacklisted = bch2_journal_seq_is_blacklisted(c, blacklisted = bch2_journal_seq_is_blacklisted(c,
le64_to_cpu(i->journal_seq), le64_to_cpu(i->journal_seq),
true); true);
btree_err_on(blacklisted && first, btree_err_on(blacklisted && first,
BTREE_ERR_FIXABLE, c, ca, b, i, BTREE_ERR_FIXABLE, c, ca, b, i,
"first btree node bset has blacklisted journal seq"); "first btree node bset has blacklisted journal seq (%llu)",
le64_to_cpu(i->journal_seq));
btree_err_on(blacklisted && ptr_written, btree_err_on(blacklisted && ptr_written,
BTREE_ERR_FIXABLE, c, ca, b, i, BTREE_ERR_FIXABLE, c, ca, b, i,
"found blacklisted bset in btree node with sectors_written"); "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
le64_to_cpu(i->journal_seq),
b->written, b->written + sectors, ptr_written);
b->written += sectors;
if (blacklisted && !first) if (blacklisted && !first)
continue; continue;

View File

@ -363,7 +363,7 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
} }
if (unlikely(deadlock_path)) { if (unlikely(deadlock_path)) {
trace_trans_restart_would_deadlock(trans->ip, ip, trace_trans_restart_would_deadlock(trans->fn, ip,
trans->in_traverse_all, reason, trans->in_traverse_all, reason,
deadlock_path->btree_id, deadlock_path->btree_id,
deadlock_path->cached, deadlock_path->cached,
@ -548,7 +548,7 @@ bool bch2_trans_relock(struct btree_trans *trans)
trans_for_each_path(trans, path) trans_for_each_path(trans, path)
if (path->should_be_locked && if (path->should_be_locked &&
!bch2_btree_path_relock(trans, path, _RET_IP_)) { !bch2_btree_path_relock(trans, path, _RET_IP_)) {
trace_trans_restart_relock(trans->ip, _RET_IP_, trace_trans_restart_relock(trans->fn, _RET_IP_,
path->btree_id, &path->pos); path->btree_id, &path->pos);
BUG_ON(!trans->restarted); BUG_ON(!trans->restarted);
return false; return false;
@ -1519,7 +1519,7 @@ out:
trans->in_traverse_all = false; trans->in_traverse_all = false;
trace_trans_traverse_all(trans->ip, trace_ip); trace_trans_traverse_all(trans->fn, trace_ip);
return ret; return ret;
} }
@ -2843,7 +2843,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
trans->mem_bytes = new_bytes; trans->mem_bytes = new_bytes;
if (old_bytes) { if (old_bytes) {
trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
btree_trans_restart(trans); btree_trans_restart(trans);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
@ -2927,14 +2927,15 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
trans->updates = p; p += updates_bytes; trans->updates = p; p += updates_bytes;
} }
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters, unsigned expected_nr_iters,
size_t expected_mem_bytes) size_t expected_mem_bytes,
const char *fn)
__acquires(&c->btree_trans_barrier) __acquires(&c->btree_trans_barrier)
{ {
memset(trans, 0, sizeof(*trans)); memset(trans, 0, sizeof(*trans));
trans->c = c; trans->c = c;
trans->ip = _RET_IP_; trans->fn = fn;
bch2_trans_alloc_paths(trans, c); bch2_trans_alloc_paths(trans, c);
@ -2967,7 +2968,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans)
goto leaked; goto leaked;
return; return;
leaked: leaked:
bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); bch_err(c, "btree paths leaked from %s!", trans->fn);
trans_for_each_path(trans, path) trans_for_each_path(trans, path)
if (path->ref) if (path->ref)
printk(KERN_ERR " btree %s %pS\n", printk(KERN_ERR " btree %s %pS\n",
@ -3060,7 +3061,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
if (!trans_has_locks(trans)) if (!trans_has_locks(trans))
continue; continue;
pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); pr_buf(out, "%i %s\n", trans->pid, trans->fn);
trans_for_each_path(trans, path) { trans_for_each_path(trans, path) {
if (!path->nodes_locked) if (!path->nodes_locked)

View File

@ -354,9 +354,12 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
/* new multiple iterator interface: */ /* new multiple iterator interface: */
void bch2_dump_trans_paths_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
unsigned, size_t, const char *);
void bch2_trans_exit(struct btree_trans *); void bch2_trans_exit(struct btree_trans *);
#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__)
void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *); void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_btree_iter_exit(struct bch_fs *); void bch2_fs_btree_iter_exit(struct bch_fs *);

View File

@ -208,7 +208,6 @@ static int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path, struct btree_path *ck_path,
struct bkey_cached *ck) struct bkey_cached *ck)
{ {
struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
unsigned new_u64s = 0; unsigned new_u64s = 0;
@ -223,7 +222,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
goto err; goto err;
if (!bch2_btree_node_relock(trans, ck_path, 0)) { if (!bch2_btree_node_relock(trans, ck_path, 0)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->fn, _THIS_IP_);
ret = btree_trans_restart(trans); ret = btree_trans_restart(trans);
goto err; goto err;
} }
@ -238,7 +237,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
new_u64s = roundup_pow_of_two(new_u64s); new_u64s = roundup_pow_of_two(new_u64s);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
if (!new_k) { if (!new_k) {
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_ids[ck->key.btree_id], new_u64s); bch2_btree_ids[ck->key.btree_id], new_u64s);
ret = -ENOMEM; ret = -ENOMEM;
goto err; goto err;
@ -318,7 +317,7 @@ retry:
if (!trans->restarted) if (!trans->restarted)
goto retry; goto retry;
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->fn, _THIS_IP_);
ret = -EINTR; ret = -EINTR;
goto err; goto err;
} }
@ -338,7 +337,7 @@ fill:
if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
if (!path->locks_want && if (!path->locks_want &&
!__bch2_btree_path_upgrade(trans, path, 1)) { !__bch2_btree_path_upgrade(trans, path, 1)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->fn, _THIS_IP_);
ret = btree_trans_restart(trans); ret = btree_trans_restart(trans);
goto err; goto err;
} }

View File

@ -368,6 +368,7 @@ struct btree_trans_commit_hook {
struct btree_trans { struct btree_trans {
struct bch_fs *c; struct bch_fs *c;
const char *fn;
struct list_head list; struct list_head list;
struct btree *locking; struct btree *locking;
unsigned locking_path_idx; unsigned locking_path_idx;
@ -375,7 +376,6 @@ struct btree_trans {
u8 locking_btree_id; u8 locking_btree_id;
u8 locking_level; u8 locking_level;
pid_t pid; pid_t pid;
unsigned long ip;
int srcu_idx; int srcu_idx;
u8 nr_sorted; u8 nr_sorted;

View File

@ -955,7 +955,7 @@ retry:
* instead of locking/reserving all the way to the root: * instead of locking/reserving all the way to the root:
*/ */
if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_,
path->btree_id, &path->pos); path->btree_id, &path->pos);
ret = btree_trans_restart(trans); ret = btree_trans_restart(trans);
return ERR_PTR(ret); return ERR_PTR(ret);
@ -1019,7 +1019,7 @@ retry:
BTREE_UPDATE_JOURNAL_RES, BTREE_UPDATE_JOURNAL_RES,
journal_flags); journal_flags);
if (ret) { if (ret) {
trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_);
goto err; goto err;
} }

View File

@ -266,7 +266,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
return ret; return ret;
if (!bch2_trans_relock(trans)) { if (!bch2_trans_relock(trans)) {
trace_trans_restart_journal_preres_get(trans->ip, trace_ip); trace_trans_restart_journal_preres_get(trans->fn, trace_ip);
return -EINTR; return -EINTR;
} }
@ -305,7 +305,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
l->entry.pad[0] = 0; l->entry.pad[0] = 0;
l->entry.pad[1] = 0; l->entry.pad[1] = 0;
l->entry.pad[2] = 0; l->entry.pad[2] = 0;
b = snprintf(l->d, buflen, "%ps", (void *) trans->ip); b = min_t(unsigned, strlen(trans->fn), buflen);
memcpy(l->d, trans->fn, b);
while (b < buflen) while (b < buflen)
l->d[b++] = '\0'; l->d[b++] = '\0';
@ -425,7 +426,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
int ret; int ret;
if (race_fault()) { if (race_fault()) {
trace_trans_restart_fault_inject(trans->ip, trace_ip); trace_trans_restart_fault_inject(trans->fn, trace_ip);
trans->restarted = true; trans->restarted = true;
return -EINTR; return -EINTR;
} }
@ -618,7 +619,7 @@ fail:
bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
} }
trace_trans_restart_would_deadlock_write(trans->ip); trace_trans_restart_would_deadlock_write(trans->fn);
return btree_trans_restart(trans); return btree_trans_restart(trans);
} }
@ -649,9 +650,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
char buf[200]; char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n", bch_err(c, "invalid bkey %s on insert from %s -> %ps: %s\n",
buf, (void *) trans->ip, buf, trans->fn, (void *) i->ip_allocated, invalid);
(void *) i->ip_allocated, invalid);
bch2_fatal_error(c); bch2_fatal_error(c);
return -EINVAL; return -EINVAL;
} }
@ -757,7 +757,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
return 0; return 0;
if (ret == -EINTR) if (ret == -EINTR)
trace_trans_restart_btree_node_split(trans->ip, trace_ip, trace_trans_restart_btree_node_split(trans->fn, trace_ip,
i->btree_id, &i->path->pos); i->btree_id, &i->path->pos);
break; break;
case BTREE_INSERT_NEED_MARK_REPLICAS: case BTREE_INSERT_NEED_MARK_REPLICAS:
@ -770,7 +770,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
trace_trans_restart_mark_replicas(trans->ip, trace_ip); trace_trans_restart_mark_replicas(trans->fn, trace_ip);
ret = -EINTR; ret = -EINTR;
break; break;
case BTREE_INSERT_NEED_JOURNAL_RES: case BTREE_INSERT_NEED_JOURNAL_RES:
@ -790,13 +790,13 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
trace_trans_restart_journal_res_get(trans->ip, trace_ip); trace_trans_restart_journal_res_get(trans->fn, trace_ip);
ret = -EINTR; ret = -EINTR;
break; break;
case BTREE_INSERT_NEED_JOURNAL_RECLAIM: case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
trace_trans_blocked_journal_reclaim(trans->ip, trace_ip); trace_trans_blocked_journal_reclaim(trans->fn, trace_ip);
wait_event_freezable(c->journal.reclaim_wait, wait_event_freezable(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c))); (ret = journal_reclaim_wait_done(c)));
@ -806,7 +806,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
trace_trans_restart_journal_reclaim(trans->ip, trace_ip); trace_trans_restart_journal_reclaim(trans->fn, trace_ip);
ret = -EINTR; ret = -EINTR;
break; break;
default: default:
@ -815,7 +815,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
} }
BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted); BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
BUG_ON(ret == -ENOSPC && (trans->flags & BTREE_INSERT_NOFAIL)); BUG_ON(ret == -ENOSPC &&
!(trans->flags & BTREE_INSERT_NOWAIT) &&
(trans->flags & BTREE_INSERT_NOFAIL));
return ret; return ret;
} }
@ -899,7 +901,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
} }
if (ret == -EINTR) if (ret == -EINTR)
trace_trans_restart_mark(trans->ip, _RET_IP_, trace_trans_restart_mark(trans->fn, _RET_IP_,
i->btree_id, &i->path->pos); i->btree_id, &i->path->pos);
if (ret) if (ret)
return ret; return ret;
@ -929,7 +931,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
BTREE_TRIGGER_OVERWRITE|i->flags); BTREE_TRIGGER_OVERWRITE|i->flags);
if (ret == -EINTR) if (ret == -EINTR)
trace_trans_restart_mark(trans->ip, _RET_IP_, trace_trans_restart_mark(trans->fn, _RET_IP_,
i->btree_id, &i->path->pos); i->btree_id, &i->path->pos);
if (ret) if (ret)
return ret; return ret;
@ -996,7 +998,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
BUG_ON(!i->path->should_be_locked); BUG_ON(!i->path->should_be_locked);
if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
trace_trans_restart_upgrade(trans->ip, _RET_IP_, trace_trans_restart_upgrade(trans->fn, _RET_IP_,
i->btree_id, &i->path->pos); i->btree_id, &i->path->pos);
ret = btree_trans_restart(trans); ret = btree_trans_restart(trans);
goto out; goto out;

View File

@ -564,9 +564,10 @@ static int bch2_mark_alloc(struct btree_trans *trans,
* before the bucket became empty again, then the we don't have * before the bucket became empty again, then the we don't have
* to wait on a journal flush before we can reuse the bucket: * to wait on a journal flush before we can reuse the bucket:
*/ */
v->journal_seq = !new_u.data_type && new_u.journal_seq = !new_u.data_type &&
bch2_journal_noflush_seq(&c->journal, journal_seq) bch2_journal_noflush_seq(&c->journal, journal_seq)
? 0 : cpu_to_le64(journal_seq); ? 0 : journal_seq;
v->journal_seq = cpu_to_le64(new_u.journal_seq);
} }
ca = bch_dev_bkey_exists(c, new.k->p.inode); ca = bch_dev_bkey_exists(c, new.k->p.inode);

View File

@ -568,8 +568,11 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!dev)
return -EINVAL;
for_each_online_member(ca, c, i) for_each_online_member(ca, c, i)
if (ca->disk_sb.bdev->bd_dev == dev) { if (ca->dev == dev) {
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
return i; return i;
} }

View File

@ -407,16 +407,12 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
} }
#ifdef __KERNEL__ #ifdef __KERNEL__
int bch2_request_key(struct bch_sb *sb, struct bch_key *key) static int __bch2_request_key(char *key_description, struct bch_key *key)
{ {
char key_description[60];
struct key *keyring_key; struct key *keyring_key;
const struct user_key_payload *ukp; const struct user_key_payload *ukp;
int ret; int ret;
snprintf(key_description, sizeof(key_description),
"bcachefs:%pUb", &sb->user_uuid);
keyring_key = request_key(&key_type_logon, key_description, NULL); keyring_key = request_key(&key_type_logon, key_description, NULL);
if (IS_ERR(keyring_key)) if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key); return PTR_ERR(keyring_key);
@ -436,16 +432,10 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
} }
#else #else
#include <keyutils.h> #include <keyutils.h>
#include <uuid/uuid.h>
int bch2_request_key(struct bch_sb *sb, struct bch_key *key) static int __bch2_request_key(char *key_description, struct bch_key *key)
{ {
key_serial_t key_id; key_serial_t key_id;
char key_description[60];
char uuid[40];
uuid_unparse_lower(sb->user_uuid.b, uuid);
sprintf(key_description, "bcachefs:%s", uuid);
key_id = request_key("user", key_description, NULL, key_id = request_key("user", key_description, NULL,
KEY_SPEC_USER_KEYRING); KEY_SPEC_USER_KEYRING);
@ -459,6 +449,17 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
} }
#endif #endif
int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
{
char key_description[60];
char uuid[40];
uuid_unparse_lower(sb->user_uuid.b, uuid);
sprintf(key_description, "bcachefs:%s", uuid);
return __bch2_request_key(key_description, key);
}
int bch2_decrypt_sb_key(struct bch_fs *c, int bch2_decrypt_sb_key(struct bch_fs *c,
struct bch_sb_field_crypt *crypt, struct bch_sb_field_crypt *crypt,
struct bch_key *key) struct bch_key *key)

View File

@ -17,24 +17,20 @@ static int group_cmp(const void *_l, const void *_r)
strncmp(l->label, r->label, sizeof(l->label)); strncmp(l->label, r->label, sizeof(l->label));
} }
static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb, static int bch2_sb_disk_groups_validate(struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_disk_groups *groups = struct bch_sb_field_disk_groups *groups =
field_to_type(f, disk_groups); field_to_type(f, disk_groups);
struct bch_disk_group *g, *sorted = NULL; struct bch_disk_group *g, *sorted = NULL;
struct bch_sb_field_members *mi; struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
struct bch_member *m; unsigned nr_groups = disk_groups_nr(groups);
unsigned i, nr_groups, len; unsigned i, len;
const char *err = NULL; int ret = -EINVAL;
mi = bch2_sb_get_members(sb); for (i = 0; i < sb->nr_devices; i++) {
groups = bch2_sb_get_disk_groups(sb); struct bch_member *m = mi->members + i;
nr_groups = disk_groups_nr(groups);
for (m = mi->members;
m < mi->members + sb->nr_devices;
m++) {
unsigned g; unsigned g;
if (!BCH_MEMBER_GROUP(m)) if (!BCH_MEMBER_GROUP(m))
@ -42,45 +38,53 @@ static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
g = BCH_MEMBER_GROUP(m) - 1; g = BCH_MEMBER_GROUP(m) - 1;
if (g >= nr_groups || if (g >= nr_groups) {
BCH_GROUP_DELETED(&groups->entries[g])) pr_buf(err, "disk %u has invalid label %u (have %u)",
return "disk has invalid group"; i, g, nr_groups);
return -EINVAL;
}
if (BCH_GROUP_DELETED(&groups->entries[g])) {
pr_buf(err, "disk %u has deleted label %u", i, g);
return -EINVAL;
}
} }
if (!nr_groups) if (!nr_groups)
return NULL; return 0;
for (i = 0; i < nr_groups; i++) {
g = groups->entries + i;
for (g = groups->entries;
g < groups->entries + nr_groups;
g++) {
if (BCH_GROUP_DELETED(g)) if (BCH_GROUP_DELETED(g))
continue; continue;
len = strnlen(g->label, sizeof(g->label)); len = strnlen(g->label, sizeof(g->label));
if (!len) { if (!len) {
err = "group with empty label"; pr_buf(err, "label %u empty", i);
goto err; return -EINVAL;
} }
} }
sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL); sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
if (!sorted) if (!sorted)
return "cannot allocate memory"; return -ENOMEM;
memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted)); memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL); sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
for (i = 0; i + 1 < nr_groups; i++) for (g = sorted; g + 1 < sorted + nr_groups; g++)
if (!BCH_GROUP_DELETED(sorted + i) && if (!BCH_GROUP_DELETED(g) &&
!group_cmp(sorted + i, sorted + i + 1)) { !group_cmp(&g[0], &g[1])) {
err = "duplicate groups"; pr_buf(err, "duplicate label %llu.", BCH_GROUP_PARENT(g));
bch_scnmemcpy(err, g->label, strnlen(g->label, sizeof(g->label)));
goto err; goto err;
} }
err = NULL; ret = 0;
err: err:
kfree(sorted); kfree(sorted);
return err; return 0;
} }
static void bch2_sb_disk_groups_to_text(struct printbuf *out, static void bch2_sb_disk_groups_to_text(struct printbuf *out,

View File

@ -17,10 +17,6 @@
* *
* With one based indexing each level of the tree starts at a power of two - * With one based indexing each level of the tree starts at a power of two -
* good for cacheline alignment: * good for cacheline alignment:
*
* Size parameter is treated as if we were using 0 based indexing, however:
* valid nodes, and inorder indices, are in the range [1..size) - that is, there
* are actually size - 1 elements
*/ */
static inline unsigned eytzinger1_child(unsigned i, unsigned child) static inline unsigned eytzinger1_child(unsigned i, unsigned child)
@ -42,12 +38,12 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size) static inline unsigned eytzinger1_first(unsigned size)
{ {
return rounddown_pow_of_two(size - 1); return rounddown_pow_of_two(size);
} }
static inline unsigned eytzinger1_last(unsigned size) static inline unsigned eytzinger1_last(unsigned size)
{ {
return rounddown_pow_of_two(size) - 1; return rounddown_pow_of_two(size + 1) - 1;
} }
/* /*
@ -62,13 +58,13 @@ static inline unsigned eytzinger1_last(unsigned size)
static inline unsigned eytzinger1_next(unsigned i, unsigned size) static inline unsigned eytzinger1_next(unsigned i, unsigned size)
{ {
EBUG_ON(i >= size); EBUG_ON(i > size);
if (eytzinger1_right_child(i) < size) { if (eytzinger1_right_child(i) <= size) {
i = eytzinger1_right_child(i); i = eytzinger1_right_child(i);
i <<= __fls(size) - __fls(i); i <<= __fls(size + 1) - __fls(i);
i >>= i >= size; i >>= i > size;
} else { } else {
i >>= ffz(i) + 1; i >>= ffz(i) + 1;
} }
@ -78,14 +74,14 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
static inline unsigned eytzinger1_prev(unsigned i, unsigned size) static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
{ {
EBUG_ON(i >= size); EBUG_ON(i > size);
if (eytzinger1_left_child(i) < size) { if (eytzinger1_left_child(i) <= size) {
i = eytzinger1_left_child(i) + 1; i = eytzinger1_left_child(i) + 1;
i <<= __fls(size) - __fls(i); i <<= __fls(size + 1) - __fls(i);
i -= 1; i -= 1;
i >>= i >= size; i >>= i > size;
} else { } else {
i >>= __ffs(i) + 1; i >>= __ffs(i) + 1;
} }
@ -95,17 +91,17 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size) static inline unsigned eytzinger1_extra(unsigned size)
{ {
return (size - rounddown_pow_of_two(size - 1)) << 1; return (size + 1 - rounddown_pow_of_two(size)) << 1;
} }
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
unsigned extra) unsigned extra)
{ {
unsigned b = __fls(i); unsigned b = __fls(i);
unsigned shift = __fls(size - 1) - b; unsigned shift = __fls(size) - b;
int s; int s;
EBUG_ON(!i || i >= size); EBUG_ON(!i || i > size);
i ^= 1U << b; i ^= 1U << b;
i <<= 1; i <<= 1;
@ -130,7 +126,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
unsigned shift; unsigned shift;
int s; int s;
EBUG_ON(!i || i >= size); EBUG_ON(!i || i > size);
/* /*
* sign bit trick: * sign bit trick:
@ -144,7 +140,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
shift = __ffs(i); shift = __ffs(i);
i >>= shift + 1; i >>= shift + 1;
i |= 1U << (__fls(size - 1) - shift); i |= 1U << (__fls(size) - shift);
return i; return i;
} }
@ -185,39 +181,39 @@ static inline unsigned eytzinger0_right_child(unsigned i)
static inline unsigned eytzinger0_first(unsigned size) static inline unsigned eytzinger0_first(unsigned size)
{ {
return eytzinger1_first(size + 1) - 1; return eytzinger1_first(size) - 1;
} }
static inline unsigned eytzinger0_last(unsigned size) static inline unsigned eytzinger0_last(unsigned size)
{ {
return eytzinger1_last(size + 1) - 1; return eytzinger1_last(size) - 1;
} }
static inline unsigned eytzinger0_next(unsigned i, unsigned size) static inline unsigned eytzinger0_next(unsigned i, unsigned size)
{ {
return eytzinger1_next(i + 1, size + 1) - 1; return eytzinger1_next(i + 1, size) - 1;
} }
static inline unsigned eytzinger0_prev(unsigned i, unsigned size) static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
{ {
return eytzinger1_prev(i + 1, size + 1) - 1; return eytzinger1_prev(i + 1, size) - 1;
} }
static inline unsigned eytzinger0_extra(unsigned size) static inline unsigned eytzinger0_extra(unsigned size)
{ {
return eytzinger1_extra(size + 1); return eytzinger1_extra(size);
} }
static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
unsigned extra) unsigned extra)
{ {
return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1; return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
} }
static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
unsigned extra) unsigned extra)
{ {
return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1; return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
} }
static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)

View File

@ -1024,7 +1024,7 @@ retry:
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot),
BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS); BTREE_ITER_SLOTS);
while (1) { while (1) {
struct bkey_s_c k; struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent; unsigned bytes, sectors, offset_into_extent;

View File

@ -134,7 +134,6 @@ int __must_check bch2_write_inode(struct bch_fs *c,
int ret; int ret;
bch2_trans_init(&trans, c, 0, 512); bch2_trans_init(&trans, c, 0, 512);
trans.ip = _RET_IP_;
retry: retry:
bch2_trans_begin(&trans); bch2_trans_begin(&trans);

View File

@ -2241,7 +2241,7 @@ retry:
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
SPOS(inum.inum, bvec_iter.bi_sector, snapshot), SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS); BTREE_ITER_SLOTS);
while (1) { while (1) {
unsigned bytes, sectors, offset_into_extent; unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents; enum btree_id data_btree = BTREE_ID_extents;

View File

@ -893,12 +893,13 @@ static void bch2_journal_read_device(struct closure *cl)
struct journal_device *ja = struct journal_device *ja =
container_of(cl, struct journal_device, read); container_of(cl, struct journal_device, read);
struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
struct bch_fs *c = ca->fs;
struct journal_list *jlist = struct journal_list *jlist =
container_of(cl->parent, struct journal_list, cl); container_of(cl->parent, struct journal_list, cl);
struct journal_read_buf buf = { NULL, 0 }; struct journal_read_buf buf = { NULL, 0 };
u64 min_seq = U64_MAX; u64 min_seq = U64_MAX;
unsigned i; unsigned i;
int ret; int ret = 0;
if (!ja->nr) if (!ja->nr)
goto out; goto out;
@ -944,6 +945,7 @@ static void bch2_journal_read_device(struct closure *cl)
ja->discard_idx = ja->dirty_idx_ondisk = ja->discard_idx = ja->dirty_idx_ondisk =
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr; ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
out: out:
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
kvpfree(buf.data, buf.size); kvpfree(buf.data, buf.size);
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
closure_return(cl); closure_return(cl);

View File

@ -66,6 +66,12 @@ blacklist_entry_try_merge(struct bch_fs *c,
return bl; return bl;
} }
static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e,
u64 start, u64 end)
{
return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start);
}
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
{ {
struct bch_sb_field_journal_seq_blacklist *bl; struct bch_sb_field_journal_seq_blacklist *bl;
@ -76,28 +82,21 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
nr = blacklist_nr_entries(bl); nr = blacklist_nr_entries(bl);
if (bl) { for (i = 0; i < nr; i++) {
for (i = 0; i < nr; i++) { struct journal_seq_blacklist_entry *e =
struct journal_seq_blacklist_entry *e = bl->start + i;
bl->start + i;
if (start == le64_to_cpu(e->start) && if (bl_entry_contig_or_overlaps(e, start, end)) {
end == le64_to_cpu(e->end)) e->start = cpu_to_le64(min(start, le64_to_cpu(e->start)));
goto out; e->end = cpu_to_le64(max(end, le64_to_cpu(e->end)));
if (start <= le64_to_cpu(e->start) && if (i + 1 < nr)
end >= le64_to_cpu(e->end)) { bl = blacklist_entry_try_merge(c,
e->start = cpu_to_le64(start); bl, i);
e->end = cpu_to_le64(end); if (i)
bl = blacklist_entry_try_merge(c,
if (i + 1 < nr) bl, i - 1);
bl = blacklist_entry_try_merge(c, goto out_write_sb;
bl, i);
if (i)
bl = blacklist_entry_try_merge(c,
bl, i - 1);
goto out_write_sb;
}
} }
} }
@ -189,27 +188,34 @@ int bch2_blacklist_table_initialize(struct bch_fs *c)
return 0; return 0;
} }
static const char * static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct bch_sb_field *f) struct printbuf *err)
{ {
struct bch_sb_field_journal_seq_blacklist *bl = struct bch_sb_field_journal_seq_blacklist *bl =
field_to_type(f, journal_seq_blacklist); field_to_type(f, journal_seq_blacklist);
struct journal_seq_blacklist_entry *i; unsigned i, nr = blacklist_nr_entries(bl);
unsigned nr = blacklist_nr_entries(bl);
for (i = bl->start; i < bl->start + nr; i++) { for (i = 0; i < nr; i++) {
if (le64_to_cpu(i->start) >= struct journal_seq_blacklist_entry *e = bl->start + i;
le64_to_cpu(i->end))
return "entry start >= end";
if (i + 1 < bl->start + nr && if (le64_to_cpu(e->start) >=
le64_to_cpu(i[0].end) > le64_to_cpu(e->end)) {
le64_to_cpu(i[1].start)) pr_buf(err, "entry %u start >= end (%llu >= %llu)",
return "entries out of order"; i, le64_to_cpu(e->start), le64_to_cpu(e->end));
return -EINVAL;
}
if (i + 1 < nr &&
le64_to_cpu(e[0].end) >
le64_to_cpu(e[1].start)) {
pr_buf(err, "entry %u out of order with next entry (%llu > %llu)",
i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start));
return -EINVAL;
}
} }
return NULL; return 0;
} }
static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
@ -235,3 +241,81 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
.validate = bch2_sb_journal_seq_blacklist_validate, .validate = bch2_sb_journal_seq_blacklist_validate,
.to_text = bch2_sb_journal_seq_blacklist_to_text .to_text = bch2_sb_journal_seq_blacklist_to_text
}; };
void bch2_blacklist_entries_gc(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs,
journal_seq_blacklist_gc_work);
struct journal_seq_blacklist_table *t;
struct bch_sb_field_journal_seq_blacklist *bl;
struct journal_seq_blacklist_entry *src, *dst;
struct btree_trans trans;
unsigned i, nr, new_nr;
int ret;
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_iter iter;
struct btree *b;
bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN,
0, 0, BTREE_ITER_PREFETCH);
retry:
bch2_trans_begin(&trans);
b = bch2_btree_iter_peek_node(&iter);
while (!(ret = PTR_ERR_OR_ZERO(b)) &&
b &&
!test_bit(BCH_FS_STOPPING, &c->flags))
b = bch2_btree_iter_next_node(&iter);
if (ret == -EINTR)
goto retry;
bch2_trans_iter_exit(&trans, &iter);
}
bch2_trans_exit(&trans);
if (ret)
return;
mutex_lock(&c->sb_lock);
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
if (!bl)
goto out;
nr = blacklist_nr_entries(bl);
dst = bl->start;
t = c->journal_seq_blacklist_table;
BUG_ON(nr != t->nr);
for (src = bl->start, i = eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
if (t->entries[i].dirty)
*dst++ = *src;
}
new_nr = dst - bl->start;
bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
if (new_nr != nr) {
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
new_nr ? sb_blacklist_u64s(new_nr) : 0);
BUG_ON(new_nr && !bl);
if (!new_nr)
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
bch2_write_super(c);
}
out:
mutex_unlock(&c->sb_lock);
}

View File

@ -17,4 +17,6 @@ int bch2_blacklist_table_initialize(struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist; extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
void bch2_blacklist_entries_gc(struct work_struct *);
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */

View File

@ -1,13 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _K_EYTZINGER_H
#define _K_EYTZINGER_H
/* One based indexing */
/* k = number of children */
static inline unsigned k_eytzinger_child(unsigned k, unsigned i, unsigned child)
{
return (k * i + child) * (k - 1);
}
#endif /* _K_EYTZINGER_H */

View File

@ -332,7 +332,7 @@ enum opt_type {
x(journal_transaction_names, u8, \ x(journal_transaction_names, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \ OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, false, \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \ NULL, "Log transaction function names in journal") \
x(noexcl, u8, \ x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \

View File

@ -6,15 +6,17 @@
#include "subvolume.h" #include "subvolume.h"
#include "super-io.h" #include "super-io.h"
static const char *bch2_sb_validate_quota(struct bch_sb *sb, static int bch2_sb_validate_quota(struct bch_sb *sb, struct bch_sb_field *f,
struct bch_sb_field *f) struct printbuf *err)
{ {
struct bch_sb_field_quota *q = field_to_type(f, quota); struct bch_sb_field_quota *q = field_to_type(f, quota);
if (vstruct_bytes(&q->field) != sizeof(*q)) if (vstruct_bytes(&q->field) < sizeof(*q)) {
return "invalid field quota: wrong size"; pr_buf(err, "wrong size (got %llu should be %zu)",
vstruct_bytes(&q->field), sizeof(*q));
}
return NULL; return 0;
} }
const struct bch_sb_field_ops bch_sb_field_ops_quota = { const struct bch_sb_field_ops bch_sb_field_ops_quota = {

View File

@ -519,7 +519,7 @@ static int bch2_journal_replay(struct bch_fs *c)
size_t i; size_t i;
int ret; int ret;
keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL); keys_sorted = kvmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
if (!keys_sorted) if (!keys_sorted)
return -ENOMEM; return -ENOMEM;
@ -530,10 +530,8 @@ static int bch2_journal_replay(struct bch_fs *c)
sizeof(keys_sorted[0]), sizeof(keys_sorted[0]),
journal_sort_seq_cmp, NULL); journal_sort_seq_cmp, NULL);
if (keys->nr) { if (keys->nr)
bch_verbose(c, "starting journal replay, %zu keys", keys->nr);
replay_now_at(j, keys->journal_seq_base); replay_now_at(j, keys->journal_seq_base);
}
for (i = 0; i < keys->nr; i++) { for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i]; k = keys_sorted[i];
@ -563,7 +561,7 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_flush_all_pins(j); bch2_journal_flush_all_pins(j);
ret = bch2_journal_error(j); ret = bch2_journal_error(j);
err: err:
kfree(keys_sorted); kvfree(keys_sorted);
return ret; return ret;
} }
@ -901,7 +899,6 @@ static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
@ -915,7 +912,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
goto err; goto err;
if (!bkey_is_inode(k.k)) { if (!bkey_is_inode(k.k)) {
bch_err(c, "root inode not found"); bch_err(trans->c, "root inode not found");
ret = -ENOENT; ret = -ENOENT;
goto err; goto err;
} }
@ -1008,6 +1005,7 @@ int bch2_fs_recovery(struct bch_fs *c)
if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
struct journal_replay *i; struct journal_replay *i;
bch_verbose(c, "starting journal read");
ret = bch2_journal_read(c, &c->journal_entries, ret = bch2_journal_read(c, &c->journal_entries,
&blacklist_seq, &journal_seq); &blacklist_seq, &journal_seq);
if (ret) if (ret)
@ -1067,6 +1065,16 @@ use_clean:
if (ret) if (ret)
goto err; goto err;
/*
* After an unclean shutdown, skip then next few journal sequence
* numbers as they may have been referenced by btree writes that
* happened before their corresponding journal writes - those btree
* writes need to be ignored, by skipping and blacklisting the next few
* journal sequence numbers:
*/
if (!c->sb.clean)
journal_seq += 8;
if (blacklist_seq != journal_seq) { if (blacklist_seq != journal_seq) {
ret = bch2_journal_seq_blacklist_add(c, ret = bch2_journal_seq_blacklist_add(c,
blacklist_seq, journal_seq); blacklist_seq, journal_seq);
@ -1141,7 +1149,7 @@ use_clean:
if (c->opts.norecovery) if (c->opts.norecovery)
goto out; goto out;
bch_verbose(c, "starting journal replay"); bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
err = "journal replay failed"; err = "journal replay failed";
ret = bch2_journal_replay(c); ret = bch2_journal_replay(c);
if (ret) if (ret)
@ -1199,14 +1207,6 @@ use_clean:
} }
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
/*
* With journal replay done, we can clear the journal seq blacklist
* table:
*/
BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
if (le16_to_cpu(c->sb.version_min) >= bcachefs_metadata_version_btree_ptr_sectors_written)
bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 0);
if (c->opts.version_upgrade) { if (c->opts.version_upgrade) {
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
@ -1248,6 +1248,10 @@ use_clean:
bch_info(c, "scanning for old btree nodes done"); bch_info(c, "scanning for old btree nodes done");
} }
if (c->journal_seq_blacklist_table &&
c->journal_seq_blacklist_table->nr > 128)
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
ret = 0; ret = 0;
out: out:
set_bit(BCH_FS_FSCK_DONE, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags);

View File

@ -41,18 +41,19 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
{ {
unsigned i; unsigned i;
pr_buf(out, "%s: %u/%u [", if (e->data_type < BCH_DATA_NR)
bch2_data_types[e->data_type], pr_buf(out, "%s", bch2_data_types[e->data_type]);
e->nr_required, else
e->nr_devs); pr_buf(out, "(invalid data type %u)", e->data_type);
pr_buf(out, ": %u/%u [", e->nr_required, e->nr_devs);
for (i = 0; i < e->nr_devs; i++) for (i = 0; i < e->nr_devs; i++)
pr_buf(out, i ? " %u" : "%u", e->devs[i]); pr_buf(out, i ? " %u" : "%u", e->devs[i]);
pr_buf(out, "]"); pr_buf(out, "]");
} }
void bch2_cpu_replicas_to_text(struct printbuf *out, void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r) struct bch_replicas_cpu *r)
{ {
struct bch_replicas_entry *e; struct bch_replicas_entry *e;
bool first = true; bool first = true;
@ -808,67 +809,78 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
return 0; return 0;
} }
static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r) static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_sb *sb,
struct printbuf *err)
{ {
unsigned i; struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
unsigned i, j;
sort_cmp_size(cpu_r->entries, sort_cmp_size(cpu_r->entries,
cpu_r->nr, cpu_r->nr,
cpu_r->entry_size, cpu_r->entry_size,
memcmp, NULL); memcmp, NULL);
for (i = 0; i + 1 < cpu_r->nr; i++) { for (i = 0; i < cpu_r->nr; i++) {
struct bch_replicas_entry *l = struct bch_replicas_entry *e =
cpu_replicas_entry(cpu_r, i); cpu_replicas_entry(cpu_r, i);
struct bch_replicas_entry *r =
cpu_replicas_entry(cpu_r, i + 1);
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0); if (e->data_type >= BCH_DATA_NR) {
pr_buf(err, "invalid data type in entry ");
bch2_replicas_entry_to_text(err, e);
return -EINVAL;
}
if (!memcmp(l, r, cpu_r->entry_size)) if (!e->nr_devs) {
return "duplicate replicas entry"; pr_buf(err, "no devices in entry ");
bch2_replicas_entry_to_text(err, e);
return -EINVAL;
}
if (e->nr_required > 1 &&
e->nr_required >= e->nr_devs) {
pr_buf(err, "bad nr_required in entry ");
bch2_replicas_entry_to_text(err, e);
return -EINVAL;
}
for (j = 0; j < e->nr_devs; j++)
if (!bch2_dev_exists(sb, mi, e->devs[j])) {
pr_buf(err, "invalid device %u in entry ", e->devs[j]);
bch2_replicas_entry_to_text(err, e);
return -EINVAL;
}
if (i + 1 < cpu_r->nr) {
struct bch_replicas_entry *n =
cpu_replicas_entry(cpu_r, i + 1);
BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
if (!memcmp(e, n, cpu_r->entry_size)) {
pr_buf(err, "duplicate replicas entry ");
bch2_replicas_entry_to_text(err, e);
return -EINVAL;
}
}
} }
return NULL; return 0;
} }
static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f) static int bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
struct bch_sb_field_members *mi = bch2_sb_get_members(sb); struct bch_replicas_cpu cpu_r;
struct bch_replicas_cpu cpu_r = { .entries = NULL }; int ret;
struct bch_replicas_entry *e;
const char *err;
unsigned i;
for_each_replicas_entry(sb_r, e) {
err = "invalid replicas entry: invalid data type";
if (e->data_type >= BCH_DATA_NR)
goto err;
err = "invalid replicas entry: no devices";
if (!e->nr_devs)
goto err;
err = "invalid replicas entry: bad nr_required";
if (e->nr_required > 1 &&
e->nr_required >= e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";
for (i = 0; i < e->nr_devs; i++)
if (!bch2_dev_exists(sb, mi, e->devs[i]))
goto err;
}
err = "cannot allocate memory";
if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r)) if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
goto err; return -ENOMEM;
err = check_dup_replicas_entries(&cpu_r); ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
err:
kfree(cpu_r.entries); kfree(cpu_r.entries);
return err; return ret;
} }
static void bch2_sb_replicas_to_text(struct printbuf *out, static void bch2_sb_replicas_to_text(struct printbuf *out,
@ -893,38 +905,19 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
.to_text = bch2_sb_replicas_to_text, .to_text = bch2_sb_replicas_to_text,
}; };
static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f) static int bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
struct bch_sb_field_members *mi = bch2_sb_get_members(sb); struct bch_replicas_cpu cpu_r;
struct bch_replicas_cpu cpu_r = { .entries = NULL }; int ret;
struct bch_replicas_entry_v0 *e;
const char *err;
unsigned i;
for_each_replicas_entry_v0(sb_r, e) {
err = "invalid replicas entry: invalid data type";
if (e->data_type >= BCH_DATA_NR)
goto err;
err = "invalid replicas entry: no devices";
if (!e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";
for (i = 0; i < e->nr_devs; i++)
if (!bch2_dev_exists(sb, mi, e->devs[i]))
goto err;
}
err = "cannot allocate memory";
if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r)) if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
goto err; return -ENOMEM;
err = check_dup_replicas_entries(&cpu_r); ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
err:
kfree(cpu_r.entries); kfree(cpu_r.entries);
return err; return ret;
} }
const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {

View File

@ -27,8 +27,8 @@ const char * const bch2_sb_fields[] = {
NULL NULL
}; };
static const char *bch2_sb_field_validate(struct bch_sb *, static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *,
struct bch_sb_field *); struct printbuf *);
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb, struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
enum bch_sb_field_type type) enum bch_sb_field_type type)
@ -202,22 +202,31 @@ static inline void __bch2_sb_layout_size_assert(void)
BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
} }
static const char *validate_sb_layout(struct bch_sb_layout *layout) static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
{ {
u64 offset, prev_offset, max_sectors; u64 offset, prev_offset, max_sectors;
unsigned i; unsigned i;
if (uuid_le_cmp(layout->magic, BCACHE_MAGIC)) if (uuid_le_cmp(layout->magic, BCACHE_MAGIC)) {
return "Not a bcachefs superblock layout"; pr_buf(out, "Not a bcachefs superblock layout");
return -EINVAL;
}
if (layout->layout_type != 0) if (layout->layout_type != 0) {
return "Invalid superblock layout type"; pr_buf(out, "Invalid superblock layout type %u",
layout->layout_type);
return -EINVAL;
}
if (!layout->nr_superblocks) if (!layout->nr_superblocks) {
return "Invalid superblock layout: no superblocks"; pr_buf(out, "Invalid superblock layout: no superblocks");
return -EINVAL;
}
if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) {
return "Invalid superblock layout: too many superblocks"; pr_buf(out, "Invalid superblock layout: too many superblocks");
return -EINVAL;
}
max_sectors = 1 << layout->sb_max_size_bits; max_sectors = 1 << layout->sb_max_size_bits;
@ -226,122 +235,134 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout)
for (i = 1; i < layout->nr_superblocks; i++) { for (i = 1; i < layout->nr_superblocks; i++) {
offset = le64_to_cpu(layout->sb_offset[i]); offset = le64_to_cpu(layout->sb_offset[i]);
if (offset < prev_offset + max_sectors) if (offset < prev_offset + max_sectors) {
return "Invalid superblock layout: superblocks overlap"; pr_buf(out, "Invalid superblock layout: superblocks overlap\n"
" (sb %u ends at %llu next starts at %llu",
i - 1, prev_offset + max_sectors, offset);
return -EINVAL;
}
prev_offset = offset; prev_offset = offset;
} }
return NULL; return 0;
} }
const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out)
{ {
struct bch_sb *sb = disk_sb->sb; struct bch_sb *sb = disk_sb->sb;
struct bch_sb_field *f; struct bch_sb_field *f;
struct bch_sb_field_members *mi; struct bch_sb_field_members *mi;
const char *err;
u32 version, version_min; u32 version, version_min;
u16 block_size; u16 block_size;
int ret;
version = le16_to_cpu(sb->version); version = le16_to_cpu(sb->version);
version_min = version >= bcachefs_metadata_version_new_versioning version_min = version >= bcachefs_metadata_version_new_versioning
? le16_to_cpu(sb->version_min) ? le16_to_cpu(sb->version_min)
: version; : version;
if (version >= bcachefs_metadata_version_max || if (version >= bcachefs_metadata_version_max) {
version_min < bcachefs_metadata_version_min) pr_buf(out, "Unsupported superblock version %u (min %u, max %u)",
return "Unsupported superblock version"; version, bcachefs_metadata_version_min, bcachefs_metadata_version_max);
return -EINVAL;
}
if (version_min > version) if (version_min < bcachefs_metadata_version_min) {
return "Bad minimum version"; pr_buf(out, "Unsupported superblock version %u (min %u, max %u)",
version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max);
return -EINVAL;
}
if (version_min > version) {
pr_buf(out, "Bad minimum version %u, greater than version field %u",
version_min, version);
return -EINVAL;
}
if (sb->features[1] || if (sb->features[1] ||
(le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) {
return "Filesystem has incompatible features"; pr_buf(out, "Filesystem has incompatible features");
return -EINVAL;
}
block_size = le16_to_cpu(sb->block_size); block_size = le16_to_cpu(sb->block_size);
if (block_size > PAGE_SECTORS) if (block_size > PAGE_SECTORS) {
return "Bad block size"; pr_buf(out, "Block size too big (got %u, max %u)",
block_size, PAGE_SECTORS);
return -EINVAL;
}
if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le))) if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le))) {
return "Bad user UUID"; pr_buf(out, "Bad user UUID (got zeroes)");
return -EINVAL;
}
if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le))) if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le))) {
return "Bad internal UUID"; pr_buf(out, "Bad intenal UUID (got zeroes)");
return -EINVAL;
}
if (!sb->nr_devices || if (!sb->nr_devices ||
sb->nr_devices <= sb->dev_idx || sb->nr_devices > BCH_SB_MEMBERS_MAX) {
sb->nr_devices > BCH_SB_MEMBERS_MAX) pr_buf(out, "Bad number of member devices %u (max %u)",
return "Bad number of member devices"; sb->nr_devices, BCH_SB_MEMBERS_MAX);
return -EINVAL;
}
if (!BCH_SB_META_REPLICAS_WANT(sb) || if (sb->dev_idx >= sb->nr_devices) {
BCH_SB_META_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX) pr_buf(out, "Bad dev_idx (got %u, nr_devices %u)",
return "Invalid number of metadata replicas"; sb->dev_idx, sb->nr_devices);
return -EINVAL;
if (!BCH_SB_META_REPLICAS_REQ(sb) || }
BCH_SB_META_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
return "Invalid number of metadata replicas";
if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
BCH_SB_DATA_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX)
return "Invalid number of data replicas";
if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
BCH_SB_DATA_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX)
return "Invalid number of data replicas";
if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
return "Invalid metadata checksum type";
if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
return "Invalid metadata checksum type";
if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
return "Invalid compression type";
if (!BCH_SB_BTREE_NODE_SIZE(sb))
return "Btree node size not set";
if (BCH_SB_GC_RESERVE(sb) < 5)
return "gc reserve percentage too small";
if (!sb->time_precision || if (!sb->time_precision ||
le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) {
return "invalid time precision"; pr_buf(out, "Invalid time precision: %u (min 1, max %lu)",
le32_to_cpu(sb->time_precision), NSEC_PER_SEC);
return -EINVAL;
}
/* validate layout */ /* validate layout */
err = validate_sb_layout(&sb->layout); ret = validate_sb_layout(&sb->layout, out);
if (err) if (ret)
return err; return ret;
vstruct_for_each(sb, f) { vstruct_for_each(sb, f) {
if (!f->u64s) if (!f->u64s) {
return "Invalid superblock: invalid optional field"; pr_buf(out, "Invalid superblock: optional with size 0 (type %u)",
le32_to_cpu(f->type));
return -EINVAL;
}
if (vstruct_next(f) > vstruct_last(sb)) if (vstruct_next(f) > vstruct_last(sb)) {
return "Invalid superblock: invalid optional field"; pr_buf(out, "Invalid superblock: optional field extends past end of superblock (type %u)",
le32_to_cpu(f->type));
return -EINVAL;
}
} }
/* members must be validated first: */ /* members must be validated first: */
mi = bch2_sb_get_members(sb); mi = bch2_sb_get_members(sb);
if (!mi) if (!mi) {
return "Invalid superblock: member info area missing"; pr_buf(out, "Invalid superblock: member info area missing");
return -EINVAL;
}
err = bch2_sb_field_validate(sb, &mi->field); ret = bch2_sb_field_validate(sb, &mi->field, out);
if (err) if (ret)
return err; return ret;
vstruct_for_each(sb, f) { vstruct_for_each(sb, f) {
if (le32_to_cpu(f->type) == BCH_SB_FIELD_members) if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
continue; continue;
err = bch2_sb_field_validate(sb, f); ret = bch2_sb_field_validate(sb, f, out);
if (err) if (ret)
return err; return ret;
} }
return NULL; return 0;
} }
/* device open: */ /* device open: */
@ -470,10 +491,12 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
/* read superblock: */ /* read superblock: */
static const char *read_one_super(struct bch_sb_handle *sb, u64 offset) static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
{ {
struct bch_csum csum; struct bch_csum csum;
u32 version, version_min;
size_t bytes; size_t bytes;
int ret;
reread: reread:
bio_reset(sb->bio); bio_reset(sb->bio);
bio_set_dev(sb->bio, sb->bdev); bio_set_dev(sb->bio, sb->bdev);
@ -481,40 +504,65 @@ reread:
bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META); bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); bch2_bio_map(sb->bio, sb->sb, sb->buffer_size);
if (submit_bio_wait(sb->bio)) ret = submit_bio_wait(sb->bio);
return "IO error"; if (ret) {
pr_buf(err, "IO error: %i", ret);
return ret;
}
if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) {
return "Not a bcachefs superblock"; pr_buf(err, "Not a bcachefs superblock");
return -EINVAL;
}
if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || version = le16_to_cpu(sb->sb->version);
le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) version_min = version >= bcachefs_metadata_version_new_versioning
return "Unsupported superblock version"; ? le16_to_cpu(sb->sb->version_min)
: version;
if (version >= bcachefs_metadata_version_max) {
pr_buf(err, "Unsupported superblock version %u (min %u, max %u)",
version, bcachefs_metadata_version_min, bcachefs_metadata_version_max);
return -EINVAL;
}
if (version_min < bcachefs_metadata_version_min) {
pr_buf(err, "Unsupported superblock version %u (min %u, max %u)",
version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max);
return -EINVAL;
}
bytes = vstruct_bytes(sb->sb); bytes = vstruct_bytes(sb->sb);
if (bytes > 512 << sb->sb->layout.sb_max_size_bits) if (bytes > 512 << sb->sb->layout.sb_max_size_bits) {
return "Bad superblock: too big"; pr_buf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
bytes, 512UL << sb->sb->layout.sb_max_size_bits);
return -EINVAL;
}
if (bytes > sb->buffer_size) { if (bytes > sb->buffer_size) {
if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s))) if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
return "cannot allocate memory"; return -ENOMEM;
goto reread; goto reread;
} }
if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
return "unknown csum type"; pr_buf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
return -EINVAL;
}
/* XXX: verify MACs */ /* XXX: verify MACs */
csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
null_nonce(), sb->sb); null_nonce(), sb->sb);
if (bch2_crc_cmp(csum, sb->sb->csum)) if (bch2_crc_cmp(csum, sb->sb->csum)) {
return "bad checksum reading superblock"; pr_buf(err, "bad checksum");
return -EINVAL;
}
sb->seq = le64_to_cpu(sb->sb->seq); sb->seq = le64_to_cpu(sb->sb->seq);
return NULL; return 0;
} }
int bch2_read_super(const char *path, struct bch_opts *opts, int bch2_read_super(const char *path, struct bch_opts *opts,
@ -522,10 +570,16 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
{ {
u64 offset = opt_get(*opts, sb); u64 offset = opt_get(*opts, sb);
struct bch_sb_layout layout; struct bch_sb_layout layout;
const char *err; char *_err;
struct printbuf err;
__le64 *i; __le64 *i;
int ret; int ret;
_err = kmalloc(4096, GFP_KERNEL);
if (!_err)
return -ENOMEM;
err = _PBUF(_err, 4096);
pr_verbose_init(*opts, ""); pr_verbose_init(*opts, "");
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
@ -554,25 +608,28 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
goto out; goto out;
} }
err = "cannot allocate memory";
ret = bch2_sb_realloc(sb, 0); ret = bch2_sb_realloc(sb, 0);
if (ret) if (ret) {
pr_buf(&err, "error allocating memory for superblock");
goto err; goto err;
}
ret = -EFAULT; if (bch2_fs_init_fault("read_super")) {
err = "dynamic fault"; pr_buf(&err, "dynamic fault");
if (bch2_fs_init_fault("read_super")) ret = -EFAULT;
goto err; goto err;
}
ret = -EINVAL; ret = read_one_super(sb, offset, &err);
err = read_one_super(sb, offset); if (!ret)
if (!err)
goto got_super; goto got_super;
if (opt_defined(*opts, sb)) if (opt_defined(*opts, sb))
goto err; goto err;
pr_err("error reading default superblock: %s", err); printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s",
path, _err);
err = _PBUF(_err, 4096);
/* /*
* Error reading primary superblock - read location of backup * Error reading primary superblock - read location of backup
@ -588,13 +645,15 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
*/ */
bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
err = "IO error"; ret = submit_bio_wait(sb->bio);
if (submit_bio_wait(sb->bio)) if (ret) {
pr_buf(&err, "IO error: %i", ret);
goto err; goto err;
}
memcpy(&layout, sb->sb, sizeof(layout)); memcpy(&layout, sb->sb, sizeof(layout));
err = validate_sb_layout(&layout); ret = validate_sb_layout(&layout, &err);
if (err) if (ret)
goto err; goto err;
for (i = layout.sb_offset; for (i = layout.sb_offset;
@ -604,32 +663,39 @@ int bch2_read_super(const char *path, struct bch_opts *opts,
if (offset == opt_get(*opts, sb)) if (offset == opt_get(*opts, sb))
continue; continue;
err = read_one_super(sb, offset); ret = read_one_super(sb, offset, &err);
if (!err) if (!ret)
goto got_super; goto got_super;
} }
ret = -EINVAL;
goto err; goto err;
got_super: got_super:
err = "Superblock block size smaller than device block size";
ret = -EINVAL;
if (le16_to_cpu(sb->sb->block_size) << 9 < if (le16_to_cpu(sb->sb->block_size) << 9 <
bdev_logical_block_size(sb->bdev)) { bdev_logical_block_size(sb->bdev)) {
pr_err("error reading superblock: Superblock block size (%u) smaller than device block size (%u)", pr_buf(&err, "block size (%u) smaller than device block size (%u)",
le16_to_cpu(sb->sb->block_size) << 9, le16_to_cpu(sb->sb->block_size) << 9,
bdev_logical_block_size(sb->bdev)); bdev_logical_block_size(sb->bdev));
goto err_no_print; ret = -EINVAL;
goto err;
} }
ret = 0; ret = 0;
sb->have_layout = true; sb->have_layout = true;
ret = bch2_sb_validate(sb, &err);
if (ret) {
printk(KERN_ERR "bcachefs (%s): error validating superblock: %s",
path, _err);
goto err_no_print;
}
out: out:
pr_verbose_init(*opts, "ret %i", ret); pr_verbose_init(*opts, "ret %i", ret);
kfree(_err);
return ret; return ret;
err: err:
pr_err("error reading superblock: %s", err); printk(KERN_ERR "bcachefs (%s): error reading superblock: %s",
path, _err);
err_no_print: err_no_print:
bch2_free_super(sb); bch2_free_super(sb);
goto out; goto out;
@ -704,7 +770,6 @@ int bch2_write_super(struct bch_fs *c)
struct closure *cl = &c->sb_write; struct closure *cl = &c->sb_write;
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, sb = 0, nr_wrote; unsigned i, sb = 0, nr_wrote;
const char *err;
struct bch_devs_mask sb_written; struct bch_devs_mask sb_written;
bool wrote, can_mount_without_written, can_mount_with_written; bool wrote, can_mount_without_written, can_mount_with_written;
unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
@ -731,10 +796,19 @@ int bch2_write_super(struct bch_fs *c)
bch2_sb_from_fs(c, ca); bch2_sb_from_fs(c, ca);
for_each_online_member(ca, c, i) { for_each_online_member(ca, c, i) {
err = bch2_sb_validate(&ca->disk_sb); struct printbuf buf = { NULL, NULL };
if (err) {
bch2_fs_inconsistent(c, "sb invalid before write: %s", err); ret = bch2_sb_validate(&ca->disk_sb, &buf);
ret = -1; if (ret) {
char *_buf = kmalloc(4096, GFP_NOFS);
if (_buf) {
buf = _PBUF(_buf, 4096);
bch2_sb_validate(&ca->disk_sb, &buf);
}
bch2_fs_inconsistent(c, "sb invalid before write: %s", _buf);
kfree(_buf);
percpu_ref_put(&ca->io_ref);
goto out; goto out;
} }
} }
@ -847,54 +921,57 @@ static int u64_cmp(const void *_l, const void *_r)
return l < r ? -1 : l > r ? 1 : 0; return l < r ? -1 : l > r ? 1 : 0;
} }
static const char *bch2_sb_validate_journal(struct bch_sb *sb, static int bch2_sb_validate_journal(struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_journal *journal = field_to_type(f, journal); struct bch_sb_field_journal *journal = field_to_type(f, journal);
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
const char *err; int ret = -EINVAL;
unsigned nr; unsigned nr;
unsigned i; unsigned i;
u64 *b; u64 *b;
journal = bch2_sb_get_journal(sb);
if (!journal)
return NULL;
nr = bch2_nr_journal_buckets(journal); nr = bch2_nr_journal_buckets(journal);
if (!nr) if (!nr)
return NULL; return 0;
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL); b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
if (!b) if (!b)
return "cannot allocate memory"; return -ENOMEM;
for (i = 0; i < nr; i++) for (i = 0; i < nr; i++)
b[i] = le64_to_cpu(journal->buckets[i]); b[i] = le64_to_cpu(journal->buckets[i]);
sort(b, nr, sizeof(u64), u64_cmp, NULL); sort(b, nr, sizeof(u64), u64_cmp, NULL);
err = "journal bucket at sector 0"; if (!b[0]) {
if (!b[0]) pr_buf(err, "journal bucket at sector 0");
goto err; goto err;
}
err = "journal bucket before first bucket"; if (b[0] < le16_to_cpu(m->first_bucket)) {
if (m && b[0] < le16_to_cpu(m->first_bucket)) pr_buf(err, "journal bucket %llu before first bucket %u",
b[0], le16_to_cpu(m->first_bucket));
goto err; goto err;
}
err = "journal bucket past end of device"; if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) {
if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets)) pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1], le64_to_cpu(m->nbuckets));
goto err; goto err;
}
err = "duplicate journal buckets";
for (i = 0; i + 1 < nr; i++) for (i = 0; i + 1 < nr; i++)
if (b[i] == b[i + 1]) if (b[i] == b[i + 1]) {
pr_buf(err, "duplicate journal buckets %llu", b[i]);
goto err; goto err;
}
err = NULL; ret = 0;
err: err:
kfree(b); kfree(b);
return err; return ret;
} }
static const struct bch_sb_field_ops bch_sb_field_ops_journal = { static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
@ -903,39 +980,54 @@ static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
/* BCH_SB_FIELD_members: */ /* BCH_SB_FIELD_members: */
static const char *bch2_sb_validate_members(struct bch_sb *sb, static int bch2_sb_validate_members(struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_members *mi = field_to_type(f, members); struct bch_sb_field_members *mi = field_to_type(f, members);
struct bch_member *m; unsigned i;
if ((void *) (mi->members + sb->nr_devices) > if ((void *) (mi->members + sb->nr_devices) >
vstruct_end(&mi->field)) vstruct_end(&mi->field)) {
return "Invalid superblock: bad member info"; pr_buf(err, "too many devices for section size");
return -EINVAL;
}
for (i = 0; i < sb->nr_devices; i++) {
struct bch_member *m = mi->members + i;
for (m = mi->members;
m < mi->members + sb->nr_devices;
m++) {
if (!bch2_member_exists(m)) if (!bch2_member_exists(m))
continue; continue;
if (le64_to_cpu(m->nbuckets) > LONG_MAX) if (le64_to_cpu(m->nbuckets) > LONG_MAX) {
return "Too many buckets"; pr_buf(err, "device %u: too many buckets (got %llu, max %lu)",
i, le64_to_cpu(m->nbuckets), LONG_MAX);
return -EINVAL;
}
if (le64_to_cpu(m->nbuckets) - if (le64_to_cpu(m->nbuckets) -
le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) {
return "Not enough buckets"; pr_buf(err, "device %u: not enough buckets (got %llu, max %u)",
i, le64_to_cpu(m->nbuckets), BCH_MIN_NR_NBUCKETS);
return -EINVAL;
}
if (le16_to_cpu(m->bucket_size) < if (le16_to_cpu(m->bucket_size) <
le16_to_cpu(sb->block_size)) le16_to_cpu(sb->block_size)) {
return "bucket size smaller than block size"; pr_buf(err, "device %u: bucket size %u smaller than block size %u",
i, le16_to_cpu(m->bucket_size), le16_to_cpu(sb->block_size));
return -EINVAL;
}
if (le16_to_cpu(m->bucket_size) < if (le16_to_cpu(m->bucket_size) <
BCH_SB_BTREE_NODE_SIZE(sb)) BCH_SB_BTREE_NODE_SIZE(sb)) {
return "bucket size smaller than btree node size"; pr_buf(err, "device %u: bucket size %u smaller than btree node size %llu",
i, le16_to_cpu(m->bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
return -EINVAL;
}
} }
return NULL; return 0;
} }
static const struct bch_sb_field_ops bch_sb_field_ops_members = { static const struct bch_sb_field_ops bch_sb_field_ops_members = {
@ -944,18 +1036,24 @@ static const struct bch_sb_field_ops bch_sb_field_ops_members = {
/* BCH_SB_FIELD_crypt: */ /* BCH_SB_FIELD_crypt: */
static const char *bch2_sb_validate_crypt(struct bch_sb *sb, static int bch2_sb_validate_crypt(struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_crypt *crypt = field_to_type(f, crypt); struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
if (vstruct_bytes(&crypt->field) != sizeof(*crypt)) if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) {
return "invalid field crypt: wrong size"; pr_buf(err, "wrong size (got %llu should be %zu)",
vstruct_bytes(&crypt->field), sizeof(*crypt));
return -EINVAL;
}
if (BCH_CRYPT_KDF_TYPE(crypt)) if (BCH_CRYPT_KDF_TYPE(crypt)) {
return "invalid field crypt: bad kdf type"; pr_buf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
return -EINVAL;
}
return NULL; return 0;
} }
static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
@ -1164,15 +1262,19 @@ out:
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
} }
static const char *bch2_sb_validate_clean(struct bch_sb *sb, static int bch2_sb_validate_clean(struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f,
struct printbuf *err)
{ {
struct bch_sb_field_clean *clean = field_to_type(f, clean); struct bch_sb_field_clean *clean = field_to_type(f, clean);
if (vstruct_bytes(&clean->field) < sizeof(*clean)) if (vstruct_bytes(&clean->field) < sizeof(*clean)) {
return "invalid field crypt: wrong size"; pr_buf(err, "wrong size (got %llu should be %zu)",
vstruct_bytes(&clean->field), sizeof(*clean));
return -EINVAL;
}
return NULL; return 0;
} }
static const struct bch_sb_field_ops bch_sb_field_ops_clean = { static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
@ -1186,14 +1288,26 @@ static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
#undef x #undef x
}; };
static const char *bch2_sb_field_validate(struct bch_sb *sb, static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct bch_sb_field *f) struct printbuf *orig_err)
{ {
unsigned type = le32_to_cpu(f->type); unsigned type = le32_to_cpu(f->type);
struct printbuf err = *orig_err;
int ret;
return type < BCH_SB_FIELD_NR if (type >= BCH_SB_FIELD_NR)
? bch2_sb_field_ops[type]->validate(sb, f) return 0;
: NULL;
pr_buf(&err, "Invalid superblock section %s: ", bch2_sb_fields[type]);
ret = bch2_sb_field_ops[type]->validate(sb, f, &err);
if (ret) {
pr_buf(&err, "\n");
bch2_sb_field_to_text(&err, sb, f);
*orig_err = err;
}
return ret;
} }
void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,

View File

@ -38,9 +38,8 @@ BCH_SB_FIELDS()
extern const char * const bch2_sb_fields[]; extern const char * const bch2_sb_fields[];
struct bch_sb_field_ops { struct bch_sb_field_ops {
const char * (*validate)(struct bch_sb *, struct bch_sb_field *); int (*validate)(struct bch_sb *, struct bch_sb_field *, struct printbuf *);
void (*to_text)(struct printbuf *, struct bch_sb *, void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *);
struct bch_sb_field *);
}; };
static inline __le64 bch2_sb_magic(struct bch_fs *c) static inline __le64 bch2_sb_magic(struct bch_fs *c)
@ -66,8 +65,6 @@ int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
void bch2_free_super(struct bch_sb_handle *); void bch2_free_super(struct bch_sb_handle *);
int bch2_sb_realloc(struct bch_sb_handle *, unsigned); int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
const char *bch2_sb_validate(struct bch_sb_handle *);
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
int bch2_write_super(struct bch_fs *); int bch2_write_super(struct bch_fs *);
void __bch2_check_set_feature(struct bch_fs *, unsigned); void __bch2_check_set_feature(struct bch_fs *, unsigned);

View File

@ -528,6 +528,8 @@ void __bch2_fs_stop(struct bch_fs *c)
set_bit(BCH_FS_STOPPING, &c->flags); set_bit(BCH_FS_STOPPING, &c->flags);
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
down_write(&c->state_lock); down_write(&c->state_lock);
bch2_fs_read_only(c); bch2_fs_read_only(c);
up_write(&c->state_lock); up_write(&c->state_lock);
@ -690,6 +692,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock); spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
INIT_LIST_HEAD(&c->journal_entries); INIT_LIST_HEAD(&c->journal_entries);
INIT_LIST_HEAD(&c->journal_iters); INIT_LIST_HEAD(&c->journal_iters);
@ -737,7 +742,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret) if (ret)
goto err; goto err;
scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); uuid_unparse_lower(c->sb.user_uuid.b, c->name);
/* Compat: */ /* Compat: */
if (sb->version <= bcachefs_metadata_version_inode_v2 && if (sb->version <= bcachefs_metadata_version_inode_v2 &&
@ -1251,6 +1256,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
ca->disk_sb.bdev->bd_holder = ca; ca->disk_sb.bdev->bd_holder = ca;
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
ca->dev = ca->disk_sb.bdev->bd_dev;
percpu_ref_reinit(&ca->io_ref); percpu_ref_reinit(&ca->io_ref);
return 0; return 0;
@ -1596,18 +1603,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
struct bch_sb_field_members *mi; struct bch_sb_field_members *mi;
struct bch_member dev_mi; struct bch_member dev_mi;
unsigned dev_idx, nr_devices, u64s; unsigned dev_idx, nr_devices, u64s;
char *_errbuf;
struct printbuf errbuf;
int ret; int ret;
_errbuf = kmalloc(4096, GFP_KERNEL);
if (!_errbuf)
return -ENOMEM;
errbuf = _PBUF(_errbuf, 4096);
ret = bch2_read_super(path, &opts, &sb); ret = bch2_read_super(path, &opts, &sb);
if (ret) { if (ret) {
bch_err(c, "device add error: error reading super: %i", ret); bch_err(c, "device add error: error reading super: %i", ret);
return ret; goto err;
}
err = bch2_sb_validate(&sb);
if (err) {
bch_err(c, "device add error: error validating super: %s", err);
return -EINVAL;
} }
dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx]; dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
@ -1615,19 +1624,21 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
err = bch2_dev_may_add(sb.sb, c); err = bch2_dev_may_add(sb.sb, c);
if (err) { if (err) {
bch_err(c, "device add error: %s", err); bch_err(c, "device add error: %s", err);
return -EINVAL; ret = -EINVAL;
goto err;
} }
ca = __bch2_dev_alloc(c, &dev_mi); ca = __bch2_dev_alloc(c, &dev_mi);
if (!ca) { if (!ca) {
bch2_free_super(&sb); bch2_free_super(&sb);
return -ENOMEM; ret = -ENOMEM;
goto err;
} }
ret = __bch2_dev_attach_bdev(ca, &sb); ret = __bch2_dev_attach_bdev(ca, &sb);
if (ret) { if (ret) {
bch2_dev_free(ca); bch2_dev_free(ca);
return ret; goto err;
} }
ret = bch2_dev_journal_alloc(ca); ret = bch2_dev_journal_alloc(ca);
@ -1719,10 +1730,12 @@ err:
if (ca) if (ca)
bch2_dev_free(ca); bch2_dev_free(ca);
bch2_free_super(&sb); bch2_free_super(&sb);
kfree(_errbuf);
return ret; return ret;
err_late: err_late:
up_write(&c->state_lock); up_write(&c->state_lock);
return -EINVAL; ca = NULL;
goto err;
} }
/* Hot add existing device to running filesystem: */ /* Hot add existing device to running filesystem: */
@ -1869,7 +1882,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
rcu_read_lock(); rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL) for_each_member_device_rcu(ca, c, i, NULL)
if (ca->disk_sb.bdev->bd_dev == dev) if (ca->dev == dev)
goto found; goto found;
ca = ERR_PTR(-ENOENT); ca = ERR_PTR(-ENOENT);
found: found:
@ -1888,20 +1901,28 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
struct bch_sb_field_members *mi; struct bch_sb_field_members *mi;
unsigned i, best_sb = 0; unsigned i, best_sb = 0;
const char *err; const char *err;
char *_errbuf = NULL;
struct printbuf errbuf;
int ret = 0; int ret = 0;
if (!try_module_get(THIS_MODULE))
return ERR_PTR(-ENODEV);
pr_verbose_init(opts, ""); pr_verbose_init(opts, "");
if (!nr_devices) { if (!nr_devices) {
c = ERR_PTR(-EINVAL); ret = -EINVAL;
goto out2; goto err;
} }
if (!try_module_get(THIS_MODULE)) { _errbuf = kmalloc(4096, GFP_KERNEL);
c = ERR_PTR(-ENODEV); if (!_errbuf) {
goto out2; ret = -ENOMEM;
goto err;
} }
errbuf = _PBUF(_errbuf, 4096);
sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
if (!sb) { if (!sb) {
ret = -ENOMEM; ret = -ENOMEM;
@ -1913,9 +1934,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
if (ret) if (ret)
goto err; goto err;
err = bch2_sb_validate(&sb[i]);
if (err)
goto err_print;
} }
for (i = 1; i < nr_devices; i++) for (i = 1; i < nr_devices; i++)
@ -1970,8 +1988,8 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
} }
out: out:
kfree(sb); kfree(sb);
kfree(_errbuf);
module_put(THIS_MODULE); module_put(THIS_MODULE);
out2:
pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
return c; return c;
err_print: err_print:
@ -1988,81 +2006,6 @@ err:
goto out; goto out;
} }
static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
struct bch_opts opts)
{
const char *err;
struct bch_fs *c;
bool allocated_fs = false;
int ret;
err = bch2_sb_validate(sb);
if (err)
return err;
mutex_lock(&bch_fs_list_lock);
c = __bch2_uuid_to_fs(sb->sb->uuid);
if (c) {
closure_get(&c->cl);
err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
if (err)
goto err;
} else {
allocated_fs = true;
c = bch2_fs_alloc(sb->sb, opts);
err = "bch2_fs_alloc() error";
if (IS_ERR(c))
goto err;
}
err = "bch2_dev_online() error";
mutex_lock(&c->sb_lock);
if (bch2_dev_attach_bdev(c, sb)) {
mutex_unlock(&c->sb_lock);
goto err;
}
mutex_unlock(&c->sb_lock);
if (!c->opts.nostart && bch2_fs_may_start(c)) {
err = "error starting filesystem";
ret = bch2_fs_start(c);
if (ret)
goto err;
}
closure_put(&c->cl);
mutex_unlock(&bch_fs_list_lock);
return NULL;
err:
mutex_unlock(&bch_fs_list_lock);
if (allocated_fs && !IS_ERR(c))
bch2_fs_stop(c);
else if (c)
closure_put(&c->cl);
return err;
}
const char *bch2_fs_open_incremental(const char *path)
{
struct bch_sb_handle sb;
struct bch_opts opts = bch2_opts_empty();
const char *err;
if (bch2_read_super(path, &opts, &sb))
return "error reading superblock";
err = __bch2_fs_open_incremental(&sb, opts);
bch2_free_super(&sb);
return err;
}
/* Global interfaces/init */ /* Global interfaces/init */
static void bcachefs_exit(void) static void bcachefs_exit(void)

View File

@ -254,6 +254,5 @@ void bch2_fs_stop(struct bch_fs *);
int bch2_fs_start(struct bch_fs *); int bch2_fs_start(struct bch_fs *);
struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
const char *bch2_fs_open_incremental(const char *path);
#endif /* _BCACHEFS_SUPER_H */ #endif /* _BCACHEFS_SUPER_H */

View File

@ -746,4 +746,13 @@ static inline int u8_cmp(u8 l, u8 r)
return cmp_int(l, r); return cmp_int(l, r);
} }
#ifdef __KERNEL__
static inline void uuid_unparse_lower(u8 *uuid, char *out)
{
sprintf(out, "%plU", uuid);
}
#else
#include <uuid/uuid.h>
#endif
#endif /* _BCACHEFS_UTIL_H */ #endif /* _BCACHEFS_UTIL_H */