From 931ed5a709c2afa239cbae2e13bc22f13e99713c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 3 Jan 2022 23:43:03 -0500 Subject: [PATCH] Update bcachefs sources to 50ac18afbb bcachefs: Fix an uninitialized variable --- .bcachefs_revision | 2 +- include/linux/slab.h | 4 + include/trace/events/bcachefs.h | 144 ++++----- libbcachefs/bcachefs.h | 10 +- libbcachefs/bset.c | 141 +-------- libbcachefs/bset.h | 1 - libbcachefs/btree_cache.c | 2 +- libbcachefs/btree_gc.c | 61 ++-- libbcachefs/btree_io.c | 12 +- libbcachefs/btree_iter.c | 21 +- libbcachefs/btree_iter.h | 5 +- libbcachefs/btree_key_cache.c | 9 +- libbcachefs/btree_types.h | 2 +- libbcachefs/btree_update_interior.c | 4 +- libbcachefs/btree_update_leaf.c | 34 +- libbcachefs/buckets.c | 5 +- libbcachefs/chardev.c | 5 +- libbcachefs/checksum.c | 25 +- libbcachefs/disk_groups.c | 62 ++-- libbcachefs/eytzinger.h | 48 ++- libbcachefs/fs-io.c | 2 +- libbcachefs/fs.c | 1 - libbcachefs/io.c | 2 +- libbcachefs/journal_io.c | 4 +- libbcachefs/journal_seq_blacklist.c | 152 +++++++-- libbcachefs/journal_seq_blacklist.h | 2 + libbcachefs/k-eytzinger.h | 13 - libbcachefs/opts.h | 2 +- libbcachefs/quota.c | 12 +- libbcachefs/recovery.c | 36 ++- libbcachefs/replicas.c | 137 ++++---- libbcachefs/super-io.c | 466 +++++++++++++++++----------- libbcachefs/super-io.h | 7 +- libbcachefs/super.c | 137 +++----- libbcachefs/super.h | 1 - libbcachefs/util.h | 9 + 36 files changed, 816 insertions(+), 764 deletions(-) delete mode 100644 libbcachefs/k-eytzinger.h diff --git a/.bcachefs_revision b/.bcachefs_revision index e9908fb6..79a03365 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -90d824456e169e50965814b74a75c50045b13976 +50ac18afbb522a3103cecff9aaf9519d4eb5e908 diff --git a/include/linux/slab.h b/include/linux/slab.h index 67633c98..bc99973f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -63,6 +63,10 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags) ((size) != 0 && (n) > SIZE_MAX / (size) \ ? NULL : kmalloc((n) * (size), flags)) +#define kvmalloc_array(n, size, flags) \ + ((size) != 0 && (n) > SIZE_MAX / (size) \ + ? NULL : kmalloc((n) * (size), flags)) + #define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO) #define kfree(p) free(p) diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 5a409ee1..295dcd60 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -387,7 +387,7 @@ TRACE_EVENT(alloc_scan, ), TP_fast_assign( - __entry->dev = ca->disk_sb.bdev->bd_dev; + __entry->dev = ca->dev; __entry->found = found; __entry->inc_gen = inc_gen; __entry->inc_gen_skipped = inc_gen_skipped; @@ -409,7 +409,7 @@ TRACE_EVENT(invalidate, ), TP_fast_assign( - __entry->dev = ca->disk_sb.bdev->bd_dev; + __entry->dev = ca->dev; __entry->offset = offset, __entry->sectors = sectors; ), @@ -431,7 +431,7 @@ DECLARE_EVENT_CLASS(bucket_alloc, ), TP_fast_assign( - __entry->dev = ca->disk_sb.bdev->bd_dev; + __entry->dev = ca->dev; __entry->reserve = reserve; ), @@ -546,94 +546,81 @@ TRACE_EVENT(copygc_wait, __entry->wait_amount, __entry->until) ); -TRACE_EVENT(transaction_restart_ip, - TP_PROTO(unsigned long caller, unsigned long ip), - TP_ARGS(caller, ip), - - TP_STRUCT__entry( - __field(unsigned long, caller ) - __field(unsigned long, ip ) - ), - - TP_fast_assign( - __entry->caller = caller; - __entry->ip = ip; - ), - - TP_printk("%ps %pS", (void *) __entry->caller, (void *) __entry->ip) -); - DECLARE_EVENT_CLASS(transaction_restart, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip), + TP_ARGS(trans_fn, caller_ip), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; ), - TP_printk("%ps %pS", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip) + TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip) +); + +DEFINE_EVENT(transaction_restart, transaction_restart_ip, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip), + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_traverse_all, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DECLARE_EVENT_CLASS(transaction_restart_iter, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos), + TP_ARGS(trans_fn, caller_ip, btree_id, pos), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, btree_id ) __field(u64, pos_inode ) @@ -642,7 +629,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->btree_id = btree_id; __entry->pos_inode = pos->inode; @@ -650,8 +637,8 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, __entry->pos_snapshot = pos->snapshot; ), - TP_printk("%ps %pS btree %u pos %llu:%llu:%u", - (void *) __entry->trans_ip, + TP_printk("%s %pS btree %u pos %llu:%llu:%u", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->btree_id, __entry->pos_inode, @@ -660,63 +647,63 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_mark, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); TRACE_EVENT(trans_restart_would_deadlock, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, bool in_traverse_all, unsigned reason, @@ -726,12 +713,12 @@ TRACE_EVENT(trans_restart_would_deadlock, enum btree_id want_btree_id, unsigned want_iter_type, struct bpos *want_pos), - TP_ARGS(trans_ip, caller_ip, in_traverse_all, reason, + TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason, have_btree_id, have_iter_type, have_pos, want_btree_id, want_iter_type, want_pos), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, in_traverse_all ) __field(u8, reason ) @@ -749,7 +736,7 @@ TRACE_EVENT(trans_restart_would_deadlock, ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->in_traverse_all = in_traverse_all; __entry->reason = reason; @@ -767,8 +754,8 @@ TRACE_EVENT(trans_restart_would_deadlock, __entry->want_pos_snapshot = want_pos->snapshot; ), - TP_printk("%ps %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", - (void *) __entry->trans_ip, + TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->in_traverse_all, __entry->reason, @@ -785,39 +772,40 @@ TRACE_EVENT(trans_restart_would_deadlock, ); TRACE_EVENT(trans_restart_would_deadlock_write, - TP_PROTO(unsigned long trans_ip), - TP_ARGS(trans_ip), + TP_PROTO(const char *trans_fn), + TP_ARGS(trans_fn), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ), - TP_printk("%ps", (void *) __entry->trans_ip) + TP_printk("%s", __entry->trans_fn) ); TRACE_EVENT(trans_restart_mem_realloced, - TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, unsigned long bytes), - TP_ARGS(trans_ip, caller_ip, bytes), + TP_ARGS(trans_fn, caller_ip, bytes), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(unsigned long, bytes ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->bytes = bytes; ), - TP_printk("%ps %pS bytes %lu", - (void *) __entry->trans_ip, + TP_printk("%s %pS bytes %lu", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->bytes) ); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ddd700c3..7b39a419 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -177,7 +177,11 @@ */ #undef pr_fmt +#ifdef __KERNEL__ #define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__ +#else +#define pr_fmt(fmt) "%s() " fmt "\n", __func__ +#endif #include #include @@ -219,8 +223,8 @@ #define bch2_fmt(_c, fmt) "bcachefs (%s): " fmt "\n", ((_c)->name) #define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum) #else -#define bch2_fmt(_c, fmt) "%s: " fmt "\n", ((_c)->name) -#define bch2_fmt_inum(_c, _inum, fmt) "%s inum %llu: " fmt "\n", ((_c)->name), (_inum) +#define bch2_fmt(_c, fmt) fmt "\n" +#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum) #endif #define bch_info(c, fmt, ...) \ @@ -432,6 +436,7 @@ struct bch_dev { struct bch_sb_handle disk_sb; struct bch_sb *sb_read_scratch; int sb_write_error; + dev_t dev; struct bch_devs_mask self; @@ -749,6 +754,7 @@ struct bch_fs { /* JOURNAL SEQ BLACKLIST */ struct journal_seq_blacklist_table * journal_seq_blacklist_table; + struct work_struct journal_seq_blacklist_gc_work; /* ALLOCATOR */ spinlock_t freelist_lock; diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index a4e0d149..6000a879 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -473,7 +473,7 @@ static inline struct bkey_packed *tree_to_bkey(const struct btree *b, unsigned j) { return cacheline_to_bkey(b, t, - __eytzinger1_to_inorder(j, t->size, t->extra), + __eytzinger1_to_inorder(j, t->size - 1, t->extra), bkey_float(b, t, j)->key_offset); } @@ -607,10 +607,10 @@ static inline unsigned bkey_mantissa(const struct bkey_packed *k, } __always_inline -static inline void __make_bfloat(struct btree *b, struct bset_tree *t, - unsigned j, - struct bkey_packed *min_key, - struct bkey_packed *max_key) +static inline void make_bfloat(struct btree *b, struct bset_tree *t, + unsigned j, + struct bkey_packed *min_key, + struct bkey_packed *max_key) { struct bkey_float *f = bkey_float(b, t, j); struct bkey_packed *m = tree_to_bkey(b, t, j); @@ -679,34 +679,6 @@ static inline void __make_bfloat(struct btree *b, struct bset_tree *t, f->mantissa = mantissa; } -static void make_bfloat(struct btree *b, struct bset_tree *t, - unsigned j, - struct bkey_packed *min_key, - struct bkey_packed *max_key) -{ - struct bkey_i *k; - - if (is_power_of_2(j) && - !min_key->u64s) { - if (!bkey_pack_pos(min_key, b->data->min_key, b)) { - k = (void *) min_key; - bkey_init(&k->k); - k->k.p = b->data->min_key; - } - } - - if (is_power_of_2(j + 1) && - !max_key->u64s) { - if (!bkey_pack_pos(max_key, b->data->max_key, b)) { - k = (void *) max_key; - bkey_init(&k->k); - k->k.p = b->data->max_key; - } - } - - __make_bfloat(b, t, j, min_key, max_key); -} - /* bytes remaining - only valid for last bset: */ static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t) { @@ -763,7 +735,7 @@ retry: t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; /* First we figure out where the first key in each cacheline is */ - eytzinger1_for_each(j, t->size) { + eytzinger1_for_each(j, t->size - 1) { while (bkey_to_cacheline(b, t, k) < cacheline) prev = k, k = bkey_next(k); @@ -795,10 +767,10 @@ retry: } /* Then we build the tree */ - eytzinger1_for_each(j, t->size) - __make_bfloat(b, t, j, - bkey_to_packed(&min_key), - bkey_to_packed(&max_key)); + eytzinger1_for_each(j, t->size - 1) + make_bfloat(b, t, j, + bkey_to_packed(&min_key), + bkey_to_packed(&max_key)); } static void bset_alloc_tree(struct btree *b, struct bset_tree *t) @@ -897,7 +869,7 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t, do { p = j ? tree_to_bkey(b, t, __inorder_to_eytzinger1(j--, - t->size, t->extra)) + t->size - 1, t->extra)) : btree_bkey_first(b, t); } while (p >= k); break; @@ -943,91 +915,6 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, /* Insert */ -static void rw_aux_tree_fix_invalidated_key(struct btree *b, - struct bset_tree *t, - struct bkey_packed *k) -{ - unsigned offset = __btree_node_key_to_offset(b, k); - unsigned j = rw_aux_tree_bsearch(b, t, offset); - - if (j < t->size && - rw_aux_tree(b, t)[j].offset == offset) - rw_aux_tree_set(b, t, j, k); - - bch2_bset_verify_rw_aux_tree(b, t); -} - -static void ro_aux_tree_fix_invalidated_key(struct btree *b, - struct bset_tree *t, - struct bkey_packed *k) -{ - struct bkey_packed min_key, max_key; - unsigned inorder, j; - - EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE); - - /* signal to make_bfloat() that they're uninitialized: */ - min_key.u64s = max_key.u64s = 0; - - if (bkey_next(k) == btree_bkey_last(b, t)) { - for (j = 1; j < t->size; j = j * 2 + 1) - make_bfloat(b, t, j, &min_key, &max_key); - } - - inorder = bkey_to_cacheline(b, t, k); - - if (inorder && - inorder < t->size) { - j = __inorder_to_eytzinger1(inorder, t->size, t->extra); - - if (k == tree_to_bkey(b, t, j)) { - /* Fix the node this key corresponds to */ - make_bfloat(b, t, j, &min_key, &max_key); - - /* Children for which this key is the right boundary */ - for (j = eytzinger1_left_child(j); - j < t->size; - j = eytzinger1_right_child(j)) - make_bfloat(b, t, j, &min_key, &max_key); - } - } - - if (inorder + 1 < t->size) { - j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra); - - if (k == tree_to_prev_bkey(b, t, j)) { - make_bfloat(b, t, j, &min_key, &max_key); - - /* Children for which this key is the left boundary */ - for (j = eytzinger1_right_child(j); - j < t->size; - j = eytzinger1_left_child(j)) - make_bfloat(b, t, j, &min_key, &max_key); - } - } -} - -/** - * bch2_bset_fix_invalidated_key() - given an existing key @k that has been - * modified, fix any auxiliary search tree by remaking all the nodes in the - * auxiliary search tree that @k corresponds to - */ -void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k) -{ - struct bset_tree *t = bch2_bkey_to_bset(b, k); - - switch (bset_aux_tree_type(t)) { - case BSET_NO_AUX_TREE: - break; - case BSET_RO_AUX_TREE: - ro_aux_tree_fix_invalidated_key(b, t, k); - break; - case BSET_RW_AUX_TREE: - rw_aux_tree_fix_invalidated_key(b, t, k); - break; - } -} - static void bch2_bset_fix_lookup_table(struct btree *b, struct bset_tree *t, struct bkey_packed *_where, @@ -1262,7 +1149,7 @@ slowpath: n = n * 2 + (cmp < 0); } while (n < t->size); - inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra); + inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra); /* * n would have been the node we recursed to - the low bit tells us if @@ -1273,7 +1160,7 @@ slowpath: if (unlikely(!inorder)) return btree_bkey_first(b, t); - f = &base->f[eytzinger1_prev(n >> 1, t->size)]; + f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)]; } return cacheline_to_bkey(b, t, inorder, f->key_offset); @@ -1690,7 +1577,7 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, if (!inorder || inorder >= t->size) return; - j = __inorder_to_eytzinger1(inorder, t->size, t->extra); + j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra); if (k != tree_to_bkey(b, t, j)) return; diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index e42f866c..0d46534c 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -361,7 +361,6 @@ void bch2_bset_init_first(struct btree *, struct bset *); void bch2_bset_init_next(struct bch_fs *, struct btree *, struct btree_node_entry *); void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); -void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *); void bch2_bset_insert(struct btree *, struct btree_node_iter *, struct bkey_packed *, struct bkey_i *, unsigned); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 2788ba17..fc6c4d4c 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -858,7 +858,7 @@ lock_node: if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; - trace_trans_restart_btree_node_reused(trans->ip, + trace_trans_restart_btree_node_reused(trans->fn, trace_ip, path->btree_id, &path->pos); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 0625a65d..268ad74d 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -156,6 +156,34 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) } } +static void bch2_btree_node_update_key_early(struct bch_fs *c, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_i *new) +{ + struct btree *b; + struct bkey_buf tmp; + int ret; + + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_reassemble(&tmp, c, old); + + b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true); + if (!IS_ERR_OR_NULL(b)) { + mutex_lock(&c->btree_cache.lock); + + bch2_btree_node_hash_remove(&c->btree_cache, b); + + bkey_copy(&b->key, new); + ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); + BUG_ON(ret); + + mutex_unlock(&c->btree_cache.lock); + six_unlock_read(&b->c.lock); + } + + bch2_bkey_buf_exit(&tmp, c); +} + static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) { struct bkey_i_btree_ptr_v2 *new; @@ -523,18 +551,6 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, } } - if (fsck_err_on(data_type == BCH_DATA_btree && - g->mark.gen != p.ptr.gen, c, - "bucket %u:%zu data type %s has metadata but wrong gen: %u != %u\n" - "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), - bch2_data_types[ptr_data_type(k->k, &p.ptr)], - p.ptr.gen, g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { - g->_mark.data_type = data_type; - g->gen_valid = true; - } - if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "while marking %s", @@ -573,7 +589,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) do_update = true; - if (p.ptr.gen != g->mark.gen) + if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen) continue; if (fsck_err_on(g->mark.data_type && @@ -687,16 +703,19 @@ found: } ret = bch2_journal_key_insert_take(c, btree_id, level, new); - - if (ret) + if (ret) { kfree(new); - else { - bch2_bkey_val_to_text(&PBUF(buf), c, *k); - bch_info(c, "updated %s", buf); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new)); - bch_info(c, "new key %s", buf); - *k = bkey_i_to_s_c(new); + return ret; } + + if (level) + bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new); + + bch2_bkey_val_to_text(&PBUF(buf), c, *k); + bch_info(c, "updated %s", buf); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new)); + bch_info(c, "new key %s", buf); + *k = bkey_i_to_s_c(new); } fsck_err: return ret; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 1455dc78..a3651325 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -972,19 +972,23 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); - b->written += sectors; - blacklisted = bch2_journal_seq_is_blacklisted(c, le64_to_cpu(i->journal_seq), true); btree_err_on(blacklisted && first, BTREE_ERR_FIXABLE, c, ca, b, i, - "first btree node bset has blacklisted journal seq"); + "first btree node bset has blacklisted journal seq (%llu)", + le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, BTREE_ERR_FIXABLE, c, ca, b, i, - "found blacklisted bset in btree node with sectors_written"); + "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", + le64_to_cpu(i->journal_seq), + b->written, b->written + sectors, ptr_written); + + b->written += sectors; + if (blacklisted && !first) continue; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index e8e0adac..db179013 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -363,7 +363,7 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, } if (unlikely(deadlock_path)) { - trace_trans_restart_would_deadlock(trans->ip, ip, + trace_trans_restart_would_deadlock(trans->fn, ip, trans->in_traverse_all, reason, deadlock_path->btree_id, deadlock_path->cached, @@ -548,7 +548,7 @@ bool bch2_trans_relock(struct btree_trans *trans) trans_for_each_path(trans, path) if (path->should_be_locked && !bch2_btree_path_relock(trans, path, _RET_IP_)) { - trace_trans_restart_relock(trans->ip, _RET_IP_, + trace_trans_restart_relock(trans->fn, _RET_IP_, path->btree_id, &path->pos); BUG_ON(!trans->restarted); return false; @@ -1519,7 +1519,7 @@ out: trans->in_traverse_all = false; - trace_trans_traverse_all(trans->ip, trace_ip); + trace_trans_traverse_all(trans->fn, trace_ip); return ret; } @@ -2843,7 +2843,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trans->mem_bytes = new_bytes; if (old_bytes) { - trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); + trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -2927,14 +2927,15 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) trans->updates = p; p += updates_bytes; } -void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, - unsigned expected_nr_iters, - size_t expected_mem_bytes) +void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, + unsigned expected_nr_iters, + size_t expected_mem_bytes, + const char *fn) __acquires(&c->btree_trans_barrier) { memset(trans, 0, sizeof(*trans)); trans->c = c; - trans->ip = _RET_IP_; + trans->fn = fn; bch2_trans_alloc_paths(trans, c); @@ -2967,7 +2968,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans) goto leaked; return; leaked: - bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); + bch_err(c, "btree paths leaked from %s!", trans->fn); trans_for_each_path(trans, path) if (path->ref) printk(KERN_ERR " btree %s %pS\n", @@ -3060,7 +3061,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) if (!trans_has_locks(trans)) continue; - pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); + pr_buf(out, "%i %s\n", trans->pid, trans->fn); trans_for_each_path(trans, path) { if (!path->nodes_locked) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 4c903b9d..eceec5d5 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -354,9 +354,12 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, /* new multiple iterator interface: */ void bch2_dump_trans_paths_updates(struct btree_trans *); -void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); +void __bch2_trans_init(struct btree_trans *, struct bch_fs *, + unsigned, size_t, const char *); void bch2_trans_exit(struct btree_trans *); +#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__) + void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_btree_iter_exit(struct bch_fs *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 80ed79b0..1d7b1012 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -208,7 +208,6 @@ static int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, struct bkey_cached *ck) { - struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; @@ -223,7 +222,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, goto err; if (!bch2_btree_node_relock(trans, ck_path, 0)) { - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = btree_trans_restart(trans); goto err; } @@ -238,7 +237,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, new_u64s = roundup_pow_of_two(new_u64s); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS); if (!new_k) { - bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", + bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_ids[ck->key.btree_id], new_u64s); ret = -ENOMEM; goto err; @@ -318,7 +317,7 @@ retry: if (!trans->restarted) goto retry; - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = -EINTR; goto err; } @@ -338,7 +337,7 @@ fill: if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1)) { - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = btree_trans_restart(trans); goto err; } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 1ace7604..914d536c 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -368,6 +368,7 @@ struct btree_trans_commit_hook { struct btree_trans { struct bch_fs *c; + const char *fn; struct list_head list; struct btree *locking; unsigned locking_path_idx; @@ -375,7 +376,6 @@ struct btree_trans { u8 locking_btree_id; u8 locking_level; pid_t pid; - unsigned long ip; int srcu_idx; u8 nr_sorted; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index e1a5e34e..47568a0b 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -955,7 +955,7 @@ retry: * instead of locking/reserving all the way to the root: */ if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { - trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, + trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, path->btree_id, &path->pos); ret = btree_trans_restart(trans); return ERR_PTR(ret); @@ -1019,7 +1019,7 @@ retry: BTREE_UPDATE_JOURNAL_RES, journal_flags); if (ret) { - trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); + trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_); goto err; } diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index e2e878b8..ca98e685 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -266,7 +266,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, return ret; if (!bch2_trans_relock(trans)) { - trace_trans_restart_journal_preres_get(trans->ip, trace_ip); + trace_trans_restart_journal_preres_get(trans->fn, trace_ip); return -EINTR; } @@ -305,7 +305,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans) l->entry.pad[0] = 0; l->entry.pad[1] = 0; l->entry.pad[2] = 0; - b = snprintf(l->d, buflen, "%ps", (void *) trans->ip); + b = min_t(unsigned, strlen(trans->fn), buflen); + memcpy(l->d, trans->fn, b); while (b < buflen) l->d[b++] = '\0'; @@ -425,7 +426,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, int ret; if (race_fault()) { - trace_trans_restart_fault_inject(trans->ip, trace_ip); + trace_trans_restart_fault_inject(trans->fn, trace_ip); trans->restarted = true; return -EINTR; } @@ -618,7 +619,7 @@ fail: bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); } - trace_trans_restart_would_deadlock_write(trans->ip); + trace_trans_restart_would_deadlock_write(trans->fn); return btree_trans_restart(trans); } @@ -649,9 +650,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, char buf[200]; bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); - bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n", - buf, (void *) trans->ip, - (void *) i->ip_allocated, invalid); + bch_err(c, "invalid bkey %s on insert from %s -> %ps: %s\n", + buf, trans->fn, (void *) i->ip_allocated, invalid); bch2_fatal_error(c); return -EINVAL; } @@ -757,7 +757,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; if (ret == -EINTR) - trace_trans_restart_btree_node_split(trans->ip, trace_ip, + trace_trans_restart_btree_node_split(trans->fn, trace_ip, i->btree_id, &i->path->pos); break; case BTREE_INSERT_NEED_MARK_REPLICAS: @@ -770,7 +770,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_mark_replicas(trans->ip, trace_ip); + trace_trans_restart_mark_replicas(trans->fn, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: @@ -790,13 +790,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_res_get(trans->ip, trace_ip); + trace_trans_restart_journal_res_get(trans->fn, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RECLAIM: bch2_trans_unlock(trans); - trace_trans_blocked_journal_reclaim(trans->ip, trace_ip); + trace_trans_blocked_journal_reclaim(trans->fn, trace_ip); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); @@ -806,7 +806,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_reclaim(trans->ip, trace_ip); + trace_trans_restart_journal_reclaim(trans->fn, trace_ip); ret = -EINTR; break; default: @@ -815,7 +815,9 @@ int bch2_trans_commit_error(struct btree_trans *trans, } BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted); - BUG_ON(ret == -ENOSPC && (trans->flags & BTREE_INSERT_NOFAIL)); + BUG_ON(ret == -ENOSPC && + !(trans->flags & BTREE_INSERT_NOWAIT) && + (trans->flags & BTREE_INSERT_NOFAIL)); return ret; } @@ -899,7 +901,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) } if (ret == -EINTR) - trace_trans_restart_mark(trans->ip, _RET_IP_, + trace_trans_restart_mark(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); if (ret) return ret; @@ -929,7 +931,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) BTREE_TRIGGER_OVERWRITE|i->flags); if (ret == -EINTR) - trace_trans_restart_mark(trans->ip, _RET_IP_, + trace_trans_restart_mark(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); if (ret) return ret; @@ -996,7 +998,7 @@ int __bch2_trans_commit(struct btree_trans *trans) BUG_ON(!i->path->should_be_locked); if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { - trace_trans_restart_upgrade(trans->ip, _RET_IP_, + trace_trans_restart_upgrade(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); ret = btree_trans_restart(trans); goto out; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c72fe777..fb0f64f0 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -564,9 +564,10 @@ static int bch2_mark_alloc(struct btree_trans *trans, * before the bucket became empty again, then the we don't have * to wait on a journal flush before we can reuse the bucket: */ - v->journal_seq = !new_u.data_type && + new_u.journal_seq = !new_u.data_type && bch2_journal_noflush_seq(&c->journal, journal_seq) - ? 0 : cpu_to_le64(journal_seq); + ? 0 : journal_seq; + v->journal_seq = cpu_to_le64(new_u.journal_seq); } ca = bch_dev_bkey_exists(c, new.k->p.inode); diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index db68a782..aa26588e 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -568,8 +568,11 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!dev) + return -EINVAL; + for_each_online_member(ca, c, i) - if (ca->disk_sb.bdev->bd_dev == dev) { + if (ca->dev == dev) { percpu_ref_put(&ca->io_ref); return i; } diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index fbe8603c..a1d89923 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -407,16 +407,12 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, } #ifdef __KERNEL__ -int bch2_request_key(struct bch_sb *sb, struct bch_key *key) +static int __bch2_request_key(char *key_description, struct bch_key *key) { - char key_description[60]; struct key *keyring_key; const struct user_key_payload *ukp; int ret; - snprintf(key_description, sizeof(key_description), - "bcachefs:%pUb", &sb->user_uuid); - keyring_key = request_key(&key_type_logon, key_description, NULL); if (IS_ERR(keyring_key)) return PTR_ERR(keyring_key); @@ -436,16 +432,10 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) } #else #include -#include -int bch2_request_key(struct bch_sb *sb, struct bch_key *key) +static int __bch2_request_key(char *key_description, struct bch_key *key) { key_serial_t key_id; - char key_description[60]; - char uuid[40]; - - uuid_unparse_lower(sb->user_uuid.b, uuid); - sprintf(key_description, "bcachefs:%s", uuid); key_id = request_key("user", key_description, NULL, KEY_SPEC_USER_KEYRING); @@ -459,6 +449,17 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) } #endif +int bch2_request_key(struct bch_sb *sb, struct bch_key *key) +{ + char key_description[60]; + char uuid[40]; + + uuid_unparse_lower(sb->user_uuid.b, uuid); + sprintf(key_description, "bcachefs:%s", uuid); + + return __bch2_request_key(key_description, key); +} + int bch2_decrypt_sb_key(struct bch_fs *c, struct bch_sb_field_crypt *crypt, struct bch_key *key) diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c index c52b6faa..6c84297e 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/disk_groups.c @@ -17,24 +17,20 @@ static int group_cmp(const void *_l, const void *_r) strncmp(l->label, r->label, sizeof(l->label)); } -static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_disk_groups_validate(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_disk_groups *groups = field_to_type(f, disk_groups); struct bch_disk_group *g, *sorted = NULL; - struct bch_sb_field_members *mi; - struct bch_member *m; - unsigned i, nr_groups, len; - const char *err = NULL; + struct bch_sb_field_members *mi = bch2_sb_get_members(sb); + unsigned nr_groups = disk_groups_nr(groups); + unsigned i, len; + int ret = -EINVAL; - mi = bch2_sb_get_members(sb); - groups = bch2_sb_get_disk_groups(sb); - nr_groups = disk_groups_nr(groups); - - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) { + for (i = 0; i < sb->nr_devices; i++) { + struct bch_member *m = mi->members + i; unsigned g; if (!BCH_MEMBER_GROUP(m)) @@ -42,45 +38,53 @@ static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb, g = BCH_MEMBER_GROUP(m) - 1; - if (g >= nr_groups || - BCH_GROUP_DELETED(&groups->entries[g])) - return "disk has invalid group"; + if (g >= nr_groups) { + pr_buf(err, "disk %u has invalid label %u (have %u)", + i, g, nr_groups); + return -EINVAL; + } + + if (BCH_GROUP_DELETED(&groups->entries[g])) { + pr_buf(err, "disk %u has deleted label %u", i, g); + return -EINVAL; + } } if (!nr_groups) - return NULL; + return 0; + + for (i = 0; i < nr_groups; i++) { + g = groups->entries + i; - for (g = groups->entries; - g < groups->entries + nr_groups; - g++) { if (BCH_GROUP_DELETED(g)) continue; len = strnlen(g->label, sizeof(g->label)); if (!len) { - err = "group with empty label"; - goto err; + pr_buf(err, "label %u empty", i); + return -EINVAL; } } sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL); if (!sorted) - return "cannot allocate memory"; + return -ENOMEM; memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted)); sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL); - for (i = 0; i + 1 < nr_groups; i++) - if (!BCH_GROUP_DELETED(sorted + i) && - !group_cmp(sorted + i, sorted + i + 1)) { - err = "duplicate groups"; + for (g = sorted; g + 1 < sorted + nr_groups; g++) + if (!BCH_GROUP_DELETED(g) && + !group_cmp(&g[0], &g[1])) { + pr_buf(err, "duplicate label %llu.", BCH_GROUP_PARENT(g)); + bch_scnmemcpy(err, g->label, strnlen(g->label, sizeof(g->label))); goto err; } - err = NULL; + ret = 0; err: kfree(sorted); - return err; + return 0; } static void bch2_sb_disk_groups_to_text(struct printbuf *out, diff --git a/libbcachefs/eytzinger.h b/libbcachefs/eytzinger.h index 26d5cad7..05429c96 100644 --- a/libbcachefs/eytzinger.h +++ b/libbcachefs/eytzinger.h @@ -17,10 +17,6 @@ * * With one based indexing each level of the tree starts at a power of two - * good for cacheline alignment: - * - * Size parameter is treated as if we were using 0 based indexing, however: - * valid nodes, and inorder indices, are in the range [1..size) - that is, there - * are actually size - 1 elements */ static inline unsigned eytzinger1_child(unsigned i, unsigned child) @@ -42,12 +38,12 @@ static inline unsigned eytzinger1_right_child(unsigned i) static inline unsigned eytzinger1_first(unsigned size) { - return rounddown_pow_of_two(size - 1); + return rounddown_pow_of_two(size); } static inline unsigned eytzinger1_last(unsigned size) { - return rounddown_pow_of_two(size) - 1; + return rounddown_pow_of_two(size + 1) - 1; } /* @@ -62,13 +58,13 @@ static inline unsigned eytzinger1_last(unsigned size) static inline unsigned eytzinger1_next(unsigned i, unsigned size) { - EBUG_ON(i >= size); + EBUG_ON(i > size); - if (eytzinger1_right_child(i) < size) { + if (eytzinger1_right_child(i) <= size) { i = eytzinger1_right_child(i); - i <<= __fls(size) - __fls(i); - i >>= i >= size; + i <<= __fls(size + 1) - __fls(i); + i >>= i > size; } else { i >>= ffz(i) + 1; } @@ -78,14 +74,14 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) static inline unsigned eytzinger1_prev(unsigned i, unsigned size) { - EBUG_ON(i >= size); + EBUG_ON(i > size); - if (eytzinger1_left_child(i) < size) { + if (eytzinger1_left_child(i) <= size) { i = eytzinger1_left_child(i) + 1; - i <<= __fls(size) - __fls(i); + i <<= __fls(size + 1) - __fls(i); i -= 1; - i >>= i >= size; + i >>= i > size; } else { i >>= __ffs(i) + 1; } @@ -95,17 +91,17 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size) static inline unsigned eytzinger1_extra(unsigned size) { - return (size - rounddown_pow_of_two(size - 1)) << 1; + return (size + 1 - rounddown_pow_of_two(size)) << 1; } static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, unsigned extra) { unsigned b = __fls(i); - unsigned shift = __fls(size - 1) - b; + unsigned shift = __fls(size) - b; int s; - EBUG_ON(!i || i >= size); + EBUG_ON(!i || i > size); i ^= 1U << b; i <<= 1; @@ -130,7 +126,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, unsigned shift; int s; - EBUG_ON(!i || i >= size); + EBUG_ON(!i || i > size); /* * sign bit trick: @@ -144,7 +140,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, shift = __ffs(i); i >>= shift + 1; - i |= 1U << (__fls(size - 1) - shift); + i |= 1U << (__fls(size) - shift); return i; } @@ -185,39 +181,39 @@ static inline unsigned eytzinger0_right_child(unsigned i) static inline unsigned eytzinger0_first(unsigned size) { - return eytzinger1_first(size + 1) - 1; + return eytzinger1_first(size) - 1; } static inline unsigned eytzinger0_last(unsigned size) { - return eytzinger1_last(size + 1) - 1; + return eytzinger1_last(size) - 1; } static inline unsigned eytzinger0_next(unsigned i, unsigned size) { - return eytzinger1_next(i + 1, size + 1) - 1; + return eytzinger1_next(i + 1, size) - 1; } static inline unsigned eytzinger0_prev(unsigned i, unsigned size) { - return eytzinger1_prev(i + 1, size + 1) - 1; + return eytzinger1_prev(i + 1, size) - 1; } static inline unsigned eytzinger0_extra(unsigned size) { - return eytzinger1_extra(size + 1); + return eytzinger1_extra(size); } static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, unsigned extra) { - return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1; + return __eytzinger1_to_inorder(i + 1, size, extra) - 1; } static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, unsigned extra) { - return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1; + return __inorder_to_eytzinger1(i + 1, size, extra) - 1; } static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 9cdd03f3..3b9b96e5 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1024,7 +1024,7 @@ retry: bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), - BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS); + BTREE_ITER_SLOTS); while (1) { struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 2d2ad7f7..472c03d2 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -134,7 +134,6 @@ int __must_check bch2_write_inode(struct bch_fs *c, int ret; bch2_trans_init(&trans, c, 0, 512); - trans.ip = _RET_IP_; retry: bch2_trans_begin(&trans); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 50b90b72..73558cd0 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -2241,7 +2241,7 @@ retry: bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, SPOS(inum.inum, bvec_iter.bi_sector, snapshot), - BTREE_ITER_SLOTS|BTREE_ITER_FILTER_SNAPSHOTS); + BTREE_ITER_SLOTS); while (1) { unsigned bytes, sectors, offset_into_extent; enum btree_id data_btree = BTREE_ID_extents; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index faf82bb4..df4d1a7a 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -893,12 +893,13 @@ static void bch2_journal_read_device(struct closure *cl) struct journal_device *ja = container_of(cl, struct journal_device, read); struct bch_dev *ca = container_of(ja, struct bch_dev, journal); + struct bch_fs *c = ca->fs; struct journal_list *jlist = container_of(cl->parent, struct journal_list, cl); struct journal_read_buf buf = { NULL, 0 }; u64 min_seq = U64_MAX; unsigned i; - int ret; + int ret = 0; if (!ja->nr) goto out; @@ -944,6 +945,7 @@ static void bch2_journal_read_device(struct closure *cl) ja->discard_idx = ja->dirty_idx_ondisk = ja->dirty_idx = (ja->cur_idx + 1) % ja->nr; out: + bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); kvpfree(buf.data, buf.size); percpu_ref_put(&ca->io_ref); closure_return(cl); diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c index 10bd23e9..3cc63fc2 100644 --- a/libbcachefs/journal_seq_blacklist.c +++ b/libbcachefs/journal_seq_blacklist.c @@ -66,6 +66,12 @@ blacklist_entry_try_merge(struct bch_fs *c, return bl; } +static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e, + u64 start, u64 end) +{ + return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start); +} + int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) { struct bch_sb_field_journal_seq_blacklist *bl; @@ -76,28 +82,21 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); nr = blacklist_nr_entries(bl); - if (bl) { - for (i = 0; i < nr; i++) { - struct journal_seq_blacklist_entry *e = - bl->start + i; + for (i = 0; i < nr; i++) { + struct journal_seq_blacklist_entry *e = + bl->start + i; - if (start == le64_to_cpu(e->start) && - end == le64_to_cpu(e->end)) - goto out; + if (bl_entry_contig_or_overlaps(e, start, end)) { + e->start = cpu_to_le64(min(start, le64_to_cpu(e->start))); + e->end = cpu_to_le64(max(end, le64_to_cpu(e->end))); - if (start <= le64_to_cpu(e->start) && - end >= le64_to_cpu(e->end)) { - e->start = cpu_to_le64(start); - e->end = cpu_to_le64(end); - - if (i + 1 < nr) - bl = blacklist_entry_try_merge(c, - bl, i); - if (i) - bl = blacklist_entry_try_merge(c, - bl, i - 1); - goto out_write_sb; - } + if (i + 1 < nr) + bl = blacklist_entry_try_merge(c, + bl, i); + if (i) + bl = blacklist_entry_try_merge(c, + bl, i - 1); + goto out_write_sb; } } @@ -189,27 +188,34 @@ int bch2_blacklist_table_initialize(struct bch_fs *c) return 0; } -static const char * -bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_journal_seq_blacklist *bl = field_to_type(f, journal_seq_blacklist); - struct journal_seq_blacklist_entry *i; - unsigned nr = blacklist_nr_entries(bl); + unsigned i, nr = blacklist_nr_entries(bl); - for (i = bl->start; i < bl->start + nr; i++) { - if (le64_to_cpu(i->start) >= - le64_to_cpu(i->end)) - return "entry start >= end"; + for (i = 0; i < nr; i++) { + struct journal_seq_blacklist_entry *e = bl->start + i; - if (i + 1 < bl->start + nr && - le64_to_cpu(i[0].end) > - le64_to_cpu(i[1].start)) - return "entries out of order"; + if (le64_to_cpu(e->start) >= + le64_to_cpu(e->end)) { + pr_buf(err, "entry %u start >= end (%llu >= %llu)", + i, le64_to_cpu(e->start), le64_to_cpu(e->end)); + return -EINVAL; + } + + if (i + 1 < nr && + le64_to_cpu(e[0].end) > + le64_to_cpu(e[1].start)) { + pr_buf(err, "entry %u out of order with next entry (%llu > %llu)", + i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start)); + return -EINVAL; + } } - return NULL; + return 0; } static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, @@ -235,3 +241,81 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = { .validate = bch2_sb_journal_seq_blacklist_validate, .to_text = bch2_sb_journal_seq_blacklist_to_text }; + +void bch2_blacklist_entries_gc(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, + journal_seq_blacklist_gc_work); + struct journal_seq_blacklist_table *t; + struct bch_sb_field_journal_seq_blacklist *bl; + struct journal_seq_blacklist_entry *src, *dst; + struct btree_trans trans; + unsigned i, nr, new_nr; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + for (i = 0; i < BTREE_ID_NR; i++) { + struct btree_iter iter; + struct btree *b; + + bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN, + 0, 0, BTREE_ITER_PREFETCH); +retry: + bch2_trans_begin(&trans); + + b = bch2_btree_iter_peek_node(&iter); + + while (!(ret = PTR_ERR_OR_ZERO(b)) && + b && + !test_bit(BCH_FS_STOPPING, &c->flags)) + b = bch2_btree_iter_next_node(&iter); + + if (ret == -EINTR) + goto retry; + + bch2_trans_iter_exit(&trans, &iter); + } + + bch2_trans_exit(&trans); + if (ret) + return; + + mutex_lock(&c->sb_lock); + bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb); + if (!bl) + goto out; + + nr = blacklist_nr_entries(bl); + dst = bl->start; + + t = c->journal_seq_blacklist_table; + BUG_ON(nr != t->nr); + + for (src = bl->start, i = eytzinger0_first(t->nr); + src < bl->start + nr; + src++, i = eytzinger0_next(i, nr)) { + BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); + BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); + + if (t->entries[i].dirty) + *dst++ = *src; + } + + new_nr = dst - bl->start; + + bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr); + + if (new_nr != nr) { + bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, + new_nr ? sb_blacklist_u64s(new_nr) : 0); + BUG_ON(new_nr && !bl); + + if (!new_nr) + c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3)); + + bch2_write_super(c); + } +out: + mutex_unlock(&c->sb_lock); +} diff --git a/libbcachefs/journal_seq_blacklist.h b/libbcachefs/journal_seq_blacklist.h index b4f876a0..afb886ec 100644 --- a/libbcachefs/journal_seq_blacklist.h +++ b/libbcachefs/journal_seq_blacklist.h @@ -17,4 +17,6 @@ int bch2_blacklist_table_initialize(struct bch_fs *); extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist; +void bch2_blacklist_entries_gc(struct work_struct *); + #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ diff --git a/libbcachefs/k-eytzinger.h b/libbcachefs/k-eytzinger.h deleted file mode 100644 index 819db34e..00000000 --- a/libbcachefs/k-eytzinger.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _K_EYTZINGER_H -#define _K_EYTZINGER_H - -/* One based indexing */ -/* k = number of children */ - -static inline unsigned k_eytzinger_child(unsigned k, unsigned i, unsigned child) -{ - return (k * i + child) * (k - 1); -} - -#endif /* _K_EYTZINGER_H */ diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index c6880654..c325a094 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -332,7 +332,7 @@ enum opt_type { x(journal_transaction_names, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - BCH_SB_JOURNAL_TRANSACTION_NAMES, false, \ + BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 54bb2a45..6fb8224f 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -6,15 +6,17 @@ #include "subvolume.h" #include "super-io.h" -static const char *bch2_sb_validate_quota(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_validate_quota(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_quota *q = field_to_type(f, quota); - if (vstruct_bytes(&q->field) != sizeof(*q)) - return "invalid field quota: wrong size"; + if (vstruct_bytes(&q->field) < sizeof(*q)) { + pr_buf(err, "wrong size (got %llu should be %zu)", + vstruct_bytes(&q->field), sizeof(*q)); + } - return NULL; + return 0; } const struct bch_sb_field_ops bch_sb_field_ops_quota = { diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 5da6b3b4..7e4400cc 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -519,7 +519,7 @@ static int bch2_journal_replay(struct bch_fs *c) size_t i; int ret; - keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL); + keys_sorted = kvmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL); if (!keys_sorted) return -ENOMEM; @@ -530,10 +530,8 @@ static int bch2_journal_replay(struct bch_fs *c) sizeof(keys_sorted[0]), journal_sort_seq_cmp, NULL); - if (keys->nr) { - bch_verbose(c, "starting journal replay, %zu keys", keys->nr); + if (keys->nr) replay_now_at(j, keys->journal_seq_base); - } for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; @@ -563,7 +561,7 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_flush_all_pins(j); ret = bch2_journal_error(j); err: - kfree(keys_sorted); + kvfree(keys_sorted); return ret; } @@ -901,7 +899,6 @@ static int bch2_fs_initialize_subvolumes(struct bch_fs *c) static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) { - struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked inode; @@ -915,7 +912,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) goto err; if (!bkey_is_inode(k.k)) { - bch_err(c, "root inode not found"); + bch_err(trans->c, "root inode not found"); ret = -ENOENT; goto err; } @@ -1008,6 +1005,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { struct journal_replay *i; + bch_verbose(c, "starting journal read"); ret = bch2_journal_read(c, &c->journal_entries, &blacklist_seq, &journal_seq); if (ret) @@ -1067,6 +1065,16 @@ use_clean: if (ret) goto err; + /* + * After an unclean shutdown, skip then next few journal sequence + * numbers as they may have been referenced by btree writes that + * happened before their corresponding journal writes - those btree + * writes need to be ignored, by skipping and blacklisting the next few + * journal sequence numbers: + */ + if (!c->sb.clean) + journal_seq += 8; + if (blacklist_seq != journal_seq) { ret = bch2_journal_seq_blacklist_add(c, blacklist_seq, journal_seq); @@ -1141,7 +1149,7 @@ use_clean: if (c->opts.norecovery) goto out; - bch_verbose(c, "starting journal replay"); + bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); err = "journal replay failed"; ret = bch2_journal_replay(c); if (ret) @@ -1199,14 +1207,6 @@ use_clean: } mutex_lock(&c->sb_lock); - /* - * With journal replay done, we can clear the journal seq blacklist - * table: - */ - BUG_ON(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); - if (le16_to_cpu(c->sb.version_min) >= bcachefs_metadata_version_btree_ptr_sectors_written) - bch2_sb_resize_journal_seq_blacklist(&c->disk_sb, 0); - if (c->opts.version_upgrade) { c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); @@ -1248,6 +1248,10 @@ use_clean: bch_info(c, "scanning for old btree nodes done"); } + if (c->journal_seq_blacklist_table && + c->journal_seq_blacklist_table->nr > 128) + queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); + ret = 0; out: set_bit(BCH_FS_FSCK_DONE, &c->flags); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 6c5ea78d..a08f1e08 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -41,18 +41,19 @@ void bch2_replicas_entry_to_text(struct printbuf *out, { unsigned i; - pr_buf(out, "%s: %u/%u [", - bch2_data_types[e->data_type], - e->nr_required, - e->nr_devs); + if (e->data_type < BCH_DATA_NR) + pr_buf(out, "%s", bch2_data_types[e->data_type]); + else + pr_buf(out, "(invalid data type %u)", e->data_type); + pr_buf(out, ": %u/%u [", e->nr_required, e->nr_devs); for (i = 0; i < e->nr_devs; i++) pr_buf(out, i ? " %u" : "%u", e->devs[i]); pr_buf(out, "]"); } void bch2_cpu_replicas_to_text(struct printbuf *out, - struct bch_replicas_cpu *r) + struct bch_replicas_cpu *r) { struct bch_replicas_entry *e; bool first = true; @@ -808,67 +809,78 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, return 0; } -static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r) +static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, + struct bch_sb *sb, + struct printbuf *err) { - unsigned i; + struct bch_sb_field_members *mi = bch2_sb_get_members(sb); + unsigned i, j; sort_cmp_size(cpu_r->entries, cpu_r->nr, cpu_r->entry_size, memcmp, NULL); - for (i = 0; i + 1 < cpu_r->nr; i++) { - struct bch_replicas_entry *l = + for (i = 0; i < cpu_r->nr; i++) { + struct bch_replicas_entry *e = cpu_replicas_entry(cpu_r, i); - struct bch_replicas_entry *r = - cpu_replicas_entry(cpu_r, i + 1); - BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0); + if (e->data_type >= BCH_DATA_NR) { + pr_buf(err, "invalid data type in entry "); + bch2_replicas_entry_to_text(err, e); + return -EINVAL; + } - if (!memcmp(l, r, cpu_r->entry_size)) - return "duplicate replicas entry"; + if (!e->nr_devs) { + pr_buf(err, "no devices in entry "); + bch2_replicas_entry_to_text(err, e); + return -EINVAL; + } + + if (e->nr_required > 1 && + e->nr_required >= e->nr_devs) { + pr_buf(err, "bad nr_required in entry "); + bch2_replicas_entry_to_text(err, e); + return -EINVAL; + } + + for (j = 0; j < e->nr_devs; j++) + if (!bch2_dev_exists(sb, mi, e->devs[j])) { + pr_buf(err, "invalid device %u in entry ", e->devs[j]); + bch2_replicas_entry_to_text(err, e); + return -EINVAL; + } + + if (i + 1 < cpu_r->nr) { + struct bch_replicas_entry *n = + cpu_replicas_entry(cpu_r, i + 1); + + BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); + + if (!memcmp(e, n, cpu_r->entry_size)) { + pr_buf(err, "duplicate replicas entry "); + bch2_replicas_entry_to_text(err, e); + return -EINVAL; + } + } } - return NULL; + return 0; } -static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f) +static int bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - struct bch_replicas_cpu cpu_r = { .entries = NULL }; - struct bch_replicas_entry *e; - const char *err; - unsigned i; + struct bch_replicas_cpu cpu_r; + int ret; - for_each_replicas_entry(sb_r, e) { - err = "invalid replicas entry: invalid data type"; - if (e->data_type >= BCH_DATA_NR) - goto err; - - err = "invalid replicas entry: no devices"; - if (!e->nr_devs) - goto err; - - err = "invalid replicas entry: bad nr_required"; - if (e->nr_required > 1 && - e->nr_required >= e->nr_devs) - goto err; - - err = "invalid replicas entry: invalid device"; - for (i = 0; i < e->nr_devs; i++) - if (!bch2_dev_exists(sb, mi, e->devs[i])) - goto err; - } - - err = "cannot allocate memory"; if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r)) - goto err; + return -ENOMEM; - err = check_dup_replicas_entries(&cpu_r); -err: + ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); kfree(cpu_r.entries); - return err; + return ret; } static void bch2_sb_replicas_to_text(struct printbuf *out, @@ -893,38 +905,19 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = { .to_text = bch2_sb_replicas_to_text, }; -static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f) +static int bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - struct bch_replicas_cpu cpu_r = { .entries = NULL }; - struct bch_replicas_entry_v0 *e; - const char *err; - unsigned i; + struct bch_replicas_cpu cpu_r; + int ret; - for_each_replicas_entry_v0(sb_r, e) { - err = "invalid replicas entry: invalid data type"; - if (e->data_type >= BCH_DATA_NR) - goto err; - - err = "invalid replicas entry: no devices"; - if (!e->nr_devs) - goto err; - - err = "invalid replicas entry: invalid device"; - for (i = 0; i < e->nr_devs; i++) - if (!bch2_dev_exists(sb, mi, e->devs[i])) - goto err; - } - - err = "cannot allocate memory"; if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r)) - goto err; + return -ENOMEM; - err = check_dup_replicas_entries(&cpu_r); -err: + ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); kfree(cpu_r.entries); - return err; + return ret; } const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 8e28a13a..49dafdad 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -27,8 +27,8 @@ const char * const bch2_sb_fields[] = { NULL }; -static const char *bch2_sb_field_validate(struct bch_sb *, - struct bch_sb_field *); +static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, + struct printbuf *); struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb, enum bch_sb_field_type type) @@ -202,22 +202,31 @@ static inline void __bch2_sb_layout_size_assert(void) BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); } -static const char *validate_sb_layout(struct bch_sb_layout *layout) +static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) { u64 offset, prev_offset, max_sectors; unsigned i; - if (uuid_le_cmp(layout->magic, BCACHE_MAGIC)) - return "Not a bcachefs superblock layout"; + if (uuid_le_cmp(layout->magic, BCACHE_MAGIC)) { + pr_buf(out, "Not a bcachefs superblock layout"); + return -EINVAL; + } - if (layout->layout_type != 0) - return "Invalid superblock layout type"; + if (layout->layout_type != 0) { + pr_buf(out, "Invalid superblock layout type %u", + layout->layout_type); + return -EINVAL; + } - if (!layout->nr_superblocks) - return "Invalid superblock layout: no superblocks"; + if (!layout->nr_superblocks) { + pr_buf(out, "Invalid superblock layout: no superblocks"); + return -EINVAL; + } - if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) - return "Invalid superblock layout: too many superblocks"; + if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { + pr_buf(out, "Invalid superblock layout: too many superblocks"); + return -EINVAL; + } max_sectors = 1 << layout->sb_max_size_bits; @@ -226,122 +235,134 @@ static const char *validate_sb_layout(struct bch_sb_layout *layout) for (i = 1; i < layout->nr_superblocks; i++) { offset = le64_to_cpu(layout->sb_offset[i]); - if (offset < prev_offset + max_sectors) - return "Invalid superblock layout: superblocks overlap"; + if (offset < prev_offset + max_sectors) { + pr_buf(out, "Invalid superblock layout: superblocks overlap\n" + " (sb %u ends at %llu next starts at %llu", + i - 1, prev_offset + max_sectors, offset); + return -EINVAL; + } prev_offset = offset; } - return NULL; + return 0; } -const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) +static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out) { struct bch_sb *sb = disk_sb->sb; struct bch_sb_field *f; struct bch_sb_field_members *mi; - const char *err; u32 version, version_min; u16 block_size; + int ret; version = le16_to_cpu(sb->version); version_min = version >= bcachefs_metadata_version_new_versioning ? le16_to_cpu(sb->version_min) : version; - if (version >= bcachefs_metadata_version_max || - version_min < bcachefs_metadata_version_min) - return "Unsupported superblock version"; + if (version >= bcachefs_metadata_version_max) { + pr_buf(out, "Unsupported superblock version %u (min %u, max %u)", + version, bcachefs_metadata_version_min, bcachefs_metadata_version_max); + return -EINVAL; + } - if (version_min > version) - return "Bad minimum version"; + if (version_min < bcachefs_metadata_version_min) { + pr_buf(out, "Unsupported superblock version %u (min %u, max %u)", + version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max); + return -EINVAL; + } + + if (version_min > version) { + pr_buf(out, "Bad minimum version %u, greater than version field %u", + version_min, version); + return -EINVAL; + } if (sb->features[1] || - (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) - return "Filesystem has incompatible features"; + (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { + pr_buf(out, "Filesystem has incompatible features"); + return -EINVAL; + } block_size = le16_to_cpu(sb->block_size); - if (block_size > PAGE_SECTORS) - return "Bad block size"; + if (block_size > PAGE_SECTORS) { + pr_buf(out, "Block size too big (got %u, max %u)", + block_size, PAGE_SECTORS); + return -EINVAL; + } - if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le))) - return "Bad user UUID"; + if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le))) { + pr_buf(out, "Bad user UUID (got zeroes)"); + return -EINVAL; + } - if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le))) - return "Bad internal UUID"; + if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le))) { + pr_buf(out, "Bad intenal UUID (got zeroes)"); + return -EINVAL; + } if (!sb->nr_devices || - sb->nr_devices <= sb->dev_idx || - sb->nr_devices > BCH_SB_MEMBERS_MAX) - return "Bad number of member devices"; + sb->nr_devices > BCH_SB_MEMBERS_MAX) { + pr_buf(out, "Bad number of member devices %u (max %u)", + sb->nr_devices, BCH_SB_MEMBERS_MAX); + return -EINVAL; + } - if (!BCH_SB_META_REPLICAS_WANT(sb) || - BCH_SB_META_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX) - return "Invalid number of metadata replicas"; - - if (!BCH_SB_META_REPLICAS_REQ(sb) || - BCH_SB_META_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX) - return "Invalid number of metadata replicas"; - - if (!BCH_SB_DATA_REPLICAS_WANT(sb) || - BCH_SB_DATA_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX) - return "Invalid number of data replicas"; - - if (!BCH_SB_DATA_REPLICAS_REQ(sb) || - BCH_SB_DATA_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX) - return "Invalid number of data replicas"; - - if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR) - return "Invalid metadata checksum type"; - - if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR) - return "Invalid metadata checksum type"; - - if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR) - return "Invalid compression type"; - - if (!BCH_SB_BTREE_NODE_SIZE(sb)) - return "Btree node size not set"; - - if (BCH_SB_GC_RESERVE(sb) < 5) - return "gc reserve percentage too small"; + if (sb->dev_idx >= sb->nr_devices) { + pr_buf(out, "Bad dev_idx (got %u, nr_devices %u)", + sb->dev_idx, sb->nr_devices); + return -EINVAL; + } if (!sb->time_precision || - le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) - return "invalid time precision"; + le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { + pr_buf(out, "Invalid time precision: %u (min 1, max %lu)", + le32_to_cpu(sb->time_precision), NSEC_PER_SEC); + return -EINVAL; + } /* validate layout */ - err = validate_sb_layout(&sb->layout); - if (err) - return err; + ret = validate_sb_layout(&sb->layout, out); + if (ret) + return ret; vstruct_for_each(sb, f) { - if (!f->u64s) - return "Invalid superblock: invalid optional field"; + if (!f->u64s) { + pr_buf(out, "Invalid superblock: optional with size 0 (type %u)", + le32_to_cpu(f->type)); + return -EINVAL; + } - if (vstruct_next(f) > vstruct_last(sb)) - return "Invalid superblock: invalid optional field"; + if (vstruct_next(f) > vstruct_last(sb)) { + pr_buf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", + le32_to_cpu(f->type)); + return -EINVAL; + } } /* members must be validated first: */ mi = bch2_sb_get_members(sb); - if (!mi) - return "Invalid superblock: member info area missing"; + if (!mi) { + pr_buf(out, "Invalid superblock: member info area missing"); + return -EINVAL; + } - err = bch2_sb_field_validate(sb, &mi->field); - if (err) - return err; + ret = bch2_sb_field_validate(sb, &mi->field, out); + if (ret) + return ret; vstruct_for_each(sb, f) { if (le32_to_cpu(f->type) == BCH_SB_FIELD_members) continue; - err = bch2_sb_field_validate(sb, f); - if (err) - return err; + ret = bch2_sb_field_validate(sb, f, out); + if (ret) + return ret; } - return NULL; + return 0; } /* device open: */ @@ -470,10 +491,12 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) /* read superblock: */ -static const char *read_one_super(struct bch_sb_handle *sb, u64 offset) +static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) { struct bch_csum csum; + u32 version, version_min; size_t bytes; + int ret; reread: bio_reset(sb->bio); bio_set_dev(sb->bio, sb->bdev); @@ -481,40 +504,65 @@ reread: bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META); bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); - if (submit_bio_wait(sb->bio)) - return "IO error"; + ret = submit_bio_wait(sb->bio); + if (ret) { + pr_buf(err, "IO error: %i", ret); + return ret; + } - if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) - return "Not a bcachefs superblock"; + if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) { + pr_buf(err, "Not a bcachefs superblock"); + return -EINVAL; + } - if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || - le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) - return "Unsupported superblock version"; + version = le16_to_cpu(sb->sb->version); + version_min = version >= bcachefs_metadata_version_new_versioning + ? le16_to_cpu(sb->sb->version_min) + : version; + + if (version >= bcachefs_metadata_version_max) { + pr_buf(err, "Unsupported superblock version %u (min %u, max %u)", + version, bcachefs_metadata_version_min, bcachefs_metadata_version_max); + return -EINVAL; + } + + if (version_min < bcachefs_metadata_version_min) { + pr_buf(err, "Unsupported superblock version %u (min %u, max %u)", + version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max); + return -EINVAL; + } bytes = vstruct_bytes(sb->sb); - if (bytes > 512 << sb->sb->layout.sb_max_size_bits) - return "Bad superblock: too big"; + if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { + pr_buf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", + bytes, 512UL << sb->sb->layout.sb_max_size_bits); + return -EINVAL; + } if (bytes > sb->buffer_size) { if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s))) - return "cannot allocate memory"; + return -ENOMEM; goto reread; } - if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) - return "unknown csum type"; + if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { + pr_buf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); + return -EINVAL; + } /* XXX: verify MACs */ csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), null_nonce(), sb->sb); - if (bch2_crc_cmp(csum, sb->sb->csum)) - return "bad checksum reading superblock"; + if (bch2_crc_cmp(csum, sb->sb->csum)) { + pr_buf(err, "bad checksum"); + return -EINVAL; + } sb->seq = le64_to_cpu(sb->sb->seq); - return NULL; + return 0; } int bch2_read_super(const char *path, struct bch_opts *opts, @@ -522,10 +570,16 @@ int bch2_read_super(const char *path, struct bch_opts *opts, { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; - const char *err; + char *_err; + struct printbuf err; __le64 *i; int ret; + _err = kmalloc(4096, GFP_KERNEL); + if (!_err) + return -ENOMEM; + err = _PBUF(_err, 4096); + pr_verbose_init(*opts, ""); memset(sb, 0, sizeof(*sb)); @@ -554,25 +608,28 @@ int bch2_read_super(const char *path, struct bch_opts *opts, goto out; } - err = "cannot allocate memory"; ret = bch2_sb_realloc(sb, 0); - if (ret) + if (ret) { + pr_buf(&err, "error allocating memory for superblock"); goto err; + } - ret = -EFAULT; - err = "dynamic fault"; - if (bch2_fs_init_fault("read_super")) + if (bch2_fs_init_fault("read_super")) { + pr_buf(&err, "dynamic fault"); + ret = -EFAULT; goto err; + } - ret = -EINVAL; - err = read_one_super(sb, offset); - if (!err) + ret = read_one_super(sb, offset, &err); + if (!ret) goto got_super; if (opt_defined(*opts, sb)) goto err; - pr_err("error reading default superblock: %s", err); + printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s", + path, _err); + err = _PBUF(_err, 4096); /* * Error reading primary superblock - read location of backup @@ -588,13 +645,15 @@ int bch2_read_super(const char *path, struct bch_opts *opts, */ bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); - err = "IO error"; - if (submit_bio_wait(sb->bio)) + ret = submit_bio_wait(sb->bio); + if (ret) { + pr_buf(&err, "IO error: %i", ret); goto err; + } memcpy(&layout, sb->sb, sizeof(layout)); - err = validate_sb_layout(&layout); - if (err) + ret = validate_sb_layout(&layout, &err); + if (ret) goto err; for (i = layout.sb_offset; @@ -604,32 +663,39 @@ int bch2_read_super(const char *path, struct bch_opts *opts, if (offset == opt_get(*opts, sb)) continue; - err = read_one_super(sb, offset); - if (!err) + ret = read_one_super(sb, offset, &err); + if (!ret) goto got_super; } - ret = -EINVAL; goto err; got_super: - err = "Superblock block size smaller than device block size"; - ret = -EINVAL; if (le16_to_cpu(sb->sb->block_size) << 9 < bdev_logical_block_size(sb->bdev)) { - pr_err("error reading superblock: Superblock block size (%u) smaller than device block size (%u)", + pr_buf(&err, "block size (%u) smaller than device block size (%u)", le16_to_cpu(sb->sb->block_size) << 9, bdev_logical_block_size(sb->bdev)); - goto err_no_print; + ret = -EINVAL; + goto err; } ret = 0; sb->have_layout = true; + + ret = bch2_sb_validate(sb, &err); + if (ret) { + printk(KERN_ERR "bcachefs (%s): error validating superblock: %s", + path, _err); + goto err_no_print; + } out: pr_verbose_init(*opts, "ret %i", ret); + kfree(_err); return ret; err: - pr_err("error reading superblock: %s", err); + printk(KERN_ERR "bcachefs (%s): error reading superblock: %s", + path, _err); err_no_print: bch2_free_super(sb); goto out; @@ -704,7 +770,6 @@ int bch2_write_super(struct bch_fs *c) struct closure *cl = &c->sb_write; struct bch_dev *ca; unsigned i, sb = 0, nr_wrote; - const char *err; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; @@ -731,10 +796,19 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, ca); for_each_online_member(ca, c, i) { - err = bch2_sb_validate(&ca->disk_sb); - if (err) { - bch2_fs_inconsistent(c, "sb invalid before write: %s", err); - ret = -1; + struct printbuf buf = { NULL, NULL }; + + ret = bch2_sb_validate(&ca->disk_sb, &buf); + if (ret) { + char *_buf = kmalloc(4096, GFP_NOFS); + if (_buf) { + buf = _PBUF(_buf, 4096); + bch2_sb_validate(&ca->disk_sb, &buf); + } + + bch2_fs_inconsistent(c, "sb invalid before write: %s", _buf); + kfree(_buf); + percpu_ref_put(&ca->io_ref); goto out; } } @@ -847,54 +921,57 @@ static int u64_cmp(const void *_l, const void *_r) return l < r ? -1 : l > r ? 1 : 0; } -static const char *bch2_sb_validate_journal(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_validate_journal(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_journal *journal = field_to_type(f, journal); struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; - const char *err; + int ret = -EINVAL; unsigned nr; unsigned i; u64 *b; - journal = bch2_sb_get_journal(sb); - if (!journal) - return NULL; - nr = bch2_nr_journal_buckets(journal); if (!nr) - return NULL; + return 0; b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL); if (!b) - return "cannot allocate memory"; + return -ENOMEM; for (i = 0; i < nr; i++) b[i] = le64_to_cpu(journal->buckets[i]); sort(b, nr, sizeof(u64), u64_cmp, NULL); - err = "journal bucket at sector 0"; - if (!b[0]) + if (!b[0]) { + pr_buf(err, "journal bucket at sector 0"); goto err; + } - err = "journal bucket before first bucket"; - if (m && b[0] < le16_to_cpu(m->first_bucket)) + if (b[0] < le16_to_cpu(m->first_bucket)) { + pr_buf(err, "journal bucket %llu before first bucket %u", + b[0], le16_to_cpu(m->first_bucket)); goto err; + } - err = "journal bucket past end of device"; - if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets)) + if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) { + pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)", + b[nr - 1], le64_to_cpu(m->nbuckets)); goto err; + } - err = "duplicate journal buckets"; for (i = 0; i + 1 < nr; i++) - if (b[i] == b[i + 1]) + if (b[i] == b[i + 1]) { + pr_buf(err, "duplicate journal buckets %llu", b[i]); goto err; + } - err = NULL; + ret = 0; err: kfree(b); - return err; + return ret; } static const struct bch_sb_field_ops bch_sb_field_ops_journal = { @@ -903,39 +980,54 @@ static const struct bch_sb_field_ops bch_sb_field_ops_journal = { /* BCH_SB_FIELD_members: */ -static const char *bch2_sb_validate_members(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_validate_members(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_members *mi = field_to_type(f, members); - struct bch_member *m; + unsigned i; if ((void *) (mi->members + sb->nr_devices) > - vstruct_end(&mi->field)) - return "Invalid superblock: bad member info"; + vstruct_end(&mi->field)) { + pr_buf(err, "too many devices for section size"); + return -EINVAL; + } + + for (i = 0; i < sb->nr_devices; i++) { + struct bch_member *m = mi->members + i; - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) { if (!bch2_member_exists(m)) continue; - if (le64_to_cpu(m->nbuckets) > LONG_MAX) - return "Too many buckets"; + if (le64_to_cpu(m->nbuckets) > LONG_MAX) { + pr_buf(err, "device %u: too many buckets (got %llu, max %lu)", + i, le64_to_cpu(m->nbuckets), LONG_MAX); + return -EINVAL; + } if (le64_to_cpu(m->nbuckets) - - le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) - return "Not enough buckets"; + le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) { + pr_buf(err, "device %u: not enough buckets (got %llu, max %u)", + i, le64_to_cpu(m->nbuckets), BCH_MIN_NR_NBUCKETS); + return -EINVAL; + } if (le16_to_cpu(m->bucket_size) < - le16_to_cpu(sb->block_size)) - return "bucket size smaller than block size"; + le16_to_cpu(sb->block_size)) { + pr_buf(err, "device %u: bucket size %u smaller than block size %u", + i, le16_to_cpu(m->bucket_size), le16_to_cpu(sb->block_size)); + return -EINVAL; + } if (le16_to_cpu(m->bucket_size) < - BCH_SB_BTREE_NODE_SIZE(sb)) - return "bucket size smaller than btree node size"; + BCH_SB_BTREE_NODE_SIZE(sb)) { + pr_buf(err, "device %u: bucket size %u smaller than btree node size %llu", + i, le16_to_cpu(m->bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); + return -EINVAL; + } } - return NULL; + return 0; } static const struct bch_sb_field_ops bch_sb_field_ops_members = { @@ -944,18 +1036,24 @@ static const struct bch_sb_field_ops bch_sb_field_ops_members = { /* BCH_SB_FIELD_crypt: */ -static const char *bch2_sb_validate_crypt(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_validate_crypt(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_crypt *crypt = field_to_type(f, crypt); - if (vstruct_bytes(&crypt->field) != sizeof(*crypt)) - return "invalid field crypt: wrong size"; + if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) { + pr_buf(err, "wrong size (got %llu should be %zu)", + vstruct_bytes(&crypt->field), sizeof(*crypt)); + return -EINVAL; + } - if (BCH_CRYPT_KDF_TYPE(crypt)) - return "invalid field crypt: bad kdf type"; + if (BCH_CRYPT_KDF_TYPE(crypt)) { + pr_buf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt)); + return -EINVAL; + } - return NULL; + return 0; } static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { @@ -1164,15 +1262,19 @@ out: mutex_unlock(&c->sb_lock); } -static const char *bch2_sb_validate_clean(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_validate_clean(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) { struct bch_sb_field_clean *clean = field_to_type(f, clean); - if (vstruct_bytes(&clean->field) < sizeof(*clean)) - return "invalid field crypt: wrong size"; + if (vstruct_bytes(&clean->field) < sizeof(*clean)) { + pr_buf(err, "wrong size (got %llu should be %zu)", + vstruct_bytes(&clean->field), sizeof(*clean)); + return -EINVAL; + } - return NULL; + return 0; } static const struct bch_sb_field_ops bch_sb_field_ops_clean = { @@ -1186,14 +1288,26 @@ static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { #undef x }; -static const char *bch2_sb_field_validate(struct bch_sb *sb, - struct bch_sb_field *f) +static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *orig_err) { unsigned type = le32_to_cpu(f->type); + struct printbuf err = *orig_err; + int ret; - return type < BCH_SB_FIELD_NR - ? bch2_sb_field_ops[type]->validate(sb, f) - : NULL; + if (type >= BCH_SB_FIELD_NR) + return 0; + + pr_buf(&err, "Invalid superblock section %s: ", bch2_sb_fields[type]); + + ret = bch2_sb_field_ops[type]->validate(sb, f, &err); + if (ret) { + pr_buf(&err, "\n"); + bch2_sb_field_to_text(&err, sb, f); + *orig_err = err; + } + + return ret; } void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 5c264875..3b425bed 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -38,9 +38,8 @@ BCH_SB_FIELDS() extern const char * const bch2_sb_fields[]; struct bch_sb_field_ops { - const char * (*validate)(struct bch_sb *, struct bch_sb_field *); - void (*to_text)(struct printbuf *, struct bch_sb *, - struct bch_sb_field *); + int (*validate)(struct bch_sb *, struct bch_sb_field *, struct printbuf *); + void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *); }; static inline __le64 bch2_sb_magic(struct bch_fs *c) @@ -66,8 +65,6 @@ int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); void bch2_free_super(struct bch_sb_handle *); int bch2_sb_realloc(struct bch_sb_handle *, unsigned); -const char *bch2_sb_validate(struct bch_sb_handle *); - int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_write_super(struct bch_fs *); void __bch2_check_set_feature(struct bch_fs *, unsigned); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 3afa7ebd..577b58e4 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -528,6 +528,8 @@ void __bch2_fs_stop(struct bch_fs *c) set_bit(BCH_FS_STOPPING, &c->flags); + cancel_work_sync(&c->journal_seq_blacklist_gc_work); + down_write(&c->state_lock); bch2_fs_read_only(c); up_write(&c->state_lock); @@ -690,6 +692,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) spin_lock_init(&c->btree_write_error_lock); + INIT_WORK(&c->journal_seq_blacklist_gc_work, + bch2_blacklist_entries_gc); + INIT_LIST_HEAD(&c->journal_entries); INIT_LIST_HEAD(&c->journal_iters); @@ -737,7 +742,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; - scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); + uuid_unparse_lower(c->sb.user_uuid.b, c->name); /* Compat: */ if (sb->version <= bcachefs_metadata_version_inode_v2 && @@ -1251,6 +1256,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) ca->disk_sb.bdev->bd_holder = ca; memset(sb, 0, sizeof(*sb)); + ca->dev = ca->disk_sb.bdev->bd_dev; + percpu_ref_reinit(&ca->io_ref); return 0; @@ -1596,18 +1603,20 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_sb_field_members *mi; struct bch_member dev_mi; unsigned dev_idx, nr_devices, u64s; + char *_errbuf; + struct printbuf errbuf; int ret; + _errbuf = kmalloc(4096, GFP_KERNEL); + if (!_errbuf) + return -ENOMEM; + + errbuf = _PBUF(_errbuf, 4096); + ret = bch2_read_super(path, &opts, &sb); if (ret) { bch_err(c, "device add error: error reading super: %i", ret); - return ret; - } - - err = bch2_sb_validate(&sb); - if (err) { - bch_err(c, "device add error: error validating super: %s", err); - return -EINVAL; + goto err; } dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx]; @@ -1615,19 +1624,21 @@ int bch2_dev_add(struct bch_fs *c, const char *path) err = bch2_dev_may_add(sb.sb, c); if (err) { bch_err(c, "device add error: %s", err); - return -EINVAL; + ret = -EINVAL; + goto err; } ca = __bch2_dev_alloc(c, &dev_mi); if (!ca) { bch2_free_super(&sb); - return -ENOMEM; + ret = -ENOMEM; + goto err; } ret = __bch2_dev_attach_bdev(ca, &sb); if (ret) { bch2_dev_free(ca); - return ret; + goto err; } ret = bch2_dev_journal_alloc(ca); @@ -1719,10 +1730,12 @@ err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); + kfree(_errbuf); return ret; err_late: up_write(&c->state_lock); - return -EINVAL; + ca = NULL; + goto err; } /* Hot add existing device to running filesystem: */ @@ -1869,7 +1882,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path) rcu_read_lock(); for_each_member_device_rcu(ca, c, i, NULL) - if (ca->disk_sb.bdev->bd_dev == dev) + if (ca->dev == dev) goto found; ca = ERR_PTR(-ENOENT); found: @@ -1888,20 +1901,28 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; + char *_errbuf = NULL; + struct printbuf errbuf; int ret = 0; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-ENODEV); + pr_verbose_init(opts, ""); if (!nr_devices) { - c = ERR_PTR(-EINVAL); - goto out2; + ret = -EINVAL; + goto err; } - if (!try_module_get(THIS_MODULE)) { - c = ERR_PTR(-ENODEV); - goto out2; + _errbuf = kmalloc(4096, GFP_KERNEL); + if (!_errbuf) { + ret = -ENOMEM; + goto err; } + errbuf = _PBUF(_errbuf, 4096); + sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); if (!sb) { ret = -ENOMEM; @@ -1913,9 +1934,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, if (ret) goto err; - err = bch2_sb_validate(&sb[i]); - if (err) - goto err_print; } for (i = 1; i < nr_devices; i++) @@ -1970,8 +1988,8 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } out: kfree(sb); + kfree(_errbuf); module_put(THIS_MODULE); -out2: pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); return c; err_print: @@ -1988,81 +2006,6 @@ err: goto out; } -static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, - struct bch_opts opts) -{ - const char *err; - struct bch_fs *c; - bool allocated_fs = false; - int ret; - - err = bch2_sb_validate(sb); - if (err) - return err; - - mutex_lock(&bch_fs_list_lock); - c = __bch2_uuid_to_fs(sb->sb->uuid); - if (c) { - closure_get(&c->cl); - - err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb); - if (err) - goto err; - } else { - allocated_fs = true; - c = bch2_fs_alloc(sb->sb, opts); - - err = "bch2_fs_alloc() error"; - if (IS_ERR(c)) - goto err; - } - - err = "bch2_dev_online() error"; - - mutex_lock(&c->sb_lock); - if (bch2_dev_attach_bdev(c, sb)) { - mutex_unlock(&c->sb_lock); - goto err; - } - mutex_unlock(&c->sb_lock); - - if (!c->opts.nostart && bch2_fs_may_start(c)) { - err = "error starting filesystem"; - ret = bch2_fs_start(c); - if (ret) - goto err; - } - - closure_put(&c->cl); - mutex_unlock(&bch_fs_list_lock); - - return NULL; -err: - mutex_unlock(&bch_fs_list_lock); - - if (allocated_fs && !IS_ERR(c)) - bch2_fs_stop(c); - else if (c) - closure_put(&c->cl); - - return err; -} - -const char *bch2_fs_open_incremental(const char *path) -{ - struct bch_sb_handle sb; - struct bch_opts opts = bch2_opts_empty(); - const char *err; - - if (bch2_read_super(path, &opts, &sb)) - return "error reading superblock"; - - err = __bch2_fs_open_incremental(&sb, opts); - bch2_free_super(&sb); - - return err; -} - /* Global interfaces/init */ static void bcachefs_exit(void) diff --git a/libbcachefs/super.h b/libbcachefs/super.h index c3273e9c..3f24ca5a 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -254,6 +254,5 @@ void bch2_fs_stop(struct bch_fs *); int bch2_fs_start(struct bch_fs *); struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); -const char *bch2_fs_open_incremental(const char *path); #endif /* _BCACHEFS_SUPER_H */ diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 80402b39..3196bc30 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -746,4 +746,13 @@ static inline int u8_cmp(u8 l, u8 r) return cmp_int(l, r); } +#ifdef __KERNEL__ +static inline void uuid_unparse_lower(u8 *uuid, char *out) +{ + sprintf(out, "%plU", uuid); +} +#else +#include +#endif + #endif /* _BCACHEFS_UTIL_H */