Update bcachefs sources to 1d669389f7 bcachefs: use a radix tree for inum bitmap in fsck

This commit is contained in:
Kent Overstreet 2020-11-07 11:26:00 -05:00
parent 742dbbdbb9
commit d2a118d921
48 changed files with 723 additions and 599 deletions

View File

@ -1 +1 @@
8436db7aac9ced2118bf19b8f1bf3682f479d17e 1d669389f79de8571732c13fdf4d23039e2308fd

View File

@ -122,7 +122,7 @@ static void update_inode(struct bch_fs *c,
struct bkey_inode_buf packed; struct bkey_inode_buf packed;
int ret; int ret;
bch2_inode_pack(&packed, inode); bch2_inode_pack(c, &packed, inode);
ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, 0); NULL, NULL, 0);
if (ret) if (ret)

View File

@ -10,6 +10,8 @@
#define cpu_present(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0)
#define cpu_active(cpu) ((cpu) == 0) #define cpu_active(cpu) ((cpu) == 0)
#define raw_smp_processor_id() 0U
#define for_each_cpu(cpu, mask) \ #define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \ #define for_each_cpu_not(cpu, mask) \

View File

@ -21,6 +21,8 @@ struct page;
#define kmap_atomic(page) page_address(page) #define kmap_atomic(page) page_address(page)
#define kunmap_atomic(addr) do {} while (0) #define kunmap_atomic(addr) do {} while (0)
#define PageHighMem(page) false
static const char zero_page[PAGE_SIZE]; static const char zero_page[PAGE_SIZE];
#define ZERO_PAGE(o) ((struct page *) &zero_page[0]) #define ZERO_PAGE(o) ((struct page *) &zero_page[0])

View File

@ -536,9 +536,46 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
TP_ARGS(ip) TP_ARGS(ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock, TRACE_EVENT(trans_restart_would_deadlock,
TP_PROTO(unsigned long ip), TP_PROTO(unsigned long trans_ip,
TP_ARGS(ip) unsigned long caller_ip,
unsigned reason,
enum btree_id have_btree_id,
unsigned have_iter_type,
enum btree_id want_btree_id,
unsigned want_iter_type),
TP_ARGS(trans_ip, caller_ip, reason,
have_btree_id, have_iter_type,
want_btree_id, want_iter_type),
TP_STRUCT__entry(
__field(unsigned long, trans_ip )
__field(unsigned long, caller_ip )
__field(u8, reason )
__field(u8, have_btree_id )
__field(u8, have_iter_type )
__field(u8, want_btree_id )
__field(u8, want_iter_type )
),
TP_fast_assign(
__entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip;
__entry->reason = reason;
__entry->have_btree_id = have_btree_id;
__entry->have_iter_type = have_iter_type;
__entry->want_btree_id = want_btree_id;
__entry->want_iter_type = want_iter_type;
),
TP_printk("%pF %pF because %u have %u:%u want %u:%u",
(void *) __entry->trans_ip,
(void *) __entry->caller_ip,
__entry->reason,
__entry->have_btree_id,
__entry->have_iter_type,
__entry->want_btree_id,
__entry->want_iter_type)
); );
TRACE_EVENT(trans_restart_iters_realloced, TRACE_EVENT(trans_restart_iters_realloced,

View File

@ -76,7 +76,7 @@ static inline void bch2_wake_allocator(struct bch_dev *ca)
static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca, static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
size_t bucket) size_t bucket)
{ {
if (expensive_debug_checks(c)) { if (bch2_expensive_debug_checks) {
size_t iter; size_t iter;
long i; long i;
unsigned j; unsigned j;

View File

@ -265,6 +265,8 @@ do { \
BCH_DEBUG_PARAM(debug_check_bkeys, \ BCH_DEBUG_PARAM(debug_check_bkeys, \
"Run bkey_debugcheck (primarily checking GC/allocation "\ "Run bkey_debugcheck (primarily checking GC/allocation "\
"information) when iterating over keys") \ "information) when iterating over keys") \
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
"Verify btree accounting for keys within a node") \
BCH_DEBUG_PARAM(verify_btree_ondisk, \ BCH_DEBUG_PARAM(verify_btree_ondisk, \
"Reread btree nodes at various points to verify the " \ "Reread btree nodes at various points to verify the " \
"mergesort in the read path against modifications " \ "mergesort in the read path against modifications " \
@ -295,6 +297,16 @@ do { \
#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS() #define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
#endif #endif
#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
#ifndef CONFIG_BCACHEFS_DEBUG
#define BCH_DEBUG_PARAM(name, description) static const bool bch2_##name;
BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
#endif
#define BCH_TIME_STATS() \ #define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \ x(btree_node_mem_alloc) \
x(btree_node_split) \ x(btree_node_split) \
@ -529,6 +541,10 @@ struct journal_keys {
u64 journal_seq_base; u64 journal_seq_base;
}; };
struct btree_iter_buf {
struct btree_iter *iter;
};
struct bch_fs { struct bch_fs {
struct closure cl; struct closure cl;
@ -624,6 +640,7 @@ struct bch_fs {
struct mutex btree_trans_lock; struct mutex btree_trans_lock;
struct list_head btree_trans_list; struct list_head btree_trans_list;
mempool_t btree_iters_pool; mempool_t btree_iters_pool;
struct btree_iter_buf __percpu *btree_iters_bufs;
struct btree_key_cache btree_key_cache; struct btree_key_cache btree_key_cache;
@ -801,7 +818,8 @@ struct bch_fs {
struct mutex verify_lock; struct mutex verify_lock;
#endif #endif
u64 unused_inode_hint; u64 *unused_inode_hints;
unsigned inode_shard_bits;
/* /*
* A btree node on disk could have too many bsets for an iterator to fit * A btree node on disk could have too many bsets for an iterator to fit
@ -826,10 +844,6 @@ struct bch_fs {
unsigned copy_gc_enabled:1; unsigned copy_gc_enabled:1;
bool promote_whole_extents; bool promote_whole_extents;
#define BCH_DEBUG_PARAM(name, description) bool name;
BCH_DEBUG_PARAMS_ALL()
#undef BCH_DEBUG_PARAM
struct time_stats times[BCH_TIME_STAT_NR]; struct time_stats times[BCH_TIME_STAT_NR];
}; };

View File

@ -669,10 +669,10 @@ struct bch_inode_generation {
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
#define BCH_INODE_FIELDS() \ #define BCH_INODE_FIELDS() \
x(bi_atime, 64) \ x(bi_atime, 96) \
x(bi_ctime, 64) \ x(bi_ctime, 96) \
x(bi_mtime, 64) \ x(bi_mtime, 96) \
x(bi_otime, 64) \ x(bi_otime, 96) \
x(bi_size, 64) \ x(bi_size, 64) \
x(bi_sectors, 64) \ x(bi_sectors, 64) \
x(bi_uid, 32) \ x(bi_uid, 32) \
@ -739,7 +739,8 @@ enum {
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) #define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
/* Dirents */ /* Dirents */
@ -1330,13 +1331,15 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
x(btree_ptr_v2, 11) \ x(btree_ptr_v2, 11) \
x(extents_above_btree_updates, 12) \ x(extents_above_btree_updates, 12) \
x(btree_updates_journalled, 13) \ x(btree_updates_journalled, 13) \
x(reflink_inline_data, 14) x(reflink_inline_data, 14) \
x(new_varint, 15)
#define BCH_SB_FEATURES_ALL \ #define BCH_SB_FEATURES_ALL \
((1ULL << BCH_FEATURE_new_siphash)| \ ((1ULL << BCH_FEATURE_new_siphash)| \
(1ULL << BCH_FEATURE_new_extent_overwrite)| \ (1ULL << BCH_FEATURE_new_extent_overwrite)| \
(1ULL << BCH_FEATURE_btree_ptr_v2)| \ (1ULL << BCH_FEATURE_btree_ptr_v2)| \
(1ULL << BCH_FEATURE_extents_above_btree_updates)) (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
(1ULL << BCH_FEATURE_new_varint))\
enum bch_sb_feature { enum bch_sb_feature {
#define x(f, n) BCH_FEATURE_##f, #define x(f, n) BCH_FEATURE_##f,

View File

@ -411,7 +411,7 @@ static bool bkey_packed_successor(struct bkey_packed *out,
if ((*p & mask) != mask) { if ((*p & mask) != mask) {
*p += 1ULL << offset; *p += 1ULL << offset;
EBUG_ON(bkey_cmp_packed(b, out, &k) <= 0); EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0);
return true; return true;
} }
@ -1054,9 +1054,9 @@ int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
} }
__pure __flatten __pure __flatten
int __bch2_bkey_cmp_packed(const struct bkey_packed *l, int bch2_bkey_cmp_packed(const struct btree *b,
const struct bkey_packed *r, const struct bkey_packed *l,
const struct btree *b) const struct bkey_packed *r)
{ {
struct bkey unpacked; struct bkey unpacked;

View File

@ -67,13 +67,6 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
#define bkey_whiteout(_k) \ #define bkey_whiteout(_k) \
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
#define bkey_packed_typecheck(_k) \
({ \
BUILD_BUG_ON(!type_is(_k, struct bkey *) && \
!type_is(_k, struct bkey_packed *)); \
type_is(_k, struct bkey_packed *); \
})
enum bkey_lr_packed { enum bkey_lr_packed {
BKEY_PACKED_BOTH, BKEY_PACKED_BOTH,
BKEY_PACKED_RIGHT, BKEY_PACKED_RIGHT,
@ -81,9 +74,6 @@ enum bkey_lr_packed {
BKEY_PACKED_NONE, BKEY_PACKED_NONE,
}; };
#define bkey_lr_packed_typecheck(_l, _r) \
(!bkey_packed_typecheck(_l) + ((!bkey_packed_typecheck(_r)) << 1))
#define bkey_lr_packed(_l, _r) \ #define bkey_lr_packed(_l, _r) \
((_l)->format + ((_r)->format << 1)) ((_l)->format + ((_r)->format << 1))
@ -132,9 +122,9 @@ int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
const struct bpos *); const struct bpos *);
__pure __pure
int __bch2_bkey_cmp_packed(const struct bkey_packed *, int bch2_bkey_cmp_packed(const struct btree *,
const struct bkey_packed *, const struct bkey_packed *,
const struct btree *); const struct bkey_packed *);
__pure __pure
int __bch2_bkey_cmp_left_packed(const struct btree *, int __bch2_bkey_cmp_left_packed(const struct btree *,
@ -160,37 +150,6 @@ static inline int bkey_cmp_left_packed_byval(const struct btree *b,
return bkey_cmp_left_packed(b, l, &r); return bkey_cmp_left_packed(b, l, &r);
} }
/*
* If @_l or @_r are struct bkey * (not bkey_packed *), uses type information to
* skip dispatching on k->format:
*/
#define bkey_cmp_packed(_b, _l, _r) \
({ \
int _cmp; \
\
switch (bkey_lr_packed_typecheck(_l, _r)) { \
case BKEY_PACKED_NONE: \
_cmp = bkey_cmp(((struct bkey *) (_l))->p, \
((struct bkey *) (_r))->p); \
break; \
case BKEY_PACKED_LEFT: \
_cmp = bkey_cmp_left_packed((_b), \
(struct bkey_packed *) (_l), \
&((struct bkey *) (_r))->p); \
break; \
case BKEY_PACKED_RIGHT: \
_cmp = -bkey_cmp_left_packed((_b), \
(struct bkey_packed *) (_r), \
&((struct bkey *) (_l))->p); \
break; \
case BKEY_PACKED_BOTH: \
_cmp = __bch2_bkey_cmp_packed((void *) (_l), \
(void *) (_r), (_b)); \
break; \
} \
_cmp; \
})
#if 1 #if 1
static __always_inline int bkey_cmp(struct bpos l, struct bpos r) static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
{ {

View File

@ -236,7 +236,7 @@ enum merge_result bch2_bkey_merge(struct bch_fs *c,
const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type]; const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
enum merge_result ret; enum merge_result ret;
if (key_merging_disabled(c) || if (bch2_key_merging_disabled ||
!ops->key_merge || !ops->key_merge ||
l.k->type != r.k->type || l.k->type != r.k->type ||
bversion_cmp(l.k->version, r.k->version) || bversion_cmp(l.k->version, r.k->version) ||

View File

@ -86,7 +86,7 @@ static inline int key_sort_fix_overlapping_cmp(struct btree *b,
struct bkey_packed *l, struct bkey_packed *l,
struct bkey_packed *r) struct bkey_packed *r)
{ {
return bkey_cmp_packed(b, l, r) ?: return bch2_bkey_cmp_packed(b, l, r) ?:
cmp_int((unsigned long) l, (unsigned long) r); cmp_int((unsigned long) l, (unsigned long) r);
} }
@ -98,7 +98,7 @@ static inline bool should_drop_next_key(struct sort_iter *iter)
* and should be dropped. * and should be dropped.
*/ */
return iter->used >= 2 && return iter->used >= 2 &&
!bkey_cmp_packed(iter->b, !bch2_bkey_cmp_packed(iter->b,
iter->data[0].k, iter->data[0].k,
iter->data[1].k); iter->data[1].k);
} }
@ -223,7 +223,7 @@ static inline int sort_keys_cmp(struct btree *b,
struct bkey_packed *l, struct bkey_packed *l,
struct bkey_packed *r) struct bkey_packed *r)
{ {
return bkey_cmp_packed(b, l, r) ?: return bch2_bkey_cmp_packed(b, l, r) ?:
(int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
(int) l->needs_whiteout - (int) r->needs_whiteout; (int) l->needs_whiteout - (int) r->needs_whiteout;
} }
@ -245,7 +245,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
continue; continue;
while ((next = sort_iter_peek(iter)) && while ((next = sort_iter_peek(iter)) &&
!bkey_cmp_packed(iter->b, in, next)) { !bch2_bkey_cmp_packed(iter->b, in, next)) {
BUG_ON(in->needs_whiteout && BUG_ON(in->needs_whiteout &&
next->needs_whiteout); next->needs_whiteout);
needs_whiteout |= in->needs_whiteout; needs_whiteout |= in->needs_whiteout;
@ -406,7 +406,7 @@ static inline int sort_extents_cmp(struct btree *b,
struct bkey_packed *l, struct bkey_packed *l,
struct bkey_packed *r) struct bkey_packed *r)
{ {
return bkey_cmp_packed(b, l, r) ?: return bch2_bkey_cmp_packed(b, l, r) ?:
(int) bkey_deleted(l) - (int) bkey_deleted(r); (int) bkey_deleted(l) - (int) bkey_deleted(r);
} }

View File

@ -369,10 +369,10 @@ static struct bkey_float *bkey_float(const struct btree *b,
return ro_aux_tree_base(b, t)->f + idx; return ro_aux_tree_base(b, t)->f + idx;
} }
static void bset_aux_tree_verify(struct btree *b) static void bset_aux_tree_verify(const struct btree *b)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
struct bset_tree *t; const struct bset_tree *t;
for_each_bset(b, t) { for_each_bset(b, t) {
if (t->aux_data_offset == U16_MAX) if (t->aux_data_offset == U16_MAX)
@ -388,15 +388,13 @@ static void bset_aux_tree_verify(struct btree *b)
#endif #endif
} }
void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks) void bch2_btree_keys_init(struct btree *b)
{ {
unsigned i; unsigned i;
b->nsets = 0; b->nsets = 0;
memset(&b->nr, 0, sizeof(b->nr)); memset(&b->nr, 0, sizeof(b->nr));
#ifdef CONFIG_BCACHEFS_DEBUG
b->expensive_debug_checks = expensive_debug_checks;
#endif
for (i = 0; i < MAX_BSETS; i++) for (i = 0; i < MAX_BSETS; i++)
b->set[i].data_offset = U16_MAX; b->set[i].data_offset = U16_MAX;
@ -522,7 +520,7 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b,
struct bkey_packed *k = btree_bkey_first(b, t); struct bkey_packed *k = btree_bkey_first(b, t);
unsigned j = 0; unsigned j = 0;
if (!btree_keys_expensive_checks(b)) if (!bch2_expensive_debug_checks)
return; return;
BUG_ON(bset_has_ro_aux_tree(t)); BUG_ON(bset_has_ro_aux_tree(t));
@ -710,20 +708,20 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
} }
/* bytes remaining - only valid for last bset: */ /* bytes remaining - only valid for last bset: */
static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t) static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
{ {
bset_aux_tree_verify(b); bset_aux_tree_verify(b);
return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64); return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
} }
static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t) static unsigned bset_ro_tree_capacity(const struct btree *b, const struct bset_tree *t)
{ {
return __bset_tree_capacity(b, t) / return __bset_tree_capacity(b, t) /
(sizeof(struct bkey_float) + sizeof(u8)); (sizeof(struct bkey_float) + sizeof(u8));
} }
static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t) static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_tree *t)
{ {
return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree); return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
} }
@ -922,7 +920,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
k = p; k = p;
} }
if (btree_keys_expensive_checks(b)) { if (bch2_expensive_debug_checks) {
BUG_ON(ret >= orig_k); BUG_ON(ret >= orig_k);
for (i = ret for (i = ret
@ -1227,8 +1225,8 @@ static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
__flatten __flatten
static struct bkey_packed *bset_search_tree(const struct btree *b, static struct bkey_packed *bset_search_tree(const struct btree *b,
struct bset_tree *t, const struct bset_tree *t,
struct bpos *search, const struct bpos *search,
const struct bkey_packed *packed_search) const struct bkey_packed *packed_search)
{ {
struct ro_aux_tree *base = ro_aux_tree_base(b, t); struct ro_aux_tree *base = ro_aux_tree_base(b, t);
@ -1345,7 +1343,7 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
bkey_iter_pos_cmp(b, m, search) < 0) bkey_iter_pos_cmp(b, m, search) < 0)
m = bkey_next_skip_noops(m, btree_bkey_last(b, t)); m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
if (btree_keys_expensive_checks(b)) { if (bch2_expensive_debug_checks) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
BUG_ON(prev && BUG_ON(prev &&
@ -1601,7 +1599,7 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
void bch2_btree_node_iter_advance(struct btree_node_iter *iter, void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
struct btree *b) struct btree *b)
{ {
if (btree_keys_expensive_checks(b)) { if (bch2_expensive_debug_checks) {
bch2_btree_node_iter_verify(iter, b); bch2_btree_node_iter_verify(iter, b);
bch2_btree_node_iter_next_check(iter, b); bch2_btree_node_iter_next_check(iter, b);
} }
@ -1620,7 +1618,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
struct bset_tree *t; struct bset_tree *t;
unsigned end = 0; unsigned end = 0;
if (btree_keys_expensive_checks(b)) if (bch2_expensive_debug_checks)
bch2_btree_node_iter_verify(iter, b); bch2_btree_node_iter_verify(iter, b);
for_each_bset(b, t) { for_each_bset(b, t) {
@ -1656,7 +1654,7 @@ found:
iter->data[0].k = __btree_node_key_to_offset(b, prev); iter->data[0].k = __btree_node_key_to_offset(b, prev);
iter->data[0].end = end; iter->data[0].end = end;
if (btree_keys_expensive_checks(b)) if (bch2_expensive_debug_checks)
bch2_btree_node_iter_verify(iter, b); bch2_btree_node_iter_verify(iter, b);
return prev; return prev;
} }

View File

@ -5,7 +5,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/types.h> #include <linux/types.h>
#include "bcachefs_format.h" #include "bcachefs.h"
#include "bkey.h" #include "bkey.h"
#include "bkey_methods.h" #include "bkey_methods.h"
#include "btree_types.h" #include "btree_types.h"
@ -147,17 +147,6 @@
* first key in that range of bytes again. * first key in that range of bytes again.
*/ */
extern bool bch2_expensive_debug_checks;
static inline bool btree_keys_expensive_checks(const struct btree *b)
{
#ifdef CONFIG_BCACHEFS_DEBUG
return bch2_expensive_debug_checks || *b->expensive_debug_checks;
#else
return false;
#endif
}
enum bset_aux_tree_type { enum bset_aux_tree_type {
BSET_NO_AUX_TREE, BSET_NO_AUX_TREE,
BSET_RO_AUX_TREE, BSET_RO_AUX_TREE,
@ -201,17 +190,17 @@ static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree
#define BSET_CACHELINE 128 #define BSET_CACHELINE 128
static inline size_t btree_keys_cachelines(struct btree *b) static inline size_t btree_keys_cachelines(const struct btree *b)
{ {
return (1U << b->byte_order) / BSET_CACHELINE; return (1U << b->byte_order) / BSET_CACHELINE;
} }
static inline size_t btree_aux_data_bytes(struct btree *b) static inline size_t btree_aux_data_bytes(const struct btree *b)
{ {
return btree_keys_cachelines(b) * 8; return btree_keys_cachelines(b) * 8;
} }
static inline size_t btree_aux_data_u64s(struct btree *b) static inline size_t btree_aux_data_u64s(const struct btree *b)
{ {
return btree_aux_data_bytes(b) / sizeof(u64); return btree_aux_data_bytes(b) / sizeof(u64);
} }
@ -228,7 +217,7 @@ __bkey_unpack_key_format_checked(const struct btree *b,
compiled_unpack_fn unpack_fn = b->aux_data; compiled_unpack_fn unpack_fn = b->aux_data;
unpack_fn(dst, src); unpack_fn(dst, src);
if (btree_keys_expensive_checks(b)) { if (bch2_expensive_debug_checks) {
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src); struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
BUG_ON(memcmp(dst, &dst2, sizeof(*dst))); BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
@ -366,7 +355,7 @@ static inline struct bset *bset_next_set(struct btree *b,
return ((void *) i) + round_up(vstruct_bytes(i), block_bytes); return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
} }
void bch2_btree_keys_init(struct btree *, bool *); void bch2_btree_keys_init(struct btree *);
void bch2_bset_init_first(struct btree *, struct bset *); void bch2_bset_init_first(struct btree *, struct bset *);
void bch2_bset_init_next(struct bch_fs *, struct btree *, void bch2_bset_init_next(struct bch_fs *, struct btree *,
@ -477,7 +466,7 @@ static inline int bkey_iter_cmp(const struct btree *b,
const struct bkey_packed *l, const struct bkey_packed *l,
const struct bkey_packed *r) const struct bkey_packed *r)
{ {
return bkey_cmp_packed(b, l, r) return bch2_bkey_cmp_packed(b, l, r)
?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
?: cmp_int(l, r); ?: cmp_int(l, r);
} }
@ -654,7 +643,7 @@ static inline void bch2_verify_insert_pos(struct btree *b,
static inline void bch2_verify_btree_nr_keys(struct btree *b) static inline void bch2_verify_btree_nr_keys(struct btree *b)
{ {
if (btree_keys_expensive_checks(b)) if (bch2_debug_check_btree_accounting)
__bch2_verify_btree_nr_keys(b); __bch2_verify_btree_nr_keys(b);
} }

View File

@ -211,7 +211,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
* - unless btree verify mode is enabled, since it runs out of * - unless btree verify mode is enabled, since it runs out of
* the post write cleanup: * the post write cleanup:
*/ */
if (verify_btree_ondisk(c)) if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent); bch2_btree_node_write(c, b, SIX_LOCK_intent);
else else
__bch2_btree_node_write(c, b, SIX_LOCK_read); __bch2_btree_node_write(c, b, SIX_LOCK_read);
@ -254,7 +254,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
unsigned long freed = 0; unsigned long freed = 0;
unsigned i, flags; unsigned i, flags;
if (btree_shrinker_disabled(c)) if (bch2_btree_shrinker_disabled)
return SHRINK_STOP; return SHRINK_STOP;
/* Return -1 if we can't do anything right now */ /* Return -1 if we can't do anything right now */
@ -341,7 +341,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
btree_cache.shrink); btree_cache.shrink);
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
if (btree_shrinker_disabled(c)) if (bch2_btree_shrinker_disabled)
return 0; return 0;
return btree_cache_can_free(bc) * btree_pages(c); return btree_cache_can_free(bc) * btree_pages(c);
@ -590,7 +590,7 @@ out:
b->sib_u64s[0] = 0; b->sib_u64s[0] = 0;
b->sib_u64s[1] = 0; b->sib_u64s[1] = 0;
b->whiteout_u64s = 0; b->whiteout_u64s = 0;
bch2_btree_keys_init(b, &c->expensive_debug_checks); bch2_btree_keys_init(b);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
start_time); start_time);
@ -705,7 +705,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
*/ */
struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, unsigned level, const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type) enum six_lock_type lock_type,
unsigned long trace_ip)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
@ -767,7 +768,7 @@ lock_node:
btree_node_unlock(iter, level + 1); btree_node_unlock(iter, level + 1);
if (!btree_node_lock(b, k->k.p, level, iter, lock_type, if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
lock_node_check_fn, (void *) k)) { lock_node_check_fn, (void *) k, trace_ip)) {
if (b->hash_val != btree_ptr_hash_val(k)) if (b->hash_val != btree_ptr_hash_val(k))
goto retry; goto retry;
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
@ -935,7 +936,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
bch2_bkey_unpack(parent, &tmp.k, k); bch2_bkey_unpack(parent, &tmp.k, k);
ret = bch2_btree_node_get(c, iter, &tmp.k, level, ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent); SIX_LOCK_intent, _THIS_IP_);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
struct btree_iter *linked; struct btree_iter *linked;
@ -948,14 +949,14 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
* holding other locks that would cause us to deadlock: * holding other locks that would cause us to deadlock:
*/ */
trans_for_each_iter(trans, linked) trans_for_each_iter(trans, linked)
if (btree_iter_cmp(iter, linked) < 0) if (btree_iter_lock_cmp(iter, linked) < 0)
__bch2_btree_iter_unlock(linked); __bch2_btree_iter_unlock(linked);
if (sib == btree_prev_sib) if (sib == btree_prev_sib)
btree_node_unlock(iter, level); btree_node_unlock(iter, level);
ret = bch2_btree_node_get(c, iter, &tmp.k, level, ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent); SIX_LOCK_intent, _THIS_IP_);
/* /*
* before btree_iter_relock() calls btree_iter_verify_locks(): * before btree_iter_relock() calls btree_iter_verify_locks():

View File

@ -23,7 +23,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, unsigned, const struct bkey_i *, unsigned,
enum six_lock_type); enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned); enum btree_id, unsigned);

View File

@ -101,7 +101,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
int ret = 0; int ret = 0;
if (initial) { if (initial) {
BUG_ON(journal_seq_verify(c) && BUG_ON(bch2_journal_seq_verify &&
k.k->version.lo > journal_cur_seq(&c->journal)); k.k->version.lo > journal_cur_seq(&c->journal));
/* XXX change to fsck check */ /* XXX change to fsck check */
@ -209,7 +209,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
struct btree_iter *iter; struct btree_iter *iter;
struct btree *b; struct btree *b;
unsigned depth = metadata_only ? 1 unsigned depth = metadata_only ? 1
: expensive_debug_checks(c) ? 0 : bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1 : !btree_node_type_needs_gc(btree_id) ? 1
: 0; : 0;
u8 max_stale = 0; u8 max_stale = 0;
@ -236,8 +236,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT| BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD); BTREE_INSERT_GC_LOCK_HELD);
else if (!btree_gc_rewrite_disabled(c) && else if (!bch2_btree_gc_rewrite_disabled &&
(btree_gc_always_rewrite(c) || max_stale > 16)) (bch2_btree_gc_always_rewrite || max_stale > 16))
bch2_btree_node_rewrite(c, iter, bch2_btree_node_rewrite(c, iter,
b->data->keys.seq, b->data->keys.seq,
BTREE_INSERT_NOWAIT| BTREE_INSERT_NOWAIT|
@ -328,7 +328,7 @@ static int bch2_gc_btree_init(struct bch_fs *c,
{ {
struct btree *b; struct btree *b;
unsigned target_depth = metadata_only ? 1 unsigned target_depth = metadata_only ? 1
: expensive_debug_checks(c) ? 0 : bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1 : !btree_node_type_needs_gc(btree_id) ? 1
: 0; : 0;
u8 max_stale = 0; u8 max_stale = 0;
@ -835,7 +835,7 @@ again:
out: out:
if (!ret && if (!ret &&
(test_bit(BCH_FS_FIXED_GENS, &c->flags) || (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
(!iter && test_restart_gc(c)))) { (!iter && bch2_test_restart_gc))) {
/* /*
* XXX: make sure gens we fixed got saved * XXX: make sure gens we fixed got saved
*/ */

View File

@ -42,7 +42,7 @@ static void verify_no_dups(struct btree *b,
BUG_ON(extents BUG_ON(extents
? bkey_cmp(l.p, bkey_start_pos(&r)) > 0 ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
: bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0); //BUG_ON(bch2_bkey_cmp_packed(&b->format, p, k) >= 0);
} }
#endif #endif
} }
@ -102,14 +102,14 @@ static void sort_bkey_ptrs(const struct btree *bt,
break; break;
for (b = a; c = 2 * b + 1, (d = c + 1) < n;) for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
b = bkey_cmp_packed(bt, b = bch2_bkey_cmp_packed(bt,
ptrs[c], ptrs[c],
ptrs[d]) >= 0 ? c : d; ptrs[d]) >= 0 ? c : d;
if (d == n) if (d == n)
b = c; b = c;
while (b != a && while (b != a &&
bkey_cmp_packed(bt, bch2_bkey_cmp_packed(bt,
ptrs[a], ptrs[a],
ptrs[b]) >= 0) ptrs[b]) >= 0)
b = (b - 1) / 2; b = (b - 1) / 2;
@ -1044,7 +1044,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
const char *invalid = bch2_bkey_val_invalid(c, u.s_c); const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
if (invalid || if (invalid ||
(inject_invalid_keys(c) && (bch2_inject_invalid_keys &&
!bversion_cmp(u.k->version, MAX_VERSION))) { !bversion_cmp(u.k->version, MAX_VERSION))) {
char buf[160]; char buf[160];

View File

@ -197,13 +197,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level, struct btree_iter *iter, unsigned level, struct btree_iter *iter,
enum six_lock_type type, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, six_lock_should_sleep_fn should_sleep_fn, void *p,
void *p) unsigned long ip)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct btree_iter *linked; struct btree_iter *linked, *deadlock_iter = NULL;
u64 start_time = local_clock(); u64 start_time = local_clock();
bool ret = true; unsigned reason = 9;
/* Check if it's safe to block: */ /* Check if it's safe to block: */
trans_for_each_iter(trans, linked) { trans_for_each_iter(trans, linked) {
@ -228,42 +228,64 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
linked->locks_want = max_t(unsigned, linked->locks_want = max_t(unsigned,
linked->locks_want, linked->locks_want,
__fls(linked->nodes_locked) + 1); __fls(linked->nodes_locked) + 1);
if (!btree_iter_get_locks(linked, true, false)) if (!btree_iter_get_locks(linked, true, false)) {
ret = false; deadlock_iter = linked;
reason = 1;
}
} else { } else {
ret = false; deadlock_iter = linked;
reason = 2;
} }
} }
if (linked->btree_id != iter->btree_id) {
if (linked->btree_id > iter->btree_id) {
deadlock_iter = linked;
reason = 3;
}
continue;
}
/*
* Within the same btree, cached iterators come before non
* cached iterators:
*/
if (btree_iter_is_cached(linked) != btree_iter_is_cached(iter)) {
if (btree_iter_is_cached(iter)) {
deadlock_iter = linked;
reason = 4;
}
continue;
}
/* /*
* Interior nodes must be locked before their descendants: if * Interior nodes must be locked before their descendants: if
* another iterator has possible descendants locked of the node * another iterator has possible descendants locked of the node
* we're about to lock, it must have the ancestors locked too: * we're about to lock, it must have the ancestors locked too:
*/ */
if (linked->btree_id == iter->btree_id && if (level > __fls(linked->nodes_locked)) {
level > __fls(linked->nodes_locked)) {
if (!(trans->nounlock)) { if (!(trans->nounlock)) {
linked->locks_want = linked->locks_want =
max(level + 1, max_t(unsigned, max(level + 1, max_t(unsigned,
linked->locks_want, linked->locks_want,
iter->locks_want)); iter->locks_want));
if (!btree_iter_get_locks(linked, true, false)) if (!btree_iter_get_locks(linked, true, false)) {
ret = false; deadlock_iter = linked;
reason = 5;
}
} else { } else {
ret = false; deadlock_iter = linked;
reason = 6;
} }
} }
/* Must lock btree nodes in key order: */ /* Must lock btree nodes in key order: */
if ((cmp_int(iter->btree_id, linked->btree_id) ?: if (btree_node_locked(linked, level) &&
-cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
ret = false;
if (iter->btree_id == linked->btree_id &&
btree_node_locked(linked, level) &&
bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b, bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
btree_iter_type(linked))) <= 0) btree_iter_type(linked))) <= 0) {
ret = false; deadlock_iter = linked;
reason = 7;
}
/* /*
* Recheck if this is a node we already have locked - since one * Recheck if this is a node we already have locked - since one
@ -277,8 +299,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
} }
} }
if (unlikely(!ret)) { if (unlikely(deadlock_iter)) {
trace_trans_restart_would_deadlock(iter->trans->ip); trace_trans_restart_would_deadlock(iter->trans->ip, ip,
reason,
deadlock_iter->btree_id,
btree_iter_type(deadlock_iter),
iter->btree_id,
btree_iter_type(iter));
return false; return false;
} }
@ -471,7 +498,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
char buf1[100], buf2[100]; char buf1[100], buf2[100];
const char *msg; const char *msg;
if (!debug_check_iterators(iter->trans->c)) if (!bch2_debug_check_iterators)
return; return;
if (btree_iter_type(iter) == BTREE_ITER_CACHED) { if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
@ -567,7 +594,7 @@ void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
{ {
struct btree_iter *iter; struct btree_iter *iter;
if (!debug_check_iterators(trans->c)) if (!bch2_debug_check_iterators)
return; return;
trans_for_each_iter_with_node(trans, b, iter) trans_for_each_iter_with_node(trans, b, iter)
@ -739,7 +766,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
__bch2_btree_node_iter_fix(iter, b, node_iter, t, __bch2_btree_node_iter_fix(iter, b, node_iter, t,
where, clobber_u64s, new_u64s); where, clobber_u64s, new_u64s);
if (debug_check_iterators(iter->trans->c)) if (bch2_debug_check_iterators)
bch2_btree_node_iter_verify(node_iter, b); bch2_btree_node_iter_verify(node_iter, b);
} }
@ -769,7 +796,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
ret = bkey_disassemble(l->b, k, u); ret = bkey_disassemble(l->b, k, u);
if (debug_check_bkeys(iter->trans->c)) if (bch2_debug_check_bkeys)
bch2_bkey_debugcheck(iter->trans->c, l->b, ret); bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret; return ret;
@ -945,7 +972,8 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
} }
static inline int btree_iter_lock_root(struct btree_iter *iter, static inline int btree_iter_lock_root(struct btree_iter *iter,
unsigned depth_want) unsigned depth_want,
unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = iter->trans->c;
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
@ -974,7 +1002,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
lock_type = __btree_lock_want(iter, iter->level); lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level, if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
iter, lock_type, iter, lock_type,
lock_root_check_fn, rootp))) lock_root_check_fn, rootp,
trace_ip)))
return -EINTR; return -EINTR;
if (likely(b == READ_ONCE(*rootp) && if (likely(b == READ_ONCE(*rootp) &&
@ -1046,7 +1075,8 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
btree_node_unlock(iter, plevel); btree_node_unlock(iter, plevel);
} }
static __always_inline int btree_iter_down(struct btree_iter *iter) static __always_inline int btree_iter_down(struct btree_iter *iter,
unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
@ -1060,7 +1090,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter)
bch2_bkey_unpack(l->b, &tmp.k, bch2_bkey_unpack(l->b, &tmp.k,
bch2_btree_node_iter_peek(&l->iter, l->b)); bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type); b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip);
if (unlikely(IS_ERR(b))) if (unlikely(IS_ERR(b)))
return PTR_ERR(b); return PTR_ERR(b);
@ -1084,7 +1114,7 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++); btree_node_unlock(iter, iter->level++);
} }
static int btree_iter_traverse_one(struct btree_iter *); static int btree_iter_traverse_one(struct btree_iter *, unsigned long);
static int __btree_iter_traverse_all(struct btree_trans *trans, int ret) static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
{ {
@ -1104,11 +1134,12 @@ retry_all:
sorted[nr_sorted++] = iter->idx; sorted[nr_sorted++] = iter->idx;
#define btree_iter_cmp_by_idx(_l, _r) \ #define btree_iter_cmp_by_idx(_l, _r) \
btree_iter_cmp(&trans->iters[_l], &trans->iters[_r]) btree_iter_lock_cmp(&trans->iters[_l], &trans->iters[_r])
bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx); bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
#undef btree_iter_cmp_by_idx #undef btree_iter_cmp_by_idx
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
cond_resched();
if (unlikely(ret == -ENOMEM)) { if (unlikely(ret == -ENOMEM)) {
struct closure cl; struct closure cl;
@ -1139,7 +1170,7 @@ retry_all:
if (!(trans->iters_linked & (1ULL << idx))) if (!(trans->iters_linked & (1ULL << idx)))
continue; continue;
ret = btree_iter_traverse_one(&trans->iters[idx]); ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_);
if (ret) if (ret)
goto retry_all; goto retry_all;
} }
@ -1202,7 +1233,8 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
* On error, caller (peek_node()/peek_key()) must return NULL; the error is * On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_trans_exit(). * stashed in the iterator and returned from bch2_trans_exit().
*/ */
static int btree_iter_traverse_one(struct btree_iter *iter) static int btree_iter_traverse_one(struct btree_iter *iter,
unsigned long trace_ip)
{ {
unsigned depth_want = iter->level; unsigned depth_want = iter->level;
@ -1249,8 +1281,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
*/ */
while (iter->level > depth_want) { while (iter->level > depth_want) {
int ret = btree_iter_node(iter, iter->level) int ret = btree_iter_node(iter, iter->level)
? btree_iter_down(iter) ? btree_iter_down(iter, trace_ip)
: btree_iter_lock_root(iter, depth_want); : btree_iter_lock_root(iter, depth_want, trace_ip);
if (unlikely(ret)) { if (unlikely(ret)) {
if (ret == 1) if (ret == 1)
return 0; return 0;
@ -1281,7 +1313,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
int ret; int ret;
ret = bch2_trans_cond_resched(trans) ?: ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter); btree_iter_traverse_one(iter, _RET_IP_);
if (unlikely(ret)) if (unlikely(ret))
ret = __btree_iter_traverse_all(trans, ret); ret = __btree_iter_traverse_all(trans, ret);
@ -1545,13 +1577,13 @@ static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
ret.v = bkeyp_val(&l->b->format, _k); ret.v = bkeyp_val(&l->b->format, _k);
if (debug_check_iterators(iter->trans->c)) { if (bch2_debug_check_iterators) {
struct bkey k = bkey_unpack_key(l->b, _k); struct bkey k = bkey_unpack_key(l->b, _k);
BUG_ON(memcmp(&k, &iter->k, sizeof(k))); BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
} }
if (debug_check_bkeys(iter->trans->c)) if (bch2_debug_check_bkeys)
bch2_bkey_debugcheck(iter->trans->c, l->b, ret); bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
} }
@ -1970,6 +2002,7 @@ int bch2_trans_iter_free(struct btree_trans *trans,
return bch2_trans_iter_put(trans, iter); return bch2_trans_iter_put(trans, iter);
} }
#if 0
static int bch2_trans_realloc_iters(struct btree_trans *trans, static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size) unsigned new_size)
{ {
@ -2018,8 +2051,7 @@ success:
sizeof(struct btree_iter) * trans->nr_iters + sizeof(struct btree_iter) * trans->nr_iters +
sizeof(struct btree_insert_entry) * trans->nr_iters); sizeof(struct btree_insert_entry) * trans->nr_iters);
if (trans->iters != trans->iters_onstack) kfree(trans->iters);
kfree(trans->iters);
trans->iters = new_iters; trans->iters = new_iters;
trans->updates = new_updates; trans->updates = new_updates;
@ -2033,6 +2065,7 @@ success:
return 0; return 0;
} }
#endif
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans) static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
{ {
@ -2042,28 +2075,27 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
goto got_slot; goto got_slot;
if (trans->nr_iters == trans->size) { if (trans->nr_iters == trans->size) {
int ret; struct btree_iter *iter;
if (trans->nr_iters >= BTREE_ITER_MAX) { BUG_ON(trans->size < BTREE_ITER_MAX);
struct btree_iter *iter;
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter) {
pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps", pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
bch2_btree_ids[iter->btree_id], bch2_btree_ids[iter->btree_id],
iter->pos.inode, iter->pos.inode,
iter->pos.offset, iter->pos.offset,
(trans->iters_live & (1ULL << iter->idx)) ? " live" : "", (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
(trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
(void *) iter->ip_allocated); (void *) iter->ip_allocated);
}
panic("trans iter oveflow\n");
} }
panic("trans iter oveflow\n");
#if 0
ret = bch2_trans_realloc_iters(trans, trans->size * 2); ret = bch2_trans_realloc_iters(trans, trans->size * 2);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
#endif
} }
idx = trans->nr_iters++; idx = trans->nr_iters++;
@ -2305,28 +2337,37 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
bch2_btree_iter_traverse_all(trans); bch2_btree_iter_traverse_all(trans);
} }
static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
{
unsigned new_size = BTREE_ITER_MAX;
size_t iters_bytes = sizeof(struct btree_iter) * new_size;
size_t updates_bytes = sizeof(struct btree_insert_entry) * new_size;
void *p;
BUG_ON(trans->used_mempool);
p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL) ?:
mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
trans->iters = p; p += iters_bytes;
trans->updates = p; p += updates_bytes;
trans->updates2 = p; p += updates_bytes;
trans->size = new_size;
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters, unsigned expected_nr_iters,
size_t expected_mem_bytes) size_t expected_mem_bytes)
{ {
memset(trans, 0, offsetof(struct btree_trans, iters_onstack)); memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->ip = _RET_IP_;
/* /*
* reallocating iterators currently completely breaks * reallocating iterators currently completely breaks
* bch2_trans_iter_put(): * bch2_trans_iter_put(), we always allocate the max:
*/ */
expected_nr_iters = BTREE_ITER_MAX; bch2_trans_alloc_iters(trans, c);
trans->c = c;
trans->ip = _RET_IP_;
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
trans->updates2 = trans->updates2_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
bch2_trans_realloc_iters(trans, expected_nr_iters);
if (expected_mem_bytes) if (expected_mem_bytes)
bch2_trans_preload_mem(trans, expected_mem_bytes); bch2_trans_preload_mem(trans, expected_mem_bytes);
@ -2341,6 +2382,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
int bch2_trans_exit(struct btree_trans *trans) int bch2_trans_exit(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c;
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
@ -2353,19 +2396,21 @@ int bch2_trans_exit(struct btree_trans *trans)
kfree(trans->fs_usage_deltas); kfree(trans->fs_usage_deltas);
kfree(trans->mem); kfree(trans->mem);
if (trans->used_mempool)
trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters);
if (trans->iters)
mempool_free(trans->iters, &trans->c->btree_iters_pool); mempool_free(trans->iters, &trans->c->btree_iters_pool);
else if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
trans->mem = (void *) 0x1; trans->mem = (void *) 0x1;
trans->iters = (void *) 0x1; trans->iters = (void *) 0x1;
return trans->error ? -EIO : 0; return trans->error ? -EIO : 0;
} }
static void bch2_btree_iter_node_to_text(struct printbuf *out, static void __maybe_unused
struct btree_bkey_cached_common *_b, bch2_btree_iter_node_to_text(struct printbuf *out,
enum btree_iter_type type) struct btree_bkey_cached_common *_b,
enum btree_iter_type type)
{ {
pr_buf(out, " %px l=%u %s:", pr_buf(out, " %px l=%u %s:",
_b, _b->level, bch2_btree_ids[_b->btree_id]); _b, _b->level, bch2_btree_ids[_b->btree_id]);

View File

@ -177,11 +177,12 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool); void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
static inline int btree_iter_cmp(const struct btree_iter *l, /* Sort order for locking btree iterators: */
const struct btree_iter *r) static inline int btree_iter_lock_cmp(const struct btree_iter *l,
const struct btree_iter *r)
{ {
return cmp_int(l->btree_id, r->btree_id) ?: return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(btree_iter_type(l), btree_iter_type(r)) ?: -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?:
bkey_cmp(l->pos, r->pos); bkey_cmp(l->pos, r->pos);
} }

View File

@ -29,8 +29,8 @@ static const struct rhashtable_params bch2_btree_key_cache_params = {
}; };
__flatten __flatten
static inline struct bkey_cached * inline struct bkey_cached *
btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos) bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
{ {
struct bkey_cached_key key = { struct bkey_cached_key key = {
.btree_id = btree_id, .btree_id = btree_id,
@ -204,6 +204,7 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p)
!bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1; !bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
} }
__flatten
int bch2_btree_iter_traverse_cached(struct btree_iter *iter) int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
@ -218,7 +219,7 @@ int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
goto fill; goto fill;
} }
retry: retry:
ck = btree_key_cache_find(c, iter->btree_id, iter->pos); ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos);
if (!ck) { if (!ck) {
if (iter->flags & BTREE_ITER_CACHED_NOCREATE) { if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
iter->l[0].b = NULL; iter->l[0].b = NULL;
@ -242,7 +243,7 @@ retry:
enum six_lock_type lock_want = __btree_lock_want(iter, 0); enum six_lock_type lock_want = __btree_lock_want(iter, 0);
if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
bkey_cached_check_fn, iter)) { bkey_cached_check_fn, iter, _THIS_IP_)) {
if (ck->key.btree_id != iter->btree_id || if (ck->key.btree_id != iter->btree_id ||
bkey_cmp(ck->key.pos, iter->pos)) { bkey_cmp(ck->key.pos, iter->pos)) {
goto retry; goto retry;
@ -415,7 +416,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
struct bkey_cached_key key = { id, pos }; struct bkey_cached_key key = { id, pos };
/* Fastpath - assume it won't be found: */ /* Fastpath - assume it won't be found: */
if (!btree_key_cache_find(c, id, pos)) if (!bch2_btree_key_cache_find(c, id, pos))
return 0; return 0;
return btree_key_cache_flush_pos(trans, key, 0, true); return btree_key_cache_flush_pos(trans, key, 0, true);
@ -462,7 +463,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
void bch2_btree_key_cache_verify_clean(struct btree_trans *trans, void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
enum btree_id id, struct bpos pos) enum btree_id id, struct bpos pos)
{ {
BUG_ON(btree_key_cache_find(trans->c, id, pos)); BUG_ON(bch2_btree_key_cache_find(trans->c, id, pos));
} }
#endif #endif

View File

@ -1,6 +1,9 @@
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H #ifndef _BCACHEFS_BTREE_KEY_CACHE_H
#define _BCACHEFS_BTREE_KEY_CACHE_H #define _BCACHEFS_BTREE_KEY_CACHE_H
struct bkey_cached *
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
int bch2_btree_iter_traverse_cached(struct btree_iter *); int bch2_btree_iter_traverse_cached(struct btree_iter *);
bool bch2_btree_insert_key_cached(struct btree_trans *, bool bch2_btree_insert_key_cached(struct btree_trans *,

View File

@ -176,13 +176,15 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned, bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
struct btree_iter *, enum six_lock_type, struct btree_iter *, enum six_lock_type,
six_lock_should_sleep_fn, void *); six_lock_should_sleep_fn, void *,
unsigned long);
static inline bool btree_node_lock(struct btree *b, static inline bool btree_node_lock(struct btree *b,
struct bpos pos, unsigned level, struct bpos pos, unsigned level,
struct btree_iter *iter, struct btree_iter *iter,
enum six_lock_type type, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p) six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
bool ret; bool ret;
@ -200,7 +202,7 @@ static inline bool btree_node_lock(struct btree *b,
ret = likely(six_trylock_type(&b->c.lock, type)) || ret = likely(six_trylock_type(&b->c.lock, type)) ||
btree_node_lock_increment(trans, b, level, type) || btree_node_lock_increment(trans, b, level, type) ||
__bch2_btree_node_lock(b, pos, level, iter, type, __bch2_btree_node_lock(b, pos, level, iter, type,
should_sleep_fn, p); should_sleep_fn, p, ip);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
trans->locking = NULL; trans->locking = NULL;

View File

@ -130,10 +130,6 @@ struct btree {
struct btree_write writes[2]; struct btree_write writes[2];
#ifdef CONFIG_BCACHEFS_DEBUG
bool *expensive_debug_checks;
#endif
/* Key/pointer for this btree node */ /* Key/pointer for this btree node */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
}; };
@ -283,6 +279,11 @@ btree_iter_type(const struct btree_iter *iter)
return iter->flags & BTREE_ITER_TYPE; return iter->flags & BTREE_ITER_TYPE;
} }
static inline bool btree_iter_is_cached(const struct btree_iter *iter)
{
return btree_iter_type(iter) == BTREE_ITER_CACHED;
}
static inline struct btree_iter_level *iter_l(struct btree_iter *iter) static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
{ {
return iter->l + iter->level; return iter->l + iter->level;
@ -380,10 +381,6 @@ struct btree_trans {
unsigned journal_u64s; unsigned journal_u64s;
unsigned journal_preres_u64s; unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas; struct replicas_delta_list *fs_usage_deltas;
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[2];
struct btree_insert_entry updates2_onstack[2];
}; };
#define BTREE_FLAG(flag) \ #define BTREE_FLAG(flag) \

View File

@ -1313,7 +1313,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
* the node the iterator points to: * the node the iterator points to:
*/ */
while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) && while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
(bkey_cmp_packed(b, k, &insert->k) >= 0)) (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
; ;
for_each_keylist_key(keys, insert) for_each_keylist_key(keys, insert)

View File

@ -72,7 +72,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
k = bch2_btree_node_iter_peek_all(node_iter, b); k = bch2_btree_node_iter_peek_all(node_iter, b);
if (k && bkey_cmp_packed(b, k, &insert->k)) if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
k = NULL; k = NULL;
/* @k is the key being overwritten/deleted, if any: */ /* @k is the key being overwritten/deleted, if any: */
@ -220,7 +220,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
BUG_ON(bkey_cmp(insert->k.p, iter->pos)); BUG_ON(bkey_cmp(insert->k.p, iter->pos));
BUG_ON(debug_check_bkeys(c) && BUG_ON(bch2_debug_check_bkeys &&
bch2_bkey_invalid(c, bkey_i_to_s_c(insert), bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
__btree_node_type(iter->level, iter->btree_id))); __btree_node_type(iter->level, iter->btree_id)));
} }
@ -440,10 +440,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
*/ */
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c)) if (bch2_journal_seq_verify)
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
i->k->k.version.lo = trans->journal_res.seq; i->k->k.version.lo = trans->journal_res.seq;
else if (inject_invalid_keys(c)) else if (bch2_inject_invalid_keys)
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
i->k->k.version = MAX_VERSION; i->k->k.version = MAX_VERSION;
} }
@ -680,6 +680,13 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0; return 0;
} }
static inline int btree_iter_pos_cmp(const struct btree_iter *l,
const struct btree_iter *r)
{
return cmp_int(l->btree_id, r->btree_id) ?:
bkey_cmp(l->pos, r->pos);
}
static void bch2_trans_update2(struct btree_trans *trans, static void bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_i *insert) struct bkey_i *insert)
@ -697,12 +704,12 @@ static void bch2_trans_update2(struct btree_trans *trans,
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
trans_for_each_update2(trans, i) { trans_for_each_update2(trans, i) {
if (btree_iter_cmp(n.iter, i->iter) == 0) { if (btree_iter_pos_cmp(n.iter, i->iter) == 0) {
*i = n; *i = n;
return; return;
} }
if (btree_iter_cmp(n.iter, i->iter) <= 0) if (btree_iter_pos_cmp(n.iter, i->iter) <= 0)
break; break;
} }
@ -986,7 +993,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
* Pending updates are kept sorted: first, find position of new update: * Pending updates are kept sorted: first, find position of new update:
*/ */
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (btree_iter_cmp(iter, i->iter) <= 0) if (btree_iter_pos_cmp(iter, i->iter) <= 0)
break; break;
/* /*

View File

@ -70,7 +70,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max); BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
if (!IS_ENABLED(CONFIG_HIGHMEM) && if (!PageHighMem(bio_iter_page(bio, start)) &&
bio_phys_contig(bio, start)) bio_phys_contig(bio, start))
return (struct bbuf) { return (struct bbuf) {
.b = page_address(bio_iter_page(bio, start)) + .b = page_address(bio_iter_page(bio, start)) +

View File

@ -54,7 +54,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
v->written = 0; v->written = 0;
v->c.level = b->c.level; v->c.level = b->c.level;
v->c.btree_id = b->c.btree_id; v->c.btree_id = b->c.btree_id;
bch2_btree_keys_init(v, &c->expensive_debug_checks); bch2_btree_keys_init(v);
if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
NULL, &pick) <= 0) NULL, &pick) <= 0)

View File

@ -8,44 +8,15 @@ struct bio;
struct btree; struct btree;
struct bch_fs; struct bch_fs;
#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
#define BCH_DEBUG_PARAM(name, description) \
static inline bool name(struct bch_fs *c) \
{ return bch2_##name || c->name; }
BCH_DEBUG_PARAMS_ALWAYS()
#undef BCH_DEBUG_PARAM
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
#define BCH_DEBUG_PARAM(name, description) \
static inline bool name(struct bch_fs *c) \
{ return bch2_##name || c->name; }
BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
void __bch2_btree_verify(struct bch_fs *, struct btree *); void __bch2_btree_verify(struct bch_fs *, struct btree *);
#else
#define bypass_torture_test(d) ((d)->bypass_torture_test)
#else /* DEBUG */
#define BCH_DEBUG_PARAM(name, description) \
static inline bool name(struct bch_fs *c) { return false; }
BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {} static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
#define bypass_torture_test(d) 0
#endif #endif
static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b) static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
{ {
if (verify_btree_ondisk(c)) if (bch2_verify_btree_ondisk)
__bch2_btree_verify(c, b); __bch2_btree_verify(c, b);
} }

View File

@ -1586,7 +1586,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
size_t i; size_t i;
spin_lock(&c->ec_stripes_heap_lock); spin_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < min(h->used, 20UL); i++) { for (i = 0; i < min_t(size_t, h->used, 20); i++) {
m = genradix_ptr(&c->stripes[0], h->data[i].idx); m = genradix_ptr(&c->stripes[0], h->data[i].idx);
pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx, pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,

View File

@ -89,7 +89,7 @@ static inline bool ptr_better(struct bch_fs *c,
return bch2_rand_range(l1 + l2) > l1; return bch2_rand_range(l1 + l2) > l1;
} }
if (force_reconstruct_read(c)) if (bch2_force_reconstruct_read)
return p1.idx > p2.idx; return p1.idx > p2.idx;
return p1.idx < p2.idx; return p1.idx < p2.idx;
@ -137,7 +137,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
!bch2_dev_is_readable(ca)) !bch2_dev_is_readable(ca))
p.idx++; p.idx++;
if (force_reconstruct_read(c) && if (bch2_force_reconstruct_read &&
!p.idx && p.has_ec) !p.idx && p.has_ec)
p.idx++; p.idx++;

View File

@ -34,9 +34,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name) if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED; new_inode->bi_flags |= BCH_INODE_UNLINKED;
ret = bch2_inode_create(trans, new_inode, ret = bch2_inode_create(trans, new_inode);
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (ret) if (ret)
goto err; goto err;

View File

@ -265,28 +265,13 @@ static inline struct bch_page_state *bch2_page_state(struct page *page)
/* for newly allocated pages: */ /* for newly allocated pages: */
static void __bch2_page_state_release(struct page *page) static void __bch2_page_state_release(struct page *page)
{ {
struct bch_page_state *s = __bch2_page_state(page); kfree(detach_page_private(page));
if (!s)
return;
ClearPagePrivate(page);
set_page_private(page, 0);
put_page(page);
kfree(s);
} }
static void bch2_page_state_release(struct page *page) static void bch2_page_state_release(struct page *page)
{ {
struct bch_page_state *s = bch2_page_state(page); EBUG_ON(!PageLocked(page));
__bch2_page_state_release(page);
if (!s)
return;
ClearPagePrivate(page);
set_page_private(page, 0);
put_page(page);
kfree(s);
} }
/* for newly allocated pages: */ /* for newly allocated pages: */
@ -300,13 +285,7 @@ static struct bch_page_state *__bch2_page_state_create(struct page *page,
return NULL; return NULL;
spin_lock_init(&s->lock); spin_lock_init(&s->lock);
/* attach_page_private(page, s);
* migrate_page_move_mapping() assumes that pages with private data
* have their count elevated by 1.
*/
get_page(page);
set_page_private(page, (unsigned long) s);
SetPagePrivate(page);
return s; return s;
} }
@ -608,14 +587,8 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
if (ret != MIGRATEPAGE_SUCCESS) if (ret != MIGRATEPAGE_SUCCESS)
return ret; return ret;
if (PagePrivate(page)) { if (PagePrivate(page))
ClearPagePrivate(page); attach_page_private(newpage, detach_page_private(page));
get_page(newpage);
set_page_private(newpage, page_private(page));
set_page_private(page, 0);
put_page(page);
SetPagePrivate(newpage);
}
if (mode != MIGRATE_SYNC_NO_COPY) if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page); migrate_page_copy(newpage, page);
@ -647,41 +620,33 @@ static void bch2_readpages_end_io(struct bio *bio)
bio_put(bio); bio_put(bio);
} }
static inline void page_state_init_for_read(struct page *page)
{
SetPagePrivate(page);
page->private = 0;
}
struct readpages_iter { struct readpages_iter {
struct address_space *mapping; struct address_space *mapping;
struct page **pages; struct page **pages;
unsigned nr_pages; unsigned nr_pages;
unsigned nr_added;
unsigned idx; unsigned idx;
pgoff_t offset; pgoff_t offset;
}; };
static int readpages_iter_init(struct readpages_iter *iter, static int readpages_iter_init(struct readpages_iter *iter,
struct address_space *mapping, struct readahead_control *ractl)
struct list_head *pages, unsigned nr_pages)
{ {
unsigned i, nr_pages = readahead_count(ractl);
memset(iter, 0, sizeof(*iter)); memset(iter, 0, sizeof(*iter));
iter->mapping = mapping; iter->mapping = ractl->mapping;
iter->offset = list_last_entry(pages, struct page, lru)->index; iter->offset = readahead_index(ractl);
iter->nr_pages = nr_pages;
iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!iter->pages) if (!iter->pages)
return -ENOMEM; return -ENOMEM;
while (!list_empty(pages)) { __readahead_batch(ractl, iter->pages, nr_pages);
struct page *page = list_last_entry(pages, struct page, lru); for (i = 0; i < nr_pages; i++) {
__bch2_page_state_create(iter->pages[i], __GFP_NOFAIL);
__bch2_page_state_create(page, __GFP_NOFAIL); put_page(iter->pages[i]);
iter->pages[iter->nr_pages++] = page;
list_del(&page->lru);
} }
return 0; return 0;
@ -689,41 +654,9 @@ static int readpages_iter_init(struct readpages_iter *iter,
static inline struct page *readpage_iter_next(struct readpages_iter *iter) static inline struct page *readpage_iter_next(struct readpages_iter *iter)
{ {
struct page *page; if (iter->idx >= iter->nr_pages)
unsigned i; return NULL;
int ret;
BUG_ON(iter->idx > iter->nr_added);
BUG_ON(iter->nr_added > iter->nr_pages);
if (iter->idx < iter->nr_added)
goto out;
while (1) {
if (iter->idx == iter->nr_pages)
return NULL;
ret = add_to_page_cache_lru_vec(iter->mapping,
iter->pages + iter->nr_added,
iter->nr_pages - iter->nr_added,
iter->offset + iter->nr_added,
GFP_NOFS);
if (ret > 0)
break;
page = iter->pages[iter->nr_added];
iter->idx++;
iter->nr_added++;
__bch2_page_state_release(page);
put_page(page);
}
iter->nr_added += ret;
for (i = iter->idx; i < iter->nr_added; i++)
put_page(iter->pages[i]);
out:
EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx); EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
return iter->pages[iter->idx]; return iter->pages[iter->idx];
@ -889,10 +822,9 @@ retry:
bkey_on_stack_exit(&sk, c); bkey_on_stack_exit(&sk, c);
} }
int bch2_readpages(struct file *file, struct address_space *mapping, void bch2_readahead(struct readahead_control *ractl)
struct list_head *pages, unsigned nr_pages)
{ {
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts = io_opts(c, &inode->ei_inode); struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
struct btree_trans trans; struct btree_trans trans;
@ -901,7 +833,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
struct readpages_iter readpages_iter; struct readpages_iter readpages_iter;
int ret; int ret;
ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages); ret = readpages_iter_init(&readpages_iter, ractl);
BUG_ON(ret); BUG_ON(ret);
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
@ -936,8 +868,6 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
kfree(readpages_iter.pages); kfree(readpages_iter.pages);
return 0;
} }
static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,

View File

@ -19,8 +19,7 @@ int bch2_writepage(struct page *, struct writeback_control *);
int bch2_readpage(struct file *, struct page *); int bch2_readpage(struct file *, struct page *);
int bch2_writepages(struct address_space *, struct writeback_control *); int bch2_writepages(struct address_space *, struct writeback_control *);
int bch2_readpages(struct file *, struct address_space *, void bch2_readahead(struct readahead_control *);
struct list_head *, unsigned);
int bch2_write_begin(struct file *, struct address_space *, loff_t, int bch2_write_begin(struct file *, struct address_space *, loff_t,
unsigned, unsigned, struct page **, void **); unsigned, unsigned, struct page **, void **);

View File

@ -42,6 +42,11 @@ static void journal_seq_copy(struct bch_fs *c,
struct bch_inode_info *dst, struct bch_inode_info *dst,
u64 journal_seq) u64 journal_seq)
{ {
/*
* atomic64_cmpxchg has a fallback for archs that don't support it,
* cmpxchg does not:
*/
atomic64_t *dst_seq = (void *) &dst->ei_journal_seq;
u64 old, v = READ_ONCE(dst->ei_journal_seq); u64 old, v = READ_ONCE(dst->ei_journal_seq);
do { do {
@ -49,7 +54,7 @@ static void journal_seq_copy(struct bch_fs *c,
if (old >= journal_seq) if (old >= journal_seq)
break; break;
} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old);
bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq); bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
} }
@ -225,6 +230,13 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
return &inode->v; return &inode->v;
} }
static int inum_test(struct inode *inode, void *p)
{
unsigned long *ino = p;
return *ino == inode->i_ino;
}
static struct bch_inode_info * static struct bch_inode_info *
__bch2_create(struct bch_inode_info *dir, struct dentry *dentry, __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
umode_t mode, dev_t rdev, bool tmpfile) umode_t mode, dev_t rdev, bool tmpfile)
@ -304,8 +316,12 @@ err_before_quota:
* thread pulling the inode in and modifying it: * thread pulling the inode in and modifying it:
*/ */
old = to_bch_ei(insert_inode_locked2(&inode->v)); inode->v.i_state |= I_CREATING;
if (unlikely(old)) { old = to_bch_ei(inode_insert5(&inode->v, inode->v.i_ino,
inum_test, NULL, &inode->v.i_ino));
BUG_ON(!old);
if (unlikely(old != inode)) {
/* /*
* We raced, another process pulled the new inode into cache * We raced, another process pulled the new inode into cache
* before us: * before us:
@ -807,7 +823,7 @@ static int bch2_fill_extent(struct bch_fs *c,
struct fiemap_extent_info *info, struct fiemap_extent_info *info,
struct bkey_s_c k, unsigned flags) struct bkey_s_c k, unsigned flags)
{ {
if (bkey_extent_is_data(k.k)) { if (bkey_extent_is_direct_data(k.k)) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
@ -838,6 +854,12 @@ static int bch2_fill_extent(struct bch_fs *c,
} }
return 0; return 0;
} else if (bkey_extent_is_inline_data(k.k)) {
return fiemap_fill_next_extent(info,
bkey_start_offset(k.k) << 9,
0, k.k->size << 9,
flags|
FIEMAP_EXTENT_DATA_INLINE);
} else if (k.k->type == KEY_TYPE_reservation) { } else if (k.k->type == KEY_TYPE_reservation) {
return fiemap_fill_next_extent(info, return fiemap_fill_next_extent(info,
bkey_start_offset(k.k) << 9, bkey_start_offset(k.k) << 9,
@ -891,9 +913,7 @@ retry:
bkey_start_offset(k.k); bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent; sectors = k.k->size - offset_into_extent;
bkey_on_stack_realloc(&cur, c, k.k->u64s); bkey_on_stack_reassemble(&cur, c, k);
bkey_on_stack_realloc(&prev, c, k.k->u64s);
bkey_reassemble(cur.k, k);
ret = bch2_read_indirect_extent(&trans, ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &cur); &offset_into_extent, &cur);
@ -901,14 +921,14 @@ retry:
break; break;
k = bkey_i_to_s_c(cur.k); k = bkey_i_to_s_c(cur.k);
bkey_on_stack_realloc(&prev, c, k.k->u64s);
sectors = min(sectors, k.k->size - offset_into_extent); sectors = min(sectors, k.k->size - offset_into_extent);
if (offset_into_extent) bch2_cut_front(POS(k.k->p.inode,
bch2_cut_front(POS(k.k->p.inode, bkey_start_offset(k.k) +
bkey_start_offset(k.k) + offset_into_extent),
offset_into_extent), cur.k);
cur.k);
bch2_key_resize(&cur.k->k, sectors); bch2_key_resize(&cur.k->k, sectors);
cur.k->k.p = iter->pos; cur.k->k.p = iter->pos;
cur.k->k.p.offset += cur.k->k.size; cur.k->k.p.offset += cur.k->k.size;
@ -923,10 +943,8 @@ retry:
bkey_copy(prev.k, cur.k); bkey_copy(prev.k, cur.k);
have_extent = true; have_extent = true;
if (k.k->type == KEY_TYPE_reflink_v) bch2_btree_iter_set_pos(iter,
bch2_btree_iter_set_pos(iter, k.k->p); POS(iter->pos.inode, iter->pos.offset + sectors));
else
bch2_btree_iter_next(iter);
} }
if (ret == -EINTR) if (ret == -EINTR)
@ -1062,7 +1080,7 @@ static const struct address_space_operations bch_address_space_operations = {
.writepage = bch2_writepage, .writepage = bch2_writepage,
.readpage = bch2_readpage, .readpage = bch2_readpage,
.writepages = bch2_writepages, .writepages = bch2_writepages,
.readpages = bch2_readpages, .readahead = bch2_readahead,
.set_page_dirty = __set_page_dirty_nobuffers, .set_page_dirty = __set_page_dirty_nobuffers,
.write_begin = bch2_write_begin, .write_begin = bch2_write_begin,
.write_end = bch2_write_end, .write_end = bch2_write_end,
@ -1238,6 +1256,11 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
struct bch_fs *c = sb->s_fs_info; struct bch_fs *c = sb->s_fs_info;
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
unsigned shift = sb->s_blocksize_bits - 9; unsigned shift = sb->s_blocksize_bits - 9;
/*
* this assumes inodes take up 64 bytes, which is a decent average
* number:
*/
u64 avail_inodes = ((usage.capacity - usage.used) << 3);
u64 fsid; u64 fsid;
buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_type = BCACHEFS_STATFS_MAGIC;
@ -1245,8 +1268,9 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = usage.capacity >> shift; buf->f_blocks = usage.capacity >> shift;
buf->f_bfree = (usage.capacity - usage.used) >> shift; buf->f_bfree = (usage.capacity - usage.used) >> shift;
buf->f_bavail = buf->f_bfree; buf->f_bavail = buf->f_bfree;
buf->f_files = 0;
buf->f_ffree = 0; buf->f_files = usage.nr_inodes + avail_inodes;
buf->f_ffree = avail_inodes;
fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));

View File

@ -537,7 +537,7 @@ retry:
bch2_trans_unlock(&trans); bch2_trans_unlock(&trans);
bch2_inode_pack(&p, &w.inode); bch2_inode_pack(c, &p, &w.inode);
ret = bch2_btree_insert(c, BTREE_ID_INODES, ret = bch2_btree_insert(c, BTREE_ID_INODES,
&p.inode.k_i, NULL, NULL, &p.inode.k_i, NULL, NULL,
@ -808,7 +808,7 @@ create_root:
0, NULL); 0, NULL);
root_inode->bi_inum = BCACHEFS_ROOT_INO; root_inode->bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed, root_inode); bch2_inode_pack(c, &packed, root_inode);
return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, NULL, NULL,
@ -866,36 +866,22 @@ create_lostfound:
return ret; return ret;
} }
struct inode_bitmap { typedef GENRADIX(unsigned long) inode_bitmap;
unsigned long *bits;
size_t size;
};
static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr) static inline bool inode_bitmap_test(inode_bitmap *b, size_t nr)
{ {
return nr < b->size ? test_bit(nr, b->bits) : false; unsigned long *w = genradix_ptr(b, nr / BITS_PER_LONG);
return w ? test_bit(nr & (BITS_PER_LONG - 1), w) : false;
} }
static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr) static inline int inode_bitmap_set(inode_bitmap *b, size_t nr)
{ {
if (nr >= b->size) { unsigned long *w = genradix_ptr_alloc(b, nr / BITS_PER_LONG, GFP_KERNEL);
size_t new_size = max_t(size_t, max_t(size_t,
PAGE_SIZE * 8,
b->size * 2),
nr + 1);
void *n;
new_size = roundup_pow_of_two(new_size); if (!w)
n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO); return -ENOMEM;
if (!n) {
return -ENOMEM;
}
b->bits = n; *w |= 1UL << (nr & (BITS_PER_LONG - 1));
b->size = new_size;
}
__set_bit(nr, b->bits);
return 0; return 0;
} }
@ -934,7 +920,7 @@ noinline_for_stack
static int check_directory_structure(struct bch_fs *c, static int check_directory_structure(struct bch_fs *c,
struct bch_inode_unpacked *lostfound_inode) struct bch_inode_unpacked *lostfound_inode)
{ {
struct inode_bitmap dirs_done = { NULL, 0 }; inode_bitmap dirs_done;
struct pathbuf path = { 0, 0, NULL }; struct pathbuf path = { 0, 0, NULL };
struct pathbuf_entry *e; struct pathbuf_entry *e;
struct btree_trans trans; struct btree_trans trans;
@ -951,6 +937,7 @@ static int check_directory_structure(struct bch_fs *c,
/* DFS: */ /* DFS: */
restart_dfs: restart_dfs:
genradix_init(&dirs_done);
had_unreachable = false; had_unreachable = false;
ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO); ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
@ -1057,7 +1044,7 @@ retry:
if (had_unreachable) { if (had_unreachable) {
bch_info(c, "reattached unreachable directories, restarting pass to check for loops"); bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
kfree(dirs_done.bits); genradix_free(&dirs_done);
kfree(path.entries); kfree(path.entries);
memset(&dirs_done, 0, sizeof(dirs_done)); memset(&dirs_done, 0, sizeof(dirs_done));
memset(&path, 0, sizeof(path)); memset(&path, 0, sizeof(path));
@ -1066,7 +1053,7 @@ retry:
err: err:
fsck_err: fsck_err:
ret = bch2_trans_exit(&trans) ?: ret; ret = bch2_trans_exit(&trans) ?: ret;
kfree(dirs_done.bits); genradix_free(&dirs_done);
kfree(path.entries); kfree(path.entries);
return ret; return ret;
} }
@ -1326,7 +1313,7 @@ static int check_inode(struct btree_trans *trans,
if (do_update) { if (do_update) {
struct bkey_inode_buf p; struct bkey_inode_buf p;
bch2_inode_pack(&p, &u); bch2_inode_pack(c, &p, &u);
ret = __bch2_trans_do(trans, NULL, NULL, ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|

View File

@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_key_cache.h"
#include "bkey_methods.h" #include "bkey_methods.h"
#include "btree_update.h" #include "btree_update.h"
#include "error.h" #include "error.h"
#include "extents.h" #include "extents.h"
#include "inode.h" #include "inode.h"
#include "str_hash.h" #include "str_hash.h"
#include "varint.h"
#include <linux/random.h> #include <linux/random.h>
@ -88,22 +90,17 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes; return bytes;
} }
void bch2_inode_pack(struct bkey_inode_buf *packed, static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode) const struct bch_inode_unpacked *inode)
{ {
u8 *out = packed->inode.v.fields; struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1]; u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out; u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0; unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes; unsigned bytes;
bkey_inode_init(&packed->inode.k_i); #define x(_name, _bits) \
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
#define x(_name, _bits) \
out += inode_encode_field(out, end, 0, inode->_name); \ out += inode_encode_field(out, end, 0, inode->_name); \
nr_fields++; \ nr_fields++; \
\ \
@ -122,7 +119,69 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
set_bkey_val_bytes(&packed->inode.k, bytes); set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields); SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
static void bch2_inode_pack_v2(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
struct bkey_i_inode *k = &packed->inode;
u8 *out = k->v.fields;
u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out;
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
unsigned bytes;
int ret;
#define x(_name, _bits) \
nr_fields++; \
\
if (inode->_name) { \
ret = bch2_varint_encode(out, inode->_name); \
out += ret; \
\
if (_bits > 64) \
*out++ = 0; \
\
last_nonzero_field = out; \
last_nonzero_fieldnr = nr_fields; \
} else { \
*out++ = 0; \
\
if (_bits > 64) \
*out++ = 0; \
}
BCH_INODE_FIELDS()
#undef x
BUG_ON(out > end);
out = last_nonzero_field;
nr_fields = last_nonzero_fieldnr;
bytes = out - (u8 *) &packed->inode.v;
set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODE_NR_FIELDS(&k->v, nr_fields);
}
void bch2_inode_pack(struct bch_fs *c,
struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bkey_inode_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode);
if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) {
SET_INODE_NEW_VARINT(&packed->inode.v, true);
bch2_inode_pack_v2(packed, inode);
} else {
bch2_inode_pack_v1(packed, inode);
}
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked; struct bch_inode_unpacked unpacked;
@ -134,26 +193,23 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
BUG_ON(unpacked.bi_mode != inode->bi_mode); BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) BUG_ON(unpacked._name != inode->_name); #define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name);
BCH_INODE_FIELDS() BCH_INODE_FIELDS()
#undef x #undef x
} }
} }
int bch2_inode_unpack(struct bkey_s_c_inode inode, static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked) struct bch_inode_unpacked *unpacked)
{ {
const u8 *in = inode.v->fields; const u8 *in = inode.v->fields;
const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k); const u8 *end = bkey_val_end(inode);
u64 field[2]; u64 field[2];
unsigned fieldnr = 0, field_bits; unsigned fieldnr = 0, field_bits;
int ret; int ret;
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
#define x(_name, _bits) \ #define x(_name, _bits) \
if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
memset(&unpacked->_name, 0, \ memset(&unpacked->_name, 0, \
@ -176,6 +232,62 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
#undef x #undef x
/* XXX: signal if there were more fields than expected? */ /* XXX: signal if there were more fields than expected? */
return 0;
}
static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned fieldnr = 0;
int ret;
u64 v[2];
#define x(_name, _bits) \
if (fieldnr < INODE_NR_FIELDS(inode.v)) { \
ret = bch2_varint_decode(in, end, &v[0]); \
if (ret < 0) \
return ret; \
in += ret; \
\
if (_bits > 64) { \
ret = bch2_varint_decode(in, end, &v[1]); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v[1] = 0; \
} \
} else { \
v[0] = v[1] = 0; \
} \
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
fieldnr++;
BCH_INODE_FIELDS()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
int bch2_inode_unpack(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked)
{
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
if (INODE_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(inode, unpacked);
} else {
return bch2_inode_unpack_v1(inode, unpacked);
}
return 0; return 0;
} }
@ -189,11 +301,11 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
int ret; int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum), iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
BTREE_ITER_SLOTS|flags); BTREE_ITER_CACHED|flags);
if (IS_ERR(iter)) if (IS_ERR(iter))
return iter; return iter;
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_cached(iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
@ -222,7 +334,7 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p)) if (IS_ERR(inode_p))
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
bch2_inode_pack(inode_p, inode); bch2_inode_pack(trans->c, inode_p, inode);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0; return 0;
} }
@ -271,6 +383,8 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
return; return;
} }
pr_buf(out, "mode: %o ", unpacked.bi_mode);
#define x(_name, _bits) \ #define x(_name, _bits) \
pr_buf(out, #_name ": %llu ", (u64) unpacked._name); pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
BCH_INODE_FIELDS() BCH_INODE_FIELDS()
@ -359,20 +473,24 @@ static inline u32 bkey_generation(struct bkey_s_c k)
} }
int bch2_inode_create(struct btree_trans *trans, int bch2_inode_create(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *inode_u)
u64 min, u64 max, u64 *hint)
{ {
struct bch_fs *c = trans->c;
struct bkey_inode_buf *inode_p; struct bkey_inode_buf *inode_p;
struct btree_iter *iter = NULL; struct btree_iter *iter = NULL;
struct bkey_s_c k; struct bkey_s_c k;
u64 start; u64 min, max, start, *hint;
int ret; int ret;
if (!max) unsigned cpu = raw_smp_processor_id();
max = ULLONG_MAX; unsigned bits = (c->opts.inodes_32bit
? 31 : 63) - c->inode_shard_bits;
if (trans->c->opts.inodes_32bit) min = (cpu << bits);
max = min_t(u64, max, U32_MAX); max = (cpu << bits) | ~(ULLONG_MAX << bits);
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
hint = c->unused_inode_hints + cpu;
start = READ_ONCE(*hint); start = READ_ONCE(*hint);
@ -388,7 +506,17 @@ again:
if (bkey_cmp(iter->pos, POS(0, max)) > 0) if (bkey_cmp(iter->pos, POS(0, max)) > 0)
break; break;
if (k.k->type != KEY_TYPE_inode) /*
* There's a potential cache coherency issue with the btree key
* cache code here - we're iterating over the btree, skipping
* that cache. We should never see an empty slot that isn't
* actually empty due to a pending update in the key cache
* because the update that creates the inode isn't done with a
* cached iterator, but - better safe than sorry, check the
* cache before using a slot:
*/
if (k.k->type != KEY_TYPE_inode &&
!bch2_btree_key_cache_find(c, BTREE_ID_INODES, iter->pos))
goto found_slot; goto found_slot;
} }
@ -409,10 +537,7 @@ found_slot:
inode_u->bi_inum = k.k->p.offset; inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k); inode_u->bi_generation = bkey_generation(k);
bch2_inode_pack(inode_p, inode_u); return bch2_inode_write(trans, iter, inode_u);
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
bch2_trans_iter_put(trans, iter);
return 0;
} }
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
@ -422,6 +547,8 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
struct bkey_i_inode_generation delete; struct bkey_i_inode_generation delete;
struct bpos start = POS(inode_nr, 0); struct bpos start = POS(inode_nr, 0);
struct bpos end = POS(inode_nr + 1, 0); struct bpos end = POS(inode_nr + 1, 0);
struct bkey_s_c k;
u64 bi_generation;
int ret; int ret;
/* /*
@ -442,51 +569,62 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
return ret; return ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
bi_generation = 0;
ret = bch2_btree_key_cache_flush(&trans, BTREE_ID_INODES, POS(0, inode_nr));
if (ret) {
if (ret != -EINTR)
bch_err(c, "error flushing btree key cache: %i", ret);
goto err;
}
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT); BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do { k = bch2_btree_iter_peek_slot(iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
u32 bi_generation = 0;
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
break; goto err;
bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c, bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
"inode %llu not found when deleting", "inode %llu not found when deleting",
inode_nr); inode_nr);
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_inode: { case KEY_TYPE_inode: {
struct bch_inode_unpacked inode_u; struct bch_inode_unpacked inode_u;
if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u)) if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
bi_generation = inode_u.bi_generation + 1; bi_generation = inode_u.bi_generation + 1;
break; break;
} }
case KEY_TYPE_inode_generation: { case KEY_TYPE_inode_generation: {
struct bkey_s_c_inode_generation g = struct bkey_s_c_inode_generation g =
bkey_s_c_to_inode_generation(k); bkey_s_c_to_inode_generation(k);
bi_generation = le32_to_cpu(g.v->bi_generation); bi_generation = le32_to_cpu(g.v->bi_generation);
break; break;
} }
} }
if (!bi_generation) { if (!bi_generation) {
bkey_init(&delete.k); bkey_init(&delete.k);
delete.k.p.offset = inode_nr; delete.k.p.offset = inode_nr;
} else { } else {
bkey_inode_generation_init(&delete.k_i); bkey_inode_generation_init(&delete.k_i);
delete.k.p.offset = inode_nr; delete.k.p.offset = inode_nr;
delete.v.bi_generation = cpu_to_le32(bi_generation); delete.v.bi_generation = cpu_to_le32(bi_generation);
} }
bch2_trans_update(&trans, iter, &delete.k_i, 0); bch2_trans_update(&trans, iter, &delete.k_i, 0);
ret = bch2_trans_commit(&trans, NULL, NULL, ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL);
} while (ret == -EINTR); err:
if (ret == -EINTR)
goto retry;
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
@ -500,11 +638,11 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
int ret; int ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
POS(0, inode_nr), BTREE_ITER_SLOTS); POS(0, inode_nr), BTREE_ITER_CACHED);
if (IS_ERR(iter)) if (IS_ERR(iter))
return PTR_ERR(iter); return PTR_ERR(iter);
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_cached(iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
@ -523,32 +661,3 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
return bch2_trans_do(c, NULL, NULL, 0, return bch2_trans_do(c, NULL, NULL, 0,
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode)); bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
} }
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void)
{
struct bch_inode_unpacked *u, test_inodes[] = {
{
.bi_atime = U64_MAX,
.bi_ctime = U64_MAX,
.bi_mtime = U64_MAX,
.bi_otime = U64_MAX,
.bi_size = U64_MAX,
.bi_sectors = U64_MAX,
.bi_uid = U32_MAX,
.bi_gid = U32_MAX,
.bi_nlink = U32_MAX,
.bi_generation = U32_MAX,
.bi_dev = U32_MAX,
},
};
for (u = test_inodes;
u < test_inodes + ARRAY_SIZE(test_inodes);
u++) {
struct bkey_inode_buf p;
bch2_inode_pack(&p, u);
}
}
#endif

View File

@ -24,6 +24,14 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
.val_to_text = bch2_inode_generation_to_text, \ .val_to_text = bch2_inode_generation_to_text, \
} }
#if 0
typedef struct {
u64 lo;
u32 hi;
} __packed __aligned(4) u96;
#endif
typedef u64 u96;
struct bch_inode_unpacked { struct bch_inode_unpacked {
u64 bi_inum; u64 bi_inum;
__le64 bi_hash_seed; __le64 bi_hash_seed;
@ -43,7 +51,8 @@ struct bkey_inode_buf {
#undef x #undef x
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *,
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_peek(struct btree_trans *, struct btree_iter *bch2_inode_peek(struct btree_trans *,
@ -60,9 +69,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t, uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *); struct bch_inode_unpacked *);
int bch2_inode_create(struct btree_trans *, int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *);
struct bch_inode_unpacked *,
u64, u64, u64 *);
int bch2_inode_rm(struct bch_fs *, u64); int bch2_inode_rm(struct bch_fs *, u64);
@ -168,10 +175,4 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
} }
} }
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_inode_pack_test(void);
#else
static inline void bch2_inode_pack_test(void) {}
#endif
#endif /* _BCACHEFS_INODE_H */ #endif /* _BCACHEFS_INODE_H */

View File

@ -171,7 +171,7 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
while (size) { while (size) {
struct page *page = __bio_alloc_page_pool(c, &using_mempool); struct page *page = __bio_alloc_page_pool(c, &using_mempool);
unsigned len = min(PAGE_SIZE, size); unsigned len = min_t(size_t, PAGE_SIZE, size);
BUG_ON(!bio_add_page(bio, page, len, 0)); BUG_ON(!bio_add_page(bio, page, len, 0));
size -= len; size -= len;
@ -301,7 +301,7 @@ int bch2_extent_update(struct btree_trans *trans,
inode_u.bi_sectors += delta; inode_u.bi_sectors += delta;
if (delta || new_i_size) { if (delta || new_i_size) {
bch2_inode_pack(&inode_p, &inode_u); bch2_inode_pack(trans->c, &inode_p, &inode_u);
bch2_trans_update(trans, inode_iter, bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0); &inode_p.inode.k_i, 0);
} }

View File

@ -980,9 +980,11 @@ void bch2_fs_journal_stop(struct journal *j)
wait_event(j->wait, journal_entry_close(j)); wait_event(j->wait, journal_entry_close(j));
/* do we need to write another journal entry? */ /*
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags)) * Always write a new journal entry, to make sure the clock hands are up
bch2_journal_meta(j); * to date (and match the superblock)
*/
bch2_journal_meta(j);
journal_quiesce(j); journal_quiesce(j);

View File

@ -465,34 +465,12 @@ static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
return ret; return ret;
} }
/** static u64 journal_seq_to_flush(struct journal *j)
* bch2_journal_reclaim - free up journal buckets
*
* Background journal reclaim writes out btree nodes. It should be run
* early enough so that we never completely run out of journal buckets.
*
* High watermarks for triggering background reclaim:
* - FIFO has fewer than 512 entries left
* - fewer than 25% journal buckets free
*
* Background reclaim runs until low watermarks are reached:
* - FIFO has more than 1024 entries left
* - more than 50% journal buckets free
*
* As long as a reclaim can complete in the time it takes to fill up
* 512 journal entries or 25% of all journal buckets, then
* journal_next_bucket() should not stall.
*/
void bch2_journal_reclaim(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca; struct bch_dev *ca;
unsigned iter, min_nr = 0;
u64 seq_to_flush = 0; u64 seq_to_flush = 0;
unsigned iter;
lockdep_assert_held(&j->reclaim_lock);
bch2_journal_do_discards(j);
spin_lock(&j->lock); spin_lock(&j->lock);
@ -524,20 +502,52 @@ void bch2_journal_reclaim(struct journal *j)
(j->pin.size >> 1)); (j->pin.size >> 1));
spin_unlock(&j->lock); spin_unlock(&j->lock);
/* return seq_to_flush;
* If it's been longer than j->reclaim_delay_ms since we last flushed, }
* make sure to flush at least one journal pin:
*/
if (time_after(jiffies, j->last_flushed +
msecs_to_jiffies(j->reclaim_delay_ms)))
min_nr = 1;
if (j->prereserved.reserved * 2 > j->prereserved.remaining) { /**
seq_to_flush = max(seq_to_flush, journal_last_seq(j)); * bch2_journal_reclaim - free up journal buckets
min_nr = 1; *
} * Background journal reclaim writes out btree nodes. It should be run
* early enough so that we never completely run out of journal buckets.
*
* High watermarks for triggering background reclaim:
* - FIFO has fewer than 512 entries left
* - fewer than 25% journal buckets free
*
* Background reclaim runs until low watermarks are reached:
* - FIFO has more than 1024 entries left
* - more than 50% journal buckets free
*
* As long as a reclaim can complete in the time it takes to fill up
* 512 journal entries or 25% of all journal buckets, then
* journal_next_bucket() should not stall.
*/
void bch2_journal_reclaim(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned min_nr = 0;
u64 seq_to_flush = 0;
journal_flush_pins(j, seq_to_flush, min_nr); lockdep_assert_held(&j->reclaim_lock);
do {
bch2_journal_do_discards(j);
seq_to_flush = journal_seq_to_flush(j);
min_nr = 0;
/*
* If it's been longer than j->reclaim_delay_ms since we last flushed,
* make sure to flush at least one journal pin:
*/
if (time_after(jiffies, j->last_flushed +
msecs_to_jiffies(j->reclaim_delay_ms)))
min_nr = 1;
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
min_nr = 1;
} while (journal_flush_pins(j, seq_to_flush, min_nr));
if (!bch2_journal_error(j)) if (!bch2_journal_error(j))
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,

View File

@ -1320,7 +1320,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init(c, &root_inode, 0, 0, bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(&packed_inode, &root_inode); bch2_inode_pack(c, &packed_inode, &root_inode);
err = "error creating root directory"; err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_INODES, ret = bch2_btree_insert(c, BTREE_ID_INODES,

View File

@ -451,6 +451,7 @@ int bch2_fs_read_write_early(struct bch_fs *c)
static void __bch2_fs_free(struct bch_fs *c) static void __bch2_fs_free(struct bch_fs *c)
{ {
unsigned i; unsigned i;
int cpu;
for (i = 0; i < BCH_TIME_STAT_NR; i++) for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]); bch2_time_stats_exit(&c->times[i]);
@ -475,6 +476,12 @@ static void __bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[1]); free_percpu(c->usage[1]);
free_percpu(c->usage[0]); free_percpu(c->usage[0]);
kfree(c->usage_base); kfree(c->usage_base);
if (c->btree_iters_bufs)
for_each_possible_cpu(cpu)
kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter);
free_percpu(c->btree_iters_bufs);
free_percpu(c->pcpu); free_percpu(c->pcpu);
mempool_exit(&c->large_bkey_pool); mempool_exit(&c->large_bkey_pool);
mempool_exit(&c->btree_bounce_pool); mempool_exit(&c->btree_bounce_pool);
@ -485,6 +492,7 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->replicas_gc.entries); kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table); kfree(c->journal_seq_blacklist_table);
kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap); free_heap(&c->copygc_heap);
if (c->journal_reclaim_wq) if (c->journal_reclaim_wq)
@ -736,11 +744,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
(btree_blocks(c) + 1) * 2 * (btree_blocks(c) + 1) * 2 *
sizeof(struct sort_iter_set); sizeof(struct sort_iter_set);
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
if (!(c->wq = alloc_workqueue("bcachefs", if (!(c->wq = alloc_workqueue("bcachefs",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcache_copygc", !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal", !(c->journal_reclaim_wq = alloc_workqueue("bcachefs_journal_reclaim",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled, percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) || PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
@ -750,9 +760,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
offsetof(struct btree_write_bio, wbio.bio)), offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) || BIOSET_NEED_BVECS) ||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
!(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) || btree_bytes(c)) ||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
sizeof(u64), GFP_KERNEL)) ||
bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) || bch2_fs_journal_init(&c->journal) ||
@ -2012,7 +2025,6 @@ static void bcachefs_exit(void)
static int __init bcachefs_init(void) static int __init bcachefs_init(void)
{ {
bch2_bkey_pack_test(); bch2_bkey_pack_test();
bch2_inode_pack_test();
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) || if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
bch2_chardev_init() || bch2_chardev_init() ||

View File

@ -208,12 +208,6 @@ read_attribute(io_timers_write);
write_attribute(perf_test); write_attribute(perf_test);
#endif /* CONFIG_BCACHEFS_TESTS */ #endif /* CONFIG_BCACHEFS_TESTS */
#define BCH_DEBUG_PARAM(name, description) \
rw_attribute(name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
#define x(_name) \ #define x(_name) \
static struct attribute sysfs_time_stat_##_name = \ static struct attribute sysfs_time_stat_##_name = \
{ .name = #_name, .mode = S_IRUGO }; { .name = #_name, .mode = S_IRUGO };
@ -414,10 +408,6 @@ SHOW(bch2_fs)
return out.pos - buf; return out.pos - buf;
} }
#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
return 0; return 0;
} }
@ -462,10 +452,6 @@ STORE(bch2_fs)
/* Debugging: */ /* Debugging: */
#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
if (!test_bit(BCH_FS_STARTED, &c->flags)) if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM; return -EPERM;
@ -590,11 +576,6 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_io_timers_write, &sysfs_io_timers_write,
&sysfs_internal_uuid, &sysfs_internal_uuid,
#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
NULL NULL
}; };

View File

@ -520,7 +520,7 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
{ {
while (size) { while (size) {
struct page *page = alloc_page(gfp_mask); struct page *page = alloc_page(gfp_mask);
unsigned len = min(PAGE_SIZE, size); unsigned len = min_t(size_t, PAGE_SIZE, size);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;

View File

@ -37,17 +37,6 @@ struct closure;
#define atomic64_sub_bug(i, v) BUG_ON(atomic64_sub_return(i, v) < 0) #define atomic64_sub_bug(i, v) BUG_ON(atomic64_sub_return(i, v) < 0)
#define atomic64_add_bug(i, v) BUG_ON(atomic64_add_return(i, v) < 0) #define atomic64_add_bug(i, v) BUG_ON(atomic64_add_return(i, v) < 0)
#define memcpy(dst, src, len) \
({ \
void *_dst = (dst); \
const void *_src = (src); \
size_t _len = (len); \
\
BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) || \
(void *) (_dst) + (_len) <= (void *) (_src))); \
memcpy(_dst, _src, _len); \
})
#else /* DEBUG */ #else /* DEBUG */
#define EBUG_ON(cond) #define EBUG_ON(cond)

42
libbcachefs/varint.c Normal file
View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <asm/unaligned.h>
#include "varint.h"
int bch2_varint_encode(u8 *out, u64 v)
{
unsigned bits = fls64(v|1);
unsigned bytes = DIV_ROUND_UP(bits, 7);
if (likely(bytes < 9)) {
v <<= bytes;
v |= ~(~0 << (bytes - 1));
} else {
*out++ = 255;
bytes = 9;
}
put_unaligned_le64(v, out);
return bytes;
}
int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
{
u64 v = get_unaligned_le64(in);
unsigned bytes = ffz(v & 255) + 1;
if (unlikely(in + bytes > end))
return -1;
if (likely(bytes < 9)) {
v >>= bytes;
v &= ~(~0ULL << (7 * bytes));
} else {
v = get_unaligned_le64(++in);
}
*out = v;
return bytes;
}

8
libbcachefs/varint.h Normal file
View File

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_VARINT_H
#define _BCACHEFS_VARINT_H
int bch2_varint_encode(u8 *, u64);
int bch2_varint_decode(const u8 *, const u8 *, u64 *);
#endif /* _BCACHEFS_VARINT_H */