mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to da037866e6
This commit is contained in:
parent
2615d73a74
commit
e783d814e8
@ -1 +1 @@
|
|||||||
297c81ae4d608707fdabedc60158ff1f4fbec257
|
da037866e669b09edc6b049ce09535d3456474cb
|
||||||
|
@ -41,7 +41,6 @@ x(0, metadata_replicas, "#", NULL) \
|
|||||||
x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\
|
x(0, encrypted, NULL, "Enable whole filesystem encryption (chacha20/poly1305)")\
|
||||||
x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\
|
x(0, no_passphrase, NULL, "Don't encrypt master encryption key")\
|
||||||
x('e', error_action, "(continue|readonly|panic)", NULL) \
|
x('e', error_action, "(continue|readonly|panic)", NULL) \
|
||||||
x(0, max_journal_entry_size, "size", NULL) \
|
|
||||||
x('L', label, "label", NULL) \
|
x('L', label, "label", NULL) \
|
||||||
x('U', uuid, "uuid", NULL) \
|
x('U', uuid, "uuid", NULL) \
|
||||||
x('f', force, NULL, NULL) \
|
x('f', force, NULL, NULL) \
|
||||||
@ -80,7 +79,6 @@ static void usage(void)
|
|||||||
" --no_passphrase Don't encrypt master encryption key\n"
|
" --no_passphrase Don't encrypt master encryption key\n"
|
||||||
" --error_action=(continue|readonly|panic)\n"
|
" --error_action=(continue|readonly|panic)\n"
|
||||||
" Action to take on filesystem error\n"
|
" Action to take on filesystem error\n"
|
||||||
" --max_journal_entry_size=size\n"
|
|
||||||
" -l, --label=label\n"
|
" -l, --label=label\n"
|
||||||
" --uuid=uuid\n"
|
" --uuid=uuid\n"
|
||||||
" -f, --force\n"
|
" -f, --force\n"
|
||||||
@ -185,10 +183,6 @@ int cmd_format(int argc, char *argv[])
|
|||||||
read_string_list_or_die(optarg,
|
read_string_list_or_die(optarg,
|
||||||
bch2_error_actions, "error action");
|
bch2_error_actions, "error action");
|
||||||
break;
|
break;
|
||||||
case O_max_journal_entry_size:
|
|
||||||
opts.max_journal_entry_size =
|
|
||||||
hatoi_validate(optarg, "journal entry size");
|
|
||||||
break;
|
|
||||||
case O_label:
|
case O_label:
|
||||||
case 'L':
|
case 'L':
|
||||||
opts.label = strdup(optarg);
|
opts.label = strdup(optarg);
|
||||||
|
@ -247,29 +247,4 @@ unsigned long rounddown_pow_of_two(unsigned long n)
|
|||||||
return 1UL << (fls_long(n) - 1);
|
return 1UL << (fls_long(n) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute_const__
|
|
||||||
int __get_order(unsigned long size)
|
|
||||||
{
|
|
||||||
int order;
|
|
||||||
|
|
||||||
size--;
|
|
||||||
size >>= PAGE_SHIFT;
|
|
||||||
#if BITS_PER_LONG == 32
|
|
||||||
order = fls(size);
|
|
||||||
#else
|
|
||||||
order = fls64(size);
|
|
||||||
#endif
|
|
||||||
return order;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define get_order(n) \
|
|
||||||
( \
|
|
||||||
__builtin_constant_p(n) ? ( \
|
|
||||||
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
|
|
||||||
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
|
|
||||||
ilog2((n) - 1) - PAGE_SHIFT + 1) \
|
|
||||||
) : \
|
|
||||||
__get_order(n) \
|
|
||||||
)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -184,4 +184,29 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
|
|||||||
__rounddown_pow_of_two(n) \
|
__rounddown_pow_of_two(n) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
static inline __attribute_const__
|
||||||
|
int __get_order(unsigned long size)
|
||||||
|
{
|
||||||
|
int order;
|
||||||
|
|
||||||
|
size--;
|
||||||
|
size >>= PAGE_SHIFT;
|
||||||
|
#if BITS_PER_LONG == 32
|
||||||
|
order = fls(size);
|
||||||
|
#else
|
||||||
|
order = fls64(size);
|
||||||
|
#endif
|
||||||
|
return order;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define get_order(n) \
|
||||||
|
( \
|
||||||
|
__builtin_constant_p(n) ? ( \
|
||||||
|
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
|
||||||
|
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
|
||||||
|
ilog2((n) - 1) - PAGE_SHIFT + 1) \
|
||||||
|
) : \
|
||||||
|
__get_order(n) \
|
||||||
|
)
|
||||||
|
|
||||||
#endif /* _TOOLS_LINUX_LOG2_H */
|
#endif /* _TOOLS_LINUX_LOG2_H */
|
||||||
|
@ -149,14 +149,6 @@ struct bch_sb *bch2_format(struct format_opts opts,
|
|||||||
min(opts.btree_node_size, i->bucket_size);
|
min(opts.btree_node_size, i->bucket_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!opts.max_journal_entry_size) {
|
|
||||||
/* 2 MB default: */
|
|
||||||
opts.max_journal_entry_size = 4096;
|
|
||||||
}
|
|
||||||
|
|
||||||
opts.max_journal_entry_size =
|
|
||||||
roundup_pow_of_two(opts.max_journal_entry_size);
|
|
||||||
|
|
||||||
if (uuid_is_null(opts.uuid.b))
|
if (uuid_is_null(opts.uuid.b))
|
||||||
uuid_generate(opts.uuid.b);
|
uuid_generate(opts.uuid.b);
|
||||||
|
|
||||||
@ -191,7 +183,6 @@ struct bch_sb *bch2_format(struct format_opts opts,
|
|||||||
SET_BCH_SB_DATA_REPLICAS_REQ(sb, opts.data_replicas_required);
|
SET_BCH_SB_DATA_REPLICAS_REQ(sb, opts.data_replicas_required);
|
||||||
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
||||||
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
||||||
SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size));
|
|
||||||
|
|
||||||
struct timespec now;
|
struct timespec now;
|
||||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||||
@ -319,7 +310,6 @@ void bch2_super_print(struct bch_sb *sb, int units)
|
|||||||
"Version: %llu\n"
|
"Version: %llu\n"
|
||||||
"Block_size: %s\n"
|
"Block_size: %s\n"
|
||||||
"Btree node size: %s\n"
|
"Btree node size: %s\n"
|
||||||
"Max journal entry size: %s\n"
|
|
||||||
"Error action: %s\n"
|
"Error action: %s\n"
|
||||||
"Clean: %llu\n"
|
"Clean: %llu\n"
|
||||||
|
|
||||||
@ -342,7 +332,6 @@ void bch2_super_print(struct bch_sb *sb, int units)
|
|||||||
le64_to_cpu(sb->version),
|
le64_to_cpu(sb->version),
|
||||||
pr_units(le16_to_cpu(sb->block_size), units),
|
pr_units(le16_to_cpu(sb->block_size), units),
|
||||||
pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
|
pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
|
||||||
pr_units(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb), units),
|
|
||||||
|
|
||||||
BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
|
BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
|
||||||
? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)]
|
? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)]
|
||||||
|
@ -13,7 +13,6 @@ struct format_opts {
|
|||||||
uuid_le uuid;
|
uuid_le uuid;
|
||||||
|
|
||||||
unsigned on_error_action;
|
unsigned on_error_action;
|
||||||
unsigned max_journal_entry_size; /* will be removed */
|
|
||||||
|
|
||||||
unsigned block_size;
|
unsigned block_size;
|
||||||
unsigned btree_node_size;
|
unsigned btree_node_size;
|
||||||
|
@ -971,7 +971,7 @@ LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
|
|||||||
|
|
||||||
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
|
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
|
||||||
LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
|
LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
|
||||||
LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE, struct bch_sb, flags[1], 14, 20);
|
/* 14-20 unused, was JOURNAL_ENTRY_SIZE */
|
||||||
|
|
||||||
LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
|
LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
|
||||||
LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
|
LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
|
||||||
|
@ -191,6 +191,12 @@ bkey_unpack_key_format_checked(const struct btree *b,
|
|||||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||||
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
|
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hack around a harmless race when compacting whiteouts
|
||||||
|
* for a write:
|
||||||
|
*/
|
||||||
|
dst2.needs_whiteout = dst.needs_whiteout;
|
||||||
|
|
||||||
BUG_ON(memcmp(&dst, &dst2, sizeof(dst)));
|
BUG_ON(memcmp(&dst, &dst2, sizeof(dst)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,6 +87,7 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
|||||||
if (!b)
|
if (!b)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
bkey_extent_init(&b->key);
|
||||||
six_lock_init(&b->lock);
|
six_lock_init(&b->lock);
|
||||||
INIT_LIST_HEAD(&b->list);
|
INIT_LIST_HEAD(&b->list);
|
||||||
INIT_LIST_HEAD(&b->write_blocked);
|
INIT_LIST_HEAD(&b->write_blocked);
|
||||||
@ -141,8 +142,10 @@ static inline struct btree *mca_find(struct bch_fs *c,
|
|||||||
* this version is for btree nodes that have already been freed (we're not
|
* this version is for btree nodes that have already been freed (we're not
|
||||||
* reaping a real btree node)
|
* reaping a real btree node)
|
||||||
*/
|
*/
|
||||||
static int mca_reap_notrace(struct bch_fs *c, struct btree *b, bool flush)
|
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||||
{
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&c->btree_cache_lock);
|
lockdep_assert_held(&c->btree_cache_lock);
|
||||||
|
|
||||||
if (!six_trylock_intent(&b->lock))
|
if (!six_trylock_intent(&b->lock))
|
||||||
@ -155,45 +158,48 @@ static int mca_reap_notrace(struct bch_fs *c, struct btree *b, bool flush)
|
|||||||
btree_node_noevict(b))
|
btree_node_noevict(b))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
if (!list_empty(&b->write_blocked))
|
if (!btree_node_may_write(b))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
if (!flush &&
|
if (btree_node_dirty(b) ||
|
||||||
(btree_node_dirty(b) ||
|
btree_node_write_in_flight(b)) {
|
||||||
btree_node_write_in_flight(b)))
|
if (!flush)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Using the underscore version because we don't want to compact bsets
|
* Using the underscore version because we don't want to compact
|
||||||
* after the write, since this node is about to be evicted - unless
|
* bsets after the write, since this node is about to be evicted
|
||||||
* btree verify mode is enabled, since it runs out of the post write
|
* - unless btree verify mode is enabled, since it runs out of
|
||||||
* cleanup:
|
* the post write cleanup:
|
||||||
*/
|
*/
|
||||||
if (btree_node_dirty(b)) {
|
|
||||||
if (verify_btree_ondisk(c))
|
if (verify_btree_ondisk(c))
|
||||||
bch2_btree_node_write(c, b, NULL, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, b, NULL, SIX_LOCK_intent);
|
||||||
else
|
else
|
||||||
__bch2_btree_node_write(c, b, NULL, SIX_LOCK_read, -1);
|
__bch2_btree_node_write(c, b, NULL, SIX_LOCK_read);
|
||||||
|
|
||||||
|
/* wait for any in flight btree write */
|
||||||
|
btree_node_wait_on_io(b);
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
/* wait for any in flight btree write */
|
if (PTR_HASH(&b->key))
|
||||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
|
trace_btree_node_reap(c, b, ret);
|
||||||
TASK_UNINTERRUPTIBLE);
|
return ret;
|
||||||
|
|
||||||
return 0;
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
six_unlock_write(&b->lock);
|
six_unlock_write(&b->lock);
|
||||||
out_unlock_intent:
|
out_unlock_intent:
|
||||||
six_unlock_intent(&b->lock);
|
six_unlock_intent(&b->lock);
|
||||||
return -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mca_reap(struct bch_fs *c, struct btree *b, bool flush)
|
static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
|
||||||
{
|
{
|
||||||
int ret = mca_reap_notrace(c, b, flush);
|
return __btree_node_reclaim(c, b, false);
|
||||||
|
}
|
||||||
|
|
||||||
trace_btree_node_reap(c, b, ret);
|
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
||||||
return ret;
|
{
|
||||||
|
return __btree_node_reclaim(c, b, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
||||||
@ -239,7 +245,7 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if (++i > 3 &&
|
if (++i > 3 &&
|
||||||
!mca_reap_notrace(c, b, false)) {
|
!btree_node_reclaim(c, b)) {
|
||||||
mca_data_free(c, b);
|
mca_data_free(c, b);
|
||||||
six_unlock_write(&b->lock);
|
six_unlock_write(&b->lock);
|
||||||
six_unlock_intent(&b->lock);
|
six_unlock_intent(&b->lock);
|
||||||
@ -258,7 +264,7 @@ restart:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!btree_node_accessed(b) &&
|
if (!btree_node_accessed(b) &&
|
||||||
!mca_reap(c, b, false)) {
|
!btree_node_reclaim(c, b)) {
|
||||||
/* can't call bch2_btree_node_hash_remove under btree_cache_lock */
|
/* can't call bch2_btree_node_hash_remove under btree_cache_lock */
|
||||||
freed++;
|
freed++;
|
||||||
if (&t->list != &c->btree_cache)
|
if (&t->list != &c->btree_cache)
|
||||||
@ -445,12 +451,12 @@ static struct btree *mca_cannibalize(struct bch_fs *c)
|
|||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
||||||
if (!mca_reap(c, b, false))
|
if (!btree_node_reclaim(c, b))
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
||||||
if (!mca_reap(c, b, true))
|
if (!btree_node_write_and_reclaim(c, b))
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -474,7 +480,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
|||||||
* the list. Check if there's any freed nodes there:
|
* the list. Check if there's any freed nodes there:
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(b, &c->btree_cache_freeable, list)
|
list_for_each_entry(b, &c->btree_cache_freeable, list)
|
||||||
if (!mca_reap_notrace(c, b, false))
|
if (!btree_node_reclaim(c, b))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -482,7 +488,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
|||||||
* disk node. Check the freed list before allocating a new one:
|
* disk node. Check the freed list before allocating a new one:
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(b, &c->btree_cache_freed, list)
|
list_for_each_entry(b, &c->btree_cache_freed, list)
|
||||||
if (!mca_reap_notrace(c, b, false)) {
|
if (!btree_node_reclaim(c, b)) {
|
||||||
mca_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
|
mca_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
|
||||||
if (b->data)
|
if (b->data)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
@ -685,7 +685,7 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
|||||||
bch2_btree_build_aux_trees(n);
|
bch2_btree_build_aux_trees(n);
|
||||||
six_unlock_write(&n->lock);
|
six_unlock_write(&n->lock);
|
||||||
|
|
||||||
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1311,8 +1311,7 @@ static void btree_node_write_endio(struct bio *bio)
|
|||||||
|
|
||||||
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||||
struct closure *parent,
|
struct closure *parent,
|
||||||
enum six_lock_type lock_type_held,
|
enum six_lock_type lock_type_held)
|
||||||
int idx_to_write)
|
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct bch_write_bio *wbio;
|
struct bch_write_bio *wbio;
|
||||||
@ -1344,14 +1343,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
if (!(old & (1 << BTREE_NODE_dirty)))
|
if (!(old & (1 << BTREE_NODE_dirty)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (idx_to_write >= 0 &&
|
|
||||||
idx_to_write != !!(old & (1 << BTREE_NODE_write_idx)))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
||||||
wait_on_bit_io(&b->flags,
|
btree_node_wait_on_io(b);
|
||||||
BTREE_NODE_write_in_flight,
|
|
||||||
TASK_UNINTERRUPTIBLE);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1614,37 +1607,29 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
|
|||||||
*/
|
*/
|
||||||
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||||
struct closure *parent,
|
struct closure *parent,
|
||||||
enum six_lock_type lock_type_held,
|
enum six_lock_type lock_type_held)
|
||||||
int idx_to_write)
|
|
||||||
{
|
{
|
||||||
BUG_ON(lock_type_held == SIX_LOCK_write);
|
BUG_ON(lock_type_held == SIX_LOCK_write);
|
||||||
|
|
||||||
if (lock_type_held == SIX_LOCK_intent ||
|
if (lock_type_held == SIX_LOCK_intent ||
|
||||||
six_trylock_convert(&b->lock, SIX_LOCK_read,
|
six_trylock_convert(&b->lock, SIX_LOCK_read,
|
||||||
SIX_LOCK_intent)) {
|
SIX_LOCK_intent)) {
|
||||||
__bch2_btree_node_write(c, b, parent, SIX_LOCK_intent, idx_to_write);
|
__bch2_btree_node_write(c, b, parent, SIX_LOCK_intent);
|
||||||
|
|
||||||
six_lock_write(&b->lock);
|
/* don't cycle lock unnecessarily: */
|
||||||
bch2_btree_post_write_cleanup(c, b);
|
if (btree_node_just_written(b)) {
|
||||||
six_unlock_write(&b->lock);
|
six_lock_write(&b->lock);
|
||||||
|
bch2_btree_post_write_cleanup(c, b);
|
||||||
|
six_unlock_write(&b->lock);
|
||||||
|
}
|
||||||
|
|
||||||
if (lock_type_held == SIX_LOCK_read)
|
if (lock_type_held == SIX_LOCK_read)
|
||||||
six_lock_downgrade(&b->lock);
|
six_lock_downgrade(&b->lock);
|
||||||
} else {
|
} else {
|
||||||
__bch2_btree_node_write(c, b, parent, SIX_LOCK_read, idx_to_write);
|
__bch2_btree_node_write(c, b, parent, SIX_LOCK_read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_btree_node_write_dirty(struct bch_fs *c, struct btree *b,
|
|
||||||
struct closure *parent)
|
|
||||||
{
|
|
||||||
six_lock_read(&b->lock);
|
|
||||||
BUG_ON(b->level);
|
|
||||||
|
|
||||||
bch2_btree_node_write(c, b, parent, SIX_LOCK_read, -1);
|
|
||||||
six_unlock_read(&b->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write all dirty btree nodes to disk, including roots
|
* Write all dirty btree nodes to disk, including roots
|
||||||
*/
|
*/
|
||||||
@ -1654,7 +1639,7 @@ void bch2_btree_flush(struct bch_fs *c)
|
|||||||
struct btree *b;
|
struct btree *b;
|
||||||
struct bucket_table *tbl;
|
struct bucket_table *tbl;
|
||||||
struct rhash_head *pos;
|
struct rhash_head *pos;
|
||||||
bool dropped_lock;
|
bool saw_dirty;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
closure_init_stack(&cl);
|
closure_init_stack(&cl);
|
||||||
@ -1662,26 +1647,27 @@ void bch2_btree_flush(struct bch_fs *c)
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
do {
|
do {
|
||||||
dropped_lock = false;
|
saw_dirty = false;
|
||||||
i = 0;
|
i = 0;
|
||||||
restart:
|
restart:
|
||||||
tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
|
tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
|
||||||
&c->btree_cache_table);
|
&c->btree_cache_table);
|
||||||
|
|
||||||
for (; i < tbl->size; i++)
|
for (; i < tbl->size; i++)
|
||||||
rht_for_each_entry_rcu(b, pos, tbl, i, hash)
|
rht_for_each_entry_rcu(b, pos, tbl, i, hash) {
|
||||||
/*
|
saw_dirty |= btree_node_dirty(b);
|
||||||
* XXX - locking for b->level, when called from
|
|
||||||
* bch2_journal_move()
|
if (btree_node_dirty(b) &&
|
||||||
*/
|
btree_node_may_write(b)) {
|
||||||
if (!b->level && btree_node_dirty(b)) {
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
bch2_btree_node_write_dirty(c, b, &cl);
|
six_lock_read(&b->lock);
|
||||||
dropped_lock = true;
|
bch2_btree_node_write_dirty(c, b, &cl, 1);
|
||||||
|
six_unlock_read(&b->lock);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
} while (dropped_lock);
|
}
|
||||||
|
} while (saw_dirty);
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
@ -19,6 +19,17 @@ static inline void btree_node_io_lock(struct btree *b)
|
|||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void btree_node_wait_on_io(struct btree *b)
|
||||||
|
{
|
||||||
|
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||||
|
TASK_UNINTERRUPTIBLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btree_node_may_write(struct btree *b)
|
||||||
|
{
|
||||||
|
return list_empty_careful(&b->write_blocked);
|
||||||
|
}
|
||||||
|
|
||||||
enum compact_mode {
|
enum compact_mode {
|
||||||
COMPACT_LAZY,
|
COMPACT_LAZY,
|
||||||
COMPACT_WRITTEN,
|
COMPACT_WRITTEN,
|
||||||
@ -60,11 +71,28 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
|||||||
struct btree_write *);
|
struct btree_write *);
|
||||||
|
|
||||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *,
|
void __bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||||
struct closure *, enum six_lock_type, int);
|
struct closure *, enum six_lock_type);
|
||||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||||
|
|
||||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||||
struct closure *, enum six_lock_type, int);
|
struct closure *, enum six_lock_type);
|
||||||
|
|
||||||
|
#define bch2_btree_node_write_dirty(_c, _b, _cl, cond) \
|
||||||
|
do { \
|
||||||
|
while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
|
||||||
|
if (!btree_node_may_write(_b)) \
|
||||||
|
break; \
|
||||||
|
\
|
||||||
|
if (!btree_node_write_in_flight(_b)) { \
|
||||||
|
bch2_btree_node_write(_c, _b, _cl, SIX_LOCK_read);\
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
six_unlock_read(&(_b)->lock); \
|
||||||
|
btree_node_wait_on_io(_b); \
|
||||||
|
six_lock_read(&(_b)->lock); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
void bch2_btree_flush(struct bch_fs *);
|
void bch2_btree_flush(struct bch_fs *);
|
||||||
void bch2_btree_node_flush_journal_entries(struct bch_fs *, struct btree *,
|
void bch2_btree_node_flush_journal_entries(struct bch_fs *, struct btree *,
|
||||||
|
@ -614,7 +614,7 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
|||||||
|
|
||||||
b = __btree_root_alloc(c, 0, id, reserve);
|
b = __btree_root_alloc(c, 0, id, reserve);
|
||||||
|
|
||||||
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
||||||
|
|
||||||
bch2_btree_set_root_initial(c, b, reserve);
|
bch2_btree_set_root_initial(c, b, reserve);
|
||||||
bch2_btree_open_bucket_put(c, b);
|
bch2_btree_open_bucket_put(c, b);
|
||||||
@ -750,39 +750,27 @@ overwrite:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||||
unsigned i)
|
unsigned i, u64 seq)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct btree_write *w = container_of(pin, struct btree_write, journal);
|
struct btree_write *w = container_of(pin, struct btree_write, journal);
|
||||||
struct btree *b = container_of(w, struct btree, writes[i]);
|
struct btree *b = container_of(w, struct btree, writes[i]);
|
||||||
|
|
||||||
six_lock_read(&b->lock);
|
six_lock_read(&b->lock);
|
||||||
/*
|
bch2_btree_node_write_dirty(c, b, NULL,
|
||||||
* Reusing a btree node can race with the journal reclaim code calling
|
(btree_current_write(b) == w &&
|
||||||
* the journal pin flush fn, and there's no good fix for this: we don't
|
w->journal.pin_list == journal_seq_pin(j, seq)));
|
||||||
* really want journal_pin_drop() to block until the flush fn is no
|
|
||||||
* longer running, because journal_pin_drop() is called from the btree
|
|
||||||
* node write endio function, and we can't wait on the flush fn to
|
|
||||||
* finish running in mca_reap() - where we make reused btree nodes ready
|
|
||||||
* to use again - because there, we're holding the lock this function
|
|
||||||
* needs - deadlock.
|
|
||||||
*
|
|
||||||
* So, the b->level check is a hack so we don't try to write nodes we
|
|
||||||
* shouldn't:
|
|
||||||
*/
|
|
||||||
if (!b->level)
|
|
||||||
bch2_btree_node_write(c, b, NULL, SIX_LOCK_read, i);
|
|
||||||
six_unlock_read(&b->lock);
|
six_unlock_read(&b->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin)
|
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||||
{
|
{
|
||||||
return __btree_node_flush(j, pin, 0);
|
return __btree_node_flush(j, pin, 0, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin)
|
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
|
||||||
{
|
{
|
||||||
return __btree_node_flush(j, pin, 1);
|
return __btree_node_flush(j, pin, 1, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_journal_key(struct btree_insert *trans,
|
void bch2_btree_journal_key(struct btree_insert *trans,
|
||||||
@ -799,10 +787,11 @@ void bch2_btree_journal_key(struct btree_insert *trans,
|
|||||||
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
||||||
|
|
||||||
if (!journal_pin_active(&w->journal))
|
if (!journal_pin_active(&w->journal))
|
||||||
bch2_journal_pin_add(j, &w->journal,
|
bch2_journal_pin_add(j, &trans->journal_res,
|
||||||
btree_node_write_idx(b) == 0
|
&w->journal,
|
||||||
? btree_node_flush0
|
btree_node_write_idx(b) == 0
|
||||||
: btree_node_flush1);
|
? btree_node_flush0
|
||||||
|
: btree_node_flush1);
|
||||||
|
|
||||||
if (trans->journal_res.ref) {
|
if (trans->journal_res.ref) {
|
||||||
u64 seq = trans->journal_res.seq;
|
u64 seq = trans->journal_res.seq;
|
||||||
@ -972,9 +961,9 @@ retry:
|
|||||||
closure_wait(&btree_current_write(b)->wait, cl);
|
closure_wait(&btree_current_write(b)->wait, cl);
|
||||||
|
|
||||||
list_del(&as->write_blocked_list);
|
list_del(&as->write_blocked_list);
|
||||||
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
|
|
||||||
if (list_empty(&b->write_blocked))
|
bch2_btree_node_write_dirty(c, b, NULL, true);
|
||||||
bch2_btree_node_write(c, b, NULL, SIX_LOCK_read, -1);
|
|
||||||
six_unlock_read(&b->lock);
|
six_unlock_read(&b->lock);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -991,6 +980,7 @@ retry:
|
|||||||
* and then we have to wait on that btree_interior_update to finish:
|
* and then we have to wait on that btree_interior_update to finish:
|
||||||
*/
|
*/
|
||||||
closure_wait(&as->parent_as->wait, cl);
|
closure_wait(&as->parent_as->wait, cl);
|
||||||
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BTREE_INTERIOR_UPDATING_ROOT:
|
case BTREE_INTERIOR_UPDATING_ROOT:
|
||||||
@ -1017,8 +1007,9 @@ retry:
|
|||||||
* can reuse the old nodes it'll have to do a journal commit:
|
* can reuse the old nodes it'll have to do a journal commit:
|
||||||
*/
|
*/
|
||||||
six_unlock_read(&b->lock);
|
six_unlock_read(&b->lock);
|
||||||
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
|
||||||
|
|
||||||
continue_at(cl, btree_interior_update_nodes_reachable, system_wq);
|
continue_at(cl, btree_interior_update_nodes_reachable, system_wq);
|
||||||
}
|
}
|
||||||
@ -1083,7 +1074,8 @@ static void btree_interior_update_updated_root(struct bch_fs *c,
|
|||||||
system_freezable_wq);
|
system_freezable_wq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void interior_update_flush(struct journal *j, struct journal_entry_pin *pin)
|
static void interior_update_flush(struct journal *j,
|
||||||
|
struct journal_entry_pin *pin, u64 seq)
|
||||||
{
|
{
|
||||||
struct btree_interior_update *as =
|
struct btree_interior_update *as =
|
||||||
container_of(pin, struct btree_interior_update, journal);
|
container_of(pin, struct btree_interior_update, journal);
|
||||||
@ -1441,7 +1433,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
|||||||
six_unlock_write(&n2->lock);
|
six_unlock_write(&n2->lock);
|
||||||
six_unlock_write(&n1->lock);
|
six_unlock_write(&n1->lock);
|
||||||
|
|
||||||
bch2_btree_node_write(c, n2, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n2, &as->cl, SIX_LOCK_intent);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that on recursive parent_keys == insert_keys, so we
|
* Note that on recursive parent_keys == insert_keys, so we
|
||||||
@ -1461,7 +1453,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
|||||||
|
|
||||||
btree_split_insert_keys(iter, n3, &as->parent_keys,
|
btree_split_insert_keys(iter, n3, &as->parent_keys,
|
||||||
reserve);
|
reserve);
|
||||||
bch2_btree_node_write(c, n3, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n3, &as->cl, SIX_LOCK_intent);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
trace_btree_node_compact(c, b, b->nr.live_u64s);
|
trace_btree_node_compact(c, b, b->nr.live_u64s);
|
||||||
@ -1472,7 +1464,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
|||||||
bch2_keylist_add(&as->parent_keys, &n1->key);
|
bch2_keylist_add(&as->parent_keys, &n1->key);
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_btree_node_write(c, n1, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n1, &as->cl, SIX_LOCK_intent);
|
||||||
|
|
||||||
/* New nodes all written, now make them visible: */
|
/* New nodes all written, now make them visible: */
|
||||||
|
|
||||||
@ -1773,7 +1765,7 @@ retry:
|
|||||||
bch2_keylist_add(&as->parent_keys, &delete);
|
bch2_keylist_add(&as->parent_keys, &delete);
|
||||||
bch2_keylist_add(&as->parent_keys, &n->key);
|
bch2_keylist_add(&as->parent_keys, &n->key);
|
||||||
|
|
||||||
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent);
|
||||||
|
|
||||||
bch2_btree_insert_node(parent, iter, &as->parent_keys, reserve, as);
|
bch2_btree_insert_node(parent, iter, &as->parent_keys, reserve, as);
|
||||||
|
|
||||||
@ -2323,7 +2315,7 @@ int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b,
|
|||||||
|
|
||||||
trace_btree_gc_rewrite_node(c, b);
|
trace_btree_gc_rewrite_node(c, b);
|
||||||
|
|
||||||
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent, -1);
|
bch2_btree_node_write(c, n, &as->cl, SIX_LOCK_intent);
|
||||||
|
|
||||||
if (parent) {
|
if (parent) {
|
||||||
bch2_btree_insert_node(parent, iter,
|
bch2_btree_insert_node(parent, iter,
|
||||||
|
@ -49,7 +49,7 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
|
|||||||
if (ca->disk_sb.bdev == bdev)
|
if (ca->disk_sb.bdev == bdev)
|
||||||
goto found;
|
goto found;
|
||||||
|
|
||||||
ca = NULL;
|
ca = ERR_PTR(-ENOENT);
|
||||||
found:
|
found:
|
||||||
bdput(bdev);
|
bdput(bdev);
|
||||||
}
|
}
|
||||||
|
@ -1,45 +1,30 @@
|
|||||||
#ifndef _BCACHE_FIFO_H
|
#ifndef _BCACHE_FIFO_H
|
||||||
#define _BCACHE_FIFO_H
|
#define _BCACHE_FIFO_H
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
#define DECLARE_FIFO(type, name) \
|
#define DECLARE_FIFO(type, name) \
|
||||||
struct { \
|
struct { \
|
||||||
size_t front, back, size, mask; \
|
size_t front, back, size, mask; \
|
||||||
type *data; \
|
type *data; \
|
||||||
} name
|
} name
|
||||||
|
|
||||||
|
#define fifo_buf_size(fifo) \
|
||||||
|
(roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0]))
|
||||||
|
|
||||||
#define init_fifo(fifo, _size, _gfp) \
|
#define init_fifo(fifo, _size, _gfp) \
|
||||||
({ \
|
({ \
|
||||||
bool _ret = true; \
|
|
||||||
gfp_t gfp_flags = (_gfp); \
|
|
||||||
\
|
|
||||||
if (gfp_flags & GFP_KERNEL) \
|
|
||||||
gfp_flags |= __GFP_NOWARN; \
|
|
||||||
\
|
|
||||||
(fifo)->size = (_size); \
|
|
||||||
(fifo)->front = (fifo)->back = 0; \
|
(fifo)->front = (fifo)->back = 0; \
|
||||||
(fifo)->data = NULL; \
|
(fifo)->size = (_size); \
|
||||||
\
|
(fifo)->mask = (fifo)->size \
|
||||||
if ((fifo)->size) { \
|
? roundup_pow_of_two((fifo)->size) - 1 \
|
||||||
size_t _allocated_size, _bytes; \
|
: 0; \
|
||||||
\
|
(fifo)->data = kvpmalloc(fifo_buf_size(fifo), (_gfp)); \
|
||||||
_allocated_size = roundup_pow_of_two((fifo)->size); \
|
|
||||||
_bytes = _allocated_size * sizeof(*(fifo)->data); \
|
|
||||||
\
|
|
||||||
(fifo)->mask = _allocated_size - 1; \
|
|
||||||
\
|
|
||||||
if (_bytes < KMALLOC_MAX_SIZE) \
|
|
||||||
(fifo)->data = kmalloc(_bytes, gfp_flags); \
|
|
||||||
if ((!(fifo)->data) && (gfp_flags & GFP_KERNEL)) \
|
|
||||||
(fifo)->data = vmalloc(_bytes); \
|
|
||||||
if ((!(fifo)->data)) \
|
|
||||||
_ret = false; \
|
|
||||||
} \
|
|
||||||
_ret; \
|
|
||||||
})
|
})
|
||||||
|
|
||||||
#define free_fifo(fifo) \
|
#define free_fifo(fifo) \
|
||||||
do { \
|
do { \
|
||||||
kvfree((fifo)->data); \
|
kvpfree((fifo)->data, fifo_buf_size(fifo)); \
|
||||||
(fifo)->data = NULL; \
|
(fifo)->data = NULL; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ static inline void bch2_journal_add_prios(struct journal *j,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void journal_seq_blacklist_flush(struct journal *j,
|
static void journal_seq_blacklist_flush(struct journal *j,
|
||||||
struct journal_entry_pin *pin)
|
struct journal_entry_pin *pin, u64 seq)
|
||||||
{
|
{
|
||||||
struct bch_fs *c =
|
struct bch_fs *c =
|
||||||
container_of(j, struct bch_fs, journal);
|
container_of(j, struct bch_fs, journal);
|
||||||
@ -406,7 +406,8 @@ static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist,
|
|||||||
if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
|
if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
|
||||||
break;
|
break;
|
||||||
list_del(&i->list);
|
list_del(&i->list);
|
||||||
kfree(i);
|
kvpfree(i, offsetof(struct journal_replay, j) +
|
||||||
|
vstruct_bytes(&i->j));
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry_reverse(i, jlist->head, list) {
|
list_for_each_entry_reverse(i, jlist->head, list) {
|
||||||
@ -429,7 +430,7 @@ static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist,
|
|||||||
|
|
||||||
where = jlist->head;
|
where = jlist->head;
|
||||||
add:
|
add:
|
||||||
i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||||
if (!i) {
|
if (!i) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
@ -646,12 +647,16 @@ static int journal_read_buf_realloc(struct journal_read_buf *b,
|
|||||||
{
|
{
|
||||||
void *n;
|
void *n;
|
||||||
|
|
||||||
|
/* the bios are sized for this many pages, max: */
|
||||||
|
if (new_size > JOURNAL_ENTRY_SIZE_MAX)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
new_size = roundup_pow_of_two(new_size);
|
new_size = roundup_pow_of_two(new_size);
|
||||||
n = (void *) __get_free_pages(GFP_KERNEL, get_order(new_size));
|
n = kvpmalloc(new_size, GFP_KERNEL);
|
||||||
if (!n)
|
if (!n)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
free_pages((unsigned long) b->data, get_order(b->size));
|
kvpfree(b->data, b->size);
|
||||||
b->data = n;
|
b->data = n;
|
||||||
b->size = new_size;
|
b->size = new_size;
|
||||||
return 0;
|
return 0;
|
||||||
@ -894,7 +899,7 @@ search_done:
|
|||||||
!read_bucket(i))
|
!read_bucket(i))
|
||||||
break;
|
break;
|
||||||
out:
|
out:
|
||||||
free_pages((unsigned long) buf.data, get_order(buf.size));
|
kvpfree(buf.data, buf.size);
|
||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
closure_return(cl);
|
closure_return(cl);
|
||||||
err:
|
err:
|
||||||
@ -912,7 +917,8 @@ void bch2_journal_entries_free(struct list_head *list)
|
|||||||
struct journal_replay *i =
|
struct journal_replay *i =
|
||||||
list_first_entry(list, struct journal_replay, list);
|
list_first_entry(list, struct journal_replay, list);
|
||||||
list_del(&i->list);
|
list_del(&i->list);
|
||||||
kvfree(i);
|
kvpfree(i, offsetof(struct journal_replay, j) +
|
||||||
|
vstruct_bytes(&i->j));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -958,14 +964,14 @@ static inline bool journal_has_keys(struct list_head *list)
|
|||||||
|
|
||||||
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||||
{
|
{
|
||||||
|
struct journal *j = &c->journal;
|
||||||
struct jset_entry *prio_ptrs;
|
struct jset_entry *prio_ptrs;
|
||||||
struct journal_list jlist;
|
struct journal_list jlist;
|
||||||
struct journal_replay *i;
|
struct journal_replay *i;
|
||||||
struct jset *j;
|
|
||||||
struct journal_entry_pin_list *p;
|
struct journal_entry_pin_list *p;
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
u64 cur_seq, end_seq;
|
u64 cur_seq, end_seq;
|
||||||
unsigned iter;
|
unsigned iter, keys = 0, entries = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
closure_init_stack(&jlist.cl);
|
closure_init_stack(&jlist.cl);
|
||||||
@ -994,63 +1000,59 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
|||||||
fsck_err_on(c->sb.clean && journal_has_keys(list), c,
|
fsck_err_on(c->sb.clean && journal_has_keys(list), c,
|
||||||
"filesystem marked clean but journal has keys to replay");
|
"filesystem marked clean but journal has keys to replay");
|
||||||
|
|
||||||
j = &list_entry(list->prev, struct journal_replay, list)->j;
|
i = list_last_entry(list, struct journal_replay, list);
|
||||||
|
|
||||||
unfixable_fsck_err_on(le64_to_cpu(j->seq) -
|
unfixable_fsck_err_on(le64_to_cpu(i->j.seq) -
|
||||||
le64_to_cpu(j->last_seq) + 1 >
|
le64_to_cpu(i->j.last_seq) + 1 > j->pin.size, c,
|
||||||
c->journal.pin.size, c,
|
|
||||||
"too many journal entries open for refcount fifo");
|
"too many journal entries open for refcount fifo");
|
||||||
|
|
||||||
c->journal.pin.back = le64_to_cpu(j->seq) -
|
atomic64_set(&j->seq, le64_to_cpu(i->j.seq));
|
||||||
le64_to_cpu(j->last_seq) + 1;
|
j->last_seq_ondisk = le64_to_cpu(i->j.last_seq);
|
||||||
|
|
||||||
atomic64_set(&c->journal.seq, le64_to_cpu(j->seq));
|
j->pin.front = le64_to_cpu(i->j.last_seq);
|
||||||
c->journal.last_seq_ondisk = le64_to_cpu(j->last_seq);
|
j->pin.back = le64_to_cpu(i->j.seq) + 1;
|
||||||
|
|
||||||
BUG_ON(last_seq(&c->journal) != le64_to_cpu(j->last_seq));
|
BUG_ON(last_seq(j) != le64_to_cpu(i->j.last_seq));
|
||||||
|
BUG_ON(journal_seq_pin(j, atomic64_read(&j->seq)) !=
|
||||||
i = list_first_entry(list, struct journal_replay, list);
|
&fifo_peek_back(&j->pin));
|
||||||
|
|
||||||
mutex_lock(&c->journal.blacklist_lock);
|
|
||||||
|
|
||||||
fifo_for_each_entry_ptr(p, &c->journal.pin, iter) {
|
|
||||||
u64 seq = journal_pin_seq(&c->journal, p);
|
|
||||||
|
|
||||||
|
fifo_for_each_entry_ptr(p, &j->pin, iter) {
|
||||||
INIT_LIST_HEAD(&p->list);
|
INIT_LIST_HEAD(&p->list);
|
||||||
|
atomic_set(&p->count, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (i && le64_to_cpu(i->j.seq) == seq) {
|
mutex_lock(&j->blacklist_lock);
|
||||||
atomic_set(&p->count, 1);
|
|
||||||
|
|
||||||
if (journal_seq_blacklist_read(&c->journal, i, p)) {
|
list_for_each_entry(i, list, list) {
|
||||||
mutex_unlock(&c->journal.blacklist_lock);
|
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = list_is_last(&i->list, list)
|
atomic_set(&p->count, 1);
|
||||||
? NULL
|
|
||||||
: list_next_entry(i, list);
|
if (journal_seq_blacklist_read(j, i, p)) {
|
||||||
} else {
|
mutex_unlock(&j->blacklist_lock);
|
||||||
atomic_set(&p->count, 0);
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_unlock(&c->journal.blacklist_lock);
|
mutex_unlock(&j->blacklist_lock);
|
||||||
|
|
||||||
cur_seq = last_seq(&c->journal);
|
cur_seq = last_seq(j);
|
||||||
end_seq = le64_to_cpu(list_last_entry(list,
|
end_seq = le64_to_cpu(list_last_entry(list,
|
||||||
struct journal_replay, list)->j.seq);
|
struct journal_replay, list)->j.seq);
|
||||||
|
|
||||||
list_for_each_entry(i, list, list) {
|
list_for_each_entry(i, list, list) {
|
||||||
|
struct jset_entry *entry;
|
||||||
|
struct bkey_i *k, *_n;
|
||||||
bool blacklisted;
|
bool blacklisted;
|
||||||
|
|
||||||
mutex_lock(&c->journal.blacklist_lock);
|
mutex_lock(&j->blacklist_lock);
|
||||||
while (cur_seq < le64_to_cpu(i->j.seq) &&
|
while (cur_seq < le64_to_cpu(i->j.seq) &&
|
||||||
journal_seq_blacklist_find(&c->journal, cur_seq))
|
journal_seq_blacklist_find(j, cur_seq))
|
||||||
cur_seq++;
|
cur_seq++;
|
||||||
|
|
||||||
blacklisted = journal_seq_blacklist_find(&c->journal,
|
blacklisted = journal_seq_blacklist_find(j,
|
||||||
le64_to_cpu(i->j.seq));
|
le64_to_cpu(i->j.seq));
|
||||||
mutex_unlock(&c->journal.blacklist_lock);
|
mutex_unlock(&j->blacklist_lock);
|
||||||
|
|
||||||
fsck_err_on(blacklisted, c,
|
fsck_err_on(blacklisted, c,
|
||||||
"found blacklisted journal entry %llu",
|
"found blacklisted journal entry %llu",
|
||||||
@ -1059,17 +1061,25 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
|||||||
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
|
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
|
||||||
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
||||||
cur_seq, le64_to_cpu(i->j.seq) - 1,
|
cur_seq, le64_to_cpu(i->j.seq) - 1,
|
||||||
last_seq(&c->journal), end_seq);
|
last_seq(j), end_seq);
|
||||||
|
|
||||||
cur_seq = le64_to_cpu(i->j.seq) + 1;
|
cur_seq = le64_to_cpu(i->j.seq) + 1;
|
||||||
|
|
||||||
|
for_each_jset_key(k, _n, entry, &i->j)
|
||||||
|
keys++;
|
||||||
|
entries++;
|
||||||
}
|
}
|
||||||
|
|
||||||
prio_ptrs = bch2_journal_find_entry(j, JOURNAL_ENTRY_PRIO_PTRS, 0);
|
bch_info(c, "journal read done, %i keys in %i entries, seq %llu",
|
||||||
|
keys, entries, (u64) atomic64_read(&j->seq));
|
||||||
|
|
||||||
|
i = list_last_entry(list, struct journal_replay, list);
|
||||||
|
prio_ptrs = bch2_journal_find_entry(&i->j, JOURNAL_ENTRY_PRIO_PTRS, 0);
|
||||||
if (prio_ptrs) {
|
if (prio_ptrs) {
|
||||||
memcpy_u64s(c->journal.prio_buckets,
|
memcpy_u64s(j->prio_buckets,
|
||||||
prio_ptrs->_data,
|
prio_ptrs->_data,
|
||||||
le16_to_cpu(prio_ptrs->u64s));
|
le16_to_cpu(prio_ptrs->u64s));
|
||||||
c->journal.nr_prio_buckets = le16_to_cpu(prio_ptrs->u64s);
|
j->nr_prio_buckets = le16_to_cpu(prio_ptrs->u64s);
|
||||||
}
|
}
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
@ -1105,6 +1115,9 @@ static bool journal_entry_is_open(struct journal *j)
|
|||||||
void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
|
struct journal_buf *w = journal_prev_buf(j);
|
||||||
|
|
||||||
|
atomic_dec_bug(&journal_seq_pin(j, w->data->seq)->count);
|
||||||
|
|
||||||
if (!need_write_just_set &&
|
if (!need_write_just_set &&
|
||||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||||
@ -1120,8 +1133,7 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct journal_entry_pin_list *
|
static void __journal_entry_new(struct journal *j, int count)
|
||||||
__journal_entry_new(struct journal *j, int count)
|
|
||||||
{
|
{
|
||||||
struct journal_entry_pin_list *p = fifo_push_ref(&j->pin);
|
struct journal_entry_pin_list *p = fifo_push_ref(&j->pin);
|
||||||
|
|
||||||
@ -1131,25 +1143,18 @@ __journal_entry_new(struct journal *j, int count)
|
|||||||
*/
|
*/
|
||||||
atomic64_inc(&j->seq);
|
atomic64_inc(&j->seq);
|
||||||
|
|
||||||
BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq));
|
BUG_ON(journal_seq_pin(j, atomic64_read(&j->seq)) !=
|
||||||
|
&fifo_peek_back(&j->pin));
|
||||||
|
|
||||||
INIT_LIST_HEAD(&p->list);
|
INIT_LIST_HEAD(&p->list);
|
||||||
atomic_set(&p->count, count);
|
atomic_set(&p->count, count);
|
||||||
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __bch2_journal_next_entry(struct journal *j)
|
static void __bch2_journal_next_entry(struct journal *j)
|
||||||
{
|
{
|
||||||
struct journal_entry_pin_list *p;
|
|
||||||
struct journal_buf *buf;
|
struct journal_buf *buf;
|
||||||
|
|
||||||
p = __journal_entry_new(j, 1);
|
__journal_entry_new(j, 1);
|
||||||
|
|
||||||
if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) {
|
|
||||||
smp_wmb();
|
|
||||||
j->cur_pin_list = p;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = journal_cur_buf(j);
|
buf = journal_cur_buf(j);
|
||||||
memset(buf->has_inode, 0, sizeof(buf->has_inode));
|
memset(buf->has_inode, 0, sizeof(buf->has_inode));
|
||||||
@ -1181,6 +1186,8 @@ static enum {
|
|||||||
union journal_res_state old, new;
|
union journal_res_state old, new;
|
||||||
u64 v = atomic64_read(&j->reservations.counter);
|
u64 v = atomic64_read(&j->reservations.counter);
|
||||||
|
|
||||||
|
lockdep_assert_held(&j->lock);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
old.v = new.v = v;
|
old.v = new.v = v;
|
||||||
if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
|
if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
|
||||||
@ -1221,7 +1228,6 @@ static enum {
|
|||||||
|
|
||||||
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
||||||
|
|
||||||
atomic_dec_bug(&fifo_peek_back(&j->pin).count);
|
|
||||||
__bch2_journal_next_entry(j);
|
__bch2_journal_next_entry(j);
|
||||||
|
|
||||||
cancel_delayed_work(&j->write_work);
|
cancel_delayed_work(&j->write_work);
|
||||||
@ -1295,7 +1301,7 @@ static int journal_entry_sectors(struct journal *j)
|
|||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
|
struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
|
||||||
unsigned sectors_available = j->entry_size_max >> 9;
|
unsigned sectors_available = UINT_MAX;
|
||||||
unsigned i, nr_online = 0, nr_devs = 0;
|
unsigned i, nr_online = 0, nr_devs = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&j->lock);
|
lockdep_assert_held(&j->lock);
|
||||||
@ -1363,6 +1369,10 @@ static int journal_entry_open(struct journal *j)
|
|||||||
if (sectors <= 0)
|
if (sectors <= 0)
|
||||||
return sectors;
|
return sectors;
|
||||||
|
|
||||||
|
buf->disk_sectors = sectors;
|
||||||
|
|
||||||
|
sectors = min_t(unsigned, sectors, buf->size >> 9);
|
||||||
|
|
||||||
j->cur_buf_sectors = sectors;
|
j->cur_buf_sectors = sectors;
|
||||||
buf->nr_prio_buckets = j->nr_prio_buckets;
|
buf->nr_prio_buckets = j->nr_prio_buckets;
|
||||||
|
|
||||||
@ -1464,18 +1474,15 @@ void bch2_journal_start(struct bch_fs *c)
|
|||||||
|
|
||||||
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||||
{
|
{
|
||||||
int ret = 0, keys = 0, entries = 0;
|
|
||||||
struct journal *j = &c->journal;
|
struct journal *j = &c->journal;
|
||||||
struct bkey_i *k, *_n;
|
struct bkey_i *k, *_n;
|
||||||
struct jset_entry *entry;
|
struct jset_entry *entry;
|
||||||
struct journal_replay *i, *n;
|
struct journal_replay *i, *n;
|
||||||
|
int ret = 0, did_replay = 0;
|
||||||
|
|
||||||
list_for_each_entry_safe(i, n, list, list) {
|
list_for_each_entry_safe(i, n, list, list) {
|
||||||
j->cur_pin_list =
|
j->replay_pin_list =
|
||||||
&j->pin.data[((j->pin.back - 1 -
|
journal_seq_pin(j, le64_to_cpu(i->j.seq));
|
||||||
(atomic64_read(&j->seq) -
|
|
||||||
le64_to_cpu(i->j.seq))) &
|
|
||||||
j->pin.mask)];
|
|
||||||
|
|
||||||
for_each_jset_key(k, _n, entry, &i->j) {
|
for_each_jset_key(k, _n, entry, &i->j) {
|
||||||
struct disk_reservation disk_res;
|
struct disk_reservation disk_res;
|
||||||
@ -1499,16 +1506,16 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
keys++;
|
did_replay = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (atomic_dec_and_test(&j->cur_pin_list->count))
|
if (atomic_dec_and_test(&j->replay_pin_list->count))
|
||||||
wake_up(&j->wait);
|
wake_up(&j->wait);
|
||||||
|
|
||||||
entries++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (keys) {
|
j->replay_pin_list = NULL;
|
||||||
|
|
||||||
|
if (did_replay) {
|
||||||
bch2_btree_flush(c);
|
bch2_btree_flush(c);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1517,17 +1524,14 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
|||||||
* arbitrarily far in the future vs. the most recently written journal
|
* arbitrarily far in the future vs. the most recently written journal
|
||||||
* entry on disk, if we crash before writing the next journal entry:
|
* entry on disk, if we crash before writing the next journal entry:
|
||||||
*/
|
*/
|
||||||
ret = bch2_journal_meta(&c->journal);
|
ret = bch2_journal_meta(j);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "journal replay: error %d flushing journal", ret);
|
bch_err(c, "journal replay: error %d flushing journal", ret);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bch_info(c, "journal replay done, %i keys in %i entries, seq %llu",
|
bch2_journal_set_replay_done(j);
|
||||||
keys, entries, (u64) atomic64_read(&j->seq));
|
|
||||||
|
|
||||||
bch2_journal_set_replay_done(&c->journal);
|
|
||||||
err:
|
err:
|
||||||
bch2_journal_entries_free(list);
|
bch2_journal_entries_free(list);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1763,11 +1767,16 @@ static void journal_pin_add_entry(struct journal *j,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch2_journal_pin_add(struct journal *j,
|
void bch2_journal_pin_add(struct journal *j,
|
||||||
struct journal_entry_pin *pin,
|
struct journal_res *res,
|
||||||
journal_pin_flush_fn flush_fn)
|
struct journal_entry_pin *pin,
|
||||||
|
journal_pin_flush_fn flush_fn)
|
||||||
{
|
{
|
||||||
|
struct journal_entry_pin_list *pin_list = res->ref
|
||||||
|
? journal_seq_pin(j, res->seq)
|
||||||
|
: j->replay_pin_list;
|
||||||
|
|
||||||
spin_lock_irq(&j->pin_lock);
|
spin_lock_irq(&j->pin_lock);
|
||||||
__journal_pin_add(j, j->cur_pin_list, pin, flush_fn);
|
__journal_pin_add(j, pin_list, pin, flush_fn);
|
||||||
spin_unlock_irq(&j->pin_lock);
|
spin_unlock_irq(&j->pin_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1828,7 +1837,7 @@ void bch2_journal_pin_add_if_older(struct journal *j,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct journal_entry_pin *
|
static struct journal_entry_pin *
|
||||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||||
{
|
{
|
||||||
struct journal_entry_pin_list *pin_list;
|
struct journal_entry_pin_list *pin_list;
|
||||||
struct journal_entry_pin *ret = NULL;
|
struct journal_entry_pin *ret = NULL;
|
||||||
@ -1851,6 +1860,7 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush)
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
/* must be list_del_init(), see bch2_journal_pin_drop() */
|
/* must be list_del_init(), see bch2_journal_pin_drop() */
|
||||||
list_del_init(&ret->list);
|
list_del_init(&ret->list);
|
||||||
|
*seq = journal_pin_seq(j, pin_list);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1875,9 +1885,10 @@ static bool journal_has_pins(struct journal *j)
|
|||||||
void bch2_journal_flush_pins(struct journal *j)
|
void bch2_journal_flush_pins(struct journal *j)
|
||||||
{
|
{
|
||||||
struct journal_entry_pin *pin;
|
struct journal_entry_pin *pin;
|
||||||
|
u64 seq;
|
||||||
|
|
||||||
while ((pin = journal_get_next_pin(j, U64_MAX)))
|
while ((pin = journal_get_next_pin(j, U64_MAX, &seq)))
|
||||||
pin->flush(j, pin);
|
pin->flush(j, pin, seq);
|
||||||
|
|
||||||
wait_event(j->wait, !journal_has_pins(j) || bch2_journal_error(j));
|
wait_event(j->wait, !journal_has_pins(j) || bch2_journal_error(j));
|
||||||
}
|
}
|
||||||
@ -1920,7 +1931,7 @@ static void journal_reclaim_work(struct work_struct *work)
|
|||||||
struct journal *j = &c->journal;
|
struct journal *j = &c->journal;
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct journal_entry_pin *pin;
|
struct journal_entry_pin *pin;
|
||||||
u64 seq_to_flush = 0;
|
u64 seq, seq_to_flush = 0;
|
||||||
unsigned iter, bucket_to_flush;
|
unsigned iter, bucket_to_flush;
|
||||||
unsigned long next_flush;
|
unsigned long next_flush;
|
||||||
bool reclaim_lock_held = false, need_flush;
|
bool reclaim_lock_held = false, need_flush;
|
||||||
@ -1994,9 +2005,9 @@ static void journal_reclaim_work(struct work_struct *work)
|
|||||||
|
|
||||||
while ((pin = journal_get_next_pin(j, need_flush
|
while ((pin = journal_get_next_pin(j, need_flush
|
||||||
? U64_MAX
|
? U64_MAX
|
||||||
: seq_to_flush))) {
|
: seq_to_flush, &seq))) {
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
pin->flush(j, pin);
|
pin->flush(j, pin, seq);
|
||||||
need_flush = false;
|
need_flush = false;
|
||||||
|
|
||||||
j->last_flushed = jiffies;
|
j->last_flushed = jiffies;
|
||||||
@ -2196,17 +2207,39 @@ static void journal_write_done(struct closure *cl)
|
|||||||
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||||
|
{
|
||||||
|
/* we aren't holding j->lock: */
|
||||||
|
unsigned new_size = READ_ONCE(j->buf_size_want);
|
||||||
|
void *new_buf;
|
||||||
|
|
||||||
|
if (buf->size >= new_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
|
||||||
|
if (!new_buf)
|
||||||
|
return;
|
||||||
|
|
||||||
|
memcpy(new_buf, buf->data, buf->size);
|
||||||
|
kvpfree(buf->data, buf->size);
|
||||||
|
buf->data = new_buf;
|
||||||
|
buf->size = new_size;
|
||||||
|
}
|
||||||
|
|
||||||
static void journal_write(struct closure *cl)
|
static void journal_write(struct closure *cl)
|
||||||
{
|
{
|
||||||
struct journal *j = container_of(cl, struct journal, io);
|
struct journal *j = container_of(cl, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct journal_buf *w = journal_prev_buf(j);
|
struct journal_buf *w = journal_prev_buf(j);
|
||||||
struct jset *jset = w->data;
|
struct jset *jset;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct bch_extent_ptr *ptr;
|
struct bch_extent_ptr *ptr;
|
||||||
unsigned i, sectors, bytes;
|
unsigned i, sectors, bytes;
|
||||||
|
|
||||||
|
journal_buf_realloc(j, w);
|
||||||
|
jset = w->data;
|
||||||
|
|
||||||
j->write_start_time = local_clock();
|
j->write_start_time = local_clock();
|
||||||
|
|
||||||
bch2_journal_add_prios(j, w);
|
bch2_journal_add_prios(j, w);
|
||||||
@ -2346,6 +2379,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
|
|||||||
unsigned u64s_min, unsigned u64s_max)
|
unsigned u64s_min, unsigned u64s_max)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
|
struct journal_buf *buf;
|
||||||
int ret;
|
int ret;
|
||||||
retry:
|
retry:
|
||||||
ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
|
ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
|
||||||
@ -2365,7 +2399,18 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ok, no more room in the current journal entry - try to start a new
|
* If we couldn't get a reservation because the current buf filled up,
|
||||||
|
* and we had room for a bigger entry on disk, signal that we want to
|
||||||
|
* realloc the journal bufs:
|
||||||
|
*/
|
||||||
|
buf = journal_cur_buf(j);
|
||||||
|
if (journal_entry_is_open(j) &&
|
||||||
|
buf->size >> 9 < buf->disk_sectors &&
|
||||||
|
buf->size < JOURNAL_ENTRY_SIZE_MAX)
|
||||||
|
j->buf_size_want = max(j->buf_size_want, buf->size << 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Close the current journal entry if necessary, then try to start a new
|
||||||
* one:
|
* one:
|
||||||
*/
|
*/
|
||||||
switch (journal_buf_switch(j, false)) {
|
switch (journal_buf_switch(j, false)) {
|
||||||
@ -2765,11 +2810,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
|||||||
struct journal_device *ja = &ca->journal;
|
struct journal_device *ja = &ca->journal;
|
||||||
struct bch_sb_field_journal *journal_buckets =
|
struct bch_sb_field_journal *journal_buckets =
|
||||||
bch2_sb_get_journal(sb);
|
bch2_sb_get_journal(sb);
|
||||||
unsigned i, journal_entry_pages;
|
unsigned i;
|
||||||
|
|
||||||
journal_entry_pages =
|
|
||||||
DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb),
|
|
||||||
PAGE_SECTORS);
|
|
||||||
|
|
||||||
ja->nr = bch2_nr_journal_buckets(journal_buckets);
|
ja->nr = bch2_nr_journal_buckets(journal_buckets);
|
||||||
|
|
||||||
@ -2777,7 +2818,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
|||||||
if (!ja->bucket_seq)
|
if (!ja->bucket_seq)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
ca->journal.bio = bio_kmalloc(GFP_KERNEL, journal_entry_pages);
|
ca->journal.bio = bio_kmalloc(GFP_KERNEL,
|
||||||
|
DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
|
||||||
if (!ca->journal.bio)
|
if (!ca->journal.bio)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@ -2793,17 +2835,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
|||||||
|
|
||||||
void bch2_fs_journal_exit(struct journal *j)
|
void bch2_fs_journal_exit(struct journal *j)
|
||||||
{
|
{
|
||||||
unsigned order = get_order(j->entry_size_max);
|
kvpfree(j->buf[1].data, j->buf[1].size);
|
||||||
|
kvpfree(j->buf[0].data, j->buf[0].size);
|
||||||
free_pages((unsigned long) j->buf[1].data, order);
|
|
||||||
free_pages((unsigned long) j->buf[0].data, order);
|
|
||||||
free_fifo(&j->pin);
|
free_fifo(&j->pin);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_fs_journal_init(struct journal *j, unsigned entry_size_max)
|
int bch2_fs_journal_init(struct journal *j)
|
||||||
{
|
{
|
||||||
static struct lock_class_key res_key;
|
static struct lock_class_key res_key;
|
||||||
unsigned order = get_order(entry_size_max);
|
|
||||||
|
|
||||||
spin_lock_init(&j->lock);
|
spin_lock_init(&j->lock);
|
||||||
spin_lock_init(&j->pin_lock);
|
spin_lock_init(&j->pin_lock);
|
||||||
@ -2817,7 +2856,8 @@ int bch2_fs_journal_init(struct journal *j, unsigned entry_size_max)
|
|||||||
|
|
||||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||||
|
|
||||||
j->entry_size_max = entry_size_max;
|
j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN;
|
||||||
|
j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN;
|
||||||
j->write_delay_ms = 100;
|
j->write_delay_ms = 100;
|
||||||
j->reclaim_delay_ms = 100;
|
j->reclaim_delay_ms = 100;
|
||||||
|
|
||||||
@ -2828,9 +2868,11 @@ int bch2_fs_journal_init(struct journal *j, unsigned entry_size_max)
|
|||||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||||
|
|
||||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||||
!(j->buf[0].data = (void *) __get_free_pages(GFP_KERNEL, order)) ||
|
!(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
|
||||||
!(j->buf[1].data = (void *) __get_free_pages(GFP_KERNEL, order)))
|
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL)))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
j->pin.front = j->pin.back = 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -121,15 +121,21 @@ struct journal_replay {
|
|||||||
struct jset j;
|
struct jset j;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define JOURNAL_PIN ((32 * 1024) - 1)
|
#define JOURNAL_PIN (32 * 1024)
|
||||||
|
|
||||||
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
||||||
{
|
{
|
||||||
return pin->pin_list != NULL;
|
return pin->pin_list != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_journal_pin_add(struct journal *, struct journal_entry_pin *,
|
static inline struct journal_entry_pin_list *
|
||||||
journal_pin_flush_fn);
|
journal_seq_pin(struct journal *j, u64 seq)
|
||||||
|
{
|
||||||
|
return &j->pin.data[(size_t) seq & j->pin.mask];
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_journal_pin_add(struct journal *, struct journal_res *,
|
||||||
|
struct journal_entry_pin *, journal_pin_flush_fn);
|
||||||
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
|
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
|
||||||
void bch2_journal_pin_add_if_older(struct journal *,
|
void bch2_journal_pin_add_if_older(struct journal *,
|
||||||
struct journal_entry_pin *,
|
struct journal_entry_pin *,
|
||||||
@ -343,12 +349,8 @@ int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
|||||||
|
|
||||||
static inline void bch2_journal_set_replay_done(struct journal *j)
|
static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||||
{
|
{
|
||||||
spin_lock(&j->lock);
|
|
||||||
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
|
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
|
||||||
|
|
||||||
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
|
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
|
||||||
j->cur_pin_list = &fifo_peek_back(&j->pin);
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||||
@ -368,6 +370,6 @@ void bch2_fs_journal_stop(struct journal *);
|
|||||||
void bch2_dev_journal_exit(struct bch_dev *);
|
void bch2_dev_journal_exit(struct bch_dev *);
|
||||||
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
||||||
void bch2_fs_journal_exit(struct journal *);
|
void bch2_fs_journal_exit(struct journal *);
|
||||||
int bch2_fs_journal_init(struct journal *, unsigned);
|
int bch2_fs_journal_init(struct journal *);
|
||||||
|
|
||||||
#endif /* _BCACHE_JOURNAL_H */
|
#endif /* _BCACHE_JOURNAL_H */
|
||||||
|
@ -15,8 +15,12 @@ struct journal_res;
|
|||||||
*/
|
*/
|
||||||
struct journal_buf {
|
struct journal_buf {
|
||||||
struct jset *data;
|
struct jset *data;
|
||||||
|
|
||||||
struct closure_waitlist wait;
|
struct closure_waitlist wait;
|
||||||
|
|
||||||
|
unsigned size;
|
||||||
|
unsigned disk_sectors;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ugh, prio_buckets are stupid - need to convert them to new
|
* ugh, prio_buckets are stupid - need to convert them to new
|
||||||
* transaction machinery when it arrives
|
* transaction machinery when it arrives
|
||||||
@ -39,7 +43,8 @@ struct journal_entry_pin_list {
|
|||||||
|
|
||||||
struct journal;
|
struct journal;
|
||||||
struct journal_entry_pin;
|
struct journal_entry_pin;
|
||||||
typedef void (*journal_pin_flush_fn)(struct journal *j, struct journal_entry_pin *);
|
typedef void (*journal_pin_flush_fn)(struct journal *j,
|
||||||
|
struct journal_entry_pin *, u64);
|
||||||
|
|
||||||
struct journal_entry_pin {
|
struct journal_entry_pin {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
@ -90,11 +95,13 @@ union journal_res_state {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
/* 4 mb, in bytes: */
|
/* bytes: */
|
||||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20)
|
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
||||||
|
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We stash some journal state as sentinal values in cur_entry_offset:
|
* We stash some journal state as sentinal values in cur_entry_offset:
|
||||||
|
* note - cur_entry_offset is in units of u64s
|
||||||
*/
|
*/
|
||||||
#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
|
#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
|
||||||
|
|
||||||
@ -123,7 +130,7 @@ struct journal {
|
|||||||
unsigned cur_entry_u64s;
|
unsigned cur_entry_u64s;
|
||||||
unsigned prev_buf_sectors;
|
unsigned prev_buf_sectors;
|
||||||
unsigned cur_buf_sectors;
|
unsigned cur_buf_sectors;
|
||||||
unsigned entry_size_max; /* bytes */
|
unsigned buf_size_want;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Two journal entries -- one is currently open for new entries, the
|
* Two journal entries -- one is currently open for new entries, the
|
||||||
@ -162,7 +169,7 @@ struct journal {
|
|||||||
* longer needed, the bucket can be discarded and reused.
|
* longer needed, the bucket can be discarded and reused.
|
||||||
*/
|
*/
|
||||||
DECLARE_FIFO(struct journal_entry_pin_list, pin);
|
DECLARE_FIFO(struct journal_entry_pin_list, pin);
|
||||||
struct journal_entry_pin_list *cur_pin_list;
|
struct journal_entry_pin_list *replay_pin_list;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Protects the pin lists - the fifo itself is still protected by
|
* Protects the pin lists - the fifo itself is still protected by
|
||||||
|
@ -377,13 +377,6 @@ const char *bch2_validate_cache_super(struct bcache_superblock *disk_sb)
|
|||||||
if (BCH_SB_GC_RESERVE(sb) < 5)
|
if (BCH_SB_GC_RESERVE(sb) < 5)
|
||||||
return "gc reserve percentage too small";
|
return "gc reserve percentage too small";
|
||||||
|
|
||||||
if (1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) < block_size)
|
|
||||||
return "max journal entry size too small";
|
|
||||||
|
|
||||||
/* 4 mb max: */
|
|
||||||
if (512U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX)
|
|
||||||
return "max journal entry size too big";
|
|
||||||
|
|
||||||
if (!sb->time_precision ||
|
if (!sb->time_precision ||
|
||||||
le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
|
le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
|
||||||
return "invalid time precision";
|
return "invalid time precision";
|
||||||
|
@ -460,14 +460,11 @@ void bch2_fs_stop(struct bch_fs *c)
|
|||||||
bch2_fs_exit(c);
|
bch2_fs_exit(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define alloc_bucket_pages(gfp, ca) \
|
|
||||||
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
|
|
||||||
|
|
||||||
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||||
{
|
{
|
||||||
struct bch_sb_field_members *mi;
|
struct bch_sb_field_members *mi;
|
||||||
struct bch_fs *c;
|
struct bch_fs *c;
|
||||||
unsigned i, iter_size, journal_entry_bytes;
|
unsigned i, iter_size;
|
||||||
|
|
||||||
c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL);
|
c = kzalloc(sizeof(struct bch_fs), GFP_KERNEL);
|
||||||
if (!c)
|
if (!c)
|
||||||
@ -555,8 +552,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
iter_size = (btree_blocks(c) + 1) * 2 *
|
iter_size = (btree_blocks(c) + 1) * 2 *
|
||||||
sizeof(struct btree_node_iter_set);
|
sizeof(struct btree_node_iter_set);
|
||||||
|
|
||||||
journal_entry_bytes = 512U << BCH_SB_JOURNAL_ENTRY_SIZE(sb);
|
|
||||||
|
|
||||||
if (!(c->wq = alloc_workqueue("bcachefs",
|
if (!(c->wq = alloc_workqueue("bcachefs",
|
||||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||||
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
|
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
|
||||||
@ -583,7 +578,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
bdi_setup_and_register(&c->bdi, "bcachefs") ||
|
bdi_setup_and_register(&c->bdi, "bcachefs") ||
|
||||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||||
bch2_fs_journal_init(&c->journal, journal_entry_bytes) ||
|
bch2_fs_journal_init(&c->journal) ||
|
||||||
bch2_fs_btree_init(c) ||
|
bch2_fs_btree_init(c) ||
|
||||||
bch2_fs_encryption_init(c) ||
|
bch2_fs_encryption_init(c) ||
|
||||||
bch2_fs_compress_init(c) ||
|
bch2_fs_compress_init(c) ||
|
||||||
@ -974,7 +969,7 @@ static void bch2_dev_free(struct bch_dev *ca)
|
|||||||
free_percpu(ca->sectors_written);
|
free_percpu(ca->sectors_written);
|
||||||
bioset_exit(&ca->replica_set);
|
bioset_exit(&ca->replica_set);
|
||||||
free_percpu(ca->usage_percpu);
|
free_percpu(ca->usage_percpu);
|
||||||
free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
|
kvpfree(ca->disk_buckets, bucket_bytes(ca));
|
||||||
kfree(ca->prio_buckets);
|
kfree(ca->prio_buckets);
|
||||||
kfree(ca->bio_prio);
|
kfree(ca->bio_prio);
|
||||||
vfree(ca->buckets);
|
vfree(ca->buckets);
|
||||||
@ -1144,7 +1139,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
|||||||
ca->mi.nbuckets)) ||
|
ca->mi.nbuckets)) ||
|
||||||
!(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
|
!(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
|
||||||
2, GFP_KERNEL)) ||
|
2, GFP_KERNEL)) ||
|
||||||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
|
!(ca->disk_buckets = kvpmalloc(bucket_bytes(ca), GFP_KERNEL)) ||
|
||||||
!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) ||
|
!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)) ||
|
||||||
!(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
|
!(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
|
||||||
bioset_init(&ca->replica_set, 4,
|
bioset_init(&ca->replica_set, 4,
|
||||||
|
@ -175,7 +175,6 @@ read_attribute(cache_read_races);
|
|||||||
|
|
||||||
rw_attribute(journal_write_delay_ms);
|
rw_attribute(journal_write_delay_ms);
|
||||||
rw_attribute(journal_reclaim_delay_ms);
|
rw_attribute(journal_reclaim_delay_ms);
|
||||||
read_attribute(journal_entry_size_max);
|
|
||||||
|
|
||||||
rw_attribute(discard);
|
rw_attribute(discard);
|
||||||
rw_attribute(cache_replacement_policy);
|
rw_attribute(cache_replacement_policy);
|
||||||
@ -406,7 +405,6 @@ SHOW(bch2_fs)
|
|||||||
|
|
||||||
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
|
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
|
||||||
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
|
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
|
||||||
sysfs_hprint(journal_entry_size_max, c->journal.entry_size_max);
|
|
||||||
|
|
||||||
sysfs_hprint(block_size, block_bytes(c));
|
sysfs_hprint(block_size, block_bytes(c));
|
||||||
sysfs_print(block_size_bytes, block_bytes(c));
|
sysfs_print(block_size_bytes, block_bytes(c));
|
||||||
@ -561,7 +559,6 @@ SYSFS_OPS(bch2_fs);
|
|||||||
struct attribute *bch2_fs_files[] = {
|
struct attribute *bch2_fs_files[] = {
|
||||||
&sysfs_journal_write_delay_ms,
|
&sysfs_journal_write_delay_ms,
|
||||||
&sysfs_journal_reclaim_delay_ms,
|
&sysfs_journal_reclaim_delay_ms,
|
||||||
&sysfs_journal_entry_size_max,
|
|
||||||
|
|
||||||
&sysfs_block_size,
|
&sysfs_block_size,
|
||||||
&sysfs_block_size_bytes,
|
&sysfs_block_size_bytes,
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <linux/freezer.h>
|
#include <linux/freezer.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/llist.h>
|
#include <linux/llist.h>
|
||||||
|
#include <linux/log2.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
@ -78,16 +79,22 @@ do { \
|
|||||||
(__builtin_types_compatible_p(typeof(_val), _type) || \
|
(__builtin_types_compatible_p(typeof(_val), _type) || \
|
||||||
__builtin_types_compatible_p(typeof(_val), const _type))
|
__builtin_types_compatible_p(typeof(_val), const _type))
|
||||||
|
|
||||||
static inline void *kvmalloc(size_t bytes, gfp_t gfp)
|
static inline void kvpfree(void *p, size_t size)
|
||||||
{
|
{
|
||||||
if (bytes <= PAGE_SIZE ||
|
if (size < PAGE_SIZE)
|
||||||
!(gfp & GFP_KERNEL))
|
kfree(p);
|
||||||
return kmalloc(bytes, gfp);
|
else if (is_vmalloc_addr(p))
|
||||||
|
vfree(p);
|
||||||
|
else
|
||||||
|
free_pages((unsigned long) p, get_order(size));
|
||||||
|
|
||||||
return ((bytes <= KMALLOC_MAX_SIZE)
|
}
|
||||||
? kmalloc(bytes, gfp|__GFP_NOWARN)
|
|
||||||
: NULL) ?:
|
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||||
vmalloc(bytes);
|
{
|
||||||
|
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
|
||||||
|
: (void *) __get_free_pages(gfp_mask, get_order(size))
|
||||||
|
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECLARE_HEAP(type, name) \
|
#define DECLARE_HEAP(type, name) \
|
||||||
@ -98,17 +105,15 @@ static inline void *kvmalloc(size_t bytes, gfp_t gfp)
|
|||||||
|
|
||||||
#define init_heap(heap, _size, gfp) \
|
#define init_heap(heap, _size, gfp) \
|
||||||
({ \
|
({ \
|
||||||
size_t _bytes; \
|
|
||||||
(heap)->used = 0; \
|
(heap)->used = 0; \
|
||||||
(heap)->size = (_size); \
|
(heap)->size = (_size); \
|
||||||
_bytes = (heap)->size * sizeof(*(heap)->data); \
|
(heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
|
||||||
(heap)->data = kvmalloc(_bytes, (gfp)); \
|
(gfp)); \
|
||||||
(heap)->data; \
|
|
||||||
})
|
})
|
||||||
|
|
||||||
#define free_heap(heap) \
|
#define free_heap(heap) \
|
||||||
do { \
|
do { \
|
||||||
kvfree((heap)->data); \
|
kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0])); \
|
||||||
(heap)->data = NULL; \
|
(heap)->data = NULL; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user