mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 3913e0cac3 bcachefs: Journal space calculation fix
This commit is contained in:
parent
4a2acdaf65
commit
b61ad35b97
@ -1 +1 @@
|
||||
ac3ab6a511717db1644ded49a6f417304abba048
|
||||
3913e0cac34e0993ab6dde67a2dec1ea485a2e28
|
||||
|
@ -49,14 +49,14 @@ DECLARE_EVENT_CLASS(bch_fs,
|
||||
TP_ARGS(c),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
),
|
||||
|
||||
TP_printk("%pU", __entry->uuid)
|
||||
TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bio,
|
||||
@ -131,7 +131,7 @@ TRACE_EVENT(journal_reclaim_start,
|
||||
btree_key_cache_dirty, btree_key_cache_total),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
__field(u64, min_nr )
|
||||
__field(u64, prereserved )
|
||||
__field(u64, prereserved_total )
|
||||
@ -142,7 +142,7 @@ TRACE_EVENT(journal_reclaim_start,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
__entry->min_nr = min_nr;
|
||||
__entry->prereserved = prereserved;
|
||||
__entry->prereserved_total = prereserved_total;
|
||||
@ -152,8 +152,8 @@ TRACE_EVENT(journal_reclaim_start,
|
||||
__entry->btree_key_cache_total = btree_key_cache_total;
|
||||
),
|
||||
|
||||
TP_printk("%pU min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
|
||||
__entry->uuid,
|
||||
TP_printk("%d,%d min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->min_nr,
|
||||
__entry->prereserved,
|
||||
__entry->prereserved_total,
|
||||
@ -168,16 +168,18 @@ TRACE_EVENT(journal_reclaim_finish,
|
||||
TP_ARGS(c, nr_flushed),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(u64, nr_flushed )
|
||||
__field(dev_t, dev )
|
||||
__field(u64, nr_flushed )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->nr_flushed = nr_flushed;
|
||||
__entry->dev = c->dev;
|
||||
__entry->nr_flushed = nr_flushed;
|
||||
),
|
||||
|
||||
TP_printk("%pU flushed %llu", __entry->uuid, __entry->nr_flushed)
|
||||
TP_printk("%d%d flushed %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->nr_flushed)
|
||||
);
|
||||
|
||||
/* bset.c: */
|
||||
@ -194,7 +196,7 @@ DECLARE_EVENT_CLASS(btree_node,
|
||||
TP_ARGS(c, b),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
__field(u8, level )
|
||||
__field(u8, id )
|
||||
__field(u64, inode )
|
||||
@ -202,15 +204,16 @@ DECLARE_EVENT_CLASS(btree_node,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
__entry->level = b->c.level;
|
||||
__entry->id = b->c.btree_id;
|
||||
__entry->inode = b->key.k.p.inode;
|
||||
__entry->offset = b->key.k.p.offset;
|
||||
),
|
||||
|
||||
TP_printk("%pU %u id %u %llu:%llu",
|
||||
__entry->uuid, __entry->level, __entry->id,
|
||||
TP_printk("%d,%d %u id %u %llu:%llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->level, __entry->id,
|
||||
__entry->inode, __entry->offset)
|
||||
);
|
||||
|
||||
@ -254,32 +257,17 @@ DEFINE_EVENT(btree_node, btree_node_reap,
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
),
|
||||
|
||||
TP_printk("%pU", __entry->uuid)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock_fail,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
|
||||
DEFINE_EVENT(bch_fs, btree_node_cannibalize,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
@ -294,18 +282,19 @@ TRACE_EVENT(btree_reserve_get_fail,
|
||||
TP_ARGS(c, required, cl),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
__field(size_t, required )
|
||||
__field(struct closure *, cl )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
__entry->required = required;
|
||||
__entry->cl = cl;
|
||||
),
|
||||
|
||||
TP_printk("%pU required %zu by %p", __entry->uuid,
|
||||
TP_printk("%d,%d required %zu by %p",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->required, __entry->cl)
|
||||
);
|
||||
|
||||
@ -483,19 +472,20 @@ TRACE_EVENT(move_data,
|
||||
TP_ARGS(c, sectors_moved, keys_moved),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
__field(u64, sectors_moved )
|
||||
__field(u64, keys_moved )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
__entry->sectors_moved = sectors_moved;
|
||||
__entry->keys_moved = keys_moved;
|
||||
),
|
||||
|
||||
TP_printk("%pU sectors_moved %llu keys_moved %llu",
|
||||
__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
|
||||
TP_printk("%d,%d sectors_moved %llu keys_moved %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->sectors_moved, __entry->keys_moved)
|
||||
);
|
||||
|
||||
TRACE_EVENT(copygc,
|
||||
@ -507,7 +497,7 @@ TRACE_EVENT(copygc,
|
||||
buckets_moved, buckets_not_moved),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(dev_t, dev )
|
||||
__field(u64, sectors_moved )
|
||||
__field(u64, sectors_not_moved )
|
||||
__field(u64, buckets_moved )
|
||||
@ -515,17 +505,39 @@ TRACE_EVENT(copygc,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->dev = c->dev;
|
||||
__entry->sectors_moved = sectors_moved;
|
||||
__entry->sectors_not_moved = sectors_not_moved;
|
||||
__entry->buckets_moved = buckets_moved;
|
||||
__entry->buckets_not_moved = buckets_moved;
|
||||
),
|
||||
|
||||
TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
|
||||
__entry->uuid,
|
||||
__entry->sectors_moved, __entry->sectors_not_moved,
|
||||
__entry->buckets_moved, __entry->buckets_not_moved)
|
||||
TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->sectors_moved, __entry->sectors_not_moved,
|
||||
__entry->buckets_moved, __entry->buckets_not_moved)
|
||||
);
|
||||
|
||||
TRACE_EVENT(copygc_wait,
|
||||
TP_PROTO(struct bch_fs *c,
|
||||
u64 wait_amount, u64 until),
|
||||
TP_ARGS(c, wait_amount, until),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(u64, wait_amount )
|
||||
__field(u64, until )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->wait_amount = wait_amount;
|
||||
__entry->until = until;
|
||||
),
|
||||
|
||||
TP_printk("%d,%u waiting for %llu sectors until %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->wait_amount, __entry->until)
|
||||
);
|
||||
|
||||
TRACE_EVENT(trans_get_iter,
|
||||
|
@ -263,7 +263,10 @@ do { \
|
||||
BCH_DEBUG_PARAM(verify_btree_ondisk, \
|
||||
"Reread btree nodes at various points to verify the " \
|
||||
"mergesort in the read path against modifications " \
|
||||
"done in memory")
|
||||
"done in memory") \
|
||||
BCH_DEBUG_PARAM(verify_all_btree_replicas, \
|
||||
"When reading btree nodes, read all replicas and " \
|
||||
"compare them")
|
||||
|
||||
/* Parameters that should only be compiled in in debug mode: */
|
||||
#define BCH_DEBUG_PARAMS_DEBUG() \
|
||||
@ -387,6 +390,14 @@ struct gc_pos {
|
||||
unsigned level;
|
||||
};
|
||||
|
||||
struct reflink_gc {
|
||||
u64 offset;
|
||||
u32 size;
|
||||
u32 refcount;
|
||||
};
|
||||
|
||||
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
|
||||
|
||||
struct io_count {
|
||||
u64 sectors[2][BCH_DATA_NR];
|
||||
};
|
||||
@ -564,6 +575,7 @@ struct bch_fs {
|
||||
int minor;
|
||||
struct device *chardev;
|
||||
struct super_block *vfs_sb;
|
||||
dev_t dev;
|
||||
char name[40];
|
||||
|
||||
/* ro/rw, add/remove/resize devices: */
|
||||
@ -623,6 +635,7 @@ struct bch_fs {
|
||||
|
||||
/* BTREE CACHE */
|
||||
struct bio_set btree_bio;
|
||||
struct workqueue_struct *io_complete_wq;
|
||||
|
||||
struct btree_root btree_roots[BTREE_ID_NR];
|
||||
struct mutex btree_root_lock;
|
||||
@ -660,7 +673,8 @@ struct bch_fs {
|
||||
|
||||
struct btree_key_cache btree_key_cache;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
struct workqueue_struct *btree_update_wq;
|
||||
struct workqueue_struct *btree_error_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
|
||||
@ -799,6 +813,9 @@ struct bch_fs {
|
||||
|
||||
/* REFLINK */
|
||||
u64 reflink_hint;
|
||||
reflink_gc_table reflink_gc_table;
|
||||
size_t reflink_gc_nr;
|
||||
size_t reflink_gc_idx;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
|
@ -1344,6 +1344,7 @@ LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
|
||||
|
||||
LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
|
||||
LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
|
||||
|
||||
/*
|
||||
* Features:
|
||||
|
@ -1193,13 +1193,11 @@ static struct bkey_packed *bset_search_write_set(const struct btree *b,
|
||||
|
||||
static inline void prefetch_four_cachelines(void *p)
|
||||
{
|
||||
#if (CONFIG_X86_64 && !defined(__clang__))
|
||||
asm(".intel_syntax noprefix;"
|
||||
"prefetcht0 [%0 - 127 + 64 * 0];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 1];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 2];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 3];"
|
||||
".att_syntax prefix;"
|
||||
#if CONFIG_X86_64
|
||||
asm("prefetcht0 (-127 + 64 * 0)(%0);"
|
||||
"prefetcht0 (-127 + 64 * 1)(%0);"
|
||||
"prefetcht0 (-127 + 64 * 2)(%0);"
|
||||
"prefetcht0 (-127 + 64 * 3)(%0);"
|
||||
:
|
||||
: "r" (p + 127));
|
||||
#else
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "recovery.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -1282,6 +1283,201 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
|
||||
if (!r->refcount) {
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
new->k.size = 0;
|
||||
} else {
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
|
||||
if (ret)
|
||||
kfree(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct reflink_gc *r;
|
||||
size_t idx = 0;
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
if (initial) {
|
||||
c->reflink_gc_idx = 0;
|
||||
|
||||
ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_done_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx);
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
|
||||
if (!r->refcount)
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
else
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
||||
kfree(new);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
bch2_trans_exit(&trans);
|
||||
out:
|
||||
genradix_free(&c->reflink_gc_table);
|
||||
c->reflink_gc_nr = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct reflink_gc *r;
|
||||
int ret;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
genradix_free(&c->reflink_gc_table);
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
if (initial)
|
||||
return bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_start_initial_fn);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
}
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_gc - walk _all_ references to buckets, and recompute them:
|
||||
*
|
||||
@ -1316,7 +1512,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
again:
|
||||
ret = bch2_gc_start(c, metadata_only);
|
||||
ret = bch2_gc_start(c, metadata_only) ?:
|
||||
bch2_gc_reflink_start(c, initial, metadata_only);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1378,7 +1575,8 @@ out:
|
||||
bch2_journal_block(&c->journal);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
ret = bch2_gc_done(c, initial, metadata_only);
|
||||
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_done(c, initial, metadata_only);
|
||||
|
||||
bch2_journal_unblock(&c->journal);
|
||||
} else {
|
||||
|
@ -521,7 +521,7 @@ enum btree_validate_ret {
|
||||
\
|
||||
switch (write) { \
|
||||
case READ: \
|
||||
bch_err(c, "%s", _buf2); \
|
||||
bch_err(c, "%s", _buf2); \
|
||||
\
|
||||
switch (type) { \
|
||||
case BTREE_ERR_FIXABLE: \
|
||||
@ -815,6 +815,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
|
||||
unsigned u64s;
|
||||
unsigned nonblacklisted_written = 0;
|
||||
int ret, retry_read = 0, write = READ;
|
||||
|
||||
b->version_ondisk = U16_MAX;
|
||||
@ -934,15 +935,31 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
sort_iter_add(iter,
|
||||
vstruct_idx(i, whiteout_u64s),
|
||||
vstruct_last(i));
|
||||
|
||||
nonblacklisted_written = b->written;
|
||||
}
|
||||
|
||||
for (bne = write_block(b);
|
||||
bset_byte_offset(b, bne) < btree_bytes(c);
|
||||
bne = (void *) bne + block_bytes(c))
|
||||
btree_err_on(bne->keys.seq == b->data->keys.seq,
|
||||
btree_err_on(bne->keys.seq == b->data->keys.seq &&
|
||||
!bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(bne->keys.journal_seq),
|
||||
true),
|
||||
BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
|
||||
"found bset signature after last bset");
|
||||
|
||||
/*
|
||||
* Blacklisted bsets are those that were written after the most recent
|
||||
* (flush) journal write. Since there wasn't a flush, they may not have
|
||||
* made it to all devices - which means we shouldn't write new bsets
|
||||
* after them, as that could leave a gap and then reads from that device
|
||||
* wouldn't find all the bsets in that btree node - which means it's
|
||||
* important that we start writing new bsets after the most recent _non_
|
||||
* blacklisted bset:
|
||||
*/
|
||||
b->written = nonblacklisted_written;
|
||||
|
||||
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
@ -1027,8 +1044,8 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
struct btree_read_bio *rb =
|
||||
container_of(work, struct btree_read_bio, work);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct btree *b = rb->b;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
|
||||
struct btree *b = rb->bio.bi_private;
|
||||
struct bio *bio = &rb->bio;
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
char buf[200];
|
||||
@ -1101,7 +1118,263 @@ static void btree_node_read_endio(struct bio *bio)
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
|
||||
queue_work(system_unbound_wq, &rb->work);
|
||||
queue_work(c->io_complete_wq, &rb->work);
|
||||
}
|
||||
|
||||
struct btree_node_read_all {
|
||||
struct closure cl;
|
||||
struct bch_fs *c;
|
||||
struct btree *b;
|
||||
unsigned nr;
|
||||
void *buf[BCH_REPLICAS_MAX];
|
||||
struct bio *bio[BCH_REPLICAS_MAX];
|
||||
int err[BCH_REPLICAS_MAX];
|
||||
};
|
||||
|
||||
static unsigned btree_node_sectors_written(struct bch_fs *c, void *data)
|
||||
{
|
||||
struct btree_node *bn = data;
|
||||
struct btree_node_entry *bne;
|
||||
unsigned offset = 0;
|
||||
|
||||
if (le64_to_cpu(bn->magic) != bset_magic(c))
|
||||
return 0;
|
||||
|
||||
while (offset < c->opts.btree_node_size) {
|
||||
if (!offset) {
|
||||
offset += vstruct_sectors(bn, c->block_bits);
|
||||
} else {
|
||||
bne = data + (offset << 9);
|
||||
if (bne->keys.seq != bn->keys.seq)
|
||||
break;
|
||||
offset += vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data)
|
||||
{
|
||||
struct btree_node *bn = data;
|
||||
struct btree_node_entry *bne;
|
||||
|
||||
if (!offset)
|
||||
return false;
|
||||
|
||||
while (offset < c->opts.btree_node_size) {
|
||||
bne = data + (offset << 9);
|
||||
if (bne->keys.seq == bn->keys.seq)
|
||||
return true;
|
||||
offset++;
|
||||
}
|
||||
|
||||
return false;
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void btree_node_read_all_replicas_done(struct closure *cl)
|
||||
{
|
||||
struct btree_node_read_all *ra =
|
||||
container_of(cl, struct btree_node_read_all, cl);
|
||||
struct bch_fs *c = ra->c;
|
||||
struct btree *b = ra->b;
|
||||
bool have_good_copy = false;
|
||||
bool dump_bset_maps = false;
|
||||
bool have_retry = false;
|
||||
int ret = 0, write = READ;
|
||||
unsigned i, written, written2;
|
||||
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
|
||||
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
if (ra->err[i])
|
||||
continue;
|
||||
|
||||
if (!have_good_copy) {
|
||||
memcpy(b->data, ra->buf[i], btree_bytes(c));
|
||||
have_good_copy = true;
|
||||
written = btree_node_sectors_written(c, b->data);
|
||||
}
|
||||
|
||||
/* Try to get the right btree node: */
|
||||
if (have_good_copy &&
|
||||
seq &&
|
||||
b->data->keys.seq != seq &&
|
||||
((struct btree_node *) ra->buf[i])->keys.seq == seq) {
|
||||
memcpy(b->data, ra->buf[i], btree_bytes(c));
|
||||
written = btree_node_sectors_written(c, b->data);
|
||||
}
|
||||
|
||||
written2 = btree_node_sectors_written(c, ra->buf[i]);
|
||||
if (btree_err_on(written2 != written, BTREE_ERR_FIXABLE, c, NULL, b, NULL,
|
||||
"btree node sectors written mismatch: %u != %u",
|
||||
written, written2) ||
|
||||
btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
|
||||
BTREE_ERR_FIXABLE, c, NULL, b, NULL,
|
||||
"found bset signature after last bset") ||
|
||||
btree_err_on(memcmp(b->data, ra->buf[i], written << 9),
|
||||
BTREE_ERR_FIXABLE, c, NULL, b, NULL,
|
||||
"btree node replicas content mismatch"))
|
||||
dump_bset_maps = true;
|
||||
|
||||
if (written2 > written) {
|
||||
written = written2;
|
||||
memcpy(b->data, ra->buf[i], btree_bytes(c));
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
if (dump_bset_maps) {
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
char buf[200];
|
||||
struct printbuf out = PBUF(buf);
|
||||
struct btree_node *bn = ra->buf[i];
|
||||
struct btree_node_entry *bne = NULL;
|
||||
unsigned offset = 0, sectors;
|
||||
bool gap = false;
|
||||
|
||||
if (ra->err[i])
|
||||
continue;
|
||||
|
||||
while (offset < c->opts.btree_node_size) {
|
||||
if (!offset) {
|
||||
sectors = vstruct_sectors(bn, c->block_bits);
|
||||
} else {
|
||||
bne = ra->buf[i] + (offset << 9);
|
||||
if (bne->keys.seq != bn->keys.seq)
|
||||
break;
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
|
||||
pr_buf(&out, " %u-%u", offset, offset + sectors);
|
||||
if (bne && bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(bne->keys.journal_seq), false))
|
||||
pr_buf(&out, "*");
|
||||
offset += sectors;
|
||||
}
|
||||
|
||||
while (offset < c->opts.btree_node_size) {
|
||||
bne = ra->buf[i] + (offset << 9);
|
||||
if (bne->keys.seq == bn->keys.seq) {
|
||||
if (!gap)
|
||||
pr_buf(&out, " GAP");
|
||||
gap = true;
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
pr_buf(&out, " %u-%u", offset, offset + sectors);
|
||||
if (bch2_journal_seq_is_blacklisted(c,
|
||||
le64_to_cpu(bne->keys.journal_seq), false))
|
||||
pr_buf(&out, "*");
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
|
||||
bch_err(c, "replica %u:%s", i, buf);
|
||||
}
|
||||
}
|
||||
|
||||
if (have_good_copy)
|
||||
bch2_btree_node_read_done(c, NULL, b, false);
|
||||
else
|
||||
set_btree_node_read_error(b);
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
mempool_free(ra->buf[i], &c->btree_bounce_pool);
|
||||
bio_put(ra->bio[i]);
|
||||
}
|
||||
|
||||
closure_debug_destroy(&ra->cl);
|
||||
kfree(ra);
|
||||
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_read_all_replicas_endio(struct bio *bio)
|
||||
{
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
struct bch_fs *c = rb->c;
|
||||
struct btree_node_read_all *ra = rb->ra;
|
||||
|
||||
if (rb->have_ioref) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
|
||||
bch2_latency_acct(ca, rb->start_time, READ);
|
||||
}
|
||||
|
||||
ra->err[rb->idx] = bio->bi_status;
|
||||
closure_put(&ra->cl);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX This allocates multiple times from the same mempools, and can deadlock
|
||||
* under sufficient memory pressure (but is only a debug path)
|
||||
*/
|
||||
static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync)
|
||||
{
|
||||
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded pick;
|
||||
struct btree_node_read_all *ra;
|
||||
unsigned i;
|
||||
|
||||
ra = kzalloc(sizeof(*ra), GFP_NOFS);
|
||||
if (!ra)
|
||||
return -ENOMEM;
|
||||
|
||||
closure_init(&ra->cl, NULL);
|
||||
ra->c = c;
|
||||
ra->b = b;
|
||||
ra->nr = bch2_bkey_nr_ptrs(k);
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
|
||||
ra->bio[i] = bio_alloc_bioset(GFP_NOFS, buf_pages(ra->buf[i],
|
||||
btree_bytes(c)),
|
||||
&c->btree_bio);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
struct btree_read_bio *rb =
|
||||
container_of(ra->bio[i], struct btree_read_bio, bio);
|
||||
rb->c = c;
|
||||
rb->b = b;
|
||||
rb->ra = ra;
|
||||
rb->start_time = local_clock();
|
||||
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
|
||||
rb->idx = i;
|
||||
rb->pick = pick;
|
||||
rb->bio.bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
|
||||
rb->bio.bi_iter.bi_sector = pick.ptr.offset;
|
||||
rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
|
||||
bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
|
||||
|
||||
if (rb->have_ioref) {
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
|
||||
bio_sectors(&rb->bio));
|
||||
bio_set_dev(&rb->bio, ca->disk_sb.bdev);
|
||||
|
||||
closure_get(&ra->cl);
|
||||
submit_bio(&rb->bio);
|
||||
} else {
|
||||
ra->err[i] = BLK_STS_REMOVED;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (sync) {
|
||||
closure_sync(&ra->cl);
|
||||
btree_node_read_all_replicas_done(&ra->cl);
|
||||
} else {
|
||||
continue_at(&ra->cl, btree_node_read_all_replicas_done,
|
||||
c->io_complete_wq);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
@ -1117,6 +1390,12 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
btree_pos_to_text(&PBUF(buf), c, b);
|
||||
trace_btree_read(c, b);
|
||||
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
if (bch2_verify_all_btree_replicas &&
|
||||
!btree_node_read_all_replicas(c, b, sync))
|
||||
return;
|
||||
|
||||
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
|
||||
NULL, &pick);
|
||||
if (bch2_fs_fatal_err_on(ret <= 0, c,
|
||||
@ -1133,6 +1412,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
&c->btree_bio);
|
||||
rb = container_of(bio, struct btree_read_bio, bio);
|
||||
rb->c = c;
|
||||
rb->b = b;
|
||||
rb->ra = NULL;
|
||||
rb->start_time = local_clock();
|
||||
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
|
||||
rb->pick = pick;
|
||||
@ -1140,11 +1421,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bio->bi_end_io = btree_node_read_endio;
|
||||
bio->bi_private = b;
|
||||
bch2_bio_map(bio, b->data, btree_bytes(c));
|
||||
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
if (rb->have_ioref) {
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
|
||||
bio_sectors(bio));
|
||||
@ -1153,7 +1431,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
if (sync) {
|
||||
submit_bio_wait(bio);
|
||||
|
||||
bio->bi_private = b;
|
||||
btree_node_read_work(&rb->work);
|
||||
} else {
|
||||
submit_bio(bio);
|
||||
@ -1164,8 +1441,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
if (sync)
|
||||
btree_node_read_work(&rb->work);
|
||||
else
|
||||
queue_work(system_unbound_wq, &rb->work);
|
||||
|
||||
queue_work(c->io_complete_wq, &rb->work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1332,7 +1608,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
|
||||
queue_work(c->wq, &c->btree_write_error_work);
|
||||
queue_work(c->btree_error_wq, &c->btree_write_error_work);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1371,7 +1647,7 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
container_of(orig, struct btree_write_bio, wbio);
|
||||
|
||||
INIT_WORK(&wb->work, btree_node_write_work);
|
||||
queue_work(system_unbound_wq, &wb->work);
|
||||
queue_work(c->io_complete_wq, &wb->work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1441,6 +1717,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
return;
|
||||
|
||||
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
||||
/*
|
||||
* XXX waiting on btree writes with btree locks held -
|
||||
* this can deadlock, and we hit the write error path
|
||||
*/
|
||||
btree_node_wait_on_io(b);
|
||||
continue;
|
||||
}
|
||||
@ -1631,7 +1911,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
|
||||
|
||||
INIT_WORK(&wbio->work, btree_write_submit);
|
||||
schedule_work(&wbio->work);
|
||||
queue_work(c->io_complete_wq, &wbio->work);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
|
@ -13,6 +13,7 @@ struct bch_fs;
|
||||
struct btree_write;
|
||||
struct btree;
|
||||
struct btree_iter;
|
||||
struct btree_node_read_all;
|
||||
|
||||
static inline bool btree_node_dirty(struct btree *b)
|
||||
{
|
||||
@ -33,8 +34,11 @@ static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
|
||||
|
||||
struct btree_read_bio {
|
||||
struct bch_fs *c;
|
||||
struct btree *b;
|
||||
struct btree_node_read_all *ra;
|
||||
u64 start_time;
|
||||
unsigned have_ioref:1;
|
||||
unsigned idx:7;
|
||||
struct extent_ptr_decoded pick;
|
||||
struct work_struct work;
|
||||
struct bio bio;
|
||||
|
@ -2260,6 +2260,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
|
||||
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
unsigned expected_nr_iters,
|
||||
size_t expected_mem_bytes)
|
||||
__acquires(&c->btree_trans_barrier)
|
||||
{
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
trans->c = c;
|
||||
@ -2292,6 +2293,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
}
|
||||
|
||||
int bch2_trans_exit(struct btree_trans *trans)
|
||||
__releases(&c->btree_trans_barrier)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
|
@ -550,6 +550,22 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
|
||||
BUG_ON(!journal_pin_active(&as->journal));
|
||||
|
||||
/*
|
||||
* Wait for any in flight writes to finish before we free the old nodes
|
||||
* on disk:
|
||||
*/
|
||||
for (i = 0; i < as->nr_old_nodes; i++) {
|
||||
struct btree_node *bn = READ_ONCE(as->old_nodes[i]->data);
|
||||
|
||||
/*
|
||||
* This is technically a use after free, but it's just a read -
|
||||
* but it might cause problems in userspace where freeing the
|
||||
* buffer may unmap it:
|
||||
*/
|
||||
if (bn && bn->keys.seq == as->old_nodes_seq[i])
|
||||
btree_node_wait_on_io(as->old_nodes[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* We did an update to a parent node where the pointers we added pointed
|
||||
* to child nodes that weren't written yet: now, the child nodes have
|
||||
@ -889,13 +905,9 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
|
||||
btree_update_will_delete_key(as, &b->key);
|
||||
|
||||
/*
|
||||
* XXX: Waiting on io with btree node locks held, we don't want to be
|
||||
* doing this. We can't have btree writes happening after the space has
|
||||
* been freed, but we really only need to block before
|
||||
* btree_update_nodes_written_trans() happens.
|
||||
*/
|
||||
btree_node_wait_on_io(b);
|
||||
as->old_nodes[as->nr_old_nodes] = b;
|
||||
as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
|
||||
as->nr_old_nodes++;
|
||||
}
|
||||
|
||||
void bch2_btree_update_done(struct btree_update *as)
|
||||
@ -908,7 +920,8 @@ void bch2_btree_update_done(struct btree_update *as)
|
||||
|
||||
bch2_btree_reserve_put(as);
|
||||
|
||||
continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
|
||||
continue_at(&as->cl, btree_update_set_nodes_written,
|
||||
as->c->btree_interior_update_worker);
|
||||
}
|
||||
|
||||
struct btree_update *
|
||||
@ -1826,7 +1839,10 @@ void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
|
||||
void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS);
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
return;
|
||||
@ -1844,7 +1860,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
a->seq = b->data->keys.seq;
|
||||
|
||||
INIT_WORK(&a->work, async_btree_node_rewrite_work);
|
||||
queue_work(system_long_wq, &a->work);
|
||||
queue_work(c->btree_interior_update_worker, &a->work);
|
||||
}
|
||||
|
||||
static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
|
@ -92,6 +92,10 @@ struct btree_update {
|
||||
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
|
||||
unsigned nr_new_nodes;
|
||||
|
||||
struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
|
||||
__le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
|
||||
unsigned nr_old_nodes;
|
||||
|
||||
open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
|
||||
BCH_REPLICAS_MAX];
|
||||
open_bucket_idx_t nr_open_buckets;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "movinggc.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
|
||||
#include <linux/preempt.h>
|
||||
@ -1072,6 +1073,124 @@ static int bch2_mark_stripe(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 p_start, u64 p_end,
|
||||
u64 v_start, u64 v_end)
|
||||
{
|
||||
if (p_start == p_end)
|
||||
return false;
|
||||
|
||||
p_start += le64_to_cpu(p.v->idx);
|
||||
p_end += le64_to_cpu(p.v->idx);
|
||||
|
||||
if (p_end <= v_start)
|
||||
return false;
|
||||
if (p_start >= v_end)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 start, u64 end,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
return __reflink_p_frag_references(p, start, end,
|
||||
bkey_start_offset(k.k),
|
||||
k.k->p.offset);
|
||||
}
|
||||
|
||||
static int __bch2_mark_reflink_p(struct bch_fs *c,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 idx, unsigned sectors,
|
||||
unsigned front_frag,
|
||||
unsigned back_frag,
|
||||
unsigned flags,
|
||||
size_t *r_idx)
|
||||
{
|
||||
struct reflink_gc *r;
|
||||
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
|
||||
int frags_referenced;
|
||||
|
||||
while (1) {
|
||||
if (*r_idx >= c->reflink_gc_nr)
|
||||
goto not_found;
|
||||
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
|
||||
BUG_ON(!r);
|
||||
|
||||
if (r->offset > idx)
|
||||
break;
|
||||
(*r_idx)++;
|
||||
}
|
||||
|
||||
frags_referenced =
|
||||
__reflink_p_frag_references(p, 0, front_frag,
|
||||
r->offset - r->size, r->offset) +
|
||||
__reflink_p_frag_references(p, back_frag, p.k->size,
|
||||
r->offset - r->size, r->offset);
|
||||
|
||||
if (frags_referenced == 2) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
|
||||
add = -add;
|
||||
} else if (frags_referenced == 1) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
|
||||
add = 0;
|
||||
}
|
||||
|
||||
BUG_ON((s64) r->refcount + add < 0);
|
||||
|
||||
r->refcount += add;
|
||||
return min_t(u64, sectors, r->offset - idx);
|
||||
not_found:
|
||||
bch2_fs_inconsistent(c,
|
||||
"%llu:%llu len %u points to nonexistent indirect extent %llu",
|
||||
p.k->p.inode, p.k->p.offset, p.k->size, idx);
|
||||
bch2_inconsistent_error(c);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int bch2_mark_reflink_p(struct bch_fs *c,
|
||||
struct bkey_s_c_reflink_p p, unsigned offset,
|
||||
s64 sectors, unsigned flags)
|
||||
{
|
||||
u64 idx = le64_to_cpu(p.v->idx) + offset;
|
||||
struct reflink_gc *ref;
|
||||
size_t l, r, m;
|
||||
unsigned front_frag, back_frag;
|
||||
s64 ret = 0;
|
||||
|
||||
if (sectors < 0)
|
||||
sectors = -sectors;
|
||||
|
||||
BUG_ON(offset + sectors > p.k->size);
|
||||
|
||||
front_frag = offset;
|
||||
back_frag = offset + sectors;
|
||||
|
||||
l = 0;
|
||||
r = c->reflink_gc_nr;
|
||||
while (l < r) {
|
||||
m = l + (r - l) / 2;
|
||||
|
||||
ref = genradix_ptr(&c->reflink_gc_table, m);
|
||||
if (ref->offset <= idx)
|
||||
l = m + 1;
|
||||
else
|
||||
r = m;
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
ret = __bch2_mark_reflink_p(c, p, idx, sectors,
|
||||
front_frag, back_frag, flags, &l);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
idx += ret;
|
||||
sectors -= ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_s_c new,
|
||||
@ -1127,6 +1246,10 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
fs_usage->persistent_reserved[replicas - 1] += sectors;
|
||||
break;
|
||||
}
|
||||
case KEY_TYPE_reflink_p:
|
||||
ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k),
|
||||
offset, sectors, flags);
|
||||
break;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
@ -1689,35 +1812,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __le64 *bkey_refcount(struct bkey_i *k)
|
||||
{
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_i_to_reflink_v(k)->v.refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 start, u64 end,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
if (start == end)
|
||||
return false;
|
||||
|
||||
start += le64_to_cpu(p.v->idx);
|
||||
end += le64_to_cpu(p.v->idx);
|
||||
|
||||
if (end <= bkey_start_offset(k.k))
|
||||
return false;
|
||||
if (start >= k.k->p.offset)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 idx, unsigned sectors,
|
||||
@ -1731,6 +1825,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
struct bkey_i *n;
|
||||
__le64 *refcount;
|
||||
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
|
||||
int frags_referenced;
|
||||
s64 ret;
|
||||
|
||||
ret = trans_get_key(trans, BTREE_ID_reflink,
|
||||
@ -1738,18 +1833,20 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (reflink_p_frag_references(p, 0, front_frag, k) &&
|
||||
reflink_p_frag_references(p, back_frag, p.k->size, k)) {
|
||||
sectors = min_t(u64, sectors, k.k->p.offset - idx);
|
||||
|
||||
frags_referenced =
|
||||
reflink_p_frag_references(p, 0, front_frag, k) +
|
||||
reflink_p_frag_references(p, back_frag, p.k->size, k);
|
||||
|
||||
if (frags_referenced == 2) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
|
||||
add = -add;
|
||||
} else if (reflink_p_frag_references(p, 0, front_frag, k) ||
|
||||
reflink_p_frag_references(p, back_frag, p.k->size, k)) {
|
||||
} else if (frags_referenced == 1) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
|
||||
goto out;
|
||||
}
|
||||
|
||||
sectors = min_t(u64, sectors, k.k->p.offset - idx);
|
||||
|
||||
n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
@ -1804,14 +1901,13 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors,
|
||||
front_frag, back_frag, flags);
|
||||
if (ret < 0)
|
||||
break;
|
||||
return ret;
|
||||
|
||||
idx += ret;
|
||||
sectors = max_t(s64, 0LL, sectors - ret);
|
||||
ret = 0;
|
||||
idx += ret;
|
||||
sectors -= ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_key(struct btree_trans *trans,
|
||||
|
@ -23,6 +23,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
struct btree_iter *inode_iter = NULL;
|
||||
struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
|
||||
u64 now = bch2_current_time(c);
|
||||
u64 cpu = raw_smp_processor_id();
|
||||
u64 dir_offset = 0;
|
||||
int ret;
|
||||
|
||||
@ -36,7 +37,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
|
||||
if (!name)
|
||||
new_inode->bi_flags |= BCH_INODE_UNLINKED;
|
||||
|
||||
inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
|
||||
inode_iter = bch2_inode_create(trans, new_inode, U32_MAX, cpu);
|
||||
ret = PTR_ERR_OR_ZERO(inode_iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -13,6 +13,9 @@
|
||||
#include <linux/mount.h>
|
||||
|
||||
#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
|
||||
#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
|
||||
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
|
||||
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
|
||||
|
||||
struct flags_set {
|
||||
unsigned mask;
|
||||
@ -247,11 +250,54 @@ err1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
|
||||
{
|
||||
u32 flags;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (get_user(flags, arg))
|
||||
return -EFAULT;
|
||||
|
||||
bch_notice(c, "shutdown by ioctl type %u", flags);
|
||||
|
||||
down_write(&c->vfs_sb->s_umount);
|
||||
|
||||
switch (flags) {
|
||||
case FSOP_GOING_FLAGS_DEFAULT:
|
||||
ret = freeze_bdev(c->vfs_sb->s_bdev);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_journal_flush(&c->journal);
|
||||
c->vfs_sb->s_flags |= SB_RDONLY;
|
||||
bch2_fs_emergency_read_only(c);
|
||||
thaw_bdev(c->vfs_sb->s_bdev);
|
||||
break;
|
||||
|
||||
case FSOP_GOING_FLAGS_LOGFLUSH:
|
||||
bch2_journal_flush(&c->journal);
|
||||
fallthrough;
|
||||
|
||||
case FSOP_GOING_FLAGS_NOLOGFLUSH:
|
||||
c->vfs_sb->s_flags |= SB_RDONLY;
|
||||
bch2_fs_emergency_read_only(c);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err:
|
||||
up_write(&c->vfs_sb->s_umount);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct super_block *sb = inode->v.i_sb;
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETFLAGS:
|
||||
@ -276,15 +322,7 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
return -ENOTTY;
|
||||
|
||||
case FS_IOC_GOINGDOWN:
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
down_write(&sb->s_umount);
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
if (bch2_fs_emergency_read_only(c))
|
||||
bch_err(c, "emergency read only due to ioctl");
|
||||
up_write(&sb->s_umount);
|
||||
return 0;
|
||||
return bch2_ioc_goingdown(c, (u32 __user *) arg);
|
||||
|
||||
default:
|
||||
return bch2_fs_ioctl(c, cmd, (void __user *) arg);
|
||||
|
@ -1578,6 +1578,8 @@ got_sb:
|
||||
break;
|
||||
}
|
||||
|
||||
c->dev = sb->s_dev;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_POSIX_ACL
|
||||
if (c->opts.acl)
|
||||
sb->s_flags |= SB_POSIXACL;
|
||||
|
@ -472,23 +472,28 @@ static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
|
||||
struct btree_iter *bch2_inode_create(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
u32 snapshot)
|
||||
u32 snapshot, u64 cpu)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter *iter = NULL;
|
||||
struct bkey_s_c k;
|
||||
u64 min, max, start, pos, *hint;
|
||||
int ret = 0;
|
||||
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
|
||||
|
||||
u64 cpu = raw_smp_processor_id();
|
||||
unsigned bits = (c->opts.inodes_32bit
|
||||
? 31 : 63) - c->inode_shard_bits;
|
||||
if (c->opts.shard_inode_numbers) {
|
||||
bits -= c->inode_shard_bits;
|
||||
|
||||
min = (cpu << bits);
|
||||
max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
min = (cpu << bits);
|
||||
max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
|
||||
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
|
||||
hint = c->unused_inode_hints + cpu;
|
||||
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
|
||||
hint = c->unused_inode_hints + cpu;
|
||||
} else {
|
||||
min = BLOCKDEV_INODE_MAX;
|
||||
max = ~(ULLONG_MAX << bits);
|
||||
hint = c->unused_inode_hints;
|
||||
}
|
||||
|
||||
start = READ_ONCE(*hint);
|
||||
|
||||
|
@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
struct btree_iter *bch2_inode_create(struct btree_trans *,
|
||||
struct bch_inode_unpacked *, u32);
|
||||
struct bch_inode_unpacked *, u32, u64);
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *, u64, bool);
|
||||
|
||||
|
@ -1439,7 +1439,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
bch2_migrate_read_done(&op->write, rbio);
|
||||
|
||||
closure_init(cl, NULL);
|
||||
closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
|
||||
closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, cl);
|
||||
closure_return_with_destructor(cl, promote_done);
|
||||
}
|
||||
|
||||
@ -1822,6 +1822,13 @@ static void __bch2_read_endio(struct work_struct *work)
|
||||
if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
|
||||
goto csum_err;
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* We need to rework the narrow_crcs path to deliver the read completion
|
||||
* first, and then punt to a different workqueue, otherwise we're
|
||||
* holding up reads while doing btree updates which is bad for memory
|
||||
* reclaim.
|
||||
*/
|
||||
if (unlikely(rbio->narrow_crcs))
|
||||
bch2_rbio_narrow_crcs(rbio);
|
||||
|
||||
|
@ -58,7 +58,7 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
return op->alloc_reserve == RESERVE_MOVINGGC
|
||||
? op->c->copygc_wq
|
||||
: op->c->wq;
|
||||
: op->c->btree_update_wq;
|
||||
}
|
||||
|
||||
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
|
||||
|
@ -118,7 +118,9 @@ void bch2_journal_halt(struct journal *j)
|
||||
|
||||
void __bch2_journal_buf_put(struct journal *j)
|
||||
{
|
||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -304,7 +306,7 @@ static int journal_entry_open(struct journal *j)
|
||||
j->res_get_blocked_start);
|
||||
j->res_get_blocked_start = 0;
|
||||
|
||||
mod_delayed_work(system_freezable_wq,
|
||||
mod_delayed_work(c->io_complete_wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(j->write_delay_ms));
|
||||
journal_wake(j);
|
||||
@ -805,10 +807,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
long b;
|
||||
|
||||
if (new_fs) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
b = bch2_bucket_alloc_new_fs(ca);
|
||||
if (b < 0) {
|
||||
percpu_up_read(&c->mark_lock);
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
@ -825,7 +825,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
b = sector_to_bucket(ca, ob->ptr.offset);
|
||||
}
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
@ -852,14 +853,14 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
|
||||
spin_unlock(&c->journal.lock);
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
if (new_fs) {
|
||||
bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
0);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
bch2_trans_mark_metadata_bucket(&trans, ca,
|
||||
|
@ -834,7 +834,7 @@ static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < j->nr_ptrs; i++) {
|
||||
struct bch_dev *ca = c->devs[j->ptrs[i].dev];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
|
||||
u64 offset;
|
||||
|
||||
div64_u64_rem(j->ptrs[i].offset, ca->mi.bucket_size, &offset);
|
||||
@ -1233,8 +1233,6 @@ static void journal_write_done(struct closure *cl)
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_devs_list devs =
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
struct bch_replicas_padded replicas;
|
||||
union journal_res_state old, new;
|
||||
u64 v, seq;
|
||||
@ -1242,11 +1240,12 @@ static void journal_write_done(struct closure *cl)
|
||||
|
||||
bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
|
||||
if (!devs.nr) {
|
||||
if (!w->devs_written.nr) {
|
||||
bch_err(c, "unable to write journal to sufficient devices");
|
||||
err = -EIO;
|
||||
} else {
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
w->devs_written);
|
||||
if (bch2_mark_replicas(c, &replicas.e))
|
||||
err = -EIO;
|
||||
}
|
||||
@ -1258,7 +1257,7 @@ static void journal_write_done(struct closure *cl)
|
||||
seq = le64_to_cpu(w->data->seq);
|
||||
|
||||
if (seq >= j->pin.front)
|
||||
journal_seq_pin(j, seq)->devs = devs;
|
||||
journal_seq_pin(j, seq)->devs = w->devs_written;
|
||||
|
||||
j->seq_ondisk = seq;
|
||||
if (err && (!j->err_seq || seq < j->err_seq))
|
||||
@ -1296,27 +1295,27 @@ static void journal_write_done(struct closure *cl)
|
||||
journal_wake(j);
|
||||
|
||||
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
|
||||
mod_delayed_work(c->io_complete_wq, &j->write_work, 0);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (new.unwritten_idx != new.idx &&
|
||||
!journal_state_count(new, new.unwritten_idx))
|
||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||
}
|
||||
|
||||
static void journal_write_endio(struct bio *bio)
|
||||
{
|
||||
struct bch_dev *ca = bio->bi_private;
|
||||
struct journal *j = &ca->fs->journal;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
unsigned long flags;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s",
|
||||
le64_to_cpu(w->data->seq),
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("journal")) {
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&j->err_lock, flags);
|
||||
bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
|
||||
bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
|
||||
spin_unlock_irqrestore(&j->err_lock, flags);
|
||||
}
|
||||
|
||||
@ -1370,7 +1369,7 @@ static void do_journal_write(struct closure *cl)
|
||||
le64_to_cpu(w->data->seq);
|
||||
}
|
||||
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1402,7 +1401,8 @@ void bch2_journal_write(struct closure *cl)
|
||||
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) {
|
||||
w->noflush = true;
|
||||
SET_JSET_NO_FLUSH(jset, true);
|
||||
jset->last_seq = w->last_seq = 0;
|
||||
jset->last_seq = 0;
|
||||
w->last_seq = 0;
|
||||
|
||||
j->nr_noflush_writes++;
|
||||
} else {
|
||||
@ -1509,14 +1509,12 @@ retry_alloc:
|
||||
journal_debug_buf);
|
||||
kfree(journal_debug_buf);
|
||||
bch2_fatal_error(c);
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: we really should just disable the entire journal in nochanges
|
||||
* mode
|
||||
*/
|
||||
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
|
||||
if (c->opts.nochanges)
|
||||
goto no_io;
|
||||
|
||||
@ -1542,14 +1540,14 @@ retry_alloc:
|
||||
|
||||
bch2_bucket_seq_cleanup(c);
|
||||
|
||||
continue_at(cl, do_journal_write, system_highpri_wq);
|
||||
continue_at(cl, do_journal_write, c->io_complete_wq);
|
||||
return;
|
||||
no_io:
|
||||
bch2_bucket_seq_cleanup(c);
|
||||
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
return;
|
||||
err:
|
||||
bch2_inconsistent_error(c);
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
}
|
||||
|
@ -93,6 +93,10 @@ journal_dev_space_available(struct journal *j, struct bch_dev *ca,
|
||||
* until we write it out - thus, account for it here:
|
||||
*/
|
||||
while ((unwritten = get_unwritten_sectors(j, &idx))) {
|
||||
/* entry won't fit on this device, skip: */
|
||||
if (unwritten > ca->mi.bucket_size)
|
||||
continue;
|
||||
|
||||
if (unwritten >= sectors) {
|
||||
if (!buckets) {
|
||||
sectors = 0;
|
||||
|
@ -111,8 +111,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
|
||||
bl->start[nr].start = cpu_to_le64(start);
|
||||
bl->start[nr].end = cpu_to_le64(end);
|
||||
out_write_sb:
|
||||
c->disk_sb.sb->features[0] |=
|
||||
1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
|
||||
|
||||
ret = bch2_write_super(c);
|
||||
out:
|
||||
@ -298,8 +297,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
|
||||
BUG_ON(new_nr && !bl);
|
||||
|
||||
if (!new_nr)
|
||||
c->disk_sb.sb->features[0] &=
|
||||
~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
|
||||
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
|
||||
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ struct journal_buf {
|
||||
struct jset *data;
|
||||
|
||||
__BKEY_PADDED(key, BCH_REPLICAS_MAX);
|
||||
struct bch_devs_list devs_written;
|
||||
|
||||
struct closure_waitlist wait;
|
||||
u64 last_seq; /* copy of data->last_seq */
|
||||
|
@ -523,6 +523,11 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!k.k || bkey_cmp(k.k->p, pos)) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -921,8 +926,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
rewrite_old_nodes_pred, c, stats);
|
||||
if (!ret) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
|
||||
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
@ -317,6 +317,8 @@ static int bch2_copygc_thread(void *arg)
|
||||
set_freezable();
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
cond_resched();
|
||||
|
||||
if (kthread_wait_freezable(c->copy_gc_enabled))
|
||||
break;
|
||||
|
||||
@ -324,6 +326,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
wait = bch2_copygc_wait_amount(c);
|
||||
|
||||
if (wait > clock->max_slop) {
|
||||
trace_copygc_wait(c, wait, last + wait);
|
||||
c->copygc_wait = last + wait;
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
|
@ -165,8 +165,13 @@ enum opt_type {
|
||||
x(inodes_32bit, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, false, \
|
||||
BCH_SB_INODE_32BIT, true, \
|
||||
NULL, "Constrain inode numbers to 32 bits") \
|
||||
x(shard_inode_numbers, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_SHARD_INUMS, false, \
|
||||
NULL, "Shard new inode numbers by CPU id") \
|
||||
x(gc_reserve_percent, u8, \
|
||||
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_UINT(5, 21), \
|
||||
|
@ -716,7 +716,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
case BCH_JSET_ENTRY_dev_usage: {
|
||||
struct jset_entry_dev_usage *u =
|
||||
container_of(entry, struct jset_entry_dev_usage, entry);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, u->dev);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
|
||||
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
|
||||
unsigned nr_types = (bytes - sizeof(struct jset_entry_dev_usage)) /
|
||||
sizeof(struct jset_entry_dev_usage_type);
|
||||
@ -755,7 +755,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
struct jset_entry_clock *clock =
|
||||
container_of(entry, struct jset_entry_clock, entry);
|
||||
|
||||
atomic64_set(&c->io_clock[clock->rw].now, clock->time);
|
||||
atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1217,13 +1217,13 @@ use_clean:
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (c->opts.version_upgrade) {
|
||||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
|
||||
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
@ -1278,12 +1278,12 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
|
||||
|
||||
if (c->opts.version_upgrade) {
|
||||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
|
||||
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
|
@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
|
||||
|
||||
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
|
||||
|
||||
refcount = (void *) &r_v->v;
|
||||
refcount = bkey_refcount(r_v);
|
||||
*refcount = 0;
|
||||
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
|
||||
|
||||
@ -181,18 +181,19 @@ err:
|
||||
|
||||
static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
|
||||
{
|
||||
struct bkey_s_c k = bch2_btree_iter_peek(iter);
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_continue(iter, 0, k, ret) {
|
||||
if (bkey_cmp(iter->pos, end) >= 0)
|
||||
return bkey_s_c_null;
|
||||
break;
|
||||
|
||||
if (bkey_extent_is_data(k.k))
|
||||
break;
|
||||
return k;
|
||||
}
|
||||
|
||||
return k;
|
||||
bch2_btree_iter_set_pos(iter, end);
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
s64 bch2_remap_range(struct bch_fs *c,
|
||||
@ -205,8 +206,8 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
struct bkey_s_c src_k;
|
||||
struct bkey_buf new_dst, new_src;
|
||||
struct bpos dst_end = dst_start, src_end = src_start;
|
||||
struct bpos dst_want, src_want;
|
||||
u64 src_done, dst_done;
|
||||
struct bpos src_want;
|
||||
u64 dst_done;
|
||||
int ret = 0, ret2 = 0;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
@ -226,7 +227,8 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
while (ret == 0 || ret == -EINTR) {
|
||||
while ((ret == 0 || ret == -EINTR) &&
|
||||
bkey_cmp(dst_iter->pos, dst_end) < 0) {
|
||||
struct disk_reservation disk_res = { 0 };
|
||||
|
||||
bch2_trans_begin(&trans);
|
||||
@ -236,32 +238,29 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
|
||||
dst_done = dst_iter->pos.offset - dst_start.offset;
|
||||
src_want = POS(src_start.inode, src_start.offset + dst_done);
|
||||
bch2_btree_iter_set_pos(src_iter, src_want);
|
||||
|
||||
src_k = get_next_src(src_iter, src_end);
|
||||
ret = bkey_err(src_k);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
src_done = bpos_min(src_iter->pos, src_end).offset -
|
||||
src_start.offset;
|
||||
dst_want = POS(dst_start.inode, dst_start.offset + src_done);
|
||||
|
||||
if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
|
||||
ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
|
||||
journal_seq, i_sectors_delta);
|
||||
if (bkey_cmp(src_want, src_iter->pos) < 0) {
|
||||
ret = bch2_fpunch_at(&trans, dst_iter,
|
||||
bpos_min(dst_end,
|
||||
POS(dst_iter->pos.inode, dst_iter->pos.offset +
|
||||
src_iter->pos.offset - src_want.offset)),
|
||||
journal_seq, i_sectors_delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
|
||||
|
||||
if (!bkey_cmp(dst_iter->pos, dst_end))
|
||||
break;
|
||||
|
||||
if (src_k.k->type != KEY_TYPE_reflink_p) {
|
||||
bch2_bkey_buf_reassemble(&new_src, c, src_k);
|
||||
src_k = bkey_i_to_s_c(new_src.k);
|
||||
|
||||
bch2_cut_front(src_iter->pos, new_src.k);
|
||||
bch2_cut_back(src_end, new_src.k);
|
||||
bch2_btree_iter_set_pos(src_iter, bkey_start_pos(src_k.k));
|
||||
|
||||
ret = bch2_make_extent_indirect(&trans, src_iter,
|
||||
new_src.k);
|
||||
@ -278,7 +277,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
bkey_reflink_p_init(new_dst.k);
|
||||
|
||||
u64 offset = le64_to_cpu(src_p.v->idx) +
|
||||
(src_iter->pos.offset -
|
||||
(src_want.offset -
|
||||
bkey_start_offset(src_k.k));
|
||||
|
||||
dst_p->v.idx = cpu_to_le64(offset);
|
||||
@ -288,20 +287,13 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
|
||||
new_dst.k->k.p = dst_iter->pos;
|
||||
bch2_key_resize(&new_dst.k->k,
|
||||
min(src_k.k->p.offset - src_iter->pos.offset,
|
||||
min(src_k.k->p.offset - src_want.offset,
|
||||
dst_end.offset - dst_iter->pos.offset));
|
||||
|
||||
ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
|
||||
&disk_res, journal_seq,
|
||||
new_i_size, i_sectors_delta,
|
||||
true);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
dst_done = dst_iter->pos.offset - dst_start.offset;
|
||||
src_want = POS(src_start.inode, src_start.offset + dst_done);
|
||||
bch2_btree_iter_set_pos(src_iter, src_want);
|
||||
}
|
||||
bch2_trans_iter_put(&trans, dst_iter);
|
||||
bch2_trans_iter_put(&trans, src_iter);
|
||||
|
@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
|
||||
.val_to_text = bch2_indirect_inline_data_to_text, \
|
||||
}
|
||||
|
||||
static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_s_c_to_reflink_v(k).v->refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline __le64 *bkey_refcount(struct bkey_i *k)
|
||||
{
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_i_to_reflink_v(k)->v.refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
|
||||
u64, u64 *, u64, s64 *);
|
||||
|
||||
|
@ -982,7 +982,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS;
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
|
||||
ret = bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
@ -999,7 +999,7 @@ static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
|
||||
* The u64s field counts from the start of data, ignoring the shared
|
||||
* fields.
|
||||
*/
|
||||
entry->u64s = u64s - 1;
|
||||
entry->u64s = cpu_to_le16(u64s - 1);
|
||||
|
||||
*end = vstruct_next(*end);
|
||||
return entry;
|
||||
@ -1092,7 +1092,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
|
||||
clock->entry.type = BCH_JSET_ENTRY_clock;
|
||||
clock->rw = i;
|
||||
clock->time = atomic64_read(&c->io_clock[i].now);
|
||||
clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1109,10 +1109,10 @@ void bch2_fs_mark_clean(struct bch_fs *c)
|
||||
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
|
||||
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_metadata;
|
||||
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
|
||||
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
|
||||
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
|
||||
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
|
||||
|
||||
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
|
||||
|
||||
|
@ -509,10 +509,14 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->unused_inode_hints);
|
||||
free_heap(&c->copygc_heap);
|
||||
|
||||
if (c->io_complete_wq )
|
||||
destroy_workqueue(c->io_complete_wq );
|
||||
if (c->copygc_wq)
|
||||
destroy_workqueue(c->copygc_wq);
|
||||
if (c->wq)
|
||||
destroy_workqueue(c->wq);
|
||||
if (c->btree_error_wq)
|
||||
destroy_workqueue(c->btree_error_wq);
|
||||
if (c->btree_update_wq)
|
||||
destroy_workqueue(c->btree_update_wq);
|
||||
|
||||
bch2_free_super(&c->disk_sb);
|
||||
kvpfree(c, sizeof(*c));
|
||||
@ -760,10 +764,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
|
||||
|
||||
if (!(c->wq = alloc_workqueue("bcachefs",
|
||||
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->btree_error_wq = alloc_workqueue("bcachefs_error",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
|
||||
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
@ -1437,7 +1445,7 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
/* Device add/removal: */
|
||||
|
||||
int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
size_t i;
|
||||
|
@ -312,7 +312,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
pr_buf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
|
||||
bch2_bpos_to_text(out, c->gc_gens_pos);
|
||||
|
Loading…
Reference in New Issue
Block a user