Update bcachefs sources to 3913e0cac3 bcachefs: Journal space calculation fix

This commit is contained in:
Kent Overstreet 2021-05-31 15:05:33 -04:00
parent 4a2acdaf65
commit b61ad35b97
33 changed files with 954 additions and 234 deletions

View File

@ -1 +1 @@
ac3ab6a511717db1644ded49a6f417304abba048
3913e0cac34e0993ab6dde67a2dec1ea485a2e28

View File

@ -49,14 +49,14 @@ DECLARE_EVENT_CLASS(bch_fs,
TP_ARGS(c),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
),
TP_printk("%pU", __entry->uuid)
TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
);
DECLARE_EVENT_CLASS(bio,
@ -131,7 +131,7 @@ TRACE_EVENT(journal_reclaim_start,
btree_key_cache_dirty, btree_key_cache_total),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
__field(u64, min_nr )
__field(u64, prereserved )
__field(u64, prereserved_total )
@ -142,7 +142,7 @@ TRACE_EVENT(journal_reclaim_start,
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
__entry->min_nr = min_nr;
__entry->prereserved = prereserved;
__entry->prereserved_total = prereserved_total;
@ -152,8 +152,8 @@ TRACE_EVENT(journal_reclaim_start,
__entry->btree_key_cache_total = btree_key_cache_total;
),
TP_printk("%pU min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
__entry->uuid,
TP_printk("%d,%d min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->min_nr,
__entry->prereserved,
__entry->prereserved_total,
@ -168,16 +168,18 @@ TRACE_EVENT(journal_reclaim_finish,
TP_ARGS(c, nr_flushed),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(u64, nr_flushed )
__field(dev_t, dev )
__field(u64, nr_flushed )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->nr_flushed = nr_flushed;
__entry->dev = c->dev;
__entry->nr_flushed = nr_flushed;
),
TP_printk("%pU flushed %llu", __entry->uuid, __entry->nr_flushed)
TP_printk("%d%d flushed %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->nr_flushed)
);
/* bset.c: */
@ -194,7 +196,7 @@ DECLARE_EVENT_CLASS(btree_node,
TP_ARGS(c, b),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
__field(u8, level )
__field(u8, id )
__field(u64, inode )
@ -202,15 +204,16 @@ DECLARE_EVENT_CLASS(btree_node,
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
__entry->level = b->c.level;
__entry->id = b->c.btree_id;
__entry->inode = b->key.k.p.inode;
__entry->offset = b->key.k.p.offset;
),
TP_printk("%pU %u id %u %llu:%llu",
__entry->uuid, __entry->level, __entry->id,
TP_printk("%d,%d %u id %u %llu:%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->level, __entry->id,
__entry->inode, __entry->offset)
);
@ -254,32 +257,17 @@ DEFINE_EVENT(btree_node, btree_node_reap,
TP_ARGS(c, b)
);
DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c),
TP_STRUCT__entry(
__array(char, uuid, 16 )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
),
TP_printk("%pU", __entry->uuid)
);
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock_fail,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
DEFINE_EVENT(bch_fs, btree_node_cannibalize,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
@ -294,18 +282,19 @@ TRACE_EVENT(btree_reserve_get_fail,
TP_ARGS(c, required, cl),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
__field(size_t, required )
__field(struct closure *, cl )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
__entry->required = required;
__entry->cl = cl;
),
TP_printk("%pU required %zu by %p", __entry->uuid,
TP_printk("%d,%d required %zu by %p",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->required, __entry->cl)
);
@ -483,19 +472,20 @@ TRACE_EVENT(move_data,
TP_ARGS(c, sectors_moved, keys_moved),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
__field(u64, sectors_moved )
__field(u64, keys_moved )
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
__entry->sectors_moved = sectors_moved;
__entry->keys_moved = keys_moved;
),
TP_printk("%pU sectors_moved %llu keys_moved %llu",
__entry->uuid, __entry->sectors_moved, __entry->keys_moved)
TP_printk("%d,%d sectors_moved %llu keys_moved %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->sectors_moved, __entry->keys_moved)
);
TRACE_EVENT(copygc,
@ -507,7 +497,7 @@ TRACE_EVENT(copygc,
buckets_moved, buckets_not_moved),
TP_STRUCT__entry(
__array(char, uuid, 16 )
__field(dev_t, dev )
__field(u64, sectors_moved )
__field(u64, sectors_not_moved )
__field(u64, buckets_moved )
@ -515,17 +505,39 @@ TRACE_EVENT(copygc,
),
TP_fast_assign(
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->dev = c->dev;
__entry->sectors_moved = sectors_moved;
__entry->sectors_not_moved = sectors_not_moved;
__entry->buckets_moved = buckets_moved;
__entry->buckets_not_moved = buckets_moved;
),
TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
__entry->uuid,
__entry->sectors_moved, __entry->sectors_not_moved,
__entry->buckets_moved, __entry->buckets_not_moved)
TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->sectors_moved, __entry->sectors_not_moved,
__entry->buckets_moved, __entry->buckets_not_moved)
);
TRACE_EVENT(copygc_wait,
TP_PROTO(struct bch_fs *c,
u64 wait_amount, u64 until),
TP_ARGS(c, wait_amount, until),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, wait_amount )
__field(u64, until )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->wait_amount = wait_amount;
__entry->until = until;
),
TP_printk("%d,%u waiting for %llu sectors until %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->wait_amount, __entry->until)
);
TRACE_EVENT(trans_get_iter,

View File

@ -263,7 +263,10 @@ do { \
BCH_DEBUG_PARAM(verify_btree_ondisk, \
"Reread btree nodes at various points to verify the " \
"mergesort in the read path against modifications " \
"done in memory")
"done in memory") \
BCH_DEBUG_PARAM(verify_all_btree_replicas, \
"When reading btree nodes, read all replicas and " \
"compare them")
/* Parameters that should only be compiled in in debug mode: */
#define BCH_DEBUG_PARAMS_DEBUG() \
@ -387,6 +390,14 @@ struct gc_pos {
unsigned level;
};
struct reflink_gc {
u64 offset;
u32 size;
u32 refcount;
};
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
@ -564,6 +575,7 @@ struct bch_fs {
int minor;
struct device *chardev;
struct super_block *vfs_sb;
dev_t dev;
char name[40];
/* ro/rw, add/remove/resize devices: */
@ -623,6 +635,7 @@ struct bch_fs {
/* BTREE CACHE */
struct bio_set btree_bio;
struct workqueue_struct *io_complete_wq;
struct btree_root btree_roots[BTREE_ID_NR];
struct mutex btree_root_lock;
@ -660,7 +673,8 @@ struct bch_fs {
struct btree_key_cache btree_key_cache;
struct workqueue_struct *wq;
struct workqueue_struct *btree_update_wq;
struct workqueue_struct *btree_error_wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
@ -799,6 +813,9 @@ struct bch_fs {
/* REFLINK */
u64 reflink_hint;
reflink_gc_table reflink_gc_table;
size_t reflink_gc_nr;
size_t reflink_gc_idx;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;

View File

@ -1344,6 +1344,7 @@ LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28);
LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
/*
* Features:

View File

@ -1193,13 +1193,11 @@ static struct bkey_packed *bset_search_write_set(const struct btree *b,
static inline void prefetch_four_cachelines(void *p)
{
#if (CONFIG_X86_64 && !defined(__clang__))
asm(".intel_syntax noprefix;"
"prefetcht0 [%0 - 127 + 64 * 0];"
"prefetcht0 [%0 - 127 + 64 * 1];"
"prefetcht0 [%0 - 127 + 64 * 2];"
"prefetcht0 [%0 - 127 + 64 * 3];"
".att_syntax prefix;"
#if CONFIG_X86_64
asm("prefetcht0 (-127 + 64 * 0)(%0);"
"prefetcht0 (-127 + 64 * 1)(%0);"
"prefetcht0 (-127 + 64 * 2)(%0);"
"prefetcht0 (-127 + 64 * 3)(%0);"
:
: "r" (p + 127));
#else

View File

@ -23,6 +23,7 @@
#include "keylist.h"
#include "move.h"
#include "recovery.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
@ -1282,6 +1283,201 @@ static int bch2_gc_start(struct bch_fs *c,
return 0;
}
static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k)
{
struct reflink_gc *r;
const __le64 *refcount = bkey_refcount_c(k);
char buf[200];
int ret = 0;
if (!refcount)
return 0;
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
if (!r)
return -ENOMEM;
if (!r ||
r->offset != k.k->p.offset ||
r->size != k.k->size) {
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
return -EINVAL;
}
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
"reflink key has wrong refcount:\n"
" %s\n"
" should be %u",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
r->refcount)) {
struct bkey_i *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
goto fsck_err;
}
bkey_reassemble(new, k);
if (!r->refcount) {
new->k.type = KEY_TYPE_deleted;
new->k.size = 0;
} else {
*bkey_refcount(new) = cpu_to_le64(r->refcount);
}
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
if (ret)
kfree(new);
}
fsck_err:
return ret;
}
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct reflink_gc *r;
size_t idx = 0;
char buf[200];
int ret = 0;
if (metadata_only)
return 0;
if (initial) {
c->reflink_gc_idx = 0;
ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
bch2_gc_reflink_done_initial_fn);
goto out;
}
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr(&c->reflink_gc_table, idx);
if (!r ||
r->offset != k.k->p.offset ||
r->size != k.k->size) {
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
ret = -EINVAL;
break;
}
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
"reflink key has wrong refcount:\n"
" %s\n"
" should be %u",
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
r->refcount)) {
struct bkey_i *new;
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
break;
}
bkey_reassemble(new, k);
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
else
*bkey_refcount(new) = cpu_to_le64(r->refcount);
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
kfree(new);
if (ret)
break;
}
}
fsck_err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
out:
genradix_free(&c->reflink_gc_table);
c->reflink_gc_nr = 0;
return ret;
}
static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k)
{
struct reflink_gc *r;
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
return 0;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r)
return -ENOMEM;
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
return 0;
}
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct reflink_gc *r;
int ret;
if (metadata_only)
return 0;
genradix_free(&c->reflink_gc_table);
c->reflink_gc_nr = 0;
if (initial)
return bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
bch2_gc_reflink_start_initial_fn);
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
const __le64 *refcount = bkey_refcount_c(k);
if (!refcount)
continue;
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
GFP_KERNEL);
if (!r) {
ret = -ENOMEM;
break;
}
r->offset = k.k->p.offset;
r->size = k.k->size;
r->refcount = 0;
}
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
return 0;
}
/**
* bch2_gc - walk _all_ references to buckets, and recompute them:
*
@ -1316,7 +1512,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
again:
ret = bch2_gc_start(c, metadata_only);
ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_reflink_start(c, initial, metadata_only);
if (ret)
goto out;
@ -1378,7 +1575,8 @@ out:
bch2_journal_block(&c->journal);
percpu_down_write(&c->mark_lock);
ret = bch2_gc_done(c, initial, metadata_only);
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
bch2_gc_done(c, initial, metadata_only);
bch2_journal_unblock(&c->journal);
} else {

View File

@ -521,7 +521,7 @@ enum btree_validate_ret {
\
switch (write) { \
case READ: \
bch_err(c, "%s", _buf2); \
bch_err(c, "%s", _buf2); \
\
switch (type) { \
case BTREE_ERR_FIXABLE: \
@ -815,6 +815,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
unsigned u64s;
unsigned nonblacklisted_written = 0;
int ret, retry_read = 0, write = READ;
b->version_ondisk = U16_MAX;
@ -934,15 +935,31 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sort_iter_add(iter,
vstruct_idx(i, whiteout_u64s),
vstruct_last(i));
nonblacklisted_written = b->written;
}
for (bne = write_block(b);
bset_byte_offset(b, bne) < btree_bytes(c);
bne = (void *) bne + block_bytes(c))
btree_err_on(bne->keys.seq == b->data->keys.seq,
btree_err_on(bne->keys.seq == b->data->keys.seq &&
!bch2_journal_seq_is_blacklisted(c,
le64_to_cpu(bne->keys.journal_seq),
true),
BTREE_ERR_WANT_RETRY, c, ca, b, NULL,
"found bset signature after last bset");
/*
* Blacklisted bsets are those that were written after the most recent
* (flush) journal write. Since there wasn't a flush, they may not have
* made it to all devices - which means we shouldn't write new bsets
* after them, as that could leave a gap and then reads from that device
* wouldn't find all the bsets in that btree node - which means it's
* important that we start writing new bsets after the most recent _non_
* blacklisted bset:
*/
b->written = nonblacklisted_written;
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
sorted->keys.u64s = 0;
@ -1027,8 +1044,8 @@ static void btree_node_read_work(struct work_struct *work)
struct btree_read_bio *rb =
container_of(work, struct btree_read_bio, work);
struct bch_fs *c = rb->c;
struct btree *b = rb->b;
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio;
struct bch_io_failures failed = { .nr = 0 };
char buf[200];
@ -1101,7 +1118,263 @@ static void btree_node_read_endio(struct bio *bio)
bch2_latency_acct(ca, rb->start_time, READ);
}
queue_work(system_unbound_wq, &rb->work);
queue_work(c->io_complete_wq, &rb->work);
}
struct btree_node_read_all {
struct closure cl;
struct bch_fs *c;
struct btree *b;
unsigned nr;
void *buf[BCH_REPLICAS_MAX];
struct bio *bio[BCH_REPLICAS_MAX];
int err[BCH_REPLICAS_MAX];
};
static unsigned btree_node_sectors_written(struct bch_fs *c, void *data)
{
struct btree_node *bn = data;
struct btree_node_entry *bne;
unsigned offset = 0;
if (le64_to_cpu(bn->magic) != bset_magic(c))
return 0;
while (offset < c->opts.btree_node_size) {
if (!offset) {
offset += vstruct_sectors(bn, c->block_bits);
} else {
bne = data + (offset << 9);
if (bne->keys.seq != bn->keys.seq)
break;
offset += vstruct_sectors(bne, c->block_bits);
}
}
return offset;
}
static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data)
{
struct btree_node *bn = data;
struct btree_node_entry *bne;
if (!offset)
return false;
while (offset < c->opts.btree_node_size) {
bne = data + (offset << 9);
if (bne->keys.seq == bn->keys.seq)
return true;
offset++;
}
return false;
return offset;
}
static void btree_node_read_all_replicas_done(struct closure *cl)
{
struct btree_node_read_all *ra =
container_of(cl, struct btree_node_read_all, cl);
struct bch_fs *c = ra->c;
struct btree *b = ra->b;
bool have_good_copy = false;
bool dump_bset_maps = false;
bool have_retry = false;
int ret = 0, write = READ;
unsigned i, written, written2;
__le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
for (i = 0; i < ra->nr; i++) {
if (ra->err[i])
continue;
if (!have_good_copy) {
memcpy(b->data, ra->buf[i], btree_bytes(c));
have_good_copy = true;
written = btree_node_sectors_written(c, b->data);
}
/* Try to get the right btree node: */
if (have_good_copy &&
seq &&
b->data->keys.seq != seq &&
((struct btree_node *) ra->buf[i])->keys.seq == seq) {
memcpy(b->data, ra->buf[i], btree_bytes(c));
written = btree_node_sectors_written(c, b->data);
}
written2 = btree_node_sectors_written(c, ra->buf[i]);
if (btree_err_on(written2 != written, BTREE_ERR_FIXABLE, c, NULL, b, NULL,
"btree node sectors written mismatch: %u != %u",
written, written2) ||
btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
BTREE_ERR_FIXABLE, c, NULL, b, NULL,
"found bset signature after last bset") ||
btree_err_on(memcmp(b->data, ra->buf[i], written << 9),
BTREE_ERR_FIXABLE, c, NULL, b, NULL,
"btree node replicas content mismatch"))
dump_bset_maps = true;
if (written2 > written) {
written = written2;
memcpy(b->data, ra->buf[i], btree_bytes(c));
}
}
fsck_err:
if (dump_bset_maps) {
for (i = 0; i < ra->nr; i++) {
char buf[200];
struct printbuf out = PBUF(buf);
struct btree_node *bn = ra->buf[i];
struct btree_node_entry *bne = NULL;
unsigned offset = 0, sectors;
bool gap = false;
if (ra->err[i])
continue;
while (offset < c->opts.btree_node_size) {
if (!offset) {
sectors = vstruct_sectors(bn, c->block_bits);
} else {
bne = ra->buf[i] + (offset << 9);
if (bne->keys.seq != bn->keys.seq)
break;
sectors = vstruct_sectors(bne, c->block_bits);
}
pr_buf(&out, " %u-%u", offset, offset + sectors);
if (bne && bch2_journal_seq_is_blacklisted(c,
le64_to_cpu(bne->keys.journal_seq), false))
pr_buf(&out, "*");
offset += sectors;
}
while (offset < c->opts.btree_node_size) {
bne = ra->buf[i] + (offset << 9);
if (bne->keys.seq == bn->keys.seq) {
if (!gap)
pr_buf(&out, " GAP");
gap = true;
sectors = vstruct_sectors(bne, c->block_bits);
pr_buf(&out, " %u-%u", offset, offset + sectors);
if (bch2_journal_seq_is_blacklisted(c,
le64_to_cpu(bne->keys.journal_seq), false))
pr_buf(&out, "*");
}
offset++;
}
bch_err(c, "replica %u:%s", i, buf);
}
}
if (have_good_copy)
bch2_btree_node_read_done(c, NULL, b, false);
else
set_btree_node_read_error(b);
for (i = 0; i < ra->nr; i++) {
mempool_free(ra->buf[i], &c->btree_bounce_pool);
bio_put(ra->bio[i]);
}
closure_debug_destroy(&ra->cl);
kfree(ra);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
}
static void btree_node_read_all_replicas_endio(struct bio *bio)
{
struct btree_read_bio *rb =
container_of(bio, struct btree_read_bio, bio);
struct bch_fs *c = rb->c;
struct btree_node_read_all *ra = rb->ra;
if (rb->have_ioref) {
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
bch2_latency_acct(ca, rb->start_time, READ);
}
ra->err[rb->idx] = bio->bi_status;
closure_put(&ra->cl);
}
/*
* XXX This allocates multiple times from the same mempools, and can deadlock
* under sufficient memory pressure (but is only a debug path)
*/
static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync)
{
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded pick;
struct btree_node_read_all *ra;
unsigned i;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return -ENOMEM;
closure_init(&ra->cl, NULL);
ra->c = c;
ra->b = b;
ra->nr = bch2_bkey_nr_ptrs(k);
for (i = 0; i < ra->nr; i++) {
ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
ra->bio[i] = bio_alloc_bioset(GFP_NOFS, buf_pages(ra->buf[i],
btree_bytes(c)),
&c->btree_bio);
}
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
struct btree_read_bio *rb =
container_of(ra->bio[i], struct btree_read_bio, bio);
rb->c = c;
rb->b = b;
rb->ra = ra;
rb->start_time = local_clock();
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
rb->idx = i;
rb->pick = pick;
rb->bio.bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
rb->bio.bi_iter.bi_sector = pick.ptr.offset;
rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
bio_sectors(&rb->bio));
bio_set_dev(&rb->bio, ca->disk_sb.bdev);
closure_get(&ra->cl);
submit_bio(&rb->bio);
} else {
ra->err[i] = BLK_STS_REMOVED;
}
i++;
}
if (sync) {
closure_sync(&ra->cl);
btree_node_read_all_replicas_done(&ra->cl);
} else {
continue_at(&ra->cl, btree_node_read_all_replicas_done,
c->io_complete_wq);
}
return 0;
}
void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
@ -1117,6 +1390,12 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
btree_pos_to_text(&PBUF(buf), c, b);
trace_btree_read(c, b);
set_btree_node_read_in_flight(b);
if (bch2_verify_all_btree_replicas &&
!btree_node_read_all_replicas(c, b, sync))
return;
ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
NULL, &pick);
if (bch2_fs_fatal_err_on(ret <= 0, c,
@ -1133,6 +1412,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
&c->btree_bio);
rb = container_of(bio, struct btree_read_bio, bio);
rb->c = c;
rb->b = b;
rb->ra = NULL;
rb->start_time = local_clock();
rb->have_ioref = bch2_dev_get_ioref(ca, READ);
rb->pick = pick;
@ -1140,11 +1421,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_end_io = btree_node_read_endio;
bio->bi_private = b;
bch2_bio_map(bio, b->data, btree_bytes(c));
set_btree_node_read_in_flight(b);
if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
bio_sectors(bio));
@ -1153,7 +1431,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
if (sync) {
submit_bio_wait(bio);
bio->bi_private = b;
btree_node_read_work(&rb->work);
} else {
submit_bio(bio);
@ -1164,8 +1441,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
if (sync)
btree_node_read_work(&rb->work);
else
queue_work(system_unbound_wq, &rb->work);
queue_work(c->io_complete_wq, &rb->work);
}
}
@ -1332,7 +1608,7 @@ static void btree_node_write_work(struct work_struct *work)
bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
queue_work(c->wq, &c->btree_write_error_work);
queue_work(c->btree_error_wq, &c->btree_write_error_work);
return;
}
@ -1371,7 +1647,7 @@ static void btree_node_write_endio(struct bio *bio)
container_of(orig, struct btree_write_bio, wbio);
INIT_WORK(&wb->work, btree_node_write_work);
queue_work(system_unbound_wq, &wb->work);
queue_work(c->io_complete_wq, &wb->work);
}
}
@ -1441,6 +1717,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
return;
if (old & (1 << BTREE_NODE_write_in_flight)) {
/*
* XXX waiting on btree writes with btree locks held -
* this can deadlock, and we hit the write error path
*/
btree_node_wait_on_io(b);
continue;
}
@ -1631,7 +1911,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
INIT_WORK(&wbio->work, btree_write_submit);
schedule_work(&wbio->work);
queue_work(c->io_complete_wq, &wbio->work);
return;
err:
set_btree_node_noevict(b);

View File

@ -13,6 +13,7 @@ struct bch_fs;
struct btree_write;
struct btree;
struct btree_iter;
struct btree_node_read_all;
static inline bool btree_node_dirty(struct btree *b)
{
@ -33,8 +34,11 @@ static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
struct btree_read_bio {
struct bch_fs *c;
struct btree *b;
struct btree_node_read_all *ra;
u64 start_time;
unsigned have_ioref:1;
unsigned idx:7;
struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;

View File

@ -2260,6 +2260,7 @@ static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters,
size_t expected_mem_bytes)
__acquires(&c->btree_trans_barrier)
{
memset(trans, 0, sizeof(*trans));
trans->c = c;
@ -2292,6 +2293,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
}
int bch2_trans_exit(struct btree_trans *trans)
__releases(&c->btree_trans_barrier)
{
struct bch_fs *c = trans->c;

View File

@ -550,6 +550,22 @@ static void btree_update_nodes_written(struct btree_update *as)
BUG_ON(!journal_pin_active(&as->journal));
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
*/
for (i = 0; i < as->nr_old_nodes; i++) {
struct btree_node *bn = READ_ONCE(as->old_nodes[i]->data);
/*
* This is technically a use after free, but it's just a read -
* but it might cause problems in userspace where freeing the
* buffer may unmap it:
*/
if (bn && bn->keys.seq == as->old_nodes_seq[i])
btree_node_wait_on_io(as->old_nodes[i]);
}
/*
* We did an update to a parent node where the pointers we added pointed
* to child nodes that weren't written yet: now, the child nodes have
@ -889,13 +905,9 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
btree_update_will_delete_key(as, &b->key);
/*
* XXX: Waiting on io with btree node locks held, we don't want to be
* doing this. We can't have btree writes happening after the space has
* been freed, but we really only need to block before
* btree_update_nodes_written_trans() happens.
*/
btree_node_wait_on_io(b);
as->old_nodes[as->nr_old_nodes] = b;
as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
as->nr_old_nodes++;
}
void bch2_btree_update_done(struct btree_update *as)
@ -908,7 +920,8 @@ void bch2_btree_update_done(struct btree_update *as)
bch2_btree_reserve_put(as);
continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
continue_at(&as->cl, btree_update_set_nodes_written,
as->c->btree_interior_update_worker);
}
struct btree_update *
@ -1826,7 +1839,10 @@ void async_btree_node_rewrite_work(struct work_struct *work)
void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{
struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS);
struct async_btree_rewrite *a;
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
return;
if (!percpu_ref_tryget(&c->writes))
return;
@ -1844,7 +1860,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
a->seq = b->data->keys.seq;
INIT_WORK(&a->work, async_btree_node_rewrite_work);
queue_work(system_long_wq, &a->work);
queue_work(c->btree_interior_update_worker, &a->work);
}
static void __bch2_btree_node_update_key(struct bch_fs *c,

View File

@ -92,6 +92,10 @@ struct btree_update {
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
unsigned nr_new_nodes;
struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
__le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
unsigned nr_old_nodes;
open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
BCH_REPLICAS_MAX];
open_bucket_idx_t nr_open_buckets;

View File

@ -14,6 +14,7 @@
#include "ec.h"
#include "error.h"
#include "movinggc.h"
#include "reflink.h"
#include "replicas.h"
#include <linux/preempt.h>
@ -1072,6 +1073,124 @@ static int bch2_mark_stripe(struct bch_fs *c,
return 0;
}
static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 p_start, u64 p_end,
u64 v_start, u64 v_end)
{
if (p_start == p_end)
return false;
p_start += le64_to_cpu(p.v->idx);
p_end += le64_to_cpu(p.v->idx);
if (p_end <= v_start)
return false;
if (p_start >= v_end)
return false;
return true;
}
static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 start, u64 end,
struct bkey_s_c k)
{
return __reflink_p_frag_references(p, start, end,
bkey_start_offset(k.k),
k.k->p.offset);
}
static int __bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
unsigned front_frag,
unsigned back_frag,
unsigned flags,
size_t *r_idx)
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
int frags_referenced;
while (1) {
if (*r_idx >= c->reflink_gc_nr)
goto not_found;
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
BUG_ON(!r);
if (r->offset > idx)
break;
(*r_idx)++;
}
frags_referenced =
__reflink_p_frag_references(p, 0, front_frag,
r->offset - r->size, r->offset) +
__reflink_p_frag_references(p, back_frag, p.k->size,
r->offset - r->size, r->offset);
if (frags_referenced == 2) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
add = -add;
} else if (frags_referenced == 1) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
add = 0;
}
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
return min_t(u64, sectors, r->offset - idx);
not_found:
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
return -EIO;
}
static int bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p, unsigned offset,
s64 sectors, unsigned flags)
{
u64 idx = le64_to_cpu(p.v->idx) + offset;
struct reflink_gc *ref;
size_t l, r, m;
unsigned front_frag, back_frag;
s64 ret = 0;
if (sectors < 0)
sectors = -sectors;
BUG_ON(offset + sectors > p.k->size);
front_frag = offset;
back_frag = offset + sectors;
l = 0;
r = c->reflink_gc_nr;
while (l < r) {
m = l + (r - l) / 2;
ref = genradix_ptr(&c->reflink_gc_table, m);
if (ref->offset <= idx)
l = m + 1;
else
r = m;
}
while (sectors) {
ret = __bch2_mark_reflink_p(c, p, idx, sectors,
front_frag, back_frag, flags, &l);
if (ret < 0)
return ret;
idx += ret;
sectors -= ret;
}
return 0;
}
static int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c old,
struct bkey_s_c new,
@ -1127,6 +1246,10 @@ static int bch2_mark_key_locked(struct bch_fs *c,
fs_usage->persistent_reserved[replicas - 1] += sectors;
break;
}
case KEY_TYPE_reflink_p:
ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k),
offset, sectors, flags);
break;
}
preempt_enable();
@ -1689,35 +1812,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
return ret;
}
static __le64 *bkey_refcount(struct bkey_i *k)
{
switch (k->k.type) {
case KEY_TYPE_reflink_v:
return &bkey_i_to_reflink_v(k)->v.refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
default:
return NULL;
}
}
static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p,
u64 start, u64 end,
struct bkey_s_c k)
{
if (start == end)
return false;
start += le64_to_cpu(p.v->idx);
end += le64_to_cpu(p.v->idx);
if (end <= bkey_start_offset(k.k))
return false;
if (start >= k.k->p.offset)
return false;
return true;
}
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
@ -1731,6 +1825,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_i *n;
__le64 *refcount;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
int frags_referenced;
s64 ret;
ret = trans_get_key(trans, BTREE_ID_reflink,
@ -1738,18 +1833,20 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (ret < 0)
return ret;
if (reflink_p_frag_references(p, 0, front_frag, k) &&
reflink_p_frag_references(p, back_frag, p.k->size, k)) {
sectors = min_t(u64, sectors, k.k->p.offset - idx);
frags_referenced =
reflink_p_frag_references(p, 0, front_frag, k) +
reflink_p_frag_references(p, back_frag, p.k->size, k);
if (frags_referenced == 2) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
add = -add;
} else if (reflink_p_frag_references(p, 0, front_frag, k) ||
reflink_p_frag_references(p, back_frag, p.k->size, k)) {
} else if (frags_referenced == 1) {
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
goto out;
}
sectors = min_t(u64, sectors, k.k->p.offset - idx);
n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
@ -1804,14 +1901,13 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors,
front_frag, back_frag, flags);
if (ret < 0)
break;
return ret;
idx += ret;
sectors = max_t(s64, 0LL, sectors - ret);
ret = 0;
idx += ret;
sectors -= ret;
}
return ret;
return 0;
}
int bch2_trans_mark_key(struct btree_trans *trans,

View File

@ -23,6 +23,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
struct btree_iter *inode_iter = NULL;
struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
u64 now = bch2_current_time(c);
u64 cpu = raw_smp_processor_id();
u64 dir_offset = 0;
int ret;
@ -36,7 +37,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED;
inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
inode_iter = bch2_inode_create(trans, new_inode, U32_MAX, cpu);
ret = PTR_ERR_OR_ZERO(inode_iter);
if (ret)
goto err;

View File

@ -13,6 +13,9 @@
#include <linux/mount.h>
#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
struct flags_set {
unsigned mask;
@ -247,11 +250,54 @@ err1:
return ret;
}
static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
{
u32 flags;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(flags, arg))
return -EFAULT;
bch_notice(c, "shutdown by ioctl type %u", flags);
down_write(&c->vfs_sb->s_umount);
switch (flags) {
case FSOP_GOING_FLAGS_DEFAULT:
ret = freeze_bdev(c->vfs_sb->s_bdev);
if (ret)
goto err;
bch2_journal_flush(&c->journal);
c->vfs_sb->s_flags |= SB_RDONLY;
bch2_fs_emergency_read_only(c);
thaw_bdev(c->vfs_sb->s_bdev);
break;
case FSOP_GOING_FLAGS_LOGFLUSH:
bch2_journal_flush(&c->journal);
fallthrough;
case FSOP_GOING_FLAGS_NOLOGFLUSH:
c->vfs_sb->s_flags |= SB_RDONLY;
bch2_fs_emergency_read_only(c);
break;
default:
ret = -EINVAL;
break;
}
err:
up_write(&c->vfs_sb->s_umount);
return ret;
}
long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct super_block *sb = inode->v.i_sb;
struct bch_fs *c = sb->s_fs_info;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
switch (cmd) {
case FS_IOC_GETFLAGS:
@ -276,15 +322,7 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return -ENOTTY;
case FS_IOC_GOINGDOWN:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
down_write(&sb->s_umount);
sb->s_flags |= SB_RDONLY;
if (bch2_fs_emergency_read_only(c))
bch_err(c, "emergency read only due to ioctl");
up_write(&sb->s_umount);
return 0;
return bch2_ioc_goingdown(c, (u32 __user *) arg);
default:
return bch2_fs_ioctl(c, cmd, (void __user *) arg);

View File

@ -1578,6 +1578,8 @@ got_sb:
break;
}
c->dev = sb->s_dev;
#ifdef CONFIG_BCACHEFS_POSIX_ACL
if (c->opts.acl)
sb->s_flags |= SB_POSIXACL;

View File

@ -472,23 +472,28 @@ static inline u32 bkey_generation(struct bkey_s_c k)
struct btree_iter *bch2_inode_create(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u,
u32 snapshot)
u32 snapshot, u64 cpu)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = NULL;
struct bkey_s_c k;
u64 min, max, start, pos, *hint;
int ret = 0;
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
u64 cpu = raw_smp_processor_id();
unsigned bits = (c->opts.inodes_32bit
? 31 : 63) - c->inode_shard_bits;
if (c->opts.shard_inode_numbers) {
bits -= c->inode_shard_bits;
min = (cpu << bits);
max = (cpu << bits) | ~(ULLONG_MAX << bits);
min = (cpu << bits);
max = (cpu << bits) | ~(ULLONG_MAX << bits);
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
hint = c->unused_inode_hints + cpu;
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
hint = c->unused_inode_hints + cpu;
} else {
min = BLOCKDEV_INODE_MAX;
max = ~(ULLONG_MAX << bits);
hint = c->unused_inode_hints;
}
start = READ_ONCE(*hint);

View File

@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_create(struct btree_trans *,
struct bch_inode_unpacked *, u32);
struct bch_inode_unpacked *, u32, u64);
int bch2_inode_rm(struct bch_fs *, u64, bool);

View File

@ -1439,7 +1439,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
bch2_migrate_read_done(&op->write, rbio);
closure_init(cl, NULL);
closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, cl);
closure_return_with_destructor(cl, promote_done);
}
@ -1822,6 +1822,13 @@ static void __bch2_read_endio(struct work_struct *work)
if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
goto csum_err;
/*
* XXX
* We need to rework the narrow_crcs path to deliver the read completion
* first, and then punt to a different workqueue, otherwise we're
* holding up reads while doing btree updates which is bad for memory
* reclaim.
*/
if (unlikely(rbio->narrow_crcs))
bch2_rbio_narrow_crcs(rbio);

View File

@ -58,7 +58,7 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->alloc_reserve == RESERVE_MOVINGGC
? op->c->copygc_wq
: op->c->wq;
: op->c->btree_update_wq;
}
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,

View File

@ -118,7 +118,9 @@ void bch2_journal_halt(struct journal *j)
void __bch2_journal_buf_put(struct journal *j)
{
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
}
/*
@ -304,7 +306,7 @@ static int journal_entry_open(struct journal *j)
j->res_get_blocked_start);
j->res_get_blocked_start = 0;
mod_delayed_work(system_freezable_wq,
mod_delayed_work(c->io_complete_wq,
&j->write_work,
msecs_to_jiffies(j->write_delay_ms));
journal_wake(j);
@ -805,10 +807,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
long b;
if (new_fs) {
percpu_down_read(&c->mark_lock);
b = bch2_bucket_alloc_new_fs(ca);
if (b < 0) {
percpu_up_read(&c->mark_lock);
ret = -ENOSPC;
goto err;
}
@ -825,7 +825,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
b = sector_to_bucket(ca, ob->ptr.offset);
}
spin_lock(&c->journal.lock);
if (c)
spin_lock(&c->journal.lock);
/*
* XXX
@ -852,14 +853,14 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
spin_unlock(&c->journal.lock);
if (c)
spin_unlock(&c->journal.lock);
if (new_fs) {
bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
0);
percpu_up_read(&c->mark_lock);
} else {
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_trans_mark_metadata_bucket(&trans, ca,

View File

@ -834,7 +834,7 @@ static void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
unsigned i;
for (i = 0; i < j->nr_ptrs; i++) {
struct bch_dev *ca = c->devs[j->ptrs[i].dev];
struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
u64 offset;
div64_u64_rem(j->ptrs[i].offset, ca->mi.bucket_size, &offset);
@ -1233,8 +1233,6 @@ static void journal_write_done(struct closure *cl)
struct journal *j = container_of(cl, struct journal, io);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_last_unwritten_buf(j);
struct bch_devs_list devs =
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
struct bch_replicas_padded replicas;
union journal_res_state old, new;
u64 v, seq;
@ -1242,11 +1240,12 @@ static void journal_write_done(struct closure *cl)
bch2_time_stats_update(j->write_time, j->write_start_time);
if (!devs.nr) {
if (!w->devs_written.nr) {
bch_err(c, "unable to write journal to sufficient devices");
err = -EIO;
} else {
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
w->devs_written);
if (bch2_mark_replicas(c, &replicas.e))
err = -EIO;
}
@ -1258,7 +1257,7 @@ static void journal_write_done(struct closure *cl)
seq = le64_to_cpu(w->data->seq);
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs;
journal_seq_pin(j, seq)->devs = w->devs_written;
j->seq_ondisk = seq;
if (err && (!j->err_seq || seq < j->err_seq))
@ -1296,27 +1295,27 @@ static void journal_write_done(struct closure *cl)
journal_wake(j);
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
mod_delayed_work(c->io_complete_wq, &j->write_work, 0);
spin_unlock(&j->lock);
if (new.unwritten_idx != new.idx &&
!journal_state_count(new, new.unwritten_idx))
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
}
static void journal_write_endio(struct bio *bio)
{
struct bch_dev *ca = bio->bi_private;
struct journal *j = &ca->fs->journal;
struct journal_buf *w = journal_last_unwritten_buf(j);
unsigned long flags;
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s",
le64_to_cpu(w->data->seq),
bch2_blk_status_to_str(bio->bi_status)) ||
bch2_meta_write_fault("journal")) {
struct journal_buf *w = journal_last_unwritten_buf(j);
unsigned long flags;
spin_lock_irqsave(&j->err_lock, flags);
bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
spin_unlock_irqrestore(&j->err_lock, flags);
}
@ -1370,7 +1369,7 @@ static void do_journal_write(struct closure *cl)
le64_to_cpu(w->data->seq);
}
continue_at(cl, journal_write_done, system_highpri_wq);
continue_at(cl, journal_write_done, c->io_complete_wq);
return;
}
@ -1402,7 +1401,8 @@ void bch2_journal_write(struct closure *cl)
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) {
w->noflush = true;
SET_JSET_NO_FLUSH(jset, true);
jset->last_seq = w->last_seq = 0;
jset->last_seq = 0;
w->last_seq = 0;
j->nr_noflush_writes++;
} else {
@ -1509,14 +1509,12 @@ retry_alloc:
journal_debug_buf);
kfree(journal_debug_buf);
bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
continue_at(cl, journal_write_done, c->io_complete_wq);
return;
}
/*
* XXX: we really should just disable the entire journal in nochanges
* mode
*/
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
if (c->opts.nochanges)
goto no_io;
@ -1542,14 +1540,14 @@ retry_alloc:
bch2_bucket_seq_cleanup(c);
continue_at(cl, do_journal_write, system_highpri_wq);
continue_at(cl, do_journal_write, c->io_complete_wq);
return;
no_io:
bch2_bucket_seq_cleanup(c);
continue_at(cl, journal_write_done, system_highpri_wq);
continue_at(cl, journal_write_done, c->io_complete_wq);
return;
err:
bch2_inconsistent_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
continue_at(cl, journal_write_done, c->io_complete_wq);
}

View File

@ -93,6 +93,10 @@ journal_dev_space_available(struct journal *j, struct bch_dev *ca,
* until we write it out - thus, account for it here:
*/
while ((unwritten = get_unwritten_sectors(j, &idx))) {
/* entry won't fit on this device, skip: */
if (unwritten > ca->mi.bucket_size)
continue;
if (unwritten >= sectors) {
if (!buckets) {
sectors = 0;

View File

@ -111,8 +111,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
bl->start[nr].start = cpu_to_le64(start);
bl->start[nr].end = cpu_to_le64(end);
out_write_sb:
c->disk_sb.sb->features[0] |=
1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
ret = bch2_write_super(c);
out:
@ -298,8 +297,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work)
BUG_ON(new_nr && !bl);
if (!new_nr)
c->disk_sb.sb->features[0] &=
~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
bch2_write_super(c);
}

View File

@ -21,6 +21,7 @@ struct journal_buf {
struct jset *data;
__BKEY_PADDED(key, BCH_REPLICAS_MAX);
struct bch_devs_list devs_written;
struct closure_waitlist wait;
u64 last_seq; /* copy of data->last_seq */

View File

@ -523,6 +523,11 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos,
if (ret)
goto err;
if (!k.k || bkey_cmp(k.k->p, pos)) {
ret = -ENOENT;
goto err;
}
ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
if (ret)
goto err;
@ -921,8 +926,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
rewrite_old_nodes_pred, c, stats);
if (!ret) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);

View File

@ -317,6 +317,8 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
while (!kthread_should_stop()) {
cond_resched();
if (kthread_wait_freezable(c->copy_gc_enabled))
break;
@ -324,6 +326,7 @@ static int bch2_copygc_thread(void *arg)
wait = bch2_copygc_wait_amount(c);
if (wait > clock->max_slop) {
trace_copygc_wait(c, wait, last + wait);
c->copygc_wait = last + wait;
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);

View File

@ -165,8 +165,13 @@ enum opt_type {
x(inodes_32bit, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_INODE_32BIT, false, \
BCH_SB_INODE_32BIT, true, \
NULL, "Constrain inode numbers to 32 bits") \
x(shard_inode_numbers, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_SHARD_INUMS, false, \
NULL, "Shard new inode numbers by CPU id") \
x(gc_reserve_percent, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(5, 21), \

View File

@ -716,7 +716,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
case BCH_JSET_ENTRY_dev_usage: {
struct jset_entry_dev_usage *u =
container_of(entry, struct jset_entry_dev_usage, entry);
struct bch_dev *ca = bch_dev_bkey_exists(c, u->dev);
struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
unsigned nr_types = (bytes - sizeof(struct jset_entry_dev_usage)) /
sizeof(struct jset_entry_dev_usage_type);
@ -755,7 +755,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
struct jset_entry_clock *clock =
container_of(entry, struct jset_entry_clock, entry);
atomic64_set(&c->io_clock[clock->rw].now, clock->time);
atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
}
}
@ -1217,13 +1217,13 @@ use_clean:
mutex_lock(&c->sb_lock);
if (c->opts.version_upgrade) {
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
write_sb = true;
}
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
write_sb = true;
}
@ -1278,12 +1278,12 @@ int bch2_fs_initialize(struct bch_fs *c)
bch_notice(c, "initializing new filesystem");
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_extents_above_btree_updates_done;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_bformat_overflow_done;
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
if (c->opts.version_upgrade) {
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
bch2_write_super(c);
}

View File

@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
refcount = (void *) &r_v->v;
refcount = bkey_refcount(r_v);
*refcount = 0;
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
@ -181,18 +181,19 @@ err:
static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
{
struct bkey_s_c k = bch2_btree_iter_peek(iter);
struct bkey_s_c k;
int ret;
for_each_btree_key_continue(iter, 0, k, ret) {
if (bkey_cmp(iter->pos, end) >= 0)
return bkey_s_c_null;
break;
if (bkey_extent_is_data(k.k))
break;
return k;
}
return k;
bch2_btree_iter_set_pos(iter, end);
return bkey_s_c_null;
}
s64 bch2_remap_range(struct bch_fs *c,
@ -205,8 +206,8 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bkey_s_c src_k;
struct bkey_buf new_dst, new_src;
struct bpos dst_end = dst_start, src_end = src_start;
struct bpos dst_want, src_want;
u64 src_done, dst_done;
struct bpos src_want;
u64 dst_done;
int ret = 0, ret2 = 0;
if (!percpu_ref_tryget(&c->writes))
@ -226,7 +227,8 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start,
BTREE_ITER_INTENT);
while (ret == 0 || ret == -EINTR) {
while ((ret == 0 || ret == -EINTR) &&
bkey_cmp(dst_iter->pos, dst_end) < 0) {
struct disk_reservation disk_res = { 0 };
bch2_trans_begin(&trans);
@ -236,32 +238,29 @@ s64 bch2_remap_range(struct bch_fs *c,
break;
}
dst_done = dst_iter->pos.offset - dst_start.offset;
src_want = POS(src_start.inode, src_start.offset + dst_done);
bch2_btree_iter_set_pos(src_iter, src_want);
src_k = get_next_src(src_iter, src_end);
ret = bkey_err(src_k);
if (ret)
continue;
src_done = bpos_min(src_iter->pos, src_end).offset -
src_start.offset;
dst_want = POS(dst_start.inode, dst_start.offset + src_done);
if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
journal_seq, i_sectors_delta);
if (bkey_cmp(src_want, src_iter->pos) < 0) {
ret = bch2_fpunch_at(&trans, dst_iter,
bpos_min(dst_end,
POS(dst_iter->pos.inode, dst_iter->pos.offset +
src_iter->pos.offset - src_want.offset)),
journal_seq, i_sectors_delta);
continue;
}
BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
if (!bkey_cmp(dst_iter->pos, dst_end))
break;
if (src_k.k->type != KEY_TYPE_reflink_p) {
bch2_bkey_buf_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
bch2_cut_front(src_iter->pos, new_src.k);
bch2_cut_back(src_end, new_src.k);
bch2_btree_iter_set_pos(src_iter, bkey_start_pos(src_k.k));
ret = bch2_make_extent_indirect(&trans, src_iter,
new_src.k);
@ -278,7 +277,7 @@ s64 bch2_remap_range(struct bch_fs *c,
bkey_reflink_p_init(new_dst.k);
u64 offset = le64_to_cpu(src_p.v->idx) +
(src_iter->pos.offset -
(src_want.offset -
bkey_start_offset(src_k.k));
dst_p->v.idx = cpu_to_le64(offset);
@ -288,20 +287,13 @@ s64 bch2_remap_range(struct bch_fs *c,
new_dst.k->k.p = dst_iter->pos;
bch2_key_resize(&new_dst.k->k,
min(src_k.k->p.offset - src_iter->pos.offset,
min(src_k.k->p.offset - src_want.offset,
dst_end.offset - dst_iter->pos.offset));
ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
&disk_res, journal_seq,
new_i_size, i_sectors_delta,
true);
bch2_disk_reservation_put(c, &disk_res);
if (ret)
continue;
dst_done = dst_iter->pos.offset - dst_start.offset;
src_want = POS(src_start.inode, src_start.offset + dst_done);
bch2_btree_iter_set_pos(src_iter, src_want);
}
bch2_trans_iter_put(&trans, dst_iter);
bch2_trans_iter_put(&trans, src_iter);

View File

@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
.val_to_text = bch2_indirect_inline_data_to_text, \
}
static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_reflink_v:
return &bkey_s_c_to_reflink_v(k).v->refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
default:
return NULL;
}
}
static inline __le64 *bkey_refcount(struct bkey_i *k)
{
switch (k->k.type) {
case KEY_TYPE_reflink_v:
return &bkey_i_to_reflink_v(k)->v.refcount;
case KEY_TYPE_indirect_inline_data:
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
default:
return NULL;
}
}
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
u64, u64 *, u64, s64 *);

View File

@ -982,7 +982,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS;
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@ -999,7 +999,7 @@ static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
* The u64s field counts from the start of data, ignoring the shared
* fields.
*/
entry->u64s = u64s - 1;
entry->u64s = cpu_to_le16(u64s - 1);
*end = vstruct_next(*end);
return entry;
@ -1092,7 +1092,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
clock->entry.type = BCH_JSET_ENTRY_clock;
clock->rw = i;
clock->time = atomic64_read(&c->io_clock[i].now);
clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now));
}
}
@ -1109,10 +1109,10 @@ void bch2_fs_mark_clean(struct bch_fs *c)
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_info;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_alloc_metadata;
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;

View File

@ -509,10 +509,14 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap);
if (c->io_complete_wq )
destroy_workqueue(c->io_complete_wq );
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->wq)
destroy_workqueue(c->wq);
if (c->btree_error_wq)
destroy_workqueue(c->btree_error_wq);
if (c->btree_update_wq)
destroy_workqueue(c->btree_update_wq);
bch2_free_super(&c->disk_sb);
kvpfree(c, sizeof(*c));
@ -760,10 +764,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
if (!(c->wq = alloc_workqueue("bcachefs",
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->btree_error_wq = alloc_workqueue("bcachefs_error",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
@ -1437,7 +1445,7 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
/* Device add/removal: */
int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
{
struct btree_trans trans;
size_t i;

View File

@ -312,7 +312,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
return 0;
}
void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
{
pr_buf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]);
bch2_bpos_to_text(out, c->gc_gens_pos);