Update bcachefs sources to ce8f0c316862 bcachefs: struct bch_fs_snapshots

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-30 12:27:54 -05:00
parent 0532e81278
commit 9f8195b54c
69 changed files with 1181 additions and 1095 deletions

View File

@ -1 +1 @@
5df84d32ad84d74ababcd783bf92ed1a1853e74d
ce8f0c316862869e9e2c57270b8d5f15f26be5ca

View File

@ -52,7 +52,7 @@ void strip_fs_alloc(struct bch_fs *c)
swap(u64s, clean->field.u64s);
bch2_sb_field_resize(&c->disk_sb, clean, u64s);
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
kfree(c->replicas.entries);
c->replicas.entries = NULL;
c->replicas.nr = 0;

View File

@ -429,11 +429,11 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
!bch2_replicas_marked_locked(c, &r.e))
return bch_err_throw(c, btree_insert_need_mark_replicas);
percpu_up_read(&c->mark_lock);
percpu_up_read(&c->capacity.mark_lock);
int ret;
scoped_guard(percpu_write, &c->mark_lock)
scoped_guard(percpu_write, &c->capacity.mark_lock)
ret = __bch2_accounting_mem_insert(c, a);
percpu_down_read(&c->mark_lock);
percpu_down_read(&c->capacity.mark_lock);
return ret;
}
@ -469,7 +469,7 @@ void __bch2_accounting_maybe_kill(struct bch_fs *c, struct bpos pos)
return;
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
@ -507,7 +507,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
{
struct bch_accounting_mem *acc = &c->accounting;
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
darray_for_each(acc->k, i) {
union {
u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
@ -539,7 +539,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
darray_init(out_buf);
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
darray_for_each(acc->k, i) {
struct disk_accounting_pos a_p;
bpos_to_disk_accounting_pos(&a_p, i->pos);
@ -577,7 +577,7 @@ int bch2_gc_accounting_start(struct bch_fs *c)
struct bch_accounting_mem *acc = &c->accounting;
int ret = 0;
guard(percpu_write)(&c->mark_lock);
guard(percpu_write)(&c->capacity.mark_lock);
darray_for_each(acc->k, e) {
e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL);
@ -600,7 +600,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
struct bpos pos = POS_MIN;
int ret = 0;
guard(percpu_write)(&c->mark_lock);
guard(percpu_write)(&c->capacity.mark_lock);
while (1) {
unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &pos);
@ -643,11 +643,11 @@ int bch2_gc_accounting_done(struct bch_fs *c)
bch2_trans_unlock_long(trans);
if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) {
percpu_up_write(&c->mark_lock);
percpu_up_write(&c->capacity.mark_lock);
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_skip_accounting_apply,
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));
percpu_down_write(&c->mark_lock);
percpu_down_write(&c->capacity.mark_lock);
if (ret)
goto err;
@ -661,7 +661,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
BCH_ACCOUNTING_normal, true);
guard(preempt)();
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
struct bch_fs_usage_base *dst = this_cpu_ptr(c->capacity.usage);
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
}
@ -681,7 +681,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
if (k.k->type != KEY_TYPE_accounting)
return 0;
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
BCH_ACCOUNTING_read, false);
}
@ -874,7 +874,7 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
}
guard(preempt)();
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
struct bch_fs_usage_base *usage = this_cpu_ptr(c->capacity.usage);
switch (k.type) {
case BCH_DISK_ACCOUNTING_persistent_reserved:
@ -934,12 +934,12 @@ int bch2_accounting_read(struct bch_fs *c)
*
* Instead, zero out any accounting we have:
*/
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
darray_for_each(acc->k, e)
percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
for_each_member_device(c, ca)
percpu_memset(ca->usage, 0, sizeof(*ca->usage));
percpu_memset(c->usage, 0, sizeof(*c->usage));
percpu_memset(c->capacity.usage, 0, sizeof(*c->capacity.usage));
}
struct journal_keys *keys = &c->journal_keys;
@ -1164,7 +1164,8 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
0;
}));
acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64));
acc_u64s_percpu(&base_inmem.hidden, &c->capacity.usage->hidden,
sizeof(base_inmem) / sizeof(u64));
#define check(x) \
if (base.x != base_inmem.x) { \
@ -1183,7 +1184,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
void bch2_accounting_gc_free(struct bch_fs *c)
{
lockdep_assert_held(&c->mark_lock);
lockdep_assert_held(&c->capacity.mark_lock);
struct bch_accounting_mem *acc = &c->accounting;

View File

@ -230,7 +230,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
{
guard(percpu_read)(&trans->c->mark_lock);
guard(percpu_read)(&trans->c->capacity.mark_lock);
return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
}
@ -253,7 +253,7 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
u64 *v, unsigned nr)
{
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &p);

View File

@ -923,7 +923,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (statechange(a->data_type == BCH_DATA_free) &&
bucket_flushed(new_a))
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
if (statechange(a->data_type == BCH_DATA_need_discard) &&
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
@ -1587,13 +1587,13 @@ void bch2_recalc_capacity(struct bch_fs *c)
reserved_sectors = min(reserved_sectors, capacity);
c->reserved = reserved_sectors;
c->capacity = capacity - reserved_sectors;
c->capacity.reserved = reserved_sectors;
c->capacity.capacity = capacity - reserved_sectors;
c->bucket_size_max = bucket_size_max;
c->capacity.bucket_size_max = bucket_size_max;
/* Wake up case someone was waiting for buckets */
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
}
u64 bch2_min_rw_member_capacity(struct bch_fs *c)
@ -1610,8 +1610,8 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
{
struct open_bucket *ob;
for (ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
for (ob = c->allocator.open_buckets;
ob < c->allocator.open_buckets + ARRAY_SIZE(c->allocator.open_buckets);
ob++) {
scoped_guard(spinlock, &ob->lock) {
if (ob->valid && !ob->on_partial_list &&
@ -1627,7 +1627,7 @@ void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
{
/* BCH_DATA_free == all rw devs */
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) {
for (unsigned i = 0; i < ARRAY_SIZE(c->allocator.rw_devs); i++) {
bool data_type_rw = rw;
if (i != BCH_DATA_free &&
@ -1639,10 +1639,10 @@ void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
!ca->mi.durability)
data_type_rw = false;
mod_bit(ca->dev_idx, c->rw_devs[i].d, data_type_rw);
mod_bit(ca->dev_idx, c->allocator.rw_devs[i].d, data_type_rw);
}
c->rw_devs_change_count++;
c->allocator.rw_devs_change_count++;
}
/* device goes ro: */
@ -1664,7 +1664,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
* Wake up threads that were blocked on allocation, so they can notice
* the device can no longer be removed and the capacity has changed:
*/
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
/*
* journal_res_get() can block waiting for free space in the journal -
@ -1674,7 +1674,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
/* Now wait for any in flight writes: */
closure_wait_event(&c->open_buckets_wait,
closure_wait_event(&c->allocator.open_buckets_wait,
!bch2_dev_has_open_write_point(c, ca));
}
@ -1684,7 +1684,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
bch2_dev_allocator_set_rw(c, ca, true);
c->rw_devs_change_count++;
c->allocator.rw_devs_change_count++;
}
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
@ -1702,5 +1702,31 @@ void bch2_dev_allocator_background_init(struct bch_dev *ca)
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
spin_lock_init(&c->allocator.freelist_lock);
}
void bch2_fs_capacity_exit(struct bch_fs *c)
{
percpu_free_rwsem(&c->capacity.mark_lock);
if (c->capacity.pcpu) {
u64 v = percpu_u64_get(&c->capacity.pcpu->online_reserved);
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
}
free_percpu(c->capacity.pcpu);
free_percpu(c->capacity.usage);
}
int bch2_fs_capacity_init(struct bch_fs *c)
{
mutex_init(&c->capacity.sectors_available_lock);
seqcount_init(&c->capacity.usage_lock);
try(percpu_init_rwsem(&c->capacity.mark_lock));
if (!(c->capacity.pcpu = alloc_percpu(struct bch_fs_capacity_pcpu)) ||
!(c->capacity.usage = alloc_percpu(struct bch_fs_usage_base)))
return bch_err_throw(c, ENOMEM_fs_other_alloc);
return 0;
}

View File

@ -377,4 +377,7 @@ void bch2_dev_allocator_background_init(struct bch_dev *);
void bch2_fs_allocator_background_init(struct bch_fs *);
void bch2_fs_capacity_exit(struct bch_fs *);
int bch2_fs_capacity_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

View File

@ -57,13 +57,13 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
struct bch_fs_usage_short ret;
u64 data, reserved;
ret.capacity = c->capacity -
percpu_u64_get(&c->usage->hidden);
ret.capacity = c->capacity.capacity -
percpu_u64_get(&c->capacity.usage->hidden);
data = percpu_u64_get(&c->usage->data) +
percpu_u64_get(&c->usage->btree);
reserved = percpu_u64_get(&c->usage->reserved) +
percpu_u64_get(c->online_reserved);
data = percpu_u64_get(&c->capacity.usage->data) +
percpu_u64_get(&c->capacity.usage->btree);
reserved = percpu_u64_get(&c->capacity.usage->reserved) +
percpu_u64_get(&c->capacity.pcpu->online_reserved);
ret.used = min(ret.capacity, data + reserve_factor(reserved));
ret.free = ret.capacity - ret.used;
@ -74,7 +74,7 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *c)
{
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
return __bch2_fs_usage_read_short(c);
}
@ -249,7 +249,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
}
if (p.has_ec) {
struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes, p.ec.idx);
if (ret_fsck_err_on(!m || !m->alive,
trans, ptr_to_missing_stripe,
@ -343,7 +343,7 @@ again:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_extent_entry_for_each(ptrs, entry) {
if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes,
entry->stripe_ptr.idx);
union bch_extent_entry *next_ptr;
@ -547,7 +547,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
static int warned_disk_usage = 0;
bool warn = false;
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
s64 added = src->btree + src->data + src->reserved;
@ -560,10 +560,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
if (unlikely(should_not_have_added > 0)) {
u64 old, new;
old = atomic64_read(&c->sectors_available);
old = atomic64_read(&c->capacity.sectors_available);
do {
new = max_t(s64, 0, old - should_not_have_added);
} while (!atomic64_try_cmpxchg(&c->sectors_available,
} while (!atomic64_try_cmpxchg(&c->capacity.sectors_available,
&old, new));
added -= should_not_have_added;
@ -572,11 +572,11 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
if (added > 0) {
trans->disk_res->sectors -= added;
this_cpu_sub(*c->online_reserved, added);
this_cpu_sub(c->capacity.pcpu->online_reserved, added);
}
scoped_guard(preempt) {
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
struct bch_fs_usage_base *dst = this_cpu_ptr(c->capacity.usage);
acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
}
@ -707,7 +707,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_gc) {
struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
struct gc_stripe *m = genradix_ptr_alloc(&c->ec.gc_stripes, p.ec.idx, GFP_KERNEL);
if (!m) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu",
(u64) p.ec.idx);
@ -1145,9 +1145,9 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
struct disk_reservation *res,
u64 sectors, enum bch_reservation_flags flags)
{
guard(mutex)(&c->sectors_available_lock);
guard(mutex)(&c->capacity.sectors_available_lock);
percpu_u64_set(&c->pcpu->sectors_available, 0);
percpu_u64_set(&c->capacity.pcpu->sectors_available, 0);
u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
@ -1155,13 +1155,13 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
atomic64_set(&c->capacity.sectors_available,
max_t(s64, 0, sectors_available - sectors));
this_cpu_add(*c->online_reserved, sectors);
this_cpu_add(c->capacity.pcpu->online_reserved, sectors);
res->sectors += sectors;
return 0;
} else {
atomic64_set(&c->sectors_available, sectors_available);
atomic64_set(&c->capacity.sectors_available, sectors_available);
return bch_err_throw(c, ENOSPC_disk_reservation);
}
}
@ -1169,15 +1169,15 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, enum bch_reservation_flags flags)
{
struct bch_fs_pcpu *pcpu;
struct bch_fs_capacity_pcpu *pcpu;
u64 old, get;
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
preempt_disable();
pcpu = this_cpu_ptr(c->pcpu);
pcpu = this_cpu_ptr(c->capacity.pcpu);
if (unlikely(sectors > pcpu->sectors_available)) {
old = atomic64_read(&c->sectors_available);
old = atomic64_read(&c->capacity.sectors_available);
do {
get = min((u64) sectors + SECTORS_CACHE, old);
@ -1186,14 +1186,14 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
return disk_reservation_recalc_sectors_available(c,
res, sectors, flags);
}
} while (!atomic64_try_cmpxchg(&c->sectors_available,
} while (!atomic64_try_cmpxchg(&c->capacity.sectors_available,
&old, old - get));
pcpu->sectors_available += get;
}
pcpu->sectors_available -= sectors;
this_cpu_add(*c->online_reserved, sectors);
pcpu->online_reserved += sectors;
res->sectors += sectors;
preempt_enable();
return 0;

View File

@ -298,7 +298,7 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
struct disk_reservation *res)
{
if (res->sectors) {
this_cpu_sub(*c->online_reserved, res->sectors);
this_cpu_sub(c->capacity.pcpu->online_reserved, res->sectors);
res->sectors = 0;
}
}
@ -317,15 +317,15 @@ static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reserv
#ifdef __KERNEL__
u64 old, new;
old = this_cpu_read(c->pcpu->sectors_available);
old = this_cpu_read(c->capacity.pcpu->sectors_available);
do {
if (sectors > old)
return __bch2_disk_reservation_add(c, res, sectors, flags);
new = old - sectors;
} while (!this_cpu_try_cmpxchg(c->pcpu->sectors_available, &old, new));
} while (!this_cpu_try_cmpxchg(c->capacity.pcpu->sectors_available, &old, new));
this_cpu_add(*c->online_reserved, sectors);
this_cpu_add(c->capacity.pcpu->online_reserved, sectors);
res->sectors += sectors;
return 0;
#else

View File

@ -62,7 +62,7 @@ static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
enum bch_data_type data_type,
u16 target)
{
struct bch_devs_mask devs = c->rw_devs[data_type];
struct bch_devs_mask devs = c->allocator.rw_devs[data_type];
const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
if (t)

View File

@ -83,7 +83,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *c)
static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
{
open_bucket_idx_t idx = ob - c->open_buckets;
open_bucket_idx_t idx = ob - c->allocator.open_buckets;
open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
ob->hash = *slot;
@ -92,12 +92,12 @@ static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob)
{
open_bucket_idx_t idx = ob - c->open_buckets;
open_bucket_idx_t idx = ob - c->allocator.open_buckets;
open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
while (*slot != idx) {
BUG_ON(!*slot);
slot = &c->open_buckets[*slot].hash;
slot = &c->allocator.open_buckets[*slot].hash;
}
*slot = ob->hash;
@ -118,17 +118,17 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
ob->data_type = 0;
}
scoped_guard(spinlock, &c->freelist_lock) {
scoped_guard(spinlock, &c->allocator.freelist_lock) {
bch2_open_bucket_hash_remove(c, ob);
ob->freelist = c->open_buckets_freelist;
c->open_buckets_freelist = ob - c->open_buckets;
ob->freelist = c->allocator.open_buckets_freelist;
c->allocator.open_buckets_freelist = ob - c->allocator.open_buckets;
c->open_buckets_nr_free++;
c->allocator.open_buckets_nr_free++;
ca->nr_open_buckets--;
}
closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->allocator.open_buckets_wait);
}
void bch2_open_bucket_write_error(struct bch_fs *c,
@ -143,13 +143,11 @@ void bch2_open_bucket_write_error(struct bch_fs *c,
bch2_ec_bucket_cancel(c, ob, err);
}
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs_allocator *c)
{
struct open_bucket *ob;
BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
ob = c->open_buckets + c->open_buckets_freelist;
struct open_bucket *ob = c->open_buckets + c->open_buckets_freelist;
c->open_buckets_freelist = ob->freelist;
atomic_set(&ob->pin, 1);
ob->data_type = 0;
@ -168,20 +166,20 @@ static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u6
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
{
BUG_ON(c->open_buckets_partial_nr >=
ARRAY_SIZE(c->open_buckets_partial));
BUG_ON(c->allocator.open_buckets_partial_nr >=
ARRAY_SIZE(c->allocator.open_buckets_partial));
scoped_guard(spinlock, &c->freelist_lock) {
scoped_guard(spinlock, &c->allocator.freelist_lock) {
guard(rcu)();
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
c->allocator.open_buckets_partial[c->allocator.open_buckets_partial_nr++] =
ob - c->allocator.open_buckets;
}
closure_wake_up(&c->open_buckets_wait);
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.open_buckets_wait);
closure_wake_up(&c->allocator.freelist_wait);
}
static inline bool may_alloc_bucket(struct bch_fs *c,
@ -226,11 +224,11 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
return NULL;
}
guard(spinlock)(&c->freelist_lock);
guard(spinlock)(&c->allocator.freelist_lock);
if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
if (unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
closure_wait(&c->allocator.open_buckets_wait, cl);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
return ERR_PTR(bch_err_throw(c, open_buckets_empty));
@ -242,7 +240,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
return NULL;
}
struct open_bucket *ob = bch2_open_bucket_alloc(c);
struct open_bucket *ob = bch2_open_bucket_alloc(&c->allocator);
scoped_guard(spinlock, &ob->lock) {
ob->valid = true;
@ -459,7 +457,7 @@ static noinline void bucket_alloc_to_text(struct printbuf *out,
prt_printf(out, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark));
prt_printf(out, "copygc_wait\t%llu/%lli\n",
bch2_copygc_wait_amount(c),
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now));
c->copygc.wait - atomic64_read(&c->io_clock[WRITE].now));
prt_printf(out, "seen\t%llu\n", req->counters.buckets_seen);
prt_printf(out, "open\t%llu\n", req->counters.skipped_open);
prt_printf(out, "need journal commit\t%llu\n", req->counters.skipped_need_journal_commit);
@ -516,7 +514,7 @@ again:
goto alloc;
if (cl && !waiting) {
closure_wait(&c->freelist_wait, cl);
closure_wait(&c->allocator.freelist_wait, cl);
waiting = true;
goto again;
}
@ -528,7 +526,7 @@ again:
}
if (waiting)
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
alloc:
ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, req, cl)
@ -770,7 +768,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
if (!h->s->blocks[ec_idx])
continue;
struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx];
struct open_bucket *ob = c->allocator.open_buckets + h->s->blocks[ec_idx];
if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) {
ob->ec_idx = ec_idx;
ob->ec = h->s;
@ -832,16 +830,18 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c,
static int bucket_alloc_set_partial(struct bch_fs *c,
struct alloc_request *req)
{
if (!c->open_buckets_partial_nr)
struct bch_fs_allocator *a = &c->allocator;
if (!a->open_buckets_partial_nr)
return 0;
guard(spinlock)(&c->freelist_lock);
guard(spinlock)(&a->freelist_lock);
if (!c->open_buckets_partial_nr)
if (!a->open_buckets_partial_nr)
return 0;
for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
for (int i = a->open_buckets_partial_nr - 1; i >= 0; --i) {
struct open_bucket *ob = a->open_buckets + a->open_buckets_partial[i];
if (want_bucket(c, req, ob)) {
struct bch_dev *ca = ob_dev(c, ob);
@ -852,8 +852,8 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
if (!avail)
continue;
array_remove_item(c->open_buckets_partial,
c->open_buckets_partial_nr,
array_remove_item(a->open_buckets_partial,
a->open_buckets_partial_nr,
i);
ob->on_partial_list = false;
@ -952,6 +952,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
struct bch_dev *ca, bool ec)
{
struct bch_fs_allocator *a = &c->allocator;
if (ec) {
return ob->ec != NULL;
} else if (ca) {
@ -965,7 +967,7 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
if (!ob->ec->blocks[i])
continue;
struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i];
struct open_bucket *ob2 = a->open_buckets + ob->ec->blocks[i];
drop |= ob2->dev == ca->dev_idx;
}
}
@ -995,43 +997,44 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bool ec)
{
struct bch_fs_allocator *a = &c->allocator;
unsigned i;
/* Next, close write points that point to this device... */
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
for (i = 0; i < ARRAY_SIZE(a->write_points); i++)
bch2_writepoint_stop(c, ca, ec, &a->write_points[i]);
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, ec, &c->reconcile_write_point);
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
bch2_writepoint_stop(c, ca, ec, &c->copygc.write_point);
bch2_writepoint_stop(c, ca, ec, &a->reconcile_write_point);
bch2_writepoint_stop(c, ca, ec, &a->btree_write_point);
scoped_guard(mutex, &c->btree_reserve_cache_lock)
while (c->btree_reserve_cache_nr) {
scoped_guard(mutex, &c->btree_reserve_cache.lock)
while (c->btree_reserve_cache.nr) {
struct btree_alloc *a =
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
&c->btree_reserve_cache.data[--c->btree_reserve_cache.nr];
bch2_open_buckets_put(c, &a->ob);
}
i = 0;
scoped_guard(spinlock, &c->freelist_lock)
while (i < c->open_buckets_partial_nr) {
scoped_guard(spinlock, &a->freelist_lock)
while (i < a->open_buckets_partial_nr) {
struct open_bucket *ob =
c->open_buckets + c->open_buckets_partial[i];
a->open_buckets + a->open_buckets_partial[i];
if (should_drop_bucket(ob, c, ca, ec)) {
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
--a->open_buckets_partial_nr;
swap(a->open_buckets_partial[i],
a->open_buckets_partial[a->open_buckets_partial_nr]);
ob->on_partial_list = false;
scoped_guard(rcu)
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
spin_unlock(&c->freelist_lock);
spin_unlock(&a->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
spin_lock(&a->freelist_lock);
} else {
i++;
}
@ -1040,13 +1043,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bch2_ec_stop_dev(c, ca);
}
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
static inline struct hlist_head *writepoint_hash(struct bch_fs_allocator *a,
unsigned long write_point)
{
unsigned hash =
hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
hash_long(write_point, ilog2(ARRAY_SIZE(a->write_points_hash)));
return &c->write_points_hash[hash];
return &a->write_points_hash[hash];
}
static struct write_point *__writepoint_find(struct hlist_head *head,
@ -1063,7 +1066,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
{
u64 stranded = c->write_points_nr * c->bucket_size_max;
u64 stranded = c->allocator.write_points_nr * c->capacity.bucket_size_max;
u64 free = bch2_fs_usage_read_short(c).free;
return stranded * factor > free;
@ -1071,33 +1074,35 @@ static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
static noinline bool try_increase_writepoints(struct bch_fs *c)
{
struct bch_fs_allocator *a = &c->allocator;
struct write_point *wp;
if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
if (a->write_points_nr == ARRAY_SIZE(a->write_points) ||
too_many_writepoints(c, 32))
return false;
wp = c->write_points + c->write_points_nr++;
hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
wp = a->write_points + a->write_points_nr++;
hlist_add_head_rcu(&wp->node, writepoint_hash(a, wp->write_point));
return true;
}
static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
{
struct bch_fs *c = trans->c;
struct bch_fs_allocator *a = &c->allocator;
struct write_point *wp;
struct open_bucket *ob;
unsigned i;
scoped_guard(mutex, &c->write_points_hash_lock) {
if (c->write_points_nr < old_nr)
scoped_guard(mutex, &a->write_points_hash_lock) {
if (a->write_points_nr < old_nr)
return true;
if (c->write_points_nr == 1 ||
if (a->write_points_nr == 1 ||
!too_many_writepoints(c, 8))
return false;
wp = c->write_points + --c->write_points_nr;
wp = a->write_points + --a->write_points_nr;
hlist_del_rcu(&wp->node);
}
@ -1113,6 +1118,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans,
unsigned long write_point)
{
struct bch_fs *c = trans->c;
struct bch_fs_allocator *a = &c->allocator;
struct write_point *wp, *oldest;
struct hlist_head *head;
@ -1122,7 +1128,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans,
return wp;
}
head = writepoint_hash(c, write_point);
head = writepoint_hash(a, write_point);
restart_find:
wp = __writepoint_find(head, write_point);
if (wp) {
@ -1135,23 +1141,23 @@ lock_wp:
}
restart_find_oldest:
oldest = NULL;
for (wp = c->write_points;
wp < c->write_points + c->write_points_nr; wp++)
for (wp = a->write_points;
wp < a->write_points + a->write_points_nr; wp++)
if (!oldest || time_before64(wp->last_used, oldest->last_used))
oldest = wp;
bch2_trans_mutex_lock_norelock(trans, &oldest->lock);
bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock);
if (oldest >= c->write_points + c->write_points_nr ||
bch2_trans_mutex_lock_norelock(trans, &a->write_points_hash_lock);
if (oldest >= a->write_points + a->write_points_nr ||
try_increase_writepoints(c)) {
mutex_unlock(&c->write_points_hash_lock);
mutex_unlock(&a->write_points_hash_lock);
mutex_unlock(&oldest->lock);
goto restart_find_oldest;
}
wp = __writepoint_find(head, write_point);
if (wp && wp != oldest) {
mutex_unlock(&c->write_points_hash_lock);
mutex_unlock(&a->write_points_hash_lock);
mutex_unlock(&oldest->lock);
goto lock_wp;
}
@ -1160,7 +1166,7 @@ restart_find_oldest:
hlist_del_rcu(&wp->node);
wp->write_point = write_point;
hlist_add_head_rcu(&wp->node, head);
mutex_unlock(&c->write_points_hash_lock);
mutex_unlock(&a->write_points_hash_lock);
out:
wp->last_used = local_clock();
return wp;
@ -1201,6 +1207,7 @@ int bch2_alloc_sectors_req(struct btree_trans *trans,
struct write_point **wp_ret)
{
struct bch_fs *c = trans->c;
struct bch_fs_allocator *a = &c->allocator;
struct open_bucket *ob;
unsigned write_points_nr;
int i;
@ -1210,7 +1217,7 @@ retry:
req->ptrs.nr = 0;
req->nr_effective = 0;
req->have_cache = false;
write_points_nr = c->write_points_nr;
write_points_nr = a->write_points_nr;
*wp_ret = req->wp = writepoint_find(trans, write_point.v);
@ -1357,47 +1364,49 @@ static inline void writepoint_init(struct write_point *wp,
void bch2_fs_allocator_foreground_init(struct bch_fs *c)
{
struct bch_fs_allocator *a = &c->allocator;
struct open_bucket *ob;
struct write_point *wp;
mutex_init(&c->write_points_hash_lock);
c->write_points_nr = ARRAY_SIZE(c->write_points);
mutex_init(&a->write_points_hash_lock);
a->write_points_nr = ARRAY_SIZE(a->write_points);
/* open bucket 0 is a sentinal NULL: */
spin_lock_init(&c->open_buckets[0].lock);
spin_lock_init(&a->open_buckets[0].lock);
for (ob = c->open_buckets + 1;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
for (ob = a->open_buckets + 1;
ob < a->open_buckets + ARRAY_SIZE(a->open_buckets); ob++) {
spin_lock_init(&ob->lock);
c->open_buckets_nr_free++;
a->open_buckets_nr_free++;
ob->freelist = c->open_buckets_freelist;
c->open_buckets_freelist = ob - c->open_buckets;
ob->freelist = a->open_buckets_freelist;
a->open_buckets_freelist = ob - a->open_buckets;
}
writepoint_init(&c->btree_write_point, BCH_DATA_btree);
writepoint_init(&c->reconcile_write_point, BCH_DATA_user);
writepoint_init(&c->copygc_write_point, BCH_DATA_user);
writepoint_init(&a->btree_write_point, BCH_DATA_btree);
writepoint_init(&a->reconcile_write_point, BCH_DATA_user);
writepoint_init(&c->copygc.write_point, BCH_DATA_user);
for (wp = c->write_points;
wp < c->write_points + c->write_points_nr; wp++) {
for (wp = a->write_points;
wp < a->write_points + a->write_points_nr; wp++) {
writepoint_init(wp, BCH_DATA_user);
wp->last_used = local_clock();
wp->write_point = (unsigned long) wp;
hlist_add_head_rcu(&wp->node,
writepoint_hash(c, wp->write_point));
writepoint_hash(a, wp->write_point));
}
}
void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
{
struct bch_fs_allocator *a = &c->allocator;
struct bch_dev *ca = ob_dev(c, ob);
unsigned data_type = ob->data_type;
barrier(); /* READ_ONCE() doesn't work on bitfields */
prt_printf(out, "%zu ref %u ",
ob - c->open_buckets,
ob - a->open_buckets,
atomic_read(&ob->pin));
bch2_prt_data_type(out, data_type);
prt_printf(out, " %u:%llu gen %u allocated %u/%u",
@ -1413,10 +1422,11 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca)
{
struct bch_fs_allocator *a = &c->allocator;
guard(printbuf_atomic)(out);
for (struct open_bucket *ob = c->open_buckets;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
for (struct open_bucket *ob = a->open_buckets;
ob < a->open_buckets + ARRAY_SIZE(a->open_buckets);
ob++) {
guard(spinlock)(&ob->lock);
if (ob->valid && (!ca || ob->dev == ca->dev_idx))
@ -1427,11 +1437,11 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
{
guard(printbuf_atomic)(out);
guard(spinlock)(&c->freelist_lock);
guard(spinlock)(&c->allocator.freelist_lock);
for (unsigned i = 0; i < c->open_buckets_partial_nr; i++)
for (unsigned i = 0; i < c->allocator.open_buckets_partial_nr; i++)
bch2_open_bucket_to_text(out, c,
c->open_buckets + c->open_buckets_partial[i]);
c->allocator.open_buckets + c->allocator.open_buckets_partial[i]);
}
static const char * const bch2_write_point_states[] = {
@ -1469,66 +1479,69 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_fs_allocator *a = &c->allocator;
struct write_point *wp;
prt_str(out, "Foreground write points\n");
for (wp = c->write_points;
wp < c->write_points + ARRAY_SIZE(c->write_points);
for (wp = a->write_points;
wp < a->write_points + ARRAY_SIZE(a->write_points);
wp++)
bch2_write_point_to_text(out, c, wp);
prt_str(out, "Copygc write point\n");
bch2_write_point_to_text(out, c, &c->copygc_write_point);
bch2_write_point_to_text(out, c, &c->copygc.write_point);
prt_str(out, "Rebalance write point\n");
bch2_write_point_to_text(out, c, &c->reconcile_write_point);
bch2_write_point_to_text(out, c, &a->reconcile_write_point);
prt_str(out, "Btree write point\n");
bch2_write_point_to_text(out, c, &c->btree_write_point);
bch2_write_point_to_text(out, c, &a->btree_write_point);
}
void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_fs_allocator *a = &c->allocator;
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
for (unsigned i = 0; i < ARRAY_SIZE(a->open_buckets); i++)
nr[a->open_buckets[i].data_type]++;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
prt_printf(out, "capacity\t%llu\n", c->capacity);
prt_printf(out, "capacity\t%llu\n", c->capacity.capacity);
prt_printf(out, "used\t%llu\n", bch2_fs_usage_read_short(c).used);
prt_printf(out, "reserved\t%llu\n", c->reserved);
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden));
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree));
prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->usage->data));
prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->usage->cached));
prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->usage->reserved));
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
prt_printf(out, "reserved\t%llu\n", c->capacity.reserved);
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->capacity.usage->hidden));
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->capacity.usage->btree));
prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->capacity.usage->data));
prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->capacity.usage->cached));
prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->capacity.usage->reserved));
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(&c->capacity.pcpu->online_reserved));
prt_newline(out);
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
prt_printf(out, "freelist_wait\t%s\n", a->freelist_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - a->open_buckets_nr_free);
prt_printf(out, "open buckets total\t%u\n", OPEN_BUCKETS_COUNT);
prt_printf(out, "open_buckets_wait\t%s\n", c->open_buckets_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open_buckets_wait\t%s\n", a->open_buckets_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open_buckets_btree\t%u\n", nr[BCH_DATA_btree]);
prt_printf(out, "open_buckets_user\t%u\n", nr[BCH_DATA_user]);
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache_nr);
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache.nr);
}
void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct bch_fs_allocator *a = &c->allocator;
struct bch_dev_usage_full stats = bch2_dev_usage_full_read(ca);
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
for (unsigned i = 0; i < ARRAY_SIZE(a->open_buckets); i++)
nr[a->open_buckets[i].data_type]++;
bch2_dev_usage_to_text(out, ca, &stats);
@ -1587,8 +1600,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
{
if (c->allocator_last_stuck &&
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
if (c->allocator.last_stuck &&
time_after(c->allocator.last_stuck + HZ * 60 * 2, jiffies))
return 0;
return c->opts.allocator_stuck_timeout * HZ;
@ -1599,7 +1612,7 @@ void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
unsigned t = allocator_wait_timeout(c);
if (t && closure_sync_timeout(cl, t)) {
c->allocator_last_stuck = jiffies;
c->allocator.last_stuck = jiffies;
bch2_print_allocator_stuck(c);
}

View File

@ -110,13 +110,13 @@ static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
{
BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
obs->v[obs->nr++] = ob - c->open_buckets;
obs->v[obs->nr++] = ob - c->allocator.open_buckets;
}
#define open_bucket_for_each(_c, _obs, _ob, _i) \
for ((_i) = 0; \
(_i) < (_obs)->nr && \
((_ob) = (_c)->open_buckets + (_obs)->v[_i], true); \
#define open_bucket_for_each(_c, _obs, _ob, _i) \
for ((_i) = 0; \
(_i) < (_obs)->nr && \
((_ob) = (_c)->allocator.open_buckets + (_obs)->v[_i], true); \
(_i)++)
static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
@ -188,7 +188,7 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
static inline open_bucket_idx_t *open_bucket_hashslot(struct bch_fs *c,
unsigned dev, u64 bucket)
{
return c->open_buckets_hash +
return c->allocator.open_buckets_hash +
(jhash_3words(dev, bucket, bucket >> 32, 0) &
(OPEN_BUCKETS_COUNT - 1));
}
@ -198,7 +198,7 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke
open_bucket_idx_t slot = *open_bucket_hashslot(c, dev, bucket);
while (slot) {
struct open_bucket *ob = &c->open_buckets[slot];
struct open_bucket *ob = &c->allocator.open_buckets[slot];
if (ob->dev == dev && ob->bucket == bucket)
return true;
@ -214,7 +214,7 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
if (bch2_bucket_is_open(c, dev, bucket))
return true;
guard(spinlock)(&c->freelist_lock);
guard(spinlock)(&c->allocator.freelist_lock);
return bch2_bucket_is_open(c, dev, bucket);
}

View File

@ -286,7 +286,7 @@ bool bch2_replicas_marked_locked(struct bch_fs *c,
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
return bch2_replicas_marked_locked(c, search);
}
@ -331,7 +331,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
guard(mutex)(&c->sb_lock);
bool write_sb = false;
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
if (!replicas_entry_search(&c->replicas, new_entry)) {
CLASS(bch_replicas_cpu, new_r)();
@ -375,7 +375,7 @@ static void __replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_cp
void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *kill)
{
lockdep_assert_held(&c->mark_lock);
lockdep_assert_held(&c->capacity.mark_lock);
lockdep_assert_held(&c->sb_lock);
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, kill);
@ -408,7 +408,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
BUG_ON(r->data_type != BCH_DATA_journal);
verify_replicas_entry(r);
scoped_guard(percpu_read, &c->mark_lock) {
scoped_guard(percpu_read, &c->capacity.mark_lock) {
int ret = __replicas_entry_put(c, r, nr);
if (!ret)
return;
@ -421,7 +421,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
}
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
if (e && !atomic_read(&e->ref))
__replicas_entry_kill(c, e);
@ -432,7 +432,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
static inline bool bch2_replicas_entry_get_inmem(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
{
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
if (e)
atomic_inc(&e->ref);
@ -458,7 +458,7 @@ int bch2_replicas_gc_reffed(struct bch_fs *c)
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
unsigned dst = 0;
for (unsigned i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry_cpu *e =
@ -576,7 +576,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
bch2_cpu_replicas_sort(&new_r);
guard(percpu_write)(&c->mark_lock);
guard(percpu_write)(&c->capacity.mark_lock);
swap(c->replicas, new_r);
return 0;
@ -773,7 +773,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
{
guard(percpu_read)(&c->mark_lock);
guard(percpu_read)(&c->capacity.mark_lock);
for_each_cpu_replicas_entry(&c->replicas, i) {
struct bch_replicas_entry_v1 *e = &i->e;

View File

@ -5,6 +5,8 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include "init/dev_types.h"
#include "util/clock_types.h"
#include "util/fifo.h"
@ -118,4 +120,60 @@ struct write_point_specifier {
unsigned long v;
};
struct bch_fs_usage_base;
struct bch_fs_capacity_pcpu {
u64 sectors_available;
u64 online_reserved;
};
struct bch_fs_capacity {
u64 capacity; /* sectors */
u64 reserved; /* sectors */
/*
* When capacity _decreases_ (due to a disk being removed), we
* increment capacity_gen - this invalidates outstanding reservations
* and forces them to be revalidated
*/
u32 capacity_gen;
unsigned bucket_size_max;
atomic64_t sectors_available;
struct mutex sectors_available_lock;
struct bch_fs_capacity_pcpu __percpu *pcpu;
struct percpu_rw_semaphore mark_lock;
seqcount_t usage_lock;
struct bch_fs_usage_base __percpu *usage;
};
struct bch_fs_allocator {
struct bch_devs_mask rw_devs[BCH_DATA_NR];
unsigned long rw_devs_change_count;
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
unsigned long last_stuck;
open_bucket_idx_t open_buckets_freelist;
open_bucket_idx_t open_buckets_nr_free;
struct closure_waitlist open_buckets_wait;
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
struct write_point write_points[WRITE_POINT_MAX];
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
struct mutex write_points_hash_lock;
unsigned write_points_nr;
struct write_point btree_write_point;
struct write_point reconcile_write_point;
};
#endif /* _BCACHEFS_ALLOC_TYPES_H */

View File

@ -241,11 +241,13 @@
#include "alloc/types.h"
#include "btree/check_types.h"
#include "btree/interior_types.h"
#include "btree/journal_overlay_types.h"
#include "btree/types.h"
#include "btree/node_scan_types.h"
#include "btree/write_buffer_types.h"
#include "data/copygc_types.h"
#include "data/ec_types.h"
#include "data/keylist_types.h"
#include "data/nocow_locking_types.h"
@ -256,13 +258,14 @@
#include "fs/quota_types.h"
#include "init/error_types.h"
#include "init/passes_types.h"
#include "init/dev_types.h"
#include "journal/types.h"
#include "sb/counters_types.h"
#include "sb/errors_types.h"
#include "sb/io_types.h"
#include "sb/members_types.h"
#include "snapshots/snapshot_types.h"
@ -514,12 +517,6 @@ enum bch_time_stats {
/* Number of nodes btree coalesce will try to coalesce at once */
#define GC_MERGE_NODES 4U
/* Maximum number of nodes we might need to allocate atomically: */
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
/* Size of the freelist we allocate btree nodes from: */
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
struct btree;
@ -712,23 +709,6 @@ struct btree_debug {
};
#define BCH_LINK_MAX U32_MAX
#define BCH_TRANSACTIONS_NR 128
struct btree_transaction_stats {
struct bch2_time_stats duration;
struct bch2_time_stats lock_hold_times;
struct mutex lock;
unsigned nr_max_paths;
unsigned max_mem;
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
char *max_paths_text;
};
struct bch_fs_pcpu {
u64 sectors_available;
};
struct journal_seq_blacklist_table {
size_t nr;
@ -739,10 +719,6 @@ struct journal_seq_blacklist_table {
} entries[];
};
struct btree_trans_buf {
struct btree_trans *trans;
};
#define BCH_WRITE_REFS() \
x(journal) \
x(trans) \
@ -793,19 +769,22 @@ struct bch_fs {
struct super_block *vfs_sb;
dev_t dev;
char name[40];
struct stdio_redirect *stdio;
struct task_struct *stdio_filter;
unsigned loglevel;
unsigned prev_loglevel;
/*
* Certain operations are only allowed in single threaded mode, during
* recovery, and we want to assert that this is the case:
*/
struct task_struct *recovery_task;
/* ro/rw, add/remove/resize devices: */
struct rw_semaphore state_lock;
/* Counts outstanding writes, for clean transition to read-only */
struct enumerated_ref writes;
/*
* Certain operations are only allowed in single threaded mode, during
* recovery, and we want to assert that this is the case:
*/
struct task_struct *recovery_task;
/*
* Analagous to c->writes, for asynchronous ops that don't necessarily
@ -813,121 +792,83 @@ struct bch_fs {
*/
refcount_t ro_ref;
wait_queue_head_t ro_ref_wait;
struct work_struct read_only_work;
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
struct bch_devs_mask devs_online;
struct bch_devs_mask devs_removed;
struct bch_devs_mask devs_rotational;
u8 extent_type_u64s[31];
u8 extent_types_known;
struct bch_accounting_mem accounting;
struct bch_replicas_cpu replicas;
struct journal_entry_res btree_root_journal_res;
struct journal_entry_res clock_journal_res;
struct bch_disk_groups_cpu __rcu *disk_groups;
struct bch_opts opts;
atomic_t opt_change_cookie;
unsigned loglevel;
unsigned prev_loglevel;
/* Updated by bch2_sb_update():*/
struct {
__uuid_t uuid;
__uuid_t user_uuid;
u16 version;
u16 version_incompat;
u16 version_incompat_allowed;
u16 version_min;
u16 version_upgrade_complete;
u8 nr_devices;
u8 clean;
bool multi_device; /* true if we've ever had more than one device */
u8 encryption_type;
u64 time_base_lo;
u32 time_base_hi;
unsigned time_units_per_sec;
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
u64 recovery_passes_required;
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
} sb;
unsigned long incompat_versions_requested[BITS_TO_LONGS(BCH_VERSION_MINOR(bcachefs_metadata_version_current))];
struct unicode_map *cf_encoding;
struct bch_sb_cpu sb;
struct bch_sb_handle disk_sb;
unsigned short block_bits; /* ilog2(block_size) */
u16 btree_foreground_merge_threshold;
struct closure sb_write;
struct mutex sb_lock;
unsigned long incompat_versions_requested[BITS_TO_LONGS(BCH_VERSION_MINOR(bcachefs_metadata_version_current))];
struct unicode_map *cf_encoding;
unsigned short block_bits; /* ilog2(block_size) */
u16 btree_foreground_merge_threshold;
struct delayed_work maybe_schedule_btree_bitmap_gc;
/* snapshot.c: */
struct snapshot_table __rcu *snapshots;
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
struct bch_fs_counters counters;
struct bch2_time_stats times[BCH_TIME_STAT_NR];
struct bch_fs_errors errors;
struct snapshot_delete snapshot_delete;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked;
struct mutex snapshots_unlinked_lock;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
#endif
struct journal journal;
u64 journal_replay_seq_start;
u64 journal_replay_seq_end;
GENRADIX(struct journal_replay *) journal_entries;
u64 journal_entries_base_seq;
struct journal_keys journal_keys;
struct list_head journal_iters;
struct journal_seq_blacklist_table *journal_seq_blacklist_table;
struct bch_fs_recovery recovery;
/* BTREE CACHE */
struct bio_set btree_bio;
struct workqueue_struct *btree_read_complete_wq;
struct workqueue_struct *btree_write_submit_wq;
struct btree_root btree_roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) btree_roots_extra;
struct mutex btree_root_lock;
struct btree_cache btree_cache;
/*
* Cache of allocated btree nodes - if we allocate a btree node and
* don't use it, if we free it that space can't be reused until going
* _all_ the way through the allocator (which exposes us to a livelock
* when allocating btree reserves fail halfway through) - instead, we
* can stick them here:
* A btree node on disk could have too many bsets for an iterator to fit
* on the stack - have to dynamically allocate them
*/
struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2];
unsigned btree_reserve_cache_nr;
struct mutex btree_reserve_cache_lock;
mempool_t fill_iter;
mempool_t btree_bounce_pool;
struct bio_set btree_bio;
struct workqueue_struct *btree_read_complete_wq;
struct workqueue_struct *btree_write_submit_wq;
struct journal_entry_res btree_root_journal_res;
struct workqueue_struct *btree_write_complete_wq;
mempool_t btree_interior_update_pool;
struct list_head btree_interior_update_list;
struct list_head btree_interior_updates_unwritten;
struct mutex btree_interior_update_lock;
struct mutex btree_interior_update_commit_lock;
struct closure_waitlist btree_interior_update_wait;
struct bch_fs_btree_cache btree_cache;
struct bch_fs_btree_key_cache btree_key_cache;
struct bch_fs_btree_write_buffer btree_write_buffer;
struct bch_fs_btree_trans btree_trans;
struct bch_fs_btree_reserve_cache btree_reserve_cache;
struct bch_fs_btree_interior_updates btree_interior_updates;
struct bch_fs_btree_node_rewrites btree_node_rewrites;
struct find_btree_nodes found_btree_nodes;
struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work;
struct bch_fs_gc gc;
struct bch_fs_gc_gens gc_gens;
struct workqueue_struct *btree_node_rewrite_worker;
struct list_head btree_node_rewrites;
struct list_head btree_node_rewrites_pending;
spinlock_t btree_node_rewrites_lock;
struct closure_waitlist btree_node_rewrites_wait;
struct bch_accounting_mem accounting;
struct bch_replicas_cpu replicas;
struct bch_disk_groups_cpu __rcu *disk_groups;
struct bch_fs_capacity capacity;
struct bch_fs_allocator allocator;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
struct bch_fs_snapshots snapshots;
/* btree_io.c: */
spinlock_t btree_write_error_lock;
@ -935,120 +876,21 @@ struct bch_fs {
atomic64_t nr;
atomic64_t bytes;
} btree_write_stats[BTREE_WRITE_TYPE_NR];
/* btree_iter.c: */
struct seqmutex btree_trans_lock;
struct list_head btree_trans_list;
mempool_t btree_trans_pool;
mempool_t btree_trans_mem_pool;
struct btree_trans_buf __percpu *btree_trans_bufs;
struct srcu_struct btree_trans_barrier;
bool btree_trans_barrier_initialized;
struct btree_key_cache btree_key_cache;
struct btree_write_buffer btree_write_buffer;
struct workqueue_struct *btree_update_wq;
struct workqueue_struct *btree_write_complete_wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
/*
* Use a dedicated wq for write ref holder tasks. Required to avoid
* dependency problems with other wq tasks that can block on ref
* draining, such as read-only transition.
*/
struct workqueue_struct *write_ref_wq;
struct workqueue_struct *write_ref_wq;
struct workqueue_struct *promote_wq;
struct semaphore __percpu *promote_limit;
struct workqueue_struct *promote_wq;
struct semaphore __percpu *promote_limit;
/* ALLOCATION */
struct bch_devs_mask online_devs;
struct bch_devs_mask rw_devs[BCH_DATA_NR];
unsigned long rw_devs_change_count;
u64 capacity; /* sectors */
u64 reserved; /* sectors */
/*
* When capacity _decreases_ (due to a disk being removed), we
* increment capacity_gen - this invalidates outstanding reservations
* and forces them to be revalidated
*/
u32 capacity_gen;
unsigned bucket_size_max;
atomic64_t sectors_available;
struct mutex sectors_available_lock;
struct bch_fs_pcpu __percpu *pcpu;
struct percpu_rw_semaphore mark_lock;
seqcount_t usage_lock;
struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved;
unsigned long allocator_last_stuck;
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table *
journal_seq_blacklist_table;
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
open_bucket_idx_t open_buckets_freelist;
open_bucket_idx_t open_buckets_nr_free;
struct closure_waitlist open_buckets_wait;
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
open_bucket_idx_t open_buckets_partial_nr;
struct write_point btree_write_point;
struct write_point reconcile_write_point;
struct write_point write_points[WRITE_POINT_MAX];
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
struct mutex write_points_hash_lock;
unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
/* GARBAGE COLLECTION */
struct work_struct gc_gens_work;
unsigned long gc_count;
enum btree_id gc_gens_btree;
struct bpos gc_gens_pos;
/*
* Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
* has been marked by GC.
*
* gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.)
*
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread
* can read without a lock.
*/
seqcount_t gc_pos_lock;
struct gc_pos gc_pos;
/*
* The allocation code needs gc_mark in struct bucket to be correct, but
* it's not while a gc is in progress.
*/
struct rw_semaphore gc_lock;
struct mutex gc_gens_lock;
struct io_clock io_clock[2];
struct journal_entry_res clock_journal_res;
/* IO PATH */
struct workqueue_struct *btree_update_wq;
struct bio_set bio_read;
struct bio_set bio_read_split;
struct bio_set bio_write;
@ -1059,10 +901,6 @@ struct bch_fs {
nocow_locks;
struct rhashtable promote_table;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
#endif
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
size_t zstd_workspace_size;
@ -1076,38 +914,9 @@ struct bch_fs {
struct list_head moving_context_list;
struct mutex moving_context_lock;
/* REBALANCE */
struct bch_fs_reconcile reconcile;
/* COPYGC */
struct task_struct *copygc_thread;
struct write_point copygc_write_point;
s64 copygc_wait_at;
s64 copygc_wait;
bool copygc_running;
wait_queue_head_t copygc_running_wq;
/* STRIPES: */
GENRADIX(struct gc_stripe) gc_stripes;
struct hlist_head ec_stripes_new[32];
struct hlist_head ec_stripes_new_buckets[64];
spinlock_t ec_stripes_new_lock;
/* ERASURE CODING */
struct list_head ec_stripe_head_list;
struct mutex ec_stripe_head_lock;
struct list_head ec_stripe_new_list;
struct mutex ec_stripe_new_lock;
wait_queue_head_t ec_stripe_new_wait;
struct work_struct ec_stripe_create_work;
u64 ec_stripe_hint;
struct work_struct ec_stripe_delete_work;
struct bio_set ec_bioset;
struct bch_fs_copygc copygc;
struct bch_fs_ec ec;
/* REFLINK */
reflink_gc_table reflink_gc_table;
@ -1129,11 +938,6 @@ struct bch_fs {
/* QUOTAS */
struct bch_memquota_type quotas[QTYP_NR];
/* RECOVERY */
u64 journal_replay_seq_start;
u64 journal_replay_seq_end;
struct bch_fs_recovery recovery;
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
struct dentry *btree_debug_dir;
@ -1142,38 +946,6 @@ struct bch_fs {
struct btree *verify_data;
struct btree_node *verify_ondisk;
struct mutex verify_lock;
/*
* A btree node on disk could have too many bsets for an iterator to fit
* on the stack - have to dynamically allocate them
*/
mempool_t fill_iter;
mempool_t btree_bounce_pool;
struct journal journal;
GENRADIX(struct journal_replay *) journal_entries;
u64 journal_entries_base_seq;
struct journal_keys journal_keys;
struct list_head journal_iters;
struct find_btree_nodes found_btree_nodes;
u64 last_bucket_seq_cleanup;
struct bch_fs_counters counters;
struct bch2_time_stats times[BCH_TIME_STAT_NR];
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
/* ERRORS */
struct list_head fsck_error_msgs;
struct mutex fsck_error_msgs_lock;
bool fsck_alloc_msgs_err;
bch_sb_errors_cpu fsck_error_counts;
struct mutex fsck_error_counts_lock;
};
static inline int __bch2_err_throw(struct bch_fs *c, int err)

View File

@ -36,7 +36,7 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
{
unsigned reserve = 16;
if (!c->btree_roots_known[0].b)
if (!c->btree_cache.roots_known[0].b)
reserve += 8;
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
@ -51,7 +51,8 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
static inline size_t btree_cache_can_free(struct btree_cache_list *list)
{
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
struct bch_fs_btree_cache *bc =
container_of(list, struct bch_fs_btree_cache, live[list->idx]);
size_t can_free = list->nr;
if (!list->idx)
@ -59,7 +60,7 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
return can_free;
}
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
static void btree_node_to_freedlist(struct bch_fs_btree_cache *bc, struct btree *b)
{
BUG_ON(!list_empty(&b->list));
@ -69,7 +70,7 @@ static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
list_add(&b->list, &bc->freed_nonpcpu);
}
static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
static void __bch2_btree_node_to_freelist(struct bch_fs_btree_cache *bc, struct btree *b)
{
BUG_ON(!list_empty(&b->list));
BUG_ON(!b->data);
@ -80,7 +81,7 @@ static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
scoped_guard(mutex, &bc->lock)
__bch2_btree_node_to_freelist(bc, b);
@ -118,7 +119,7 @@ void __btree_node_data_free(struct btree *b)
b->aux_data = NULL;
}
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
static void btree_node_data_free(struct bch_fs_btree_cache *bc, struct btree *b)
{
BUG_ON(list_empty(&b->list));
list_del_init(&b->list);
@ -203,7 +204,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
return b;
}
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
static inline bool __btree_node_pinned(struct bch_fs_btree_cache *bc, struct btree *b)
{
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
@ -216,7 +217,7 @@ static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
void bch2_node_pin(struct bch_fs *c, struct btree *b)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
guard(mutex)(&bc->lock);
if (!btree_node_is_root(c, b) && !btree_node_pinned(b)) {
@ -229,12 +230,12 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
void bch2_btree_cache_unpin(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b, *n;
guard(mutex)(&bc->lock);
c->btree_cache.pinned_nodes_mask[0] = 0;
c->btree_cache.pinned_nodes_mask[1] = 0;
bc->pinned_nodes_mask[0] = 0;
bc->pinned_nodes_mask[1] = 0;
list_for_each_entry_safe(b, n, &bc->live[1].list, list) {
clear_btree_node_pinned(b);
@ -246,7 +247,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
/* Btree in memory cache - hash table */
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
void __bch2_btree_node_hash_remove(struct bch_fs_btree_cache *bc, struct btree *b)
{
lockdep_assert_held(&bc->lock);
@ -264,13 +265,13 @@ void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
list_del_init(&b->list);
}
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
void bch2_btree_node_hash_remove(struct bch_fs_btree_cache *bc, struct btree *b)
{
__bch2_btree_node_hash_remove(bc, b);
__bch2_btree_node_to_freelist(bc, b);
}
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
int __bch2_btree_node_hash_insert(struct bch_fs_btree_cache *bc, struct btree *b)
{
BUG_ON(!list_empty(&b->list));
BUG_ON(b->hash_val);
@ -289,7 +290,7 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
return 0;
}
int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
int bch2_btree_node_hash_insert(struct bch_fs_btree_cache *bc, struct btree *b,
unsigned level, enum btree_id id)
{
b->c.level = level;
@ -326,8 +327,8 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
}
__flatten
static inline struct btree *btree_cache_find(struct btree_cache *bc,
const struct bkey_i *k)
static inline struct btree *btree_cache_find(struct bch_fs_btree_cache *bc,
const struct bkey_i *k)
{
u64 v = btree_ptr_hash_val(k);
@ -337,7 +338,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
bool flush, bool locked)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
lockdep_assert_held(&bc->lock);
@ -403,7 +404,7 @@ static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
*/
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
int ret = 0;
lockdep_assert_held(&bc->lock);
@ -455,7 +456,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct btree_cache_list *list = shrink->private_data;
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
struct bch_fs_btree_cache *bc =
container_of(list, struct bch_fs_btree_cache, live[list->idx]);
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
struct btree *b, *t;
unsigned long nr = sc->nr_to_scan;
@ -573,7 +575,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
void bch2_fs_btree_cache_exit(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b, *t;
shrinker_free(bc->live[1].shrink);
@ -608,8 +610,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
cond_resched();
}
BUG_ON(!bch2_journal_error(&c->journal) &&
atomic_long_read(&c->btree_cache.nr_dirty));
BUG_ON(!bch2_journal_error(&c->journal) && atomic_long_read(&bc->nr_dirty));
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
@ -627,11 +628,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
if (bc->table_init_done)
rhashtable_destroy(&bc->table);
darray_exit(&bc->roots_extra);
}
int bch2_fs_btree_cache_init(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct shrinker *shrink;
if (rhashtable_init(&bc->table, &bch_btree_cache_params))
@ -675,8 +678,9 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
return 0;
}
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
void bch2_fs_btree_cache_init_early(struct bch_fs_btree_cache *bc)
{
mutex_init(&bc->root_lock);
mutex_init(&bc->lock);
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) {
bc->live[i].idx = i;
@ -695,7 +699,7 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
*/
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
{
struct btree_cache *bc = &trans->c->btree_cache;
struct bch_fs_btree_cache *bc = &trans->c->btree_cache;
if (bc->alloc_lock == current) {
event_inc_trace(trans->c, btree_cache_cannibalize_unlock, buf,
@ -707,7 +711,7 @@ void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
static int __btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct task_struct *old;
old = NULL;
@ -743,7 +747,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure
static struct btree *btree_node_cannibalize(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b;
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
@ -769,7 +773,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct list_head *freed = pcpu_read_locks
? &bc->freed_pcpu
: &bc->freed_nonpcpu;
@ -895,7 +899,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
bool sync)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b;
EBUG_ON(path && level + 1 != path->level);
@ -1037,7 +1041,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr
unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b;
int ret;
@ -1232,7 +1236,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
bool nofill)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b;
int ret;
@ -1315,7 +1319,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
enum btree_id btree_id, unsigned level)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
@ -1334,7 +1338,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct btree *b;
b = btree_cache_find(bc, k);
@ -1471,7 +1475,7 @@ static const char * const bch2_btree_cache_not_freed_reasons_strs[] = {
NULL
};
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs_btree_cache *bc)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);

View File

@ -14,11 +14,11 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
void __bch2_btree_node_hash_remove(struct bch_fs_btree_cache *, struct btree *);
void bch2_btree_node_hash_remove(struct bch_fs_btree_cache *, struct btree *);
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
int __bch2_btree_node_hash_insert(struct bch_fs_btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct bch_fs_btree_cache *, struct btree *,
unsigned, enum btree_id);
void bch2_node_pin(struct bch_fs *, struct btree *);
@ -48,7 +48,7 @@ void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
void bch2_fs_btree_cache_exit(struct bch_fs *);
int bch2_fs_btree_cache_init(struct bch_fs *);
void bch2_fs_btree_cache_init_early(struct btree_cache *);
void bch2_fs_btree_cache_init_early(struct bch_fs_btree_cache *);
static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
{
@ -119,21 +119,21 @@ static inline unsigned btree_blocks(const struct bch_fs *c)
static inline unsigned btree_id_nr_alive(struct bch_fs *c)
{
return BTREE_ID_NR + c->btree_roots_extra.nr;
return BTREE_ID_NR + c->btree_cache.roots_extra.nr;
}
static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned id)
{
if (likely(id < BTREE_ID_NR)) {
return &c->btree_roots_known[id];
return &c->btree_cache.roots_known[id];
} else {
unsigned idx = id - BTREE_ID_NR;
/* This can happen when we're called from btree_node_scan */
if (idx >= c->btree_roots_extra.nr)
if (idx >= c->btree_cache.roots_extra.nr)
return NULL;
return &c->btree_roots_extra.data[idx];
return &c->btree_cache.roots_extra.data[idx];
}
}
@ -160,7 +160,7 @@ void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *,
enum btree_id, unsigned, struct bkey_s_c);
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs_btree_cache *);
#define trace_btree_node(_c, _b, event) \
event_inc_trace(c, event, buf, bch2_btree_pos_to_text(&buf, c, b))

View File

@ -76,14 +76,14 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
guard(preempt)();
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
write_seqcount_begin(&c->gc.pos_lock);
c->gc.pos = new_pos;
write_seqcount_end(&c->gc.pos_lock);
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) < 0);
BUG_ON(gc_pos_cmp(new_pos, c->gc.pos) < 0);
__gc_pos_set(c, new_pos);
}
@ -798,7 +798,7 @@ static void bch2_gc_free(struct bch_fs *c)
bch2_accounting_gc_free(c);
genradix_free(&c->reflink_gc_table);
genradix_free(&c->gc_stripes);
genradix_free(&c->ec.gc_stripes);
for_each_member_device(c, ca)
genradix_free(&ca->buckets_gc);
@ -953,7 +953,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans,
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
struct gc_stripe *m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes, k.k->p.offset);
bool bad = false;
for (unsigned i = 0; i < s->nr_blocks; i++) {
@ -1024,7 +1024,7 @@ int bch2_check_allocations(struct bch_fs *c)
int ret;
guard(rwsem_read)(&c->state_lock);
guard(rwsem_write)(&c->gc_lock);
guard(rwsem_write)(&c->gc.lock);
bch2_btree_interior_updates_flush(c);
@ -1046,14 +1046,12 @@ int bch2_check_allocations(struct bch_fs *c)
if (ret)
goto out;
c->gc_count++;
ret = bch2_gc_alloc_done(c) ?:
bch2_gc_accounting_done(c) ?:
bch2_gc_stripes_done(c) ?:
bch2_gc_reflink_done(c);
out:
scoped_guard(percpu_write, &c->mark_lock) {
scoped_guard(percpu_write, &c->capacity.mark_lock) {
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_not_running));
bch2_gc_free(c);
@ -1063,7 +1061,7 @@ out:
* At startup, allocations can happen directly instead of via the
* allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
bch2_sb_members_clean_deleted(c);
@ -1104,7 +1102,7 @@ int bch2_gc_gens(struct bch_fs *c)
u64 b, start_time = local_clock();
int ret;
if (!mutex_trylock(&c->gc_gens_lock))
if (!mutex_trylock(&c->gc_gens.lock))
return 0;
event_inc_trace(c, gc_gens_start, buf);
@ -1115,7 +1113,7 @@ int bch2_gc_gens(struct bch_fs *c)
* state lock at the start of going RO.
*/
if (!down_read_trylock(&c->state_lock)) {
mutex_unlock(&c->gc_gens_lock);
mutex_unlock(&c->gc_gens.lock);
return 0;
}
@ -1137,8 +1135,7 @@ int bch2_gc_gens(struct bch_fs *c)
for (unsigned i = 0; i < BTREE_ID_NR; i++)
if (btree_type_has_data_ptrs(i)) {
c->gc_gens_btree = i;
c->gc_gens_pos = POS_MIN;
c->gc_gens.pos = BBPOS(i, POS_MIN);
ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, i,
@ -1172,10 +1169,7 @@ int bch2_gc_gens(struct bch_fs *c)
if (ret)
goto err;
c->gc_gens_btree = 0;
c->gc_gens_pos = POS_MIN;
c->gc_count++;
c->gc_gens.pos = BBPOS_MIN;
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
event_inc_trace(c, gc_gens_end, buf);
@ -1192,7 +1186,7 @@ err:
}
up_read(&c->state_lock);
mutex_unlock(&c->gc_gens_lock);
mutex_unlock(&c->gc_gens.lock);
if (!bch2_err_matches(ret, EROFS))
bch_err_fn(c, ret);
return ret;
@ -1200,7 +1194,7 @@ err:
static void bch2_gc_gens_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens.work);
bch2_gc_gens(c);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
@ -1208,7 +1202,7 @@ static void bch2_gc_gens_work(struct work_struct *work)
void bch2_gc_gens_async(struct bch_fs *c)
{
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) &&
!queue_work(c->write_ref_wq, &c->gc_gens_work))
!queue_work(c->write_ref_wq, &c->gc_gens.work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
@ -1277,9 +1271,9 @@ int bch2_merge_btree_nodes(struct bch_fs *c)
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
{
seqcount_init(&c->gc_pos_lock);
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
seqcount_init(&c->gc.pos_lock);
INIT_WORK(&c->gc_gens.work, bch2_gc_gens_work);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
init_rwsem(&c->gc.lock);
mutex_init(&c->gc_gens.lock);
}

View File

@ -71,9 +71,9 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
bool ret;
do {
seq = read_seqcount_begin(&c->gc_pos_lock);
ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
seq = read_seqcount_begin(&c->gc.pos_lock);
ret = gc_pos_cmp(pos, c->gc.pos) <= 0;
} while (read_seqcount_retry(&c->gc.pos_lock, seq));
return ret;
}

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_BTREE_GC_TYPES_H
#define _BCACHEFS_BTREE_GC_TYPES_H
#include "btree/bbpos_types.h"
#include <linux/generic-radix-tree.h>
#define GC_PHASES() \
@ -31,4 +33,21 @@ struct reflink_gc {
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
struct bch_fs_gc {
seqcount_t pos_lock;
struct gc_pos pos;
/*
* The allocation code needs gc_mark in struct bucket to be correct, but
* it's not while a gc is in progress.
*/
struct rw_semaphore lock;
};
struct bch_fs_gc_gens {
struct bbpos pos;
struct work_struct work;
struct mutex lock;
};
#endif /* _BCACHEFS_BTREE_GC_TYPES_H */

View File

@ -670,7 +670,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
struct bkey_i *accounting;
scoped_guard(percpu_read, &c->mark_lock)
scoped_guard(percpu_read, &c->capacity.mark_lock)
for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
accounting = bkey_next(accounting)) {
@ -695,7 +695,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret;
}
if (unlikely(c->gc_pos.phase)) {
if (unlikely(c->gc.pos.phase)) {
ret = bch2_trans_commit_run_gc_triggers(trans);
if (bch2_fs_fatal_err_on(ret, c, "fatal error in transaction commit: %s", bch2_err_str(ret)))
return ret;
@ -973,7 +973,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans,
struct bkey_i *accounting;
retry:
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
percpu_down_read(&c->mark_lock);
percpu_down_read(&c->capacity.mark_lock);
for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
accounting = bkey_next(accounting)) {
@ -984,7 +984,7 @@ retry:
if (ret)
goto revert_fs_usage;
}
percpu_up_read(&c->mark_lock);
percpu_up_read(&c->capacity.mark_lock);
/* Only fatal errors are possible later, so no need to revert this */
bch2_trans_account_disk_usage_change(trans);
@ -1008,7 +1008,7 @@ retry:
}
if (i->type == BCH_JSET_ENTRY_btree_root) {
guard(mutex)(&c->btree_root_lock);
guard(mutex)(&c->btree_cache.root_lock);
struct btree_root *r = bch2_btree_id_root(c, i->btree_id);
@ -1029,13 +1029,13 @@ retry:
return 0;
fatal_err:
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
percpu_down_read(&c->mark_lock);
percpu_down_read(&c->capacity.mark_lock);
revert_fs_usage:
for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
i != accounting;
i = bkey_next(i))
bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
percpu_up_read(&c->mark_lock);
percpu_up_read(&c->capacity.mark_lock);
if (bch2_err_matches(ret, BCH_ERR_btree_insert_need_mark_replicas)) {
ret = drop_locks_do(trans, bch2_accounting_update_sb(trans));

View File

@ -338,7 +338,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
BUG_ON(b->ob.nr);
retry:
ret = bch2_alloc_sectors_req(trans, req,
writepoint_ptr(&c->btree_write_point),
writepoint_ptr(&c->allocator.btree_write_point),
min(res->nr_replicas,
c->opts.metadata_replicas_required),
cl, &wp);
@ -357,9 +357,9 @@ retry:
goto retry;
}
mutex_lock(&c->btree_reserve_cache_lock);
while (c->btree_reserve_cache_nr) {
struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr;
mutex_lock(&c->btree_reserve_cache.lock);
while (c->btree_reserve_cache.nr) {
struct btree_alloc *a = c->btree_reserve_cache.data + --c->btree_reserve_cache.nr;
/* check if it has sufficient durability */
@ -368,13 +368,13 @@ retry:
bkey_i_to_s_c(&a->k))) {
bkey_copy(&b->key, &a->k);
b->ob = a->ob;
mutex_unlock(&c->btree_reserve_cache_lock);
mutex_unlock(&c->btree_reserve_cache.lock);
goto out;
}
bch2_open_buckets_put(c, &a->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
mutex_unlock(&c->btree_reserve_cache.lock);
bkey_btree_ptr_v2_init(&b->key);
bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
@ -511,12 +511,12 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
while (p->nr) {
struct btree *b = p->b[--p->nr];
mutex_lock(&c->btree_reserve_cache_lock);
mutex_lock(&c->btree_reserve_cache.lock);
if (c->btree_reserve_cache_nr <
ARRAY_SIZE(c->btree_reserve_cache)) {
if (c->btree_reserve_cache.nr <
ARRAY_SIZE(c->btree_reserve_cache.data)) {
struct btree_alloc *a =
&c->btree_reserve_cache[c->btree_reserve_cache_nr++];
&c->btree_reserve_cache.data[c->btree_reserve_cache.nr++];
a->ob = b->ob;
b->ob.nr = 0;
@ -525,7 +525,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
bch2_open_buckets_put(c, &b->ob);
}
mutex_unlock(&c->btree_reserve_cache_lock);
mutex_unlock(&c->btree_reserve_cache.lock);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
@ -575,7 +575,7 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
struct bch_fs *c = as->c;
if (as->took_gc_lock)
up_read(&c->gc_lock);
up_read(&c->gc.lock);
as->took_gc_lock = false;
bch2_journal_pin_drop(&c->journal, &as->journal);
@ -586,19 +586,19 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
as->start_time);
guard(mutex)(&c->btree_interior_update_lock);
guard(mutex)(&c->btree_interior_updates.lock);
list_del(&as->unwritten_list);
list_del(&as->list);
closure_debug_destroy(&as->cl);
mempool_free(as, &c->btree_interior_update_pool);
mempool_free(as, &c->btree_interior_updates.pool);
/*
* Have to do the wakeup with btree_interior_update_lock still held,
* since being on btree_interior_update_list is our ref on @c:
*/
closure_wake_up(&c->btree_interior_update_wait);
closure_wake_up(&c->btree_interior_updates.wait);
}
static void bch2_btree_update_add_key(btree_update_nodes *nodes,
@ -835,7 +835,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* btree_node_update_key(): having the lock be at the filesystem level
* sucks, we'll need to watch for contention
*/
scoped_guard(mutex, &c->btree_interior_update_commit_lock) {
scoped_guard(mutex, &c->btree_interior_updates.commit_lock) {
ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
@ -902,7 +902,7 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
mutex_lock(&c->btree_interior_update_lock);
mutex_lock(&c->btree_interior_updates.lock);
list_del(&as->write_blocked_list);
if (list_empty(&b->write_blocked))
@ -910,7 +910,7 @@ static void btree_update_nodes_written(struct btree_update *as)
/*
* Node might have been freed, recheck under
* btree_interior_update_lock:
* btree_interior_updates.lock:
*/
if (as->b == b) {
BUG_ON(!b->c.level);
@ -934,7 +934,7 @@ static void btree_update_nodes_written(struct btree_update *as)
}
}
mutex_unlock(&c->btree_interior_update_lock);
mutex_unlock(&c->btree_interior_updates.lock);
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
six_unlock_write(&b->c.lock);
@ -954,7 +954,7 @@ static void btree_update_nodes_written(struct btree_update *as)
}
for (unsigned i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
bch2_open_bucket_put(c, c->allocator.open_buckets + as->open_buckets[i]);
bch2_btree_update_free(as, trans);
}
@ -962,12 +962,12 @@ static void btree_update_nodes_written(struct btree_update *as)
static void btree_interior_update_work(struct work_struct *work)
{
struct bch_fs *c =
container_of(work, struct bch_fs, btree_interior_update_work);
container_of(work, struct bch_fs, btree_interior_updates.work);
struct btree_update *as;
while (1) {
scoped_guard(mutex, &c->btree_interior_update_lock) {
as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
scoped_guard(mutex, &c->btree_interior_updates.lock) {
as = list_first_entry_or_null(&c->btree_interior_updates.unwritten,
struct btree_update, unwritten_list);
if (as && !as->nodes_written)
as = NULL;
@ -985,10 +985,10 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written)
closure_type(as, struct btree_update, cl);
struct bch_fs *c = as->c;
scoped_guard(mutex, &c->btree_interior_update_lock)
scoped_guard(mutex, &c->btree_interior_updates.lock)
as->nodes_written = true;
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
queue_work(c->btree_interior_updates.worker, &c->btree_interior_updates.work);
}
/*
@ -1004,8 +1004,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
guard(mutex)(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
guard(mutex)(&c->btree_interior_updates.lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates.unwritten);
as->mode = BTREE_UPDATE_node;
as->b = b;
@ -1026,7 +1026,7 @@ static void btree_update_reparent(struct btree_update *as,
{
struct bch_fs *c = as->c;
lockdep_assert_held(&c->btree_interior_update_lock);
lockdep_assert_held(&c->btree_interior_updates.lock);
child->b = NULL;
child->mode = BTREE_UPDATE_update;
@ -1042,8 +1042,8 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
BUG_ON(as->mode != BTREE_UPDATE_none);
as->mode = BTREE_UPDATE_root;
scoped_guard(mutex, &c->btree_interior_update_lock)
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
scoped_guard(mutex, &c->btree_interior_updates.lock)
list_add_tail(&as->unwritten_list, &c->btree_interior_updates.unwritten);
}
/*
@ -1064,7 +1064,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
closure_get(&as->cl);
guard(mutex)(&c->btree_interior_update_lock);
guard(mutex)(&c->btree_interior_updates.lock);
BUG_ON(b->will_make_reachable);
@ -1110,7 +1110,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
if (btree_node_fake(b))
return;
mutex_lock(&c->btree_interior_update_lock);
mutex_lock(&c->btree_interior_updates.lock);
/*
* Does this node have any btree_update operations preventing
@ -1128,7 +1128,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
* for flush_held_btree_writes() waiting on updates to flush or
* nodes to be writeable:
*/
closure_wake_up(&c->btree_interior_update_wait);
closure_wake_up(&c->btree_interior_updates.wait);
}
clear_btree_node_dirty_acct(c, b);
@ -1153,7 +1153,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
bch2_btree_update_will_free_node_journal_pin_flush);
bch2_journal_pin_drop(&c->journal, &w->journal);
mutex_unlock(&c->btree_interior_update_lock);
mutex_unlock(&c->btree_interior_updates.lock);
bch2_btree_update_add_node(c, &as->old_nodes, b);
}
@ -1166,13 +1166,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
up_read(&as->c->gc.lock);
as->took_gc_lock = false;
bch2_btree_reserve_put(as, trans);
continue_at(&as->cl, btree_update_set_nodes_written,
as->c->btree_interior_update_worker);
as->c->btree_interior_updates.worker);
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
start_time);
@ -1250,15 +1250,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
}
if (!down_read_trylock(&c->gc_lock)) {
ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0));
if (!down_read_trylock(&c->gc.lock)) {
ret = drop_locks_do(trans, (down_read(&c->gc.lock), 0));
if (ret) {
up_read(&c->gc_lock);
up_read(&c->gc.lock);
return ERR_PTR(ret);
}
}
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
as = mempool_alloc(&c->btree_interior_updates.pool, GFP_NOFS);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
as->c = c;
@ -1277,8 +1277,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
darray_init(&as->new_nodes);
bch2_keylist_init(&as->parent_keys, as->inline_keys);
scoped_guard(mutex, &c->btree_interior_update_lock)
list_add_tail(&as->list, &c->btree_interior_update_list);
scoped_guard(mutex, &c->btree_interior_updates.lock)
list_add_tail(&as->list, &c->btree_interior_updates.list);
struct btree *b = btree_path_node(path, path->level);
as->node_start = b->data->min_key;
@ -1380,7 +1380,7 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
scoped_guard(mutex, &c->btree_cache.lock)
list_del_init(&b->list);
scoped_guard(mutex, &c->btree_root_lock)
scoped_guard(mutex, &c->btree_cache.root_lock)
bch2_btree_id_root(c, b->c.btree_id)->b = b;
bch2_recalc_btree_reserve(c);
@ -1874,7 +1874,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
int live_u64s_added, u64s_added;
int ret;
lockdep_assert_held(&c->gc_lock);
lockdep_assert_held(&c->gc.lock);
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
@ -2419,10 +2419,10 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
!bch2_err_matches(ret, EROFS))
bch_err_fn_ratelimited(c, ret);
scoped_guard(spinlock, &c->btree_node_rewrites_lock)
scoped_guard(spinlock, &c->btree_node_rewrites.lock)
list_del(&a->list);
closure_wake_up(&c->btree_node_rewrites_wait);
closure_wake_up(&c->btree_node_rewrites.wait);
bch2_bkey_buf_exit(&a->key);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
@ -2446,19 +2446,19 @@ static void __bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b, b
bool now = false, pending = false;
scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
scoped_guard(spinlock, &c->btree_node_rewrites.lock) {
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
list_add(&a->list, &c->btree_node_rewrites);
list_add(&a->list, &c->btree_node_rewrites.list);
now = true;
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags) && !merge) {
list_add(&a->list, &c->btree_node_rewrites_pending);
list_add(&a->list, &c->btree_node_rewrites.pending);
pending = true;
}
}
if (now) {
queue_work(c->btree_node_rewrite_worker, &a->work);
queue_work(c->btree_node_rewrites.worker, &a->work);
} else if (pending) {
/* bch2_do_pending_node_rewrites will execute */
} else {
@ -2479,8 +2479,8 @@ void bch2_btree_node_merge_async(struct bch_fs *c, struct btree *b)
void bch2_async_btree_node_rewrites_flush(struct bch_fs *c)
{
closure_wait_event(&c->btree_node_rewrites_wait,
list_empty(&c->btree_node_rewrites));
closure_wait_event(&c->btree_node_rewrites.wait,
list_empty(&c->btree_node_rewrites.list));
}
void bch2_do_pending_node_rewrites(struct bch_fs *c)
@ -2488,18 +2488,18 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
while (1) {
struct async_btree_rewrite *a;
scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
a = list_pop_entry(&c->btree_node_rewrites_pending,
scoped_guard(spinlock, &c->btree_node_rewrites.lock) {
a = list_pop_entry(&c->btree_node_rewrites.pending,
struct async_btree_rewrite, list);
if (a)
list_add(&a->list, &c->btree_node_rewrites);
list_add(&a->list, &c->btree_node_rewrites.list);
}
if (!a)
break;
enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_node_rewrite_worker, &a->work);
queue_work(c->btree_node_rewrites.worker, &a->work);
}
}
@ -2508,8 +2508,8 @@ void bch2_free_pending_node_rewrites(struct bch_fs *c)
while (1) {
struct async_btree_rewrite *a;
scoped_guard(spinlock, &c->btree_node_rewrites_lock)
a = list_pop_entry(&c->btree_node_rewrites_pending,
scoped_guard(spinlock, &c->btree_node_rewrites.lock)
a = list_pop_entry(&c->btree_node_rewrites.pending,
struct async_btree_rewrite, list);
if (!a)
@ -2590,10 +2590,10 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bkey_copy(&b->key, new_key);
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
} else {
try(bch2_trans_mutex_lock(trans, &c->btree_interior_update_commit_lock));
try(bch2_trans_mutex_lock(trans, &c->btree_interior_updates.commit_lock));
if (!btree_node_will_make_reachable(b)) {
mutex_unlock(&c->btree_interior_update_commit_lock);
mutex_unlock(&c->btree_interior_updates.commit_lock);
return bch_err_throw(c, transaction_restart_nested);
}
@ -2605,7 +2605,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
bkey_copy(&n->key, new_key);
mutex_unlock(&c->btree_interior_update_commit_lock);
mutex_unlock(&c->btree_interior_updates.commit_lock);
}
return 0;
}
@ -2730,15 +2730,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_update *as;
guard(mutex)(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
guard(mutex)(&c->btree_interior_updates.lock);
list_for_each_entry(as, &c->btree_interior_updates.list, list)
bch2_btree_update_to_text(out, as);
}
static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{
guard(mutex)(&c->btree_interior_update_lock);
return !list_empty(&c->btree_interior_update_list);
guard(mutex)(&c->btree_interior_updates.lock);
return !list_empty(&c->btree_interior_updates.list);
}
bool bch2_btree_interior_updates_flush(struct bch_fs *c)
@ -2746,7 +2746,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *c)
bool ret = bch2_btree_interior_updates_pending(c);
if (ret)
closure_wait_event(&c->btree_interior_update_wait,
closure_wait_event(&c->btree_interior_updates.wait,
!bch2_btree_interior_updates_pending(c));
return ret;
}
@ -2755,7 +2755,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
{
struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
guard(mutex)(&c->btree_interior_update_lock);
guard(mutex)(&c->btree_interior_updates.lock);
r->level = entry->level;
r->alive = true;
@ -2767,7 +2767,7 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
struct jset_entry *end,
unsigned long skip)
{
guard(mutex)(&c->btree_interior_update_lock);
guard(mutex)(&c->btree_interior_updates.lock);
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
@ -2798,49 +2798,49 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c)
{
for (unsigned i = 0; i < c->btree_reserve_cache_nr; i++)
bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache[i]);
for (unsigned i = 0; i < c->btree_reserve_cache.nr; i++)
bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache.data[i]);
}
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
{
WARN_ON(!list_empty(&c->btree_node_rewrites));
WARN_ON(!list_empty(&c->btree_node_rewrites_pending));
WARN_ON(!list_empty(&c->btree_node_rewrites.list));
WARN_ON(!list_empty(&c->btree_node_rewrites.pending));
if (c->btree_node_rewrite_worker)
destroy_workqueue(c->btree_node_rewrite_worker);
if (c->btree_interior_update_worker)
destroy_workqueue(c->btree_interior_update_worker);
mempool_exit(&c->btree_interior_update_pool);
if (c->btree_node_rewrites.worker)
destroy_workqueue(c->btree_node_rewrites.worker);
if (c->btree_interior_updates.worker)
destroy_workqueue(c->btree_interior_updates.worker);
mempool_exit(&c->btree_interior_updates.pool);
}
void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
{
mutex_init(&c->btree_reserve_cache_lock);
INIT_LIST_HEAD(&c->btree_interior_update_list);
INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->btree_interior_update_commit_lock);
INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
mutex_init(&c->btree_reserve_cache.lock);
INIT_LIST_HEAD(&c->btree_interior_updates.list);
INIT_LIST_HEAD(&c->btree_interior_updates.unwritten);
mutex_init(&c->btree_interior_updates.lock);
mutex_init(&c->btree_interior_updates.commit_lock);
INIT_WORK(&c->btree_interior_updates.work, btree_interior_update_work);
INIT_LIST_HEAD(&c->btree_node_rewrites);
INIT_LIST_HEAD(&c->btree_node_rewrites_pending);
spin_lock_init(&c->btree_node_rewrites_lock);
INIT_LIST_HEAD(&c->btree_node_rewrites.list);
INIT_LIST_HEAD(&c->btree_node_rewrites.pending);
spin_lock_init(&c->btree_node_rewrites.lock);
}
int bch2_fs_btree_interior_update_init(struct bch_fs *c)
{
c->btree_interior_update_worker =
c->btree_interior_updates.worker =
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8);
if (!c->btree_interior_update_worker)
if (!c->btree_interior_updates.worker)
return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
c->btree_node_rewrite_worker =
c->btree_node_rewrites.worker =
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
if (!c->btree_node_rewrite_worker)
if (!c->btree_node_rewrites.worker)
return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
if (mempool_init_kmalloc_pool(&c->btree_interior_updates.pool, 1,
sizeof(struct btree_update)))
return bch_err_throw(c, ENOMEM_btree_interior_update_pool_init);

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
#ifndef _BCACHEFS_BTREE_INTERIOR_H
#define _BCACHEFS_BTREE_INTERIOR_H
#include "btree/cache.h"
#include "btree/locking.h"
@ -349,4 +349,4 @@ void bch2_fs_btree_interior_update_exit(struct bch_fs *);
void bch2_fs_btree_interior_update_init_early(struct bch_fs *);
int bch2_fs_btree_interior_update_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
#endif /* _BCACHEFS_BTREE_INTERIOR_H */

View File

@ -0,0 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_INTERIOR_TYPES_H
#define _BCACHEFS_BTREE_INTERIOR_TYPES_H
struct btree_alloc {
struct open_buckets ob;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
/* Maximum number of nodes we might need to allocate atomically: */
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
/* Size of the freelist we allocate btree nodes from: */
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
/*
* Cache of allocated btree nodes - if we allocate a btree node and don't use
* it, if we free it that space can't be reused until going _all_ the way
* through the allocator (which exposes us to a livelock when allocating btree
* reserves fail halfway through) - instead, we can stick them here:
*/
struct bch_fs_btree_reserve_cache {
struct mutex lock;
unsigned nr;
struct btree_alloc data[BTREE_NODE_RESERVE * 2];
};
struct bch_fs_btree_interior_updates {
mempool_t pool;
struct list_head list;
struct list_head unwritten;
struct mutex lock;
struct mutex commit_lock;
struct closure_waitlist wait;
struct workqueue_struct *worker;
struct work_struct work;
};
struct bch_fs_btree_node_rewrites {
struct list_head list;
struct list_head pending;
spinlock_t lock;
struct closure_waitlist wait;
struct workqueue_struct *worker;
};
#endif /* _BCACHEFS_BTREE_INTERIOR_TYPES_H */

View File

@ -3344,7 +3344,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
new_mem = allocate_dropping_locks_norelock(trans, lock_dropped,
kmalloc(new_bytes, _gfp|__GFP_NOWARN));
if (!new_mem) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_mem = mempool_alloc(&c->btree_trans.malloc_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
trans->used_mempool = true;
}
@ -3391,7 +3391,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans)
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
check_srcu_held_too_long(trans);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, trans->srcu_idx);
trans->srcu_held = false;
}
}
@ -3399,7 +3399,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans)
static void bch2_trans_srcu_lock(struct btree_trans *trans)
{
if (!trans->srcu_held) {
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans.barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
}
@ -3438,7 +3438,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
(void)lock_dropped;
if (!new_mem) {
new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
new_mem = mempool_alloc(&trans->c->btree_trans.malloc_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
trans->used_mempool = true;
kfree(trans->mem);
@ -3535,24 +3535,24 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
{
if (IS_ENABLED(__KERNEL__)) {
struct btree_trans *trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
struct btree_trans *trans = this_cpu_xchg(c->btree_trans.bufs->trans, NULL);
if (trans) {
memset(trans, 0, offsetof(struct btree_trans, list));
return trans;
}
}
struct btree_trans *trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
struct btree_trans *trans = mempool_alloc(&c->btree_trans.pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans));
seqmutex_lock(&c->btree_trans_lock);
seqmutex_lock(&c->btree_trans.lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct btree_trans *pos;
pid_t pid = current->pid;
trans->locking_wait.task = current;
list_for_each_entry(pos, &c->btree_trans_list, list) {
list_for_each_entry(pos, &c->btree_trans.list, list) {
struct task_struct *pos_task = READ_ONCE(pos->locking_wait.task);
/*
* We'd much prefer to be stricter here and completely
@ -3566,14 +3566,14 @@ static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
}
}
list_add(&trans->list, &c->btree_trans_list);
seqmutex_unlock(&c->btree_trans_lock);
list_add(&trans->list, &c->btree_trans.list);
seqmutex_unlock(&c->btree_trans.lock);
return trans;
}
struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier)
__acquires(&c->btree_trans.barrier)
{
/*
* No multithreaded btree access until we've gone RW and are no longer
@ -3608,7 +3608,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
if (fn_idx < BCH_TRANSACTIONS_NR) {
trans->fn = bch2_btree_transaction_fns[fn_idx];
struct btree_transaction_stats *s = &c->btree_transaction_stats[fn_idx];
struct btree_transaction_stats *s = &c->btree_trans.stats[fn_idx];
if (s->max_mem) {
unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
@ -3621,7 +3621,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->nr_paths_max = s->nr_max_paths;
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
trans_set_locked(trans, false);
@ -3669,7 +3669,7 @@ static inline void check_btree_paths_leaked(struct btree_trans *trans) {}
#endif
void bch2_trans_put(struct btree_trans *trans)
__releases(&c->btree_trans_barrier)
__releases(&c->btree_trans.barrier)
{
struct bch_fs *c = trans->c;
@ -3686,7 +3686,7 @@ void bch2_trans_put(struct btree_trans *trans)
if (trans->srcu_held) {
check_srcu_held_too_long(trans);
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, trans->srcu_idx);
}
if (unlikely(trans->journal_replay_not_finished))
@ -3714,35 +3714,35 @@ void bch2_trans_put(struct btree_trans *trans)
kvfree_rcu_mightsleep(paths_allocated);
if (trans->used_mempool)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
mempool_free(trans->mem, &c->btree_trans.malloc_pool);
else
kfree(trans->mem);
/* Userspace doesn't have a real percpu implementation: */
if (IS_ENABLED(__KERNEL__))
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
trans = this_cpu_xchg(c->btree_trans.bufs->trans, trans);
if (trans) {
seqmutex_lock(&c->btree_trans_lock);
seqmutex_lock(&c->btree_trans.lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans.lock);
mempool_free(trans, &c->btree_trans_pool);
mempool_free(trans, &c->btree_trans.pool);
}
}
bool bch2_current_has_btree_trans(struct bch_fs *c)
{
seqmutex_lock(&c->btree_trans_lock);
seqmutex_lock(&c->btree_trans.lock);
struct btree_trans *trans;
bool ret = false;
list_for_each_entry(trans, &c->btree_trans_list, list)
list_for_each_entry(trans, &c->btree_trans.list, list)
if (trans->locking_wait.task == current &&
trans->locked) {
ret = true;
break;
}
seqmutex_unlock(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans.lock);
return ret;
}
@ -3837,26 +3837,26 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
struct btree_trans *trans;
int cpu;
if (c->btree_trans_bufs)
if (c->btree_trans.bufs)
for_each_possible_cpu(cpu) {
struct btree_trans *trans =
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
per_cpu_ptr(c->btree_trans.bufs, cpu)->trans;
if (trans) {
seqmutex_lock(&c->btree_trans_lock);
seqmutex_lock(&c->btree_trans.lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans.lock);
}
kfree(trans);
}
free_percpu(c->btree_trans_bufs);
free_percpu(c->btree_trans.bufs);
trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
trans = list_first_entry_or_null(&c->btree_trans.list, struct btree_trans, list);
if (trans)
panic("%s leaked btree_trans\n", trans->fn);
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
for (s = c->btree_trans.stats;
s < c->btree_trans.stats + ARRAY_SIZE(c->btree_trans.stats);
s++) {
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_exit(&s->trans_kmalloc_trace);
@ -3865,39 +3865,39 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
bch2_time_stats_exit(&s->lock_hold_times);
}
if (c->btree_trans_barrier_initialized) {
synchronize_srcu_expedited(&c->btree_trans_barrier);
cleanup_srcu_struct(&c->btree_trans_barrier);
if (c->btree_trans.barrier_initialized) {
synchronize_srcu_expedited(&c->btree_trans.barrier);
cleanup_srcu_struct(&c->btree_trans.barrier);
}
mempool_exit(&c->btree_trans_mem_pool);
mempool_exit(&c->btree_trans_pool);
mempool_exit(&c->btree_trans.malloc_pool);
mempool_exit(&c->btree_trans.pool);
}
void bch2_fs_btree_iter_init_early(struct bch_fs *c)
{
struct btree_transaction_stats *s;
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
for (s = c->btree_trans.stats;
s < c->btree_trans.stats + ARRAY_SIZE(c->btree_trans.stats);
s++) {
bch2_time_stats_init(&s->duration);
bch2_time_stats_init(&s->lock_hold_times);
mutex_init(&s->lock);
}
INIT_LIST_HEAD(&c->btree_trans_list);
seqmutex_init(&c->btree_trans_lock);
INIT_LIST_HEAD(&c->btree_trans.list);
seqmutex_init(&c->btree_trans.lock);
}
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
if (!c->btree_trans_bufs)
c->btree_trans.bufs = alloc_percpu(struct btree_trans_buf);
if (!c->btree_trans.bufs)
return -ENOMEM;
try(mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, sizeof(struct btree_trans)));
try(mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX));
try(init_srcu_struct(&c->btree_trans_barrier));
try(mempool_init_kmalloc_pool(&c->btree_trans.pool, 1, sizeof(struct btree_trans)));
try(mempool_init_kmalloc_pool(&c->btree_trans.malloc_pool, 1, BTREE_TRANS_MEM_MAX));
try(init_srcu_struct(&c->btree_trans.barrier));
/*
* static annotation (hackily done) for lock ordering of reclaim vs.
@ -3911,7 +3911,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
fs_reclaim_release(GFP_KERNEL);
#endif
c->btree_trans_barrier_initialized = true;
c->btree_trans.barrier_initialized = true;
return 0;
}

View File

@ -81,7 +81,7 @@ static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
return true;
}
static bool bkey_cached_evict(struct btree_key_cache *c,
static bool bkey_cached_evict(struct bch_fs_btree_key_cache *c,
struct bkey_cached *ck)
{
bool ret = !rhashtable_remove_fast(&c->table, &ck->hash,
@ -96,7 +96,7 @@ static bool bkey_cached_evict(struct btree_key_cache *c,
static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu)
{
struct bch_fs *c = container_of(pending->srcu, struct bch_fs, btree_trans_barrier);
struct bch_fs *c = container_of(pending->srcu, struct bch_fs, btree_trans.barrier);
struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu);
this_cpu_dec(*c->btree_key_cache.nr_pending);
@ -104,7 +104,7 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu
kmem_cache_free(bch2_key_cache, ck);
}
static inline void bkey_cached_free_noassert(struct btree_key_cache *bc,
static inline void bkey_cached_free_noassert(struct bch_fs_btree_key_cache *bc,
struct bkey_cached *ck)
{
kfree(ck->k);
@ -120,7 +120,7 @@ static inline void bkey_cached_free_noassert(struct btree_key_cache *bc,
}
static void bkey_cached_free(struct btree_trans *trans,
struct btree_key_cache *bc,
struct bch_fs_btree_key_cache *bc,
struct bkey_cached *ck)
{
/*
@ -152,7 +152,7 @@ static struct bkey_cached *
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned key_u64s)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
int ret;
@ -182,7 +182,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
}
static struct bkey_cached *
bkey_cached_reuse(struct btree_key_cache *c)
bkey_cached_reuse(struct bch_fs_btree_key_cache *c)
{
guard(rcu)();
@ -209,7 +209,7 @@ static int btree_key_cache_create(struct btree_trans *trans,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
int ret = 0;
/*
@ -516,7 +516,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
int ret = 0;
CLASS(btree_trans, trans)(c);
@ -545,7 +545,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
!bch2_journal_error(j), c,
"flushing key cache: %s", bch2_err_str(ret));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
return ret;
}
@ -600,7 +600,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
struct bkey_cached *ck = (void *) path->l[0].b;
/*
@ -642,14 +642,14 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct bch_fs *c = shrink->private_data;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
struct bucket_table *tbl;
struct bkey_cached *ck;
size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
unsigned iter, start;
int srcu_idx;
srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
@ -663,7 +663,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
*/
if (unlikely(tbl->nest)) {
rcu_read_unlock();
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
return SHRINK_STOP;
}
@ -712,7 +712,7 @@ out:
bc->shrink_iter = iter;
rcu_read_unlock();
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
return freed;
}
@ -721,7 +721,7 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct bch_fs *c = shrink->private_data;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
long nr = atomic_long_read(&bc->nr_keys) -
atomic_long_read(&bc->nr_dirty);
@ -736,7 +736,7 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
return max(0L, nr);
}
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
void bch2_fs_btree_key_cache_exit(struct bch_fs_btree_key_cache *bc)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
struct bucket_table *tbl;
@ -792,11 +792,11 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
free_percpu(bc->nr_pending);
}
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
void bch2_fs_btree_key_cache_init_early(struct bch_fs_btree_key_cache *c)
{
}
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
int bch2_fs_btree_key_cache_init(struct bch_fs_btree_key_cache *bc)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
struct shrinker *shrink;
@ -805,8 +805,8 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
if (!bc->nr_pending)
return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
if (rcu_pending_init(&bc->pending[0], &c->btree_trans_barrier, __bkey_cached_free) ||
rcu_pending_init(&bc->pending[1], &c->btree_trans_barrier, __bkey_cached_free))
if (rcu_pending_init(&bc->pending[0], &c->btree_trans.barrier, __bkey_cached_free) ||
rcu_pending_init(&bc->pending[1], &c->btree_trans.barrier, __bkey_cached_free))
return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
@ -827,7 +827,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
return 0;
}
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *bc)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct bch_fs_btree_key_cache *bc)
{
printbuf_tabstop_push(out, 24);
printbuf_tabstop_push(out, 12);

View File

@ -47,11 +47,11 @@ bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
void bch2_btree_key_cache_drop(struct btree_trans *,
struct btree_path *);
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
void bch2_fs_btree_key_cache_exit(struct bch_fs_btree_key_cache *);
void bch2_fs_btree_key_cache_init_early(struct bch_fs_btree_key_cache *);
int bch2_fs_btree_key_cache_init(struct bch_fs_btree_key_cache *);
void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
void bch2_btree_key_cache_to_text(struct printbuf *, struct bch_fs_btree_key_cache *);
void bch2_btree_key_cache_exit(void);
int __init bch2_btree_key_cache_init(void);

View File

@ -4,7 +4,7 @@
#include "util/rcu_pending.h"
struct btree_key_cache {
struct bch_fs_btree_key_cache {
struct rhashtable table;
bool table_init_done;

View File

@ -25,8 +25,8 @@ static inline bool is_btree_node(struct btree_path *path, unsigned l)
static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
{
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
? &trans->c->btree_transaction_stats[trans->fn_idx]
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_trans.stats)
? &trans->c->btree_trans.stats[trans->fn_idx]
: NULL;
}

View File

@ -59,11 +59,6 @@ struct btree_write {
struct journal_entry_pin journal;
};
struct btree_alloc {
struct open_buckets ob;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
struct btree_bkey_cached_common {
struct six_lock lock;
u8 level;
@ -166,7 +161,21 @@ struct btree_cache_list {
size_t nr;
};
struct btree_cache {
struct btree_root {
struct btree *b;
/* On disk root - see async splits: */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level;
u8 alive;
s16 error;
};
struct bch_fs_btree_cache {
struct btree_root roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) roots_extra;
struct mutex root_lock;
struct rhashtable table;
bool table_init_done;
/*
@ -580,6 +589,37 @@ struct btree_trans {
struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
};
struct btree_trans_buf {
struct btree_trans *trans;
};
struct btree_transaction_stats {
struct bch2_time_stats duration;
struct bch2_time_stats lock_hold_times;
struct mutex lock;
unsigned nr_max_paths;
unsigned max_mem;
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
char *max_paths_text;
};
#define BCH_TRANSACTIONS_NR 128
struct bch_fs_btree_trans {
struct seqmutex lock;
struct list_head list;
mempool_t pool;
mempool_t malloc_pool;
struct btree_trans_buf __percpu *bufs;
struct srcu_struct barrier;
bool barrier_initialized;
struct btree_transaction_stats stats[BCH_TRANSACTIONS_NR];
};
static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
{
return trans->paths + iter->path;
@ -924,16 +964,6 @@ static inline u8 btree_trigger_order(enum btree_id btree)
}
}
struct btree_root {
struct btree *b;
/* On disk root - see async splits: */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level;
u8 alive;
s16 error;
};
enum btree_gc_coalesce_fail_reason {
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,

View File

@ -241,7 +241,7 @@ btree_write_buffered_insert(struct btree_trans *trans,
BTREE_UPDATE_internal_snapshot_node);
}
static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
static void move_keys_from_inc_to_flushing(struct bch_fs_btree_write_buffer *wb)
{
struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
struct journal *j = &c->journal;
@ -307,7 +307,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_iter iter = { NULL };
size_t overwritten = 0, fast = 0, noop = 0, slowpath = 0, could_not_insert = 0;
bool write_locked = false;
@ -577,7 +577,7 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq,
bool *did_work)
{
struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
int ret = 0, fetch_from_journal_err;
do {
@ -639,7 +639,7 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
int ret = 0;
if (mutex_trylock(&wb->flushing.lock)) {
@ -711,7 +711,7 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
int ret;
scoped_guard(mutex, &wb->flushing.lock) {
@ -724,7 +724,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
}
static void wb_accounting_sort(struct btree_write_buffer *wb)
static void wb_accounting_sort(struct bch_fs_btree_write_buffer *wb)
{
eytzinger0_sort(wb->accounting.data, wb->accounting.nr,
sizeof(wb->accounting.data[0]),
@ -734,7 +734,7 @@ static void wb_accounting_sort(struct btree_write_buffer *wb)
int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree,
struct bkey_i_accounting *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
event_inc_trace(c, accounting_key_to_wb_slowpath, buf, ({
prt_printf(&buf, "have: %zu\n", wb->accounting.nr);
@ -754,7 +754,7 @@ int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
unsigned u64s = wb_key_u64s(k);
int ret;
retry:
@ -786,7 +786,7 @@ retry:
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
if (mutex_trylock(&wb->flushing.lock)) {
mutex_lock(&wb->inc.lock);
@ -822,7 +822,7 @@ void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_ke
int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
unsigned live_accounting_keys = 0;
int ret = 0;
@ -875,7 +875,7 @@ static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
return wb_keys_resize(&wb->flushing, new_size) ?:
wb_keys_resize(&wb->inc, new_size);
@ -883,7 +883,7 @@ int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
!bch2_journal_error(&c->journal));
@ -896,7 +896,7 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
mutex_init(&wb->inc.lock);
mutex_init(&wb->flushing.lock);
@ -905,7 +905,7 @@ void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
/* Will be resized by journal as needed: */
unsigned initial_size = 1 << 16;

View File

@ -8,14 +8,14 @@
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
}
static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
}
@ -72,7 +72,7 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *,
static inline int bch2_accounting_key_to_wb(struct bch_fs *c,
enum btree_id btree, struct bkey_i_accounting *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_write_buffered_key search;
search.btree = btree;
search.k.k.p = k->k.p;

View File

@ -48,7 +48,7 @@ struct btree_write_buffer_keys {
struct mutex lock;
};
struct btree_write_buffer {
struct bch_fs_btree_write_buffer {
DARRAY(struct wb_key_ref) sorted;
struct btree_write_buffer_keys inc;
struct btree_write_buffer_keys flushing;

View File

@ -415,19 +415,19 @@ u64 bch2_copygc_wait_amount(struct bch_fs *c)
void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
{
printbuf_tabstop_push(out, 32);
prt_printf(out, "running:\t%u\n", c->copygc_running);
prt_printf(out, "copygc_wait:\t%llu\n", c->copygc_wait);
prt_printf(out, "copygc_wait_at:\t%llu\n", c->copygc_wait_at);
prt_printf(out, "running:\t%u\n", c->copygc.running);
prt_printf(out, "copygc_wait:\t%llu\n", c->copygc.wait);
prt_printf(out, "copygc_wait_at:\t%llu\n", c->copygc.wait_at);
prt_printf(out, "Currently waiting for:\t");
prt_human_readable_u64(out, max(0LL, c->copygc_wait -
prt_human_readable_u64(out, max(0LL, c->copygc.wait -
atomic64_read(&c->io_clock[WRITE].now)) << 9);
prt_newline(out);
prt_printf(out, "Currently waiting since:\t");
prt_human_readable_u64(out, max(0LL,
atomic64_read(&c->io_clock[WRITE].now) -
c->copygc_wait_at) << 9);
c->copygc.wait_at) << 9);
prt_newline(out);
bch2_printbuf_make_room(out, 4096);
@ -442,7 +442,7 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
prt_newline(out);
}
t = rcu_dereference(c->copygc_thread);
t = rcu_dereference(c->copygc.thread);
if (t)
get_task_struct(t);
}
@ -483,7 +483,7 @@ static int bch2_copygc_thread(void *arg)
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),
writepoint_ptr(&c->copygc.write_point),
false);
while (!ret && !kthread_should_stop()) {
@ -508,21 +508,21 @@ static int bch2_copygc_thread(void *arg)
wait = bch2_copygc_wait_amount(c);
if (wait > clock->max_slop) {
c->copygc_wait_at = last;
c->copygc_wait = last + wait;
c->copygc.wait_at = last;
c->copygc.wait = last + wait;
move_buckets_wait(&ctxt, &buckets, true);
bch2_kthread_io_clock_wait(clock, last + wait,
MAX_SCHEDULE_TIMEOUT);
continue;
}
c->copygc_wait = 0;
c->copygc.wait = 0;
c->copygc_running = true;
c->copygc.running = true;
ret = bch2_copygc(&ctxt, &buckets, &did_work);
c->copygc_running = false;
c->copygc.running = false;
wake_up(&c->copygc_running_wq);
wake_up(&c->copygc.running_wq);
if (!wait && !did_work) {
u64 min_member_capacity = bch2_min_rw_member_capacity(c);
@ -548,43 +548,51 @@ err:
void bch2_copygc_stop(struct bch_fs *c)
{
if (c->copygc_thread) {
kthread_stop(c->copygc_thread);
put_task_struct(c->copygc_thread);
if (c->copygc.thread) {
kthread_stop(c->copygc.thread);
put_task_struct(c->copygc.thread);
}
c->copygc_thread = NULL;
c->copygc.thread = NULL;
}
int bch2_copygc_start(struct bch_fs *c)
{
struct task_struct *t;
int ret;
if (c->copygc_thread)
return 0;
if (c->opts.nochanges)
return 0;
if (bch2_fs_init_fault("copygc_start"))
return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
ret = PTR_ERR_OR_ZERO(t);
bch_err_msg(c, ret, "creating copygc thread");
if (ret)
return ret;
if (!c->copygc.wq &&
!(c->copygc.wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)))
return bch_err_throw(c, ENOMEM_fs_other_alloc);
get_task_struct(t);
if (!c->copygc.thread) {
struct task_struct *t =
kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
int ret = PTR_ERR_OR_ZERO(t);
bch_err_msg(c, ret, "creating copygc thread");
if (ret)
return ret;
c->copygc_thread = t;
wake_up_process(c->copygc_thread);
get_task_struct(t);
c->copygc.thread = t;
wake_up_process(c->copygc.thread);
}
return 0;
}
void bch2_fs_copygc_exit(struct bch_fs *c)
{
if (c->copygc.wq)
destroy_workqueue(c->copygc.wq);
}
void bch2_fs_copygc_init(struct bch_fs *c)
{
init_waitqueue_head(&c->copygc_running_wq);
c->copygc_running = false;
init_waitqueue_head(&c->copygc.running_wq);
c->copygc.running = false;
}

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_MOVINGGC_H
#define _BCACHEFS_MOVINGGC_H
#ifndef _BCACHEFS_COPYGC_H
#define _BCACHEFS_COPYGC_H
u64 bch2_copygc_wait_amount(struct bch_fs *);
void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
@ -8,13 +8,15 @@ void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
static inline void bch2_copygc_wakeup(struct bch_fs *c)
{
guard(rcu)();
struct task_struct *p = rcu_dereference(c->copygc_thread);
struct task_struct *p = rcu_dereference(c->copygc.thread);
if (p)
wake_up_process(p);
}
void bch2_copygc_stop(struct bch_fs *);
int bch2_copygc_start(struct bch_fs *);
void bch2_fs_copygc_exit(struct bch_fs *);
void bch2_fs_copygc_init(struct bch_fs *);
#endif /* _BCACHEFS_MOVINGGC_H */
#endif /* _BCACHEFS_COPYGC_H */

View File

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_COPYGC_TYPES_H
#define _BCACHEFS_COPYGC_TYPES_H
struct bch_fs_copygc {
struct task_struct *thread;
struct write_point write_point;
s64 wait_at;
s64 wait;
bool running;
wait_queue_head_t running_wq;
/* Dedicated workqueue for btree updates: */
struct workqueue_struct *wq;
};
#endif /* _BCACHEFS_COPYGC_TYPES_H */

View File

@ -405,7 +405,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
struct gc_stripe *gc = NULL;
if (flags & BTREE_TRIGGER_gc) {
gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
gc = genradix_ptr_alloc(&c->ec.gc_stripes, idx, GFP_KERNEL);
if (!gc) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx);
return bch_err_throw(c, ENOMEM_mark_stripe);
@ -754,7 +754,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
nr_iovecs,
opf,
GFP_KERNEL,
&c->ec_bioset),
&c->ec.block_bioset),
struct ec_bio, bio);
ec_bio->ca = ca;
@ -852,8 +852,8 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{
if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
if (c->gc.pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->ec.gc_stripes, idx, gfp))
return bch_err_throw(c, ENOMEM_ec_stripe_mem_alloc);
return 0;
@ -877,10 +877,10 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
static bool __bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
{
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec_stripes_new_buckets)));
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec.stripes_new_buckets)));
struct ec_stripe_new_bucket *s;
hlist_for_each_entry(s, &c->ec_stripes_new_buckets[hash], hash)
hlist_for_each_entry(s, &c->ec.stripes_new_buckets[hash], hash)
if (s->dev_bucket == dev_bucket)
return true;
return false;
@ -888,7 +888,7 @@ static bool __bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
bool bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
{
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
return __bch2_bucket_has_new_stripe(c, dev_bucket);
}
@ -896,20 +896,20 @@ static void stripe_new_bucket_add(struct bch_fs *c, struct ec_stripe_new_bucket
{
s->dev_bucket = dev_bucket;
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec_stripes_new_buckets)));
hlist_add_head(&s->hash, &c->ec_stripes_new_buckets[hash]);
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec.stripes_new_buckets)));
hlist_add_head(&s->hash, &c->ec.stripes_new_buckets[hash]);
}
static void stripe_new_buckets_add(struct bch_fs *c, struct ec_stripe_new *s)
{
unsigned nr_blocks = s->nr_data + s->nr_parity;
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
for (unsigned i = 0; i < nr_blocks; i++) {
if (!s->blocks[i])
continue;
struct open_bucket *ob = c->open_buckets + s->blocks[i];
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
struct bpos bucket = POS(ob->dev, ob->bucket);
stripe_new_bucket_add(c, &s->buckets[i], bucket_to_u64(bucket));
@ -918,7 +918,7 @@ static void stripe_new_buckets_add(struct bch_fs *c, struct ec_stripe_new *s)
static void stripe_new_buckets_del(struct bch_fs *c, struct ec_stripe_new *s)
{
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
for (unsigned i = 0; i < v->nr_blocks; i++)
@ -927,10 +927,10 @@ static void stripe_new_buckets_del(struct bch_fs *c, struct ec_stripe_new *s)
static struct ec_stripe_handle *bch2_open_stripe_find(struct bch_fs *c, u64 idx)
{
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec.stripes_new)));
struct ec_stripe_handle *s;
hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash)
hlist_for_each_entry(s, &c->ec.stripes_new[hash], hash)
if (s->idx == idx)
return s;
return NULL;
@ -938,7 +938,7 @@ static struct ec_stripe_handle *bch2_open_stripe_find(struct bch_fs *c, u64 idx)
static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
{
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
return bch2_open_stripe_find(c, idx) != NULL;
}
@ -949,13 +949,13 @@ static bool bch2_stripe_handle_tryget(struct bch_fs *c,
BUG_ON(s->idx);
BUG_ON(!idx);
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
bool ret = !bch2_open_stripe_find(c, idx);
if (ret) {
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec.stripes_new)));
s->idx = idx;
hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
hlist_add_head(&s->hash, &c->ec.stripes_new[hash]);
}
return ret;
}
@ -965,7 +965,7 @@ static void bch2_stripe_handle_put(struct bch_fs *c, struct ec_stripe_handle *s)
if (!s->idx)
return;
guard(spinlock)(&c->ec_stripes_new_lock);
guard(spinlock)(&c->ec.stripes_new_lock);
BUG_ON(bch2_open_stripe_find(c, s->idx) != s);
hlist_del_init(&s->hash);
@ -998,7 +998,7 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
container_of(work, struct bch_fs, ec_stripe_delete_work);
container_of(work, struct bch_fs, ec.stripe_delete_work);
bch2_trans_run(c,
bch2_btree_write_buffer_tryflush(trans) ?:
@ -1016,7 +1016,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
void bch2_do_stripe_deletes(struct bch_fs *c)
{
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
!queue_work(c->write_ref_wq, &c->ec.stripe_delete_work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
@ -1323,7 +1323,7 @@ static int __ec_stripe_create(struct ec_stripe_new *s)
for (unsigned i = 0; i < nr_data; i++)
if (s->blocks[i]) {
struct open_bucket *ob = c->open_buckets + s->blocks[i];
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
if (ob->sectors_free)
zero_out_rest_of_ec_bucket(c, s, i, ob);
@ -1403,7 +1403,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
for (unsigned i = 0; i < v->nr_blocks; i++)
if (s->blocks[i]) {
struct open_bucket *ob = c->open_buckets + s->blocks[i];
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
if (i < nr_data) {
ob->ec = NULL;
@ -1413,9 +1413,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
}
}
scoped_guard(mutex, &c->ec_stripe_new_lock)
scoped_guard(mutex, &c->ec.stripe_new_lock)
list_del(&s->list);
wake_up(&c->ec_stripe_new_wait);
wake_up(&c->ec.stripe_new_wait);
ec_stripe_buf_exit(&s->old_stripe);
ec_stripe_buf_exit(&s->new_stripe);
@ -1428,8 +1428,8 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
{
struct ec_stripe_new *s;
guard(mutex)(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list)
guard(mutex)(&c->ec.stripe_new_lock);
list_for_each_entry(s, &c->ec.stripe_new_list, list)
if (!atomic_read(&s->ref[STRIPE_REF_io]))
return s;
return NULL;
@ -1438,7 +1438,7 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
static void ec_stripe_create_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work,
struct bch_fs, ec_stripe_create_work);
struct bch_fs, ec.stripe_create_work);
struct ec_stripe_new *s;
while ((s = get_pending_stripe(c)))
@ -1451,7 +1451,7 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
{
enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
if (!queue_work(system_long_wq, &c->ec.stripe_create_work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
@ -1466,8 +1466,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h
h->s = NULL;
s->pending = true;
scoped_guard(mutex, &c->ec_stripe_new_lock)
list_add(&s->list, &c->ec_stripe_new_list);
scoped_guard(mutex, &c->ec.stripe_new_lock)
list_add(&s->list, &c->ec.stripe_new_list);
ec_stripe_new_put(c, s, STRIPE_REF_io);
}
@ -1649,7 +1649,7 @@ static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *
if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
ec_stripe_new_cancel(c, h, -EINTR);
h->rw_devs_change_count = c->rw_devs_change_count;
h->rw_devs_change_count = c->allocator.rw_devs_change_count;
}
static struct ec_stripe_head *
@ -1671,7 +1671,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
h->redundancy = redundancy;
h->watermark = watermark;
list_add(&h->list, &c->ec_stripe_head_list);
list_add(&h->list, &c->ec.stripe_head_list);
return h;
}
@ -1699,7 +1699,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
if (!redundancy)
return NULL;
int ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock);
int ret = bch2_trans_mutex_lock(trans, &c->ec.stripe_head_lock);
if (ret)
return ERR_PTR(ret);
@ -1708,7 +1708,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
goto err;
}
list_for_each_entry(h, &c->ec_stripe_head_list, list)
list_for_each_entry(h, &c->ec.stripe_head_list, list)
if (h->disk_label == disk_label &&
h->algo == algo &&
h->redundancy == redundancy &&
@ -1727,7 +1727,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
goto err;
}
found:
if (h->rw_devs_change_count != c->rw_devs_change_count)
if (h->rw_devs_change_count != c->allocator.rw_devs_change_count)
ec_stripe_head_devs_update(c, h);
if (h->insufficient_devs) {
@ -1735,7 +1735,7 @@ found:
h = NULL;
}
err:
mutex_unlock(&c->ec_stripe_head_lock);
mutex_unlock(&c->ec.stripe_head_lock);
return h;
}
@ -1754,7 +1754,7 @@ static int __new_stripe_alloc_buckets(struct btree_trans *trans,
/* * We bypass the sector allocator which normally does this: */
bitmap_and(req->devs_may_alloc.d, req->devs_may_alloc.d,
c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
c->allocator.rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
/*
@ -1907,7 +1907,7 @@ static int init_new_stripe_from_old(struct bch_fs *c, struct ec_stripe_new *s)
* blocks from the stripe we're reusing:
*/
for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
bch2_open_bucket_put(c, c->allocator.open_buckets + s->blocks[i]);
s->blocks[i] = 0;
}
memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
@ -1970,12 +1970,12 @@ static int stripe_idx_alloc(struct btree_trans *trans, struct ec_stripe_new *s)
struct bch_fs *c = trans->c;
struct bkey_s_c k;
struct bpos min_pos = POS(0, 1);
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec.stripe_hint));
int ret;
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
c->ec_stripe_hint = iter.pos.offset;
c->ec.stripe_hint = iter.pos.offset;
if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
if (start_pos.offset) {
@ -2041,7 +2041,7 @@ static int stripe_alloc_or_reuse(struct btree_trans *trans,
}
/* XXX freelist_wait? */
closure_wait(&c->freelist_wait, cl);
closure_wait(&c->allocator.freelist_wait, cl);
*waiting = true;
}
}
@ -2107,7 +2107,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
ret = stripe_alloc_or_reuse(trans, req, cl, h, s, &waiting);
if (waiting &&
!bch2_err_matches(ret, BCH_ERR_operation_blocked))
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
if (ret)
goto err;
@ -2233,7 +2233,7 @@ static bool should_cancel_stripe(struct bch_fs *c, struct ec_stripe_new *s, stru
if (!s->blocks[i])
continue;
struct open_bucket *ob = c->open_buckets + s->blocks[i];
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
if (ob->dev == ca->dev_idx)
return true;
}
@ -2245,8 +2245,8 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
guard(mutex)(&c->ec_stripe_head_lock);
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
guard(mutex)(&c->ec.stripe_head_lock);
list_for_each_entry(h, &c->ec.stripe_head_list, list) {
guard(mutex)(&h->lock);
if (h->s && should_cancel_stripe(c, h->s, ca))
ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
@ -2267,13 +2267,13 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
sched_annotate_sleep();
guard(mutex)(&c->ec_stripe_new_lock);
return list_empty(&c->ec_stripe_new_list);
guard(mutex)(&c->ec.stripe_new_lock);
return list_empty(&c->ec.stripe_new_list);
}
void bch2_fs_ec_flush(struct bch_fs *c)
{
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
wait_event(c->ec.stripe_new_wait, bch2_fs_ec_flush_done(c));
}
int bch2_stripes_read(struct bch_fs *c)
@ -2305,8 +2305,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
struct ec_stripe_head *h;
struct ec_stripe_new *s;
scoped_guard(mutex, &c->ec_stripe_head_lock)
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
scoped_guard(mutex, &c->ec.stripe_head_lock)
list_for_each_entry(h, &c->ec.stripe_head_list, list) {
prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
h->disk_label, h->algo, h->redundancy,
bch2_watermarks[h->watermark],
@ -2318,8 +2318,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "in flight:\n");
scoped_guard(mutex, &c->ec_stripe_new_lock)
list_for_each_entry(s, &c->ec_stripe_new_list, list)
scoped_guard(mutex, &c->ec.stripe_new_lock)
list_for_each_entry(s, &c->ec.stripe_new_list, list)
bch2_new_stripe_to_text(out, c, s);
}
@ -2329,8 +2329,8 @@ void bch2_fs_ec_exit(struct bch_fs *c)
while (1) {
struct ec_stripe_head *h;
scoped_guard(mutex, &c->ec_stripe_head_lock)
h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list);
scoped_guard(mutex, &c->ec.stripe_head_lock)
h = list_pop_entry(&c->ec.stripe_head_list, struct ec_stripe_head, list);
if (!h)
break;
@ -2346,29 +2346,29 @@ void bch2_fs_ec_exit(struct bch_fs *c)
kfree(h);
}
BUG_ON(!list_empty(&c->ec_stripe_new_list));
BUG_ON(!list_empty(&c->ec.stripe_new_list));
bioset_exit(&c->ec_bioset);
bioset_exit(&c->ec.block_bioset);
}
void bch2_fs_ec_init_early(struct bch_fs *c)
{
spin_lock_init(&c->ec_stripes_new_lock);
spin_lock_init(&c->ec.stripes_new_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
INIT_LIST_HEAD(&c->ec.stripe_head_list);
mutex_init(&c->ec.stripe_head_lock);
INIT_LIST_HEAD(&c->ec_stripe_new_list);
mutex_init(&c->ec_stripe_new_lock);
init_waitqueue_head(&c->ec_stripe_new_wait);
INIT_LIST_HEAD(&c->ec.stripe_new_list);
mutex_init(&c->ec.stripe_new_lock);
init_waitqueue_head(&c->ec.stripe_new_wait);
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
INIT_WORK(&c->ec.stripe_create_work, ec_stripe_create_work);
INIT_WORK(&c->ec.stripe_delete_work, ec_stripe_delete_work);
}
int bch2_fs_ec_init(struct bch_fs *c)
{
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
return bioset_init(&c->ec.block_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}

View File

@ -26,4 +26,26 @@ struct gc_stripe {
union bch_replicas_padded r;
};
struct bch_fs_ec {
struct hlist_head stripes_new[32];
struct hlist_head stripes_new_buckets[64];
spinlock_t stripes_new_lock;
struct list_head stripe_head_list;
struct mutex stripe_head_lock;
struct list_head stripe_new_list;
struct mutex stripe_new_lock;
wait_queue_head_t stripe_new_wait;
struct work_struct stripe_create_work;
u64 stripe_hint;
struct work_struct stripe_delete_work;
struct bio_set block_bioset;
GENRADIX(struct gc_stripe) gc_stripes;
};
#endif /* _BCACHEFS_EC_TYPES_H */

View File

@ -301,7 +301,7 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
guard(rwsem_read)(&c->snapshot_create_lock);
guard(rwsem_read)(&c->snapshots.create_lock);
CLASS(btree_trans, trans)(c);
try(bch2_logged_op_start(trans, &op.k_i));
int ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta);
@ -509,7 +509,7 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
guard(rwsem_read)(&c->snapshot_create_lock);
guard(rwsem_read)(&c->snapshots.create_lock);
CLASS(btree_trans, trans)(c);
try(bch2_logged_op_start(trans, &op.k_i));
int ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta);

View File

@ -319,9 +319,11 @@ int bch2_move_extent(struct moving_context *ctxt,
else if (data_opts.type != BCH_DATA_UPDATE_scrub) {
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
ret = bch2_can_do_write(c, &data_opts, k, &devs_have) ?:
bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
data_opts.target, 0, data_opts.write_flags);
if (data_opts.type != BCH_DATA_UPDATE_copygc)
try(bch2_can_do_write(c, &data_opts, k, &devs_have));
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
data_opts.target, 0, data_opts.write_flags);
} else
ret = bch2_btree_node_scrub(trans, iter->btree_id, level, k, data_opts.read_dev);
@ -345,10 +347,10 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
bool is_kthread = current->flags & PF_KTHREAD;
u64 delay;
if (ctxt->wait_on_copygc && c->copygc_running) {
if (ctxt->wait_on_copygc && c->copygc.running) {
bch2_moving_ctxt_flush_all(ctxt);
wait_event_freezable(c->copygc_running_wq,
!c->copygc_running ||
wait_event_freezable(c->copygc.running_wq,
!c->copygc.running ||
(is_kthread && kthread_should_stop()));
}

View File

@ -78,7 +78,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
guard(rcu)();
devs = bch2_target_to_mask(c, target) ?:
&c->rw_devs[BCH_DATA_user];
&c->allocator.rw_devs[BCH_DATA_user];
for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
struct bch_dev *ca = rcu_dereference(c->devs[d]);

View File

@ -1500,8 +1500,12 @@ static int do_reconcile_phys(struct moving_context *ctxt,
if (!k.k)
return 0;
event_add_trace(c, reconcile_phys, k.k->size, buf,
bch2_bkey_val_to_text(&buf, c, k));
event_add_trace(c, reconcile_phys, k.k->size, buf, ({
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, bp_k);
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k);
}));
return __do_reconcile_extent(ctxt, snapshot_io_opts, &iter, k);
}
@ -1875,6 +1879,24 @@ static int do_reconcile(struct moving_context *ctxt)
continue;
}
if ((r->work_pos.btree == BTREE_ID_reconcile_hipri_phys ||
r->work_pos.btree == BTREE_ID_reconcile_work_phys) &&
k.k->p.inode != r->work_pos.pos.inode) {
/*
* We don't yet do multiple devices in parallel - that
* will require extra synchronization to avoid kicking
* off the same reconciles simultaneously via multiple
* backpointers.
*
* For now, flush when switching devices to avoid
* conflicts:
*/
bch2_moving_ctxt_flush_all(ctxt);
bch2_btree_write_buffer_flush_sync(trans);
work.nr = 0;
continue;
}
r->running = true;
r->work_pos.pos = k.k->p;
@ -1912,7 +1934,9 @@ static int do_reconcile(struct moving_context *ctxt)
if (ret)
break;
r->work_pos.pos = bpos_successor(r->work_pos.pos);
r->work_pos.pos = btree_type_has_snapshots(r->work_pos.btree)
? bpos_successor(r->work_pos.pos)
: bpos_nosnap_successor(r->work_pos.pos);
}
if (!ret && !bkey_deleted(&pending_cookie.k))
@ -1954,7 +1978,7 @@ static int bch2_reconcile_thread(void *arg)
struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit);
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
writepoint_ptr(&c->reconcile_write_point),
writepoint_ptr(&c->allocator.reconcile_write_point),
true);
while (!kthread_should_stop() && !do_reconcile(&ctxt))

View File

@ -66,21 +66,22 @@ static unsigned bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
noinline_for_stack
static void count_data_update_key_fail(struct data_update *u,
struct btree_iter *iter,
struct bkey_s_c new,
struct bkey_s_c wrote,
struct bkey_i *insert,
const char *msg)
{
struct bch_fs *c = u->op.c;
unsigned sectors = new.k->p.offset - iter->pos.offset;
if (u->stats) {
atomic64_inc(&u->stats->keys_raced);
atomic64_add(sectors, &u->stats->sectors_raced);
atomic64_add(insert->k.size, &u->stats->sectors_raced);
}
event_add_trace(c, data_update_key_fail, sectors, buf, ({
event_add_trace(c, data_update_key_fail, insert->k.size, buf, ({
prt_str(&buf, bch2_data_update_type_strs[u->opts.type]);
prt_newline(&buf);
prt_str(&buf, msg);
prt_newline(&buf);
@ -157,8 +158,13 @@ static int data_update_index_update_key(struct btree_trans *trans,
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
bkey_reassemble(insert, k);
bch2_cut_front(c, iter->pos, &new->k_i);
bch2_cut_front(c, iter->pos, insert);
bch2_cut_back(new->k.p, insert);
bch2_cut_back(insert->k.p, &new->k_i);
if (!bch2_extents_match(c, k, old)) {
count_data_update_key_fail(u, iter, k, bkey_i_to_s_c(&new->k_i), NULL, "no match:");
count_data_update_key_fail(u, k, bkey_i_to_s_c(&new->k_i), insert, "no match:");
bch2_btree_iter_advance(iter);
return 0;
}
@ -166,12 +172,6 @@ static int data_update_index_update_key(struct btree_trans *trans,
struct bch_inode_opts opts;
try(bch2_bkey_get_io_opts(trans, NULL, k, &opts));
bch2_cut_front(c, iter->pos, &new->k_i);
bch2_cut_front(c, iter->pos, insert);
bch2_cut_back(new->k.p, insert);
bch2_cut_back(insert->k.p, &new->k_i);
bch2_bkey_propagate_incompressible(c, insert, bkey_i_to_s_c(&new->k_i));
/*
@ -204,7 +204,7 @@ static int data_update_index_update_key(struct btree_trans *trans,
if (u->opts.ptrs_rewrite &&
!rewrites_found &&
bch2_bkey_durability(c, k) >= opts.data_replicas) {
count_data_update_key_fail(u, iter, k, bkey_i_to_s_c(&new->k_i), insert,
count_data_update_key_fail(u, k, bkey_i_to_s_c(&new->k_i), insert,
"no rewrites found:");
bch2_btree_iter_advance(iter);
return 0;
@ -220,7 +220,7 @@ static int data_update_index_update_key(struct btree_trans *trans,
!ptr_c->cached));
if (!bkey_val_u64s(&new->k)) {
count_data_update_key_fail(u, iter, k,
count_data_update_key_fail(u, k,
bkey_i_to_s_c(bch2_keylist_front(&u->op.insert_keys)),
insert, "new replicas conflicted:");
bch2_btree_iter_advance(iter);
@ -762,7 +762,7 @@ int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts,
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK;
if ((opts->write_flags & BCH_WRITE_alloc_nowait) &&
unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
return bch_err_throw(c, data_update_fail_would_block);
guard(rcu)();
@ -999,9 +999,11 @@ int bch2_data_update_init(struct btree_trans *trans,
* (i.e. trying to move a durability=2 replica to a target with a
* single durability=2 device)
*/
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have);
if (ret)
goto out;
if (data_opts.type != BCH_DATA_UPDATE_copygc) {
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have);
if (ret)
goto out;
}
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,

View File

@ -20,7 +20,7 @@ void bch2_write_op_error(struct bch_write_op *op, u64, const char *, ...);
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->watermark == BCH_WATERMARK_copygc
? op->c->copygc_wq
? op->c->copygc.wq
: op->c->btree_update_wq;
}

View File

@ -590,12 +590,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
restart:
seqmutex_lock(&c->btree_trans_lock);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
seqmutex_lock(&c->btree_trans.lock);
list_sort(&c->btree_trans.list, list_ptr_order_cmp);
list_for_each_entry(trans, &c->btree_trans_list, list) {
list_for_each_entry(trans, &c->btree_trans.list, list) {
if ((ulong) trans <= i->iter)
continue;
@ -609,7 +609,7 @@ restart:
continue;
}
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
u32 seq = seqmutex_unlock(&c->btree_trans.lock);
bch2_btree_trans_to_text(&i->buf, trans);
@ -624,12 +624,12 @@ restart:
if (ret)
goto unlocked;
if (!seqmutex_relock(&c->btree_trans_lock, seq))
if (!seqmutex_relock(&c->btree_trans.lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans.lock);
unlocked:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
if (i->buf.allocation_failure)
ret = -ENOMEM;
@ -759,7 +759,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
i->ret = 0;
while (1) {
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
struct btree_transaction_stats *s = &c->btree_trans.stats[i->iter];
err = bch2_debugfs_flush_buf(i);
if (err)
@ -825,10 +825,10 @@ static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
struct btree_trans *trans;
ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
seqmutex_lock(&c->btree_trans.lock);
list_sort(&c->btree_trans.list, list_ptr_order_cmp);
list_for_each_entry(trans, &c->btree_trans_list, list) {
list_for_each_entry(trans, &c->btree_trans.list, list) {
if ((ulong) trans <= iter)
continue;
@ -837,7 +837,7 @@ restart:
if (!closure_get_not_zero(&trans->ref))
continue;
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
u32 seq = seqmutex_unlock(&c->btree_trans.lock);
bool found = bch2_check_for_deadlock(trans, out) != 0;
@ -846,10 +846,10 @@ restart:
if (found)
return;
if (!seqmutex_relock(&c->btree_trans_lock, seq))
if (!seqmutex_relock(&c->btree_trans.lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans.lock);
}
typedef void (*fs_to_text_fn)(struct printbuf *, struct bch_fs *);

View File

@ -246,7 +246,7 @@ write_attribute(perf_test);
static size_t bch2_btree_cache_size(struct bch_fs *c)
{
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
size_t ret = 0;
struct btree *b;
@ -301,9 +301,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_btree_id_to_text(out, c->gc_gens_btree);
prt_printf(out, ": ");
bch2_bpos_to_text(out, c->gc_gens_pos);
bch2_bbpos_to_text(out, c->gc_gens.pos);
prt_printf(out, "\n");
}
@ -311,7 +309,7 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_fs_usage_base b = {};
acc_u64s_percpu(&b.hidden, &c->usage->hidden, sizeof(b) / sizeof(u64));
acc_u64s_percpu(&b.hidden, &c->capacity.usage->hidden, sizeof(b) / sizeof(u64));
prt_printf(out, "hidden:\t\t%llu\n", b.hidden);
prt_printf(out, "btree:\t\t%llu\n", b.btree);
@ -427,13 +425,13 @@ STORE(bch2_fs)
/* Debugging: */
if (attr == &sysfs_trigger_btree_updates)
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
queue_work(c->btree_interior_updates.worker, &c->btree_interior_updates.work);
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_trigger_btree_cache_shrink) {
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
@ -475,7 +473,7 @@ STORE(bch2_fs)
bch2_journal_do_writes(&c->journal);
if (attr == &sysfs_trigger_freelist_wakeup)
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
if (attr == &sysfs_trigger_recalc_capacity) {
guard(rwsem_read)(&c->state_lock);

View File

@ -425,9 +425,9 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
return ret;
struct bch_fs_usage_short u = bch2_fs_usage_read_short(c);
arg.capacity = c->capacity;
arg.capacity = c->capacity.capacity;
arg.used = u.used;
arg.online_reserved = percpu_u64_get(c->online_reserved);
arg.online_reserved = percpu_u64_get(&c->capacity.pcpu->online_reserved);
arg.replica_entries_bytes = replicas.nr;
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
@ -458,9 +458,9 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c,
if (ret)
return ret;
arg.capacity = c->capacity;
arg.capacity = c->capacity.capacity;
arg.used = bch2_fs_usage_read_short(c).used;
arg.online_reserved = percpu_u64_get(c->online_reserved);
arg.online_reserved = percpu_u64_get(&c->capacity.pcpu->online_reserved);
arg.accounting_u64s = accounting.nr / sizeof(u64);
return copy_to_user_errcode(user_arg, &arg, sizeof(arg));

View File

@ -167,7 +167,7 @@ int bch2_dev_in_fs(struct bch_sb_handle *fs,
void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
{
if (rw == READ)
clear_bit(ca->dev_idx, ca->fs->online_devs.d);
clear_bit(ca->dev_idx, ca->fs->devs_online.d);
if (!enumerated_ref_is_zero(&ca->io_ref[rw]))
enumerated_ref_stop(&ca->io_ref[rw],
@ -519,7 +519,7 @@ int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct prin
try(__bch2_dev_attach_bdev(c, ca, sb, err));
set_bit(ca->dev_idx, c->online_devs.d);
set_bit(ca->dev_idx, c->devs_online.d);
bch2_dev_sysfs_online(c, ca);
@ -546,7 +546,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
new_state != BCH_MEMBER_STATE_rw) {
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
struct bch_devs_mask new_rw_devs = c->allocator.rw_devs[0];
__clear_bit(ca->dev_idx, new_rw_devs.d);
return bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err);
@ -821,7 +821,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err)
ca->disk_sb.sb->dev_idx = dev_idx;
bch2_dev_attach(c, ca, dev_idx);
set_bit(ca->dev_idx, c->online_devs.d);
set_bit(ca->dev_idx, c->devs_online.d);
if (BCH_MEMBER_GROUP(&dev_mi)) {
ret = __bch2_dev_group_set(c, ca, label.buf);
@ -960,10 +960,10 @@ int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err)
static int bch2_dev_may_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err)
{
struct bch_devs_mask new_devs = c->online_devs;
struct bch_devs_mask new_devs = c->devs_online;
__clear_bit(ca->dev_idx, new_devs.d);
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
struct bch_devs_mask new_rw_devs = c->allocator.rw_devs[0];
__clear_bit(ca->dev_idx, new_devs.d);
if (!bch2_can_read_fs_with_devs(c, new_devs, flags, err) ||

View File

@ -274,27 +274,27 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
{
struct fsck_err_state *s;
list_for_each_entry(s, &c->fsck_error_msgs, list)
list_for_each_entry(s, &c->errors.msgs, list)
if (s->id == id) {
/*
* move it to the head of the list: repeated fsck errors
* are common
*/
list_move(&s->list, &c->fsck_error_msgs);
list_move(&s->list, &c->errors.msgs);
return s;
}
s = kzalloc(sizeof(*s), GFP_NOFS);
if (!s) {
if (!c->fsck_alloc_msgs_err)
if (!c->errors.msgs_alloc_err)
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
c->fsck_alloc_msgs_err = true;
c->errors.msgs_alloc_err = true;
return NULL;
}
INIT_LIST_HEAD(&s->list);
s->id = id;
list_add(&s->list, &c->fsck_error_msgs);
list_add(&s->list, &c->errors.msgs);
return s;
}
@ -385,7 +385,7 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
bool print = true, repeat = false, suppress = false;
scoped_guard(mutex, &c->fsck_error_msgs_lock)
scoped_guard(mutex, &c->errors.msgs_lock)
count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
if (suppress)
@ -506,7 +506,7 @@ int __bch2_fsck_err(struct bch_fs *c,
}
}
mutex_lock(&c->fsck_error_msgs_lock);
mutex_lock(&c->errors.msgs_lock);
bool repeat = false, print = true, suppress = false;
bool inconsistent = false, exiting = false;
struct fsck_err_state *s =
@ -626,7 +626,7 @@ print:
if (s)
s->ret = ret;
err_unlock:
mutex_unlock(&c->fsck_error_msgs_lock);
mutex_unlock(&c->errors.msgs_lock);
err:
if (trans &&
!(flags & FSCK_ERR_NO_LOG) &&
@ -708,9 +708,9 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
{
struct fsck_err_state *s, *n;
guard(mutex)(&c->fsck_error_msgs_lock);
guard(mutex)(&c->errors.msgs_lock);
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
list_for_each_entry_safe(s, n, &c->errors.msgs, list) {
if (print && s->ratelimited && s->last_msg)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
@ -755,3 +755,22 @@ void bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *
{
lockrestart_do(trans, bch2_inum_offset_err_msg_trans_norestart(trans, out, subvol, pos));
}
void bch2_fs_errors_exit(struct bch_fs *c)
{
darray_exit(&c->errors.counts);
}
void bch2_fs_errors_init_early(struct bch_fs *c)
{
INIT_LIST_HEAD(&c->errors.msgs);
mutex_init(&c->errors.msgs_lock);
mutex_init(&c->errors.counts_lock);
darray_init(&c->errors.counts);
}
int bch2_fs_errors_init(struct bch_fs *c)
{
return bch2_sb_errors_to_cpu(c);
}

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ERROR_H
#define _BCACHEFS_ERROR_H
#ifndef _BCACHEFS_INIT_ERROR_H
#define _BCACHEFS_INIT_ERROR_H
#include <linux/list.h>
#include <linux/printk.h>
@ -280,4 +280,8 @@ static inline void bch2_account_io_completion(struct bch_dev *ca,
int bch2_inum_offset_err_msg_trans_norestart(struct btree_trans *, struct printbuf *, u32, struct bpos);
void bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, u32, struct bpos);
#endif /* _BCACHEFS_ERROR_H */
void bch2_fs_errors_exit(struct bch_fs *);
void bch2_fs_errors_init_early(struct bch_fs *);
int bch2_fs_errors_init(struct bch_fs *);
#endif /* _BCACHEFS_INIT_ERROR_H */

View File

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_INIT_ERROR_TYPES_H
#define _BCACHEFS_INIT_ERROR_TYPES_H
#include "sb/errors_types.h"
struct bch_fs_errors {
struct list_head msgs;
struct mutex msgs_lock;
bool msgs_alloc_err;
bch_sb_errors_cpu counts;
struct mutex counts_lock;
};
#endif /* _BCACHEFS_INIT_ERROR_TYPES_H */

View File

@ -556,6 +556,9 @@ int bch2_fs_read_write_early(struct bch_fs *c)
static void __bch2_fs_free(struct bch_fs *c)
{
bch2_journal_keys_put_initial(c);
BUG_ON(atomic_read(&c->journal_keys.ref));
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
@ -570,7 +573,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_free_fsck_errs(c);
bch2_fs_vfs_exit(c);
bch2_fs_snapshots_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_replicas_exit(c);
bch2_fs_reconcile_exit(c);
bch2_fs_quota_exit(c);
@ -581,12 +583,15 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_fsio_exit(c);
bch2_fs_io_write_exit(c);
bch2_fs_io_read_exit(c);
bch2_fs_errors_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_ec_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_copygc_exit(c);
bch2_fs_compress_exit(c);
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_capacity_exit(c);
bch2_fs_buckets_waiting_for_journal_exit(c);
bch2_fs_btree_write_buffer_exit(c);
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
@ -595,19 +600,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_btree_cache_exit(c);
bch2_fs_accounting_exit(c);
bch2_fs_async_obj_exit(c);
bch2_journal_keys_put_initial(c);
BUG_ON(atomic_read(&c->journal_keys.ref));
percpu_free_rwsem(&c->mark_lock);
if (c->online_reserved) {
u64 v = percpu_u64_get(c->online_reserved);
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
free_percpu(c->online_reserved);
}
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
free_percpu(c->usage);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter);
@ -623,8 +616,6 @@ static void __bch2_fs_free(struct bch_fs *c)
destroy_workqueue(c->btree_write_submit_wq);
if (c->btree_read_complete_wq)
destroy_workqueue(c->btree_read_complete_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->btree_write_complete_wq)
destroy_workqueue(c->btree_write_complete_wq);
if (c->btree_update_wq)
@ -682,7 +673,7 @@ int bch2_fs_stop(struct bch_fs *c)
cancel_work_sync(&c->read_only_work);
flush_work(&c->btree_interior_update_work);
flush_work(&c->btree_interior_updates.work);
}
if (test_bit(BCH_FS_emergency_ro, &c->flags))
@ -769,8 +760,6 @@ int bch2_fs_init_rw(struct bch_fs *c)
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
@ -1060,7 +1049,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
init_rwsem(&c->state_lock);
mutex_init(&c->sb_lock);
mutex_init(&c->btree_root_lock);
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
refcount_set(&c->ro_ref, 1);
@ -1079,13 +1067,13 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
bch2_fs_btree_write_buffer_init_early(c);
bch2_fs_copygc_init(c);
bch2_fs_ec_init_early(c);
bch2_fs_errors_init_early(c);
bch2_fs_journal_init_early(&c->journal);
bch2_fs_journal_keys_init(c);
bch2_fs_move_init(c);
bch2_fs_nocow_locking_init_early(c);
bch2_fs_quota_init(c);
bch2_fs_recovery_passes_init(c);
bch2_fs_sb_errors_init_early(c);
bch2_fs_snapshots_init_early(c);
bch2_fs_subvolumes_init_early(c);
bch2_find_btree_nodes_init(&c->found_btree_nodes);
@ -1093,18 +1081,11 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
INIT_LIST_HEAD(&c->list);
mutex_init(&c->bio_bounce_pages_lock);
mutex_init(&c->snapshot_table_lock);
init_rwsem(&c->snapshot_create_lock);
spin_lock_init(&c->btree_write_error_lock);
INIT_LIST_HEAD(&c->journal_iters);
INIT_LIST_HEAD(&c->fsck_error_msgs);
mutex_init(&c->fsck_error_msgs_lock);
seqcount_init(&c->usage_lock);
INIT_LIST_HEAD(&c->vfs_inodes_list);
mutex_init(&c->vfs_inodes_lock);
@ -1112,9 +1093,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
mutex_init(&c->sectors_available_lock);
try(percpu_init_rwsem(&c->mark_lock));
try(bch2_fs_capacity_init(c));
scoped_guard(mutex, &c->sb_lock)
try(bch2_sb_to_fs(c, sb));
@ -1171,9 +1150,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
max(offsetof(struct btree_read_bio, bio),
offsetof(struct btree_write_bio, wbio.bio)),
BIOSET_NEED_BVECS) ||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
!(c->usage = alloc_percpu(struct bch_fs_usage_base)) ||
!(c->online_reserved = alloc_percpu(u64)) ||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
c->opts.btree_node_size))
return bch_err_throw(c, ENOMEM_fs_other_alloc);
@ -1189,12 +1165,12 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
try(bch2_fs_compress_init(c));
try(bch2_fs_counters_init(c));
try(bch2_fs_ec_init(c));
try(bch2_fs_errors_init(c));
try(bch2_fs_encryption_init(c));
try(bch2_fs_fsio_init(c));
try(bch2_fs_fs_io_direct_init(c));
try(bch2_fs_io_read_init(c));
try(bch2_fs_reconcile_init(c));
try(bch2_fs_sb_errors_init(c));
try(bch2_fs_vfs_init(c));
@ -1304,9 +1280,9 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
}
}
if (!bch2_can_read_fs_with_devs(c, c->online_devs, flags, err) ||
if (!bch2_can_read_fs_with_devs(c, c->devs_online, flags, err) ||
(!c->opts.read_only &&
!bch2_can_write_fs_with_devs(c, c->rw_devs[0], flags, err))) {
!bch2_can_write_fs_with_devs(c, c->allocator.rw_devs[0], flags, err))) {
prt_printf(err, "Missing devices\n");
for_each_member_device(c, ca)
if (!bch2_dev_is_online(ca) && bch2_dev_has_data(c, ca)) {

View File

@ -511,8 +511,8 @@ static int journal_replay_entry_early(struct bch_fs *c,
entry->btree_id, BTREE_ID_NR_MAX))
return 0;
while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR)
try(darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }));
while (entry->btree_id >= c->btree_cache.roots_extra.nr + BTREE_ID_NR)
try(darray_push(&c->btree_cache.roots_extra, (struct btree_root) { NULL }));
struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);

View File

@ -49,7 +49,7 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
if (ret == -BCH_ERR_bucket_alloc_blocked)
ret = bch_err_throw(c, freelist_empty);
if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
if (ret)
break;
@ -475,7 +475,6 @@ int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
scoped_guard(spinlock, &j->lock) {
j->last_flush_write = jiffies;
j->reservations.idx = journal_cur_seq(j);
c->last_bucket_seq_cleanup = journal_cur_seq(j);
}
try(bch2_replicas_gc_reffed(c));

View File

@ -653,7 +653,7 @@ static unsigned max_dev_latency(struct bch_fs *c)
u64 nsecs = 0;
guard(rcu)();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal])
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
return nsecs_to_jiffies(nsecs);
@ -1137,7 +1137,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->space[journal_space_total].total);
}
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
struct journal_device *ja = &ca->journal;
if (!ja->nr)
continue;

View File

@ -161,7 +161,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
size_t mem_limit = max_t(ssize_t, 0,
(totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes);
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
if (!ca->journal.nr)
continue;
@ -209,7 +209,7 @@ void bch2_journal_space_available(struct journal *j)
lockdep_assert_held(&j->lock);
guard(rcu)();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
struct journal_device *ja = &ca->journal;
if (!ja->nr)
@ -238,7 +238,7 @@ void bch2_journal_space_available(struct journal *j)
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal])
prt_printf(&buf, " %s", ca->name);
bch_err(c, "%s", buf.buf);
@ -709,7 +709,7 @@ static u64 journal_seq_to_flush(struct journal *j)
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_cache *bc = &c->btree_cache;
struct bch_fs_btree_cache *bc = &c->btree_cache;
bool kthread = (current->flags & PF_KTHREAD) != 0;
u64 seq_to_flush;
size_t min_nr, min_key_cache, nr_flushed;

View File

@ -369,7 +369,7 @@ static CLOSURE_CALLBACK(journal_write_done)
if (last_seq_ondisk_updated) {
bch2_reset_alloc_cursors(c);
closure_wake_up(&c->freelist_wait);
closure_wake_up(&c->allocator.freelist_wait);
bch2_do_discards(c);
}
@ -410,6 +410,11 @@ static CLOSURE_CALLBACK(journal_write_submit)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
event_inc_trace(c, journal_write, buf, ({
prt_printf(&buf, "seq %llu\n", le64_to_cpu(w->data->seq));
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&w->key));
}));
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
@ -442,9 +447,6 @@ static CLOSURE_CALLBACK(journal_write_submit)
bch2_bio_map(bio, w->data, sectors << 9);
event_inc_trace(c, journal_write, buf,
prt_printf(&buf, "seq %llu", le64_to_cpu(w->data->seq)));
closure_bio_submit(bio, cl);
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
@ -699,7 +701,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
unsigned nr_rw_members = dev_mask_nr(&c->allocator.rw_devs[BCH_DATA_free]);
int ret;
BUG_ON(!w->write_started);

View File

@ -212,10 +212,10 @@ UPGRADE_TABLE_INCOMPAT()
static int have_stripes(struct bch_fs *c)
{
if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
if (IS_ERR_OR_NULL(c->btree_cache.roots_known[BTREE_ID_stripes].b))
return 0;
return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
return !btree_node_fake(c->btree_cache.roots_known[BTREE_ID_stripes].b);
}
int bch2_sb_set_upgrade_extra(struct bch_fs *c)

View File

@ -107,9 +107,9 @@ void bch2_fs_errors_to_text(struct printbuf *out, struct bch_fs *c)
if (out->nr_tabstops < 3)
printbuf_tabstop_push(out, 16);
guard(mutex)(&c->fsck_error_counts_lock);
guard(mutex)(&c->errors.counts_lock);
bch_sb_errors_cpu *e = &c->fsck_error_counts;
bch_sb_errors_cpu *e = &c->errors.counts;
darray_for_each(*e, i) {
bch2_sb_error_id_to_text(out, i->id);
prt_tab(out);
@ -122,7 +122,7 @@ void bch2_fs_errors_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
{
bch_sb_errors_cpu *e = &c->fsck_error_counts;
bch_sb_errors_cpu *e = &c->errors.counts;
struct bch_sb_error_entry_cpu n = {
.id = err,
.nr = 1,
@ -130,7 +130,7 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
};
unsigned i;
guard(mutex)(&c->fsck_error_counts_lock);
guard(mutex)(&c->errors.counts_lock);
for (i = 0; i < e->nr; i++) {
if (err == e->data[i].id) {
@ -150,9 +150,9 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
guard(mutex)(&c->fsck_error_counts_lock);
guard(mutex)(&c->errors.counts_lock);
bch_sb_errors_cpu *src = &c->fsck_error_counts;
bch_sb_errors_cpu *src = &c->errors.counts;
struct bch_sb_field_errors *dst =
bch2_sb_field_resize(&c->disk_sb, errors,
bch2_sb_field_errors_u64s(src->nr));
@ -166,12 +166,12 @@ void bch2_sb_errors_from_cpu(struct bch_fs *c)
}
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
int bch2_sb_errors_to_cpu(struct bch_fs *c)
{
guard(mutex)(&c->fsck_error_counts_lock);
guard(mutex)(&c->errors.counts_lock);
struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
bch_sb_errors_cpu *dst = &c->fsck_error_counts;
bch_sb_errors_cpu *dst = &c->errors.counts;
unsigned nr = bch2_sb_field_errors_nr_entries(src);
if (!nr)
@ -191,19 +191,3 @@ static int bch2_sb_errors_to_cpu(struct bch_fs *c)
return 0;
}
void bch2_fs_sb_errors_exit(struct bch_fs *c)
{
darray_exit(&c->fsck_error_counts);
}
void bch2_fs_sb_errors_init_early(struct bch_fs *c)
{
mutex_init(&c->fsck_error_counts_lock);
darray_init(&c->fsck_error_counts);
}
int bch2_fs_sb_errors_init(struct bch_fs *c)
{
return bch2_sb_errors_to_cpu(c);
}

View File

@ -14,9 +14,6 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
void bch2_sb_errors_from_cpu(struct bch_fs *);
void bch2_fs_sb_errors_exit(struct bch_fs *);
void bch2_fs_sb_errors_init_early(struct bch_fs *);
int bch2_fs_sb_errors_init(struct bch_fs *);
int bch2_sb_errors_to_cpu(struct bch_fs *);
#endif /* _BCACHEFS_SB_ERRORS_H */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SUPER_IO_H
#define _BCACHEFS_SUPER_IO_H
#ifndef _BCACHEFS_SB_IO_H
#define _BCACHEFS_SB_IO_H
#include "data/extents.h"
#include "init/dev_types.h"
@ -116,4 +116,4 @@ void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
void bch2_sb_to_text(struct printbuf *, struct bch_sb *, bool, unsigned);
#endif /* _BCACHEFS_SUPER_IO_H */
#endif /* _BCACHEFS_SB_IO_H */

33
libbcachefs/sb/io_types.h Normal file
View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_IO_TYPES_H
#define _BCACHEFS_SB_IO_TYPES_H
/* Updated by bch2_sb_update():*/
struct bch_sb_cpu {
__uuid_t uuid;
__uuid_t user_uuid;
u16 version;
u16 version_incompat;
u16 version_incompat_allowed;
u16 version_min;
u16 version_upgrade_complete;
u8 nr_devices;
u8 clean;
bool multi_device; /* true if we've ever had more than one device */
u8 encryption_type;
u64 time_base_lo;
u32 time_base_hi;
unsigned time_units_per_sec;
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
u64 recovery_passes_required;
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data;
};
#endif /* _BCACHEFS_SB_IO_TYPES_H */

View File

@ -128,10 +128,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
(_ca = __bch2_next_dev((_c), _ca, (_mask)));)
#define for_each_online_member_rcu(_c, _ca) \
for_each_member_device_rcu(_c, _ca, &(_c)->online_devs)
for_each_member_device_rcu(_c, _ca, &(_c)->devs_online)
#define for_each_rw_member_rcu(_c, _ca) \
for_each_member_device_rcu(_c, _ca, &(_c)->rw_devs[BCH_DATA_free])
for_each_member_device_rcu(_c, _ca, &(_c)->allocator.rw_devs[BCH_DATA_free])
static inline void bch2_dev_get(struct bch_dev *ca)
{

View File

@ -13,7 +13,7 @@
static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
{
guard(mutex)(&c->snapshot_table_lock);
guard(mutex)(&c->snapshots.table_lock);
return bch2_snapshot_t_mut(c, id)
? 0
: bch_err_throw(c, ENOMEM_mark_snapshot);
@ -38,7 +38,7 @@ u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
snapshot_id_list *skip)
{
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
while (true) {
u32 id = snapshot_root, subvol = 0;

View File

@ -91,7 +91,7 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id,
bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
{
guard(rcu)();
return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots.table), id, ancestor);
}
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
@ -125,7 +125,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
#endif
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
if (unlikely(recovery_pass_will_run(c, BCH_RECOVERY_PASS_check_snapshots)))
return __bch2_snapshot_is_ancestor_early(t, id, ancestor);
@ -159,25 +159,23 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
new->nr = new_size;
old = rcu_dereference_protected(c->snapshots, true);
old = rcu_dereference_protected(c->snapshots.table, true);
if (old)
memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
rcu_assign_pointer(c->snapshots, new);
rcu_assign_pointer(c->snapshots.table, new);
kvfree_rcu(old, rcu);
return &rcu_dereference_protected(c->snapshots,
lockdep_is_held(&c->snapshot_table_lock))->s[idx];
return &rcu_dereference_protected(c->snapshots.table,
lockdep_is_held(&c->snapshots.table_lock))->s[idx];
}
struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
struct snapshot_table *table =
rcu_dereference_protected(c->snapshots,
lockdep_is_held(&c->snapshot_table_lock));
lockdep_assert_held(&c->snapshot_table_lock);
rcu_dereference_protected(c->snapshots.table,
lockdep_is_held(&c->snapshots.table_lock));
if (likely(table && idx < table->nr))
return &table->s[idx];
@ -278,7 +276,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
struct snapshot_t *t;
u32 id = new.k->p.offset;
guard(mutex)(&c->snapshot_table_lock);
guard(mutex)(&c->snapshots.table_lock);
t = bch2_snapshot_t_mut(c, id);
if (!t)
@ -687,10 +685,10 @@ static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
static unsigned live_child(struct bch_fs *c, u32 start)
{
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
for (u32 id = bch2_snapshot_tree_next(t, start);
id && id != start;
@ -714,7 +712,7 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct snapshot_delete *d = &trans->c->snapshot_delete;
struct snapshot_delete *d = &trans->c->snapshots.delete;
if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
@ -744,7 +742,7 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
u64 inum = iter->btree_id != BTREE_ID_inodes
? iter->pos.inode
@ -771,7 +769,7 @@ static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree
static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
bch2_progress_init(&d->progress, c, btree_has_snapshots_mask);
d->progress.silent = true;
@ -820,7 +818,7 @@ static int delete_dead_snapshot_keys_range(struct btree_trans *trans,
static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
CLASS(disk_reservation, res)(c);
u64 prev_inum = 0;
@ -893,7 +891,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
return 0;
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
@ -937,7 +935,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
interior_delete_list *skip)
{
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
while (interior_delete_has_id(skip, id))
id = __bch2_snapshot_parent(t, id);
@ -1045,7 +1043,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
try(for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k)));
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
if (!d->delete_leaves.nr && !d->delete_interior.nr)
return 0;
@ -1070,7 +1068,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
int __bch2_delete_dead_snapshots(struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
if (!mutex_trylock(&d->lock))
return 0;
@ -1108,7 +1106,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
struct bch_fs *c = container_of(work, struct bch_fs, snapshots.delete.work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
@ -1126,13 +1124,13 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (!queue_work(system_long_wq, &c->snapshot_delete.work))
if (!queue_work(system_long_wq, &c->snapshots.delete.work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshot_delete;
struct snapshot_delete *d = &c->snapshots.delete;
if (!d->running) {
prt_str(out, "(not running)");
@ -1272,13 +1270,17 @@ int bch2_snapshots_read(struct bch_fs *c)
void bch2_fs_snapshots_exit(struct bch_fs *c)
{
kvfree(rcu_dereference_protected(c->snapshots, true));
kvfree(rcu_dereference_protected(c->snapshots.table, true));
}
void bch2_fs_snapshots_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
mutex_init(&c->snapshot_delete.lock);
mutex_init(&c->snapshot_delete.progress_lock);
mutex_init(&c->snapshots_unlinked_lock);
mutex_init(&c->snapshots.table_lock);
init_rwsem(&c->snapshots.create_lock);
INIT_WORK(&c->snapshots.delete.work, bch2_delete_dead_snapshots_work);
mutex_init(&c->snapshots.delete.lock);
mutex_init(&c->snapshots.delete.progress_lock);
mutex_init(&c->snapshots.unlinked_lock);
}

View File

@ -41,7 +41,7 @@ static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
{
return __snapshot_t(rcu_dereference(c->snapshots), id);
return __snapshot_t(rcu_dereference(c->snapshots.table), id);
}
struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *, u32);
@ -96,13 +96,13 @@ static inline u32 __bch2_snapshot_parent(struct snapshot_table *t, u32 id)
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
guard(rcu)();
return __bch2_snapshot_parent(rcu_dereference(c->snapshots), id);
return __bch2_snapshot_parent(rcu_dereference(c->snapshots.table), id);
}
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
{
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
while (n--)
id = __bch2_snapshot_parent(t, id);
@ -115,7 +115,7 @@ u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
{
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
u32 parent;
while ((parent = __bch2_snapshot_parent(t, id)))
@ -132,7 +132,7 @@ static inline enum snapshot_id_state __bch2_snapshot_id_state(struct snapshot_ta
static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
guard(rcu)();
return __bch2_snapshot_id_state(rcu_dereference(c->snapshots), id);
return __bch2_snapshot_id_state(rcu_dereference(c->snapshots.table), id);
}
static inline bool __bch2_snapshot_exists(struct snapshot_table *t, u32 id)

View File

@ -56,4 +56,14 @@ struct snapshot_delete {
struct progress_indicator progress;
};
struct bch_fs_snapshots {
struct snapshot_table __rcu *table;
struct mutex table_lock;
struct rw_semaphore create_lock;
struct snapshot_delete delete;
struct work_struct wait_for_pagecache_and_delete_work;
snapshot_id_list unlinked;
struct mutex unlinked_lock;
};
#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */

View File

@ -460,15 +460,15 @@ static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs,
snapshot_wait_for_pagecache_and_delete_work);
snapshots.wait_for_pagecache_and_delete_work);
int ret = 0;
while (!ret) {
snapshot_id_list s;
scoped_guard(mutex, &c->snapshots_unlinked_lock) {
s = c->snapshots_unlinked;
darray_init(&c->snapshots_unlinked);
scoped_guard(mutex, &c->snapshots.unlinked_lock) {
s = c->snapshots.unlinked;
darray_init(&c->snapshots.unlinked);
}
if (!s.nr)
@ -502,14 +502,14 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
struct bch_fs *c = trans->c;
scoped_guard(mutex, &c->snapshots_unlinked_lock)
if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
try(snapshot_list_add(c, &c->snapshots_unlinked, h->subvol));
scoped_guard(mutex, &c->snapshots.unlinked_lock)
if (!snapshot_list_has_id(&c->snapshots.unlinked, h->subvol))
try(snapshot_list_add(c, &c->snapshots.unlinked, h->subvol));
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache))
return -EROFS;
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
if (!queue_work(c->write_ref_wq, &c->snapshots.wait_for_pagecache_and_delete_work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
return 0;
}
@ -661,6 +661,6 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
void bch2_fs_subvolumes_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
INIT_WORK(&c->snapshots.wait_for_pagecache_and_delete_work,
bch2_subvolume_wait_for_pagecache_and_delete);
}

View File

@ -540,8 +540,8 @@ static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closu
unsigned reserved = OPEN_BUCKETS_COUNT -
(OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2;
if (unlikely(c->open_buckets_nr_free <= reserved)) {
closure_wait(&c->open_buckets_wait, cl);
if (unlikely(c->allocator.open_buckets_nr_free <= reserved)) {
closure_wait(&c->allocator.open_buckets_wait, cl);
return false;
}

View File

@ -314,7 +314,7 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
!arg.src_ptr)
snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
scoped_guard(rwsem_write, &c->snapshot_create_lock)
scoped_guard(rwsem_write, &c->snapshots.create_lock)
inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
dst_dentry, arg.mode|S_IFDIR,
0, snapshot_src, create_flags);