mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to ce8f0c316862 bcachefs: struct bch_fs_snapshots
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
0532e81278
commit
9f8195b54c
@ -1 +1 @@
|
||||
5df84d32ad84d74ababcd783bf92ed1a1853e74d
|
||||
ce8f0c316862869e9e2c57270b8d5f15f26be5ca
|
||||
|
||||
@ -52,7 +52,7 @@ void strip_fs_alloc(struct bch_fs *c)
|
||||
swap(u64s, clean->field.u64s);
|
||||
bch2_sb_field_resize(&c->disk_sb, clean, u64s);
|
||||
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
kfree(c->replicas.entries);
|
||||
c->replicas.entries = NULL;
|
||||
c->replicas.nr = 0;
|
||||
|
||||
@ -429,11 +429,11 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
|
||||
!bch2_replicas_marked_locked(c, &r.e))
|
||||
return bch_err_throw(c, btree_insert_need_mark_replicas);
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
percpu_up_read(&c->capacity.mark_lock);
|
||||
int ret;
|
||||
scoped_guard(percpu_write, &c->mark_lock)
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock)
|
||||
ret = __bch2_accounting_mem_insert(c, a);
|
||||
percpu_down_read(&c->mark_lock);
|
||||
percpu_down_read(&c->capacity.mark_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -469,7 +469,7 @@ void __bch2_accounting_maybe_kill(struct bch_fs *c, struct bpos pos)
|
||||
return;
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
@ -507,7 +507,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
darray_for_each(acc->k, i) {
|
||||
union {
|
||||
u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
|
||||
@ -539,7 +539,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
|
||||
|
||||
darray_init(out_buf);
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
darray_for_each(acc->k, i) {
|
||||
struct disk_accounting_pos a_p;
|
||||
bpos_to_disk_accounting_pos(&a_p, i->pos);
|
||||
@ -577,7 +577,7 @@ int bch2_gc_accounting_start(struct bch_fs *c)
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
int ret = 0;
|
||||
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
guard(percpu_write)(&c->capacity.mark_lock);
|
||||
darray_for_each(acc->k, e) {
|
||||
e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64),
|
||||
sizeof(u64), GFP_KERNEL);
|
||||
@ -600,7 +600,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
|
||||
struct bpos pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
guard(percpu_write)(&c->capacity.mark_lock);
|
||||
while (1) {
|
||||
unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &pos);
|
||||
@ -643,11 +643,11 @@ int bch2_gc_accounting_done(struct bch_fs *c)
|
||||
bch2_trans_unlock_long(trans);
|
||||
|
||||
if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) {
|
||||
percpu_up_write(&c->mark_lock);
|
||||
percpu_up_write(&c->capacity.mark_lock);
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_skip_accounting_apply,
|
||||
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));
|
||||
percpu_down_write(&c->mark_lock);
|
||||
percpu_down_write(&c->capacity.mark_lock);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -661,7 +661,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
|
||||
BCH_ACCOUNTING_normal, true);
|
||||
|
||||
guard(preempt)();
|
||||
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
|
||||
struct bch_fs_usage_base *dst = this_cpu_ptr(c->capacity.usage);
|
||||
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
|
||||
acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
|
||||
}
|
||||
@ -681,7 +681,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
return 0;
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
|
||||
BCH_ACCOUNTING_read, false);
|
||||
}
|
||||
@ -874,7 +874,7 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
|
||||
}
|
||||
|
||||
guard(preempt)();
|
||||
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
|
||||
struct bch_fs_usage_base *usage = this_cpu_ptr(c->capacity.usage);
|
||||
|
||||
switch (k.type) {
|
||||
case BCH_DISK_ACCOUNTING_persistent_reserved:
|
||||
@ -934,12 +934,12 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
*
|
||||
* Instead, zero out any accounting we have:
|
||||
*/
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
darray_for_each(acc->k, e)
|
||||
percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
|
||||
for_each_member_device(c, ca)
|
||||
percpu_memset(ca->usage, 0, sizeof(*ca->usage));
|
||||
percpu_memset(c->usage, 0, sizeof(*c->usage));
|
||||
percpu_memset(c->capacity.usage, 0, sizeof(*c->capacity.usage));
|
||||
}
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
@ -1164,7 +1164,8 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
|
||||
0;
|
||||
}));
|
||||
|
||||
acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64));
|
||||
acc_u64s_percpu(&base_inmem.hidden, &c->capacity.usage->hidden,
|
||||
sizeof(base_inmem) / sizeof(u64));
|
||||
|
||||
#define check(x) \
|
||||
if (base.x != base_inmem.x) { \
|
||||
@ -1183,7 +1184,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
|
||||
|
||||
void bch2_accounting_gc_free(struct bch_fs *c)
|
||||
{
|
||||
lockdep_assert_held(&c->mark_lock);
|
||||
lockdep_assert_held(&c->capacity.mark_lock);
|
||||
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
|
||||
|
||||
@ -230,7 +230,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
|
||||
|
||||
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
|
||||
{
|
||||
guard(percpu_read)(&trans->c->mark_lock);
|
||||
guard(percpu_read)(&trans->c->capacity.mark_lock);
|
||||
return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false);
|
||||
}
|
||||
|
||||
@ -253,7 +253,7 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
|
||||
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
|
||||
u64 *v, unsigned nr)
|
||||
{
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &p);
|
||||
|
||||
@ -923,7 +923,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
if (statechange(a->data_type == BCH_DATA_free) &&
|
||||
bucket_flushed(new_a))
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
if (statechange(a->data_type == BCH_DATA_need_discard) &&
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
|
||||
@ -1587,13 +1587,13 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
reserved_sectors = min(reserved_sectors, capacity);
|
||||
|
||||
c->reserved = reserved_sectors;
|
||||
c->capacity = capacity - reserved_sectors;
|
||||
c->capacity.reserved = reserved_sectors;
|
||||
c->capacity.capacity = capacity - reserved_sectors;
|
||||
|
||||
c->bucket_size_max = bucket_size_max;
|
||||
c->capacity.bucket_size_max = bucket_size_max;
|
||||
|
||||
/* Wake up case someone was waiting for buckets */
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
}
|
||||
|
||||
u64 bch2_min_rw_member_capacity(struct bch_fs *c)
|
||||
@ -1610,8 +1610,8 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
for (ob = c->open_buckets;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
|
||||
for (ob = c->allocator.open_buckets;
|
||||
ob < c->allocator.open_buckets + ARRAY_SIZE(c->allocator.open_buckets);
|
||||
ob++) {
|
||||
scoped_guard(spinlock, &ob->lock) {
|
||||
if (ob->valid && !ob->on_partial_list &&
|
||||
@ -1627,7 +1627,7 @@ void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
|
||||
{
|
||||
/* BCH_DATA_free == all rw devs */
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->allocator.rw_devs); i++) {
|
||||
bool data_type_rw = rw;
|
||||
|
||||
if (i != BCH_DATA_free &&
|
||||
@ -1639,10 +1639,10 @@ void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
|
||||
!ca->mi.durability)
|
||||
data_type_rw = false;
|
||||
|
||||
mod_bit(ca->dev_idx, c->rw_devs[i].d, data_type_rw);
|
||||
mod_bit(ca->dev_idx, c->allocator.rw_devs[i].d, data_type_rw);
|
||||
}
|
||||
|
||||
c->rw_devs_change_count++;
|
||||
c->allocator.rw_devs_change_count++;
|
||||
}
|
||||
|
||||
/* device goes ro: */
|
||||
@ -1664,7 +1664,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
* Wake up threads that were blocked on allocation, so they can notice
|
||||
* the device can no longer be removed and the capacity has changed:
|
||||
*/
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
/*
|
||||
* journal_res_get() can block waiting for free space in the journal -
|
||||
@ -1674,7 +1674,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
/* Now wait for any in flight writes: */
|
||||
|
||||
closure_wait_event(&c->open_buckets_wait,
|
||||
closure_wait_event(&c->allocator.open_buckets_wait,
|
||||
!bch2_dev_has_open_write_point(c, ca));
|
||||
}
|
||||
|
||||
@ -1684,7 +1684,7 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
bch2_dev_allocator_set_rw(c, ca, true);
|
||||
c->rw_devs_change_count++;
|
||||
c->allocator.rw_devs_change_count++;
|
||||
}
|
||||
|
||||
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
|
||||
@ -1702,5 +1702,31 @@ void bch2_dev_allocator_background_init(struct bch_dev *ca)
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->freelist_lock);
|
||||
spin_lock_init(&c->allocator.freelist_lock);
|
||||
}
|
||||
|
||||
void bch2_fs_capacity_exit(struct bch_fs *c)
|
||||
{
|
||||
percpu_free_rwsem(&c->capacity.mark_lock);
|
||||
if (c->capacity.pcpu) {
|
||||
u64 v = percpu_u64_get(&c->capacity.pcpu->online_reserved);
|
||||
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
|
||||
}
|
||||
|
||||
free_percpu(c->capacity.pcpu);
|
||||
free_percpu(c->capacity.usage);
|
||||
}
|
||||
|
||||
int bch2_fs_capacity_init(struct bch_fs *c)
|
||||
{
|
||||
mutex_init(&c->capacity.sectors_available_lock);
|
||||
seqcount_init(&c->capacity.usage_lock);
|
||||
|
||||
try(percpu_init_rwsem(&c->capacity.mark_lock));
|
||||
|
||||
if (!(c->capacity.pcpu = alloc_percpu(struct bch_fs_capacity_pcpu)) ||
|
||||
!(c->capacity.usage = alloc_percpu(struct bch_fs_usage_base)))
|
||||
return bch_err_throw(c, ENOMEM_fs_other_alloc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -377,4 +377,7 @@ void bch2_dev_allocator_background_init(struct bch_dev *);
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
void bch2_fs_capacity_exit(struct bch_fs *);
|
||||
int bch2_fs_capacity_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
||||
@ -57,13 +57,13 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
struct bch_fs_usage_short ret;
|
||||
u64 data, reserved;
|
||||
|
||||
ret.capacity = c->capacity -
|
||||
percpu_u64_get(&c->usage->hidden);
|
||||
ret.capacity = c->capacity.capacity -
|
||||
percpu_u64_get(&c->capacity.usage->hidden);
|
||||
|
||||
data = percpu_u64_get(&c->usage->data) +
|
||||
percpu_u64_get(&c->usage->btree);
|
||||
reserved = percpu_u64_get(&c->usage->reserved) +
|
||||
percpu_u64_get(c->online_reserved);
|
||||
data = percpu_u64_get(&c->capacity.usage->data) +
|
||||
percpu_u64_get(&c->capacity.usage->btree);
|
||||
reserved = percpu_u64_get(&c->capacity.usage->reserved) +
|
||||
percpu_u64_get(&c->capacity.pcpu->online_reserved);
|
||||
|
||||
ret.used = min(ret.capacity, data + reserve_factor(reserved));
|
||||
ret.free = ret.capacity - ret.used;
|
||||
@ -74,7 +74,7 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
{
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
return __bch2_fs_usage_read_short(c);
|
||||
}
|
||||
|
||||
@ -249,7 +249,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (p.has_ec) {
|
||||
struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
|
||||
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes, p.ec.idx);
|
||||
|
||||
if (ret_fsck_err_on(!m || !m->alive,
|
||||
trans, ptr_to_missing_stripe,
|
||||
@ -343,7 +343,7 @@ again:
|
||||
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
|
||||
struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
|
||||
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes,
|
||||
entry->stripe_ptr.idx);
|
||||
union bch_extent_entry *next_ptr;
|
||||
|
||||
@ -547,7 +547,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
static int warned_disk_usage = 0;
|
||||
bool warn = false;
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
struct bch_fs_usage_base *src = &trans->fs_usage_delta;
|
||||
|
||||
s64 added = src->btree + src->data + src->reserved;
|
||||
@ -560,10 +560,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
if (unlikely(should_not_have_added > 0)) {
|
||||
u64 old, new;
|
||||
|
||||
old = atomic64_read(&c->sectors_available);
|
||||
old = atomic64_read(&c->capacity.sectors_available);
|
||||
do {
|
||||
new = max_t(s64, 0, old - should_not_have_added);
|
||||
} while (!atomic64_try_cmpxchg(&c->sectors_available,
|
||||
} while (!atomic64_try_cmpxchg(&c->capacity.sectors_available,
|
||||
&old, new));
|
||||
|
||||
added -= should_not_have_added;
|
||||
@ -572,11 +572,11 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
|
||||
|
||||
if (added > 0) {
|
||||
trans->disk_res->sectors -= added;
|
||||
this_cpu_sub(*c->online_reserved, added);
|
||||
this_cpu_sub(c->capacity.pcpu->online_reserved, added);
|
||||
}
|
||||
|
||||
scoped_guard(preempt) {
|
||||
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
|
||||
struct bch_fs_usage_base *dst = this_cpu_ptr(c->capacity.usage);
|
||||
acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
|
||||
}
|
||||
|
||||
@ -707,7 +707,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
|
||||
struct gc_stripe *m = genradix_ptr_alloc(&c->ec.gc_stripes, p.ec.idx, GFP_KERNEL);
|
||||
if (!m) {
|
||||
bch_err(c, "error allocating memory for gc_stripes, idx %llu",
|
||||
(u64) p.ec.idx);
|
||||
@ -1145,9 +1145,9 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
|
||||
struct disk_reservation *res,
|
||||
u64 sectors, enum bch_reservation_flags flags)
|
||||
{
|
||||
guard(mutex)(&c->sectors_available_lock);
|
||||
guard(mutex)(&c->capacity.sectors_available_lock);
|
||||
|
||||
percpu_u64_set(&c->pcpu->sectors_available, 0);
|
||||
percpu_u64_set(&c->capacity.pcpu->sectors_available, 0);
|
||||
u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
|
||||
|
||||
if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
|
||||
@ -1155,13 +1155,13 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
|
||||
|
||||
if (sectors <= sectors_available ||
|
||||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
|
||||
atomic64_set(&c->sectors_available,
|
||||
atomic64_set(&c->capacity.sectors_available,
|
||||
max_t(s64, 0, sectors_available - sectors));
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
this_cpu_add(c->capacity.pcpu->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
return 0;
|
||||
} else {
|
||||
atomic64_set(&c->sectors_available, sectors_available);
|
||||
atomic64_set(&c->capacity.sectors_available, sectors_available);
|
||||
return bch_err_throw(c, ENOSPC_disk_reservation);
|
||||
}
|
||||
}
|
||||
@ -1169,15 +1169,15 @@ static int disk_reservation_recalc_sectors_available(struct bch_fs *c,
|
||||
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
u64 sectors, enum bch_reservation_flags flags)
|
||||
{
|
||||
struct bch_fs_pcpu *pcpu;
|
||||
struct bch_fs_capacity_pcpu *pcpu;
|
||||
u64 old, get;
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
preempt_disable();
|
||||
pcpu = this_cpu_ptr(c->pcpu);
|
||||
pcpu = this_cpu_ptr(c->capacity.pcpu);
|
||||
|
||||
if (unlikely(sectors > pcpu->sectors_available)) {
|
||||
old = atomic64_read(&c->sectors_available);
|
||||
old = atomic64_read(&c->capacity.sectors_available);
|
||||
do {
|
||||
get = min((u64) sectors + SECTORS_CACHE, old);
|
||||
|
||||
@ -1186,14 +1186,14 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
return disk_reservation_recalc_sectors_available(c,
|
||||
res, sectors, flags);
|
||||
}
|
||||
} while (!atomic64_try_cmpxchg(&c->sectors_available,
|
||||
} while (!atomic64_try_cmpxchg(&c->capacity.sectors_available,
|
||||
&old, old - get));
|
||||
|
||||
pcpu->sectors_available += get;
|
||||
}
|
||||
|
||||
pcpu->sectors_available -= sectors;
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
pcpu->online_reserved += sectors;
|
||||
res->sectors += sectors;
|
||||
preempt_enable();
|
||||
return 0;
|
||||
|
||||
@ -298,7 +298,7 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
|
||||
struct disk_reservation *res)
|
||||
{
|
||||
if (res->sectors) {
|
||||
this_cpu_sub(*c->online_reserved, res->sectors);
|
||||
this_cpu_sub(c->capacity.pcpu->online_reserved, res->sectors);
|
||||
res->sectors = 0;
|
||||
}
|
||||
}
|
||||
@ -317,15 +317,15 @@ static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reserv
|
||||
#ifdef __KERNEL__
|
||||
u64 old, new;
|
||||
|
||||
old = this_cpu_read(c->pcpu->sectors_available);
|
||||
old = this_cpu_read(c->capacity.pcpu->sectors_available);
|
||||
do {
|
||||
if (sectors > old)
|
||||
return __bch2_disk_reservation_add(c, res, sectors, flags);
|
||||
|
||||
new = old - sectors;
|
||||
} while (!this_cpu_try_cmpxchg(c->pcpu->sectors_available, &old, new));
|
||||
} while (!this_cpu_try_cmpxchg(c->capacity.pcpu->sectors_available, &old, new));
|
||||
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
this_cpu_add(c->capacity.pcpu->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
return 0;
|
||||
#else
|
||||
|
||||
@ -62,7 +62,7 @@ static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
u16 target)
|
||||
{
|
||||
struct bch_devs_mask devs = c->rw_devs[data_type];
|
||||
struct bch_devs_mask devs = c->allocator.rw_devs[data_type];
|
||||
const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
|
||||
|
||||
if (t)
|
||||
|
||||
@ -83,7 +83,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *c)
|
||||
|
||||
static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
open_bucket_idx_t idx = ob - c->open_buckets;
|
||||
open_bucket_idx_t idx = ob - c->allocator.open_buckets;
|
||||
open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
|
||||
|
||||
ob->hash = *slot;
|
||||
@ -92,12 +92,12 @@ static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
|
||||
|
||||
static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
open_bucket_idx_t idx = ob - c->open_buckets;
|
||||
open_bucket_idx_t idx = ob - c->allocator.open_buckets;
|
||||
open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
|
||||
|
||||
while (*slot != idx) {
|
||||
BUG_ON(!*slot);
|
||||
slot = &c->open_buckets[*slot].hash;
|
||||
slot = &c->allocator.open_buckets[*slot].hash;
|
||||
}
|
||||
|
||||
*slot = ob->hash;
|
||||
@ -118,17 +118,17 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
ob->data_type = 0;
|
||||
}
|
||||
|
||||
scoped_guard(spinlock, &c->freelist_lock) {
|
||||
scoped_guard(spinlock, &c->allocator.freelist_lock) {
|
||||
bch2_open_bucket_hash_remove(c, ob);
|
||||
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
ob->freelist = c->allocator.open_buckets_freelist;
|
||||
c->allocator.open_buckets_freelist = ob - c->allocator.open_buckets;
|
||||
|
||||
c->open_buckets_nr_free++;
|
||||
c->allocator.open_buckets_nr_free++;
|
||||
ca->nr_open_buckets--;
|
||||
}
|
||||
|
||||
closure_wake_up(&c->open_buckets_wait);
|
||||
closure_wake_up(&c->allocator.open_buckets_wait);
|
||||
}
|
||||
|
||||
void bch2_open_bucket_write_error(struct bch_fs *c,
|
||||
@ -143,13 +143,11 @@ void bch2_open_bucket_write_error(struct bch_fs *c,
|
||||
bch2_ec_bucket_cancel(c, ob, err);
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs_allocator *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
|
||||
|
||||
ob = c->open_buckets + c->open_buckets_freelist;
|
||||
struct open_bucket *ob = c->open_buckets + c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob->freelist;
|
||||
atomic_set(&ob->pin, 1);
|
||||
ob->data_type = 0;
|
||||
@ -168,20 +166,20 @@ static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u6
|
||||
|
||||
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
BUG_ON(c->open_buckets_partial_nr >=
|
||||
ARRAY_SIZE(c->open_buckets_partial));
|
||||
BUG_ON(c->allocator.open_buckets_partial_nr >=
|
||||
ARRAY_SIZE(c->allocator.open_buckets_partial));
|
||||
|
||||
scoped_guard(spinlock, &c->freelist_lock) {
|
||||
scoped_guard(spinlock, &c->allocator.freelist_lock) {
|
||||
guard(rcu)();
|
||||
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
|
||||
|
||||
ob->on_partial_list = true;
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr++] =
|
||||
ob - c->open_buckets;
|
||||
c->allocator.open_buckets_partial[c->allocator.open_buckets_partial_nr++] =
|
||||
ob - c->allocator.open_buckets;
|
||||
}
|
||||
|
||||
closure_wake_up(&c->open_buckets_wait);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.open_buckets_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
}
|
||||
|
||||
static inline bool may_alloc_bucket(struct bch_fs *c,
|
||||
@ -226,11 +224,11 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
guard(spinlock)(&c->freelist_lock);
|
||||
guard(spinlock)(&c->allocator.freelist_lock);
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
|
||||
if (unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) {
|
||||
if (cl)
|
||||
closure_wait(&c->open_buckets_wait, cl);
|
||||
closure_wait(&c->allocator.open_buckets_wait, cl);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
|
||||
return ERR_PTR(bch_err_throw(c, open_buckets_empty));
|
||||
@ -242,7 +240,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct open_bucket *ob = bch2_open_bucket_alloc(c);
|
||||
struct open_bucket *ob = bch2_open_bucket_alloc(&c->allocator);
|
||||
|
||||
scoped_guard(spinlock, &ob->lock) {
|
||||
ob->valid = true;
|
||||
@ -459,7 +457,7 @@ static noinline void bucket_alloc_to_text(struct printbuf *out,
|
||||
prt_printf(out, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark));
|
||||
prt_printf(out, "copygc_wait\t%llu/%lli\n",
|
||||
bch2_copygc_wait_amount(c),
|
||||
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now));
|
||||
c->copygc.wait - atomic64_read(&c->io_clock[WRITE].now));
|
||||
prt_printf(out, "seen\t%llu\n", req->counters.buckets_seen);
|
||||
prt_printf(out, "open\t%llu\n", req->counters.skipped_open);
|
||||
prt_printf(out, "need journal commit\t%llu\n", req->counters.skipped_need_journal_commit);
|
||||
@ -516,7 +514,7 @@ again:
|
||||
goto alloc;
|
||||
|
||||
if (cl && !waiting) {
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
closure_wait(&c->allocator.freelist_wait, cl);
|
||||
waiting = true;
|
||||
goto again;
|
||||
}
|
||||
@ -528,7 +526,7 @@ again:
|
||||
}
|
||||
|
||||
if (waiting)
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
alloc:
|
||||
ob = likely(freespace)
|
||||
? bch2_bucket_alloc_freelist(trans, req, cl)
|
||||
@ -770,7 +768,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
||||
if (!h->s->blocks[ec_idx])
|
||||
continue;
|
||||
|
||||
struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx];
|
||||
struct open_bucket *ob = c->allocator.open_buckets + h->s->blocks[ec_idx];
|
||||
if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) {
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
@ -832,16 +830,18 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c,
|
||||
static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
struct alloc_request *req)
|
||||
{
|
||||
if (!c->open_buckets_partial_nr)
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
|
||||
if (!a->open_buckets_partial_nr)
|
||||
return 0;
|
||||
|
||||
guard(spinlock)(&c->freelist_lock);
|
||||
guard(spinlock)(&a->freelist_lock);
|
||||
|
||||
if (!c->open_buckets_partial_nr)
|
||||
if (!a->open_buckets_partial_nr)
|
||||
return 0;
|
||||
|
||||
for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
|
||||
for (int i = a->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
struct open_bucket *ob = a->open_buckets + a->open_buckets_partial[i];
|
||||
|
||||
if (want_bucket(c, req, ob)) {
|
||||
struct bch_dev *ca = ob_dev(c, ob);
|
||||
@ -852,8 +852,8 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
||||
if (!avail)
|
||||
continue;
|
||||
|
||||
array_remove_item(c->open_buckets_partial,
|
||||
c->open_buckets_partial_nr,
|
||||
array_remove_item(a->open_buckets_partial,
|
||||
a->open_buckets_partial_nr,
|
||||
i);
|
||||
ob->on_partial_list = false;
|
||||
|
||||
@ -952,6 +952,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
|
||||
static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
|
||||
struct bch_dev *ca, bool ec)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
|
||||
if (ec) {
|
||||
return ob->ec != NULL;
|
||||
} else if (ca) {
|
||||
@ -965,7 +967,7 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
|
||||
if (!ob->ec->blocks[i])
|
||||
continue;
|
||||
|
||||
struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i];
|
||||
struct open_bucket *ob2 = a->open_buckets + ob->ec->blocks[i];
|
||||
drop |= ob2->dev == ca->dev_idx;
|
||||
}
|
||||
}
|
||||
@ -995,43 +997,44 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bool ec)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
unsigned i;
|
||||
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(a->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, ec, &a->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->reconcile_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &c->copygc.write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &a->reconcile_write_point);
|
||||
bch2_writepoint_stop(c, ca, ec, &a->btree_write_point);
|
||||
|
||||
scoped_guard(mutex, &c->btree_reserve_cache_lock)
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
scoped_guard(mutex, &c->btree_reserve_cache.lock)
|
||||
while (c->btree_reserve_cache.nr) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[--c->btree_reserve_cache_nr];
|
||||
&c->btree_reserve_cache.data[--c->btree_reserve_cache.nr];
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
scoped_guard(spinlock, &c->freelist_lock)
|
||||
while (i < c->open_buckets_partial_nr) {
|
||||
scoped_guard(spinlock, &a->freelist_lock)
|
||||
while (i < a->open_buckets_partial_nr) {
|
||||
struct open_bucket *ob =
|
||||
c->open_buckets + c->open_buckets_partial[i];
|
||||
a->open_buckets + a->open_buckets_partial[i];
|
||||
|
||||
if (should_drop_bucket(ob, c, ca, ec)) {
|
||||
--c->open_buckets_partial_nr;
|
||||
swap(c->open_buckets_partial[i],
|
||||
c->open_buckets_partial[c->open_buckets_partial_nr]);
|
||||
--a->open_buckets_partial_nr;
|
||||
swap(a->open_buckets_partial[i],
|
||||
a->open_buckets_partial[a->open_buckets_partial_nr]);
|
||||
|
||||
ob->on_partial_list = false;
|
||||
|
||||
scoped_guard(rcu)
|
||||
bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
spin_unlock(&a->freelist_lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
spin_lock(&c->freelist_lock);
|
||||
spin_lock(&a->freelist_lock);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
@ -1040,13 +1043,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_ec_stop_dev(c, ca);
|
||||
}
|
||||
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
static inline struct hlist_head *writepoint_hash(struct bch_fs_allocator *a,
|
||||
unsigned long write_point)
|
||||
{
|
||||
unsigned hash =
|
||||
hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
|
||||
hash_long(write_point, ilog2(ARRAY_SIZE(a->write_points_hash)));
|
||||
|
||||
return &c->write_points_hash[hash];
|
||||
return &a->write_points_hash[hash];
|
||||
}
|
||||
|
||||
static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
@ -1063,7 +1066,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
|
||||
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
|
||||
{
|
||||
u64 stranded = c->write_points_nr * c->bucket_size_max;
|
||||
u64 stranded = c->allocator.write_points_nr * c->capacity.bucket_size_max;
|
||||
u64 free = bch2_fs_usage_read_short(c).free;
|
||||
|
||||
return stranded * factor > free;
|
||||
@ -1071,33 +1074,35 @@ static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
|
||||
|
||||
static noinline bool try_increase_writepoints(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct write_point *wp;
|
||||
|
||||
if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
|
||||
if (a->write_points_nr == ARRAY_SIZE(a->write_points) ||
|
||||
too_many_writepoints(c, 32))
|
||||
return false;
|
||||
|
||||
wp = c->write_points + c->write_points_nr++;
|
||||
hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
|
||||
wp = a->write_points + a->write_points_nr++;
|
||||
hlist_add_head_rcu(&wp->node, writepoint_hash(a, wp->write_point));
|
||||
return true;
|
||||
}
|
||||
|
||||
static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct write_point *wp;
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
scoped_guard(mutex, &c->write_points_hash_lock) {
|
||||
if (c->write_points_nr < old_nr)
|
||||
scoped_guard(mutex, &a->write_points_hash_lock) {
|
||||
if (a->write_points_nr < old_nr)
|
||||
return true;
|
||||
|
||||
if (c->write_points_nr == 1 ||
|
||||
if (a->write_points_nr == 1 ||
|
||||
!too_many_writepoints(c, 8))
|
||||
return false;
|
||||
|
||||
wp = c->write_points + --c->write_points_nr;
|
||||
wp = a->write_points + --a->write_points_nr;
|
||||
hlist_del_rcu(&wp->node);
|
||||
}
|
||||
|
||||
@ -1113,6 +1118,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans,
|
||||
unsigned long write_point)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct write_point *wp, *oldest;
|
||||
struct hlist_head *head;
|
||||
|
||||
@ -1122,7 +1128,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans,
|
||||
return wp;
|
||||
}
|
||||
|
||||
head = writepoint_hash(c, write_point);
|
||||
head = writepoint_hash(a, write_point);
|
||||
restart_find:
|
||||
wp = __writepoint_find(head, write_point);
|
||||
if (wp) {
|
||||
@ -1135,23 +1141,23 @@ lock_wp:
|
||||
}
|
||||
restart_find_oldest:
|
||||
oldest = NULL;
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + c->write_points_nr; wp++)
|
||||
for (wp = a->write_points;
|
||||
wp < a->write_points + a->write_points_nr; wp++)
|
||||
if (!oldest || time_before64(wp->last_used, oldest->last_used))
|
||||
oldest = wp;
|
||||
|
||||
bch2_trans_mutex_lock_norelock(trans, &oldest->lock);
|
||||
bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock);
|
||||
if (oldest >= c->write_points + c->write_points_nr ||
|
||||
bch2_trans_mutex_lock_norelock(trans, &a->write_points_hash_lock);
|
||||
if (oldest >= a->write_points + a->write_points_nr ||
|
||||
try_increase_writepoints(c)) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
mutex_unlock(&a->write_points_hash_lock);
|
||||
mutex_unlock(&oldest->lock);
|
||||
goto restart_find_oldest;
|
||||
}
|
||||
|
||||
wp = __writepoint_find(head, write_point);
|
||||
if (wp && wp != oldest) {
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
mutex_unlock(&a->write_points_hash_lock);
|
||||
mutex_unlock(&oldest->lock);
|
||||
goto lock_wp;
|
||||
}
|
||||
@ -1160,7 +1166,7 @@ restart_find_oldest:
|
||||
hlist_del_rcu(&wp->node);
|
||||
wp->write_point = write_point;
|
||||
hlist_add_head_rcu(&wp->node, head);
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
mutex_unlock(&a->write_points_hash_lock);
|
||||
out:
|
||||
wp->last_used = local_clock();
|
||||
return wp;
|
||||
@ -1201,6 +1207,7 @@ int bch2_alloc_sectors_req(struct btree_trans *trans,
|
||||
struct write_point **wp_ret)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct open_bucket *ob;
|
||||
unsigned write_points_nr;
|
||||
int i;
|
||||
@ -1210,7 +1217,7 @@ retry:
|
||||
req->ptrs.nr = 0;
|
||||
req->nr_effective = 0;
|
||||
req->have_cache = false;
|
||||
write_points_nr = c->write_points_nr;
|
||||
write_points_nr = a->write_points_nr;
|
||||
|
||||
*wp_ret = req->wp = writepoint_find(trans, write_point.v);
|
||||
|
||||
@ -1357,47 +1364,49 @@ static inline void writepoint_init(struct write_point *wp,
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct open_bucket *ob;
|
||||
struct write_point *wp;
|
||||
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
c->write_points_nr = ARRAY_SIZE(c->write_points);
|
||||
mutex_init(&a->write_points_hash_lock);
|
||||
a->write_points_nr = ARRAY_SIZE(a->write_points);
|
||||
|
||||
/* open bucket 0 is a sentinal NULL: */
|
||||
spin_lock_init(&c->open_buckets[0].lock);
|
||||
spin_lock_init(&a->open_buckets[0].lock);
|
||||
|
||||
for (ob = c->open_buckets + 1;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
|
||||
for (ob = a->open_buckets + 1;
|
||||
ob < a->open_buckets + ARRAY_SIZE(a->open_buckets); ob++) {
|
||||
spin_lock_init(&ob->lock);
|
||||
c->open_buckets_nr_free++;
|
||||
a->open_buckets_nr_free++;
|
||||
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
ob->freelist = a->open_buckets_freelist;
|
||||
a->open_buckets_freelist = ob - a->open_buckets;
|
||||
}
|
||||
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_btree);
|
||||
writepoint_init(&c->reconcile_write_point, BCH_DATA_user);
|
||||
writepoint_init(&c->copygc_write_point, BCH_DATA_user);
|
||||
writepoint_init(&a->btree_write_point, BCH_DATA_btree);
|
||||
writepoint_init(&a->reconcile_write_point, BCH_DATA_user);
|
||||
writepoint_init(&c->copygc.write_point, BCH_DATA_user);
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + c->write_points_nr; wp++) {
|
||||
for (wp = a->write_points;
|
||||
wp < a->write_points + a->write_points_nr; wp++) {
|
||||
writepoint_init(wp, BCH_DATA_user);
|
||||
|
||||
wp->last_used = local_clock();
|
||||
wp->write_point = (unsigned long) wp;
|
||||
hlist_add_head_rcu(&wp->node,
|
||||
writepoint_hash(c, wp->write_point));
|
||||
writepoint_hash(a, wp->write_point));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct bch_dev *ca = ob_dev(c, ob);
|
||||
unsigned data_type = ob->data_type;
|
||||
barrier(); /* READ_ONCE() doesn't work on bitfields */
|
||||
|
||||
prt_printf(out, "%zu ref %u ",
|
||||
ob - c->open_buckets,
|
||||
ob - a->open_buckets,
|
||||
atomic_read(&ob->pin));
|
||||
bch2_prt_data_type(out, data_type);
|
||||
prt_printf(out, " %u:%llu gen %u allocated %u/%u",
|
||||
@ -1413,10 +1422,11 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
|
||||
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
guard(printbuf_atomic)(out);
|
||||
|
||||
for (struct open_bucket *ob = c->open_buckets;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
|
||||
for (struct open_bucket *ob = a->open_buckets;
|
||||
ob < a->open_buckets + ARRAY_SIZE(a->open_buckets);
|
||||
ob++) {
|
||||
guard(spinlock)(&ob->lock);
|
||||
if (ob->valid && (!ca || ob->dev == ca->dev_idx))
|
||||
@ -1427,11 +1437,11 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
guard(printbuf_atomic)(out);
|
||||
guard(spinlock)(&c->freelist_lock);
|
||||
guard(spinlock)(&c->allocator.freelist_lock);
|
||||
|
||||
for (unsigned i = 0; i < c->open_buckets_partial_nr; i++)
|
||||
for (unsigned i = 0; i < c->allocator.open_buckets_partial_nr; i++)
|
||||
bch2_open_bucket_to_text(out, c,
|
||||
c->open_buckets + c->open_buckets_partial[i]);
|
||||
c->allocator.open_buckets + c->allocator.open_buckets_partial[i]);
|
||||
}
|
||||
|
||||
static const char * const bch2_write_point_states[] = {
|
||||
@ -1469,66 +1479,69 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
|
||||
void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct write_point *wp;
|
||||
|
||||
prt_str(out, "Foreground write points\n");
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points);
|
||||
for (wp = a->write_points;
|
||||
wp < a->write_points + ARRAY_SIZE(a->write_points);
|
||||
wp++)
|
||||
bch2_write_point_to_text(out, c, wp);
|
||||
|
||||
prt_str(out, "Copygc write point\n");
|
||||
bch2_write_point_to_text(out, c, &c->copygc_write_point);
|
||||
bch2_write_point_to_text(out, c, &c->copygc.write_point);
|
||||
|
||||
prt_str(out, "Rebalance write point\n");
|
||||
bch2_write_point_to_text(out, c, &c->reconcile_write_point);
|
||||
bch2_write_point_to_text(out, c, &a->reconcile_write_point);
|
||||
|
||||
prt_str(out, "Btree write point\n");
|
||||
bch2_write_point_to_text(out, c, &c->btree_write_point);
|
||||
bch2_write_point_to_text(out, c, &a->btree_write_point);
|
||||
}
|
||||
|
||||
void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
unsigned nr[BCH_DATA_NR];
|
||||
|
||||
memset(nr, 0, sizeof(nr));
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].data_type]++;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(a->open_buckets); i++)
|
||||
nr[a->open_buckets[i].data_type]++;
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 24);
|
||||
|
||||
prt_printf(out, "capacity\t%llu\n", c->capacity);
|
||||
prt_printf(out, "capacity\t%llu\n", c->capacity.capacity);
|
||||
prt_printf(out, "used\t%llu\n", bch2_fs_usage_read_short(c).used);
|
||||
prt_printf(out, "reserved\t%llu\n", c->reserved);
|
||||
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden));
|
||||
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree));
|
||||
prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->usage->data));
|
||||
prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->usage->cached));
|
||||
prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->usage->reserved));
|
||||
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
|
||||
prt_printf(out, "reserved\t%llu\n", c->capacity.reserved);
|
||||
prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->capacity.usage->hidden));
|
||||
prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->capacity.usage->btree));
|
||||
prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->capacity.usage->data));
|
||||
prt_printf(out, "cached\t%llu\n", percpu_u64_get(&c->capacity.usage->cached));
|
||||
prt_printf(out, "reserved\t%llu\n", percpu_u64_get(&c->capacity.usage->reserved));
|
||||
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(&c->capacity.pcpu->online_reserved));
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
|
||||
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
|
||||
prt_printf(out, "freelist_wait\t%s\n", a->freelist_wait.list.first ? "waiting" : "empty");
|
||||
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - a->open_buckets_nr_free);
|
||||
prt_printf(out, "open buckets total\t%u\n", OPEN_BUCKETS_COUNT);
|
||||
prt_printf(out, "open_buckets_wait\t%s\n", c->open_buckets_wait.list.first ? "waiting" : "empty");
|
||||
prt_printf(out, "open_buckets_wait\t%s\n", a->open_buckets_wait.list.first ? "waiting" : "empty");
|
||||
prt_printf(out, "open_buckets_btree\t%u\n", nr[BCH_DATA_btree]);
|
||||
prt_printf(out, "open_buckets_user\t%u\n", nr[BCH_DATA_user]);
|
||||
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache_nr);
|
||||
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache.nr);
|
||||
}
|
||||
|
||||
void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_fs_allocator *a = &c->allocator;
|
||||
struct bch_dev_usage_full stats = bch2_dev_usage_full_read(ca);
|
||||
unsigned nr[BCH_DATA_NR];
|
||||
|
||||
memset(nr, 0, sizeof(nr));
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].data_type]++;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(a->open_buckets); i++)
|
||||
nr[a->open_buckets[i].data_type]++;
|
||||
|
||||
bch2_dev_usage_to_text(out, ca, &stats);
|
||||
|
||||
@ -1587,8 +1600,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
|
||||
|
||||
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
|
||||
{
|
||||
if (c->allocator_last_stuck &&
|
||||
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
|
||||
if (c->allocator.last_stuck &&
|
||||
time_after(c->allocator.last_stuck + HZ * 60 * 2, jiffies))
|
||||
return 0;
|
||||
|
||||
return c->opts.allocator_stuck_timeout * HZ;
|
||||
@ -1599,7 +1612,7 @@ void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
|
||||
unsigned t = allocator_wait_timeout(c);
|
||||
|
||||
if (t && closure_sync_timeout(cl, t)) {
|
||||
c->allocator_last_stuck = jiffies;
|
||||
c->allocator.last_stuck = jiffies;
|
||||
bch2_print_allocator_stuck(c);
|
||||
}
|
||||
|
||||
|
||||
@ -110,13 +110,13 @@ static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
|
||||
{
|
||||
BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
|
||||
|
||||
obs->v[obs->nr++] = ob - c->open_buckets;
|
||||
obs->v[obs->nr++] = ob - c->allocator.open_buckets;
|
||||
}
|
||||
|
||||
#define open_bucket_for_each(_c, _obs, _ob, _i) \
|
||||
for ((_i) = 0; \
|
||||
(_i) < (_obs)->nr && \
|
||||
((_ob) = (_c)->open_buckets + (_obs)->v[_i], true); \
|
||||
#define open_bucket_for_each(_c, _obs, _ob, _i) \
|
||||
for ((_i) = 0; \
|
||||
(_i) < (_obs)->nr && \
|
||||
((_ob) = (_c)->allocator.open_buckets + (_obs)->v[_i], true); \
|
||||
(_i)++)
|
||||
|
||||
static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
|
||||
@ -188,7 +188,7 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
|
||||
static inline open_bucket_idx_t *open_bucket_hashslot(struct bch_fs *c,
|
||||
unsigned dev, u64 bucket)
|
||||
{
|
||||
return c->open_buckets_hash +
|
||||
return c->allocator.open_buckets_hash +
|
||||
(jhash_3words(dev, bucket, bucket >> 32, 0) &
|
||||
(OPEN_BUCKETS_COUNT - 1));
|
||||
}
|
||||
@ -198,7 +198,7 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke
|
||||
open_bucket_idx_t slot = *open_bucket_hashslot(c, dev, bucket);
|
||||
|
||||
while (slot) {
|
||||
struct open_bucket *ob = &c->open_buckets[slot];
|
||||
struct open_bucket *ob = &c->allocator.open_buckets[slot];
|
||||
|
||||
if (ob->dev == dev && ob->bucket == bucket)
|
||||
return true;
|
||||
@ -214,7 +214,7 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
|
||||
if (bch2_bucket_is_open(c, dev, bucket))
|
||||
return true;
|
||||
|
||||
guard(spinlock)(&c->freelist_lock);
|
||||
guard(spinlock)(&c->allocator.freelist_lock);
|
||||
return bch2_bucket_is_open(c, dev, bucket);
|
||||
}
|
||||
|
||||
|
||||
@ -286,7 +286,7 @@ bool bch2_replicas_marked_locked(struct bch_fs *c,
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
{
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
return bch2_replicas_marked_locked(c, search);
|
||||
}
|
||||
|
||||
@ -331,7 +331,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
guard(mutex)(&c->sb_lock);
|
||||
bool write_sb = false;
|
||||
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
if (!replicas_entry_search(&c->replicas, new_entry)) {
|
||||
CLASS(bch_replicas_cpu, new_r)();
|
||||
|
||||
@ -375,7 +375,7 @@ static void __replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_cp
|
||||
|
||||
void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *kill)
|
||||
{
|
||||
lockdep_assert_held(&c->mark_lock);
|
||||
lockdep_assert_held(&c->capacity.mark_lock);
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, kill);
|
||||
@ -408,7 +408,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
|
||||
BUG_ON(r->data_type != BCH_DATA_journal);
|
||||
verify_replicas_entry(r);
|
||||
|
||||
scoped_guard(percpu_read, &c->mark_lock) {
|
||||
scoped_guard(percpu_read, &c->capacity.mark_lock) {
|
||||
int ret = __replicas_entry_put(c, r, nr);
|
||||
if (!ret)
|
||||
return;
|
||||
@ -421,7 +421,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
|
||||
}
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
|
||||
if (e && !atomic_read(&e->ref))
|
||||
__replicas_entry_kill(c, e);
|
||||
@ -432,7 +432,7 @@ void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1
|
||||
|
||||
static inline bool bch2_replicas_entry_get_inmem(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
|
||||
{
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
|
||||
if (e)
|
||||
atomic_inc(&e->ref);
|
||||
@ -458,7 +458,7 @@ int bch2_replicas_gc_reffed(struct bch_fs *c)
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
unsigned dst = 0;
|
||||
for (unsigned i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry_cpu *e =
|
||||
@ -576,7 +576,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
|
||||
bch2_cpu_replicas_sort(&new_r);
|
||||
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
guard(percpu_write)(&c->capacity.mark_lock);
|
||||
swap(c->replicas, new_r);
|
||||
|
||||
return 0;
|
||||
@ -773,7 +773,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
guard(percpu_read)(&c->capacity.mark_lock);
|
||||
for_each_cpu_replicas_entry(&c->replicas, i) {
|
||||
struct bch_replicas_entry_v1 *e = &i->e;
|
||||
|
||||
|
||||
@ -5,6 +5,8 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include "init/dev_types.h"
|
||||
|
||||
#include "util/clock_types.h"
|
||||
#include "util/fifo.h"
|
||||
|
||||
@ -118,4 +120,60 @@ struct write_point_specifier {
|
||||
unsigned long v;
|
||||
};
|
||||
|
||||
struct bch_fs_usage_base;
|
||||
|
||||
struct bch_fs_capacity_pcpu {
|
||||
u64 sectors_available;
|
||||
u64 online_reserved;
|
||||
};
|
||||
|
||||
struct bch_fs_capacity {
|
||||
u64 capacity; /* sectors */
|
||||
u64 reserved; /* sectors */
|
||||
|
||||
/*
|
||||
* When capacity _decreases_ (due to a disk being removed), we
|
||||
* increment capacity_gen - this invalidates outstanding reservations
|
||||
* and forces them to be revalidated
|
||||
*/
|
||||
u32 capacity_gen;
|
||||
unsigned bucket_size_max;
|
||||
|
||||
atomic64_t sectors_available;
|
||||
struct mutex sectors_available_lock;
|
||||
|
||||
struct bch_fs_capacity_pcpu __percpu *pcpu;
|
||||
|
||||
struct percpu_rw_semaphore mark_lock;
|
||||
|
||||
seqcount_t usage_lock;
|
||||
struct bch_fs_usage_base __percpu *usage;
|
||||
};
|
||||
|
||||
struct bch_fs_allocator {
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
unsigned long rw_devs_change_count;
|
||||
|
||||
spinlock_t freelist_lock;
|
||||
struct closure_waitlist freelist_wait;
|
||||
unsigned long last_stuck;
|
||||
|
||||
open_bucket_idx_t open_buckets_freelist;
|
||||
open_bucket_idx_t open_buckets_nr_free;
|
||||
struct closure_waitlist open_buckets_wait;
|
||||
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
|
||||
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
|
||||
|
||||
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
open_bucket_idx_t open_buckets_partial_nr;
|
||||
|
||||
struct write_point write_points[WRITE_POINT_MAX];
|
||||
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
|
||||
struct mutex write_points_hash_lock;
|
||||
unsigned write_points_nr;
|
||||
|
||||
struct write_point btree_write_point;
|
||||
struct write_point reconcile_write_point;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_TYPES_H */
|
||||
|
||||
@ -241,11 +241,13 @@
|
||||
#include "alloc/types.h"
|
||||
|
||||
#include "btree/check_types.h"
|
||||
#include "btree/interior_types.h"
|
||||
#include "btree/journal_overlay_types.h"
|
||||
#include "btree/types.h"
|
||||
#include "btree/node_scan_types.h"
|
||||
#include "btree/write_buffer_types.h"
|
||||
|
||||
#include "data/copygc_types.h"
|
||||
#include "data/ec_types.h"
|
||||
#include "data/keylist_types.h"
|
||||
#include "data/nocow_locking_types.h"
|
||||
@ -256,13 +258,14 @@
|
||||
|
||||
#include "fs/quota_types.h"
|
||||
|
||||
#include "init/error_types.h"
|
||||
#include "init/passes_types.h"
|
||||
#include "init/dev_types.h"
|
||||
|
||||
#include "journal/types.h"
|
||||
|
||||
#include "sb/counters_types.h"
|
||||
#include "sb/errors_types.h"
|
||||
#include "sb/io_types.h"
|
||||
#include "sb/members_types.h"
|
||||
|
||||
#include "snapshots/snapshot_types.h"
|
||||
@ -514,12 +517,6 @@ enum bch_time_stats {
|
||||
/* Number of nodes btree coalesce will try to coalesce at once */
|
||||
#define GC_MERGE_NODES 4U
|
||||
|
||||
/* Maximum number of nodes we might need to allocate atomically: */
|
||||
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
|
||||
|
||||
/* Size of the freelist we allocate btree nodes from: */
|
||||
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
|
||||
|
||||
#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
|
||||
|
||||
struct btree;
|
||||
@ -712,23 +709,6 @@ struct btree_debug {
|
||||
};
|
||||
|
||||
#define BCH_LINK_MAX U32_MAX
|
||||
#define BCH_TRANSACTIONS_NR 128
|
||||
|
||||
struct btree_transaction_stats {
|
||||
struct bch2_time_stats duration;
|
||||
struct bch2_time_stats lock_hold_times;
|
||||
struct mutex lock;
|
||||
unsigned nr_max_paths;
|
||||
unsigned max_mem;
|
||||
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
|
||||
darray_trans_kmalloc_trace trans_kmalloc_trace;
|
||||
#endif
|
||||
char *max_paths_text;
|
||||
};
|
||||
|
||||
struct bch_fs_pcpu {
|
||||
u64 sectors_available;
|
||||
};
|
||||
|
||||
struct journal_seq_blacklist_table {
|
||||
size_t nr;
|
||||
@ -739,10 +719,6 @@ struct journal_seq_blacklist_table {
|
||||
} entries[];
|
||||
};
|
||||
|
||||
struct btree_trans_buf {
|
||||
struct btree_trans *trans;
|
||||
};
|
||||
|
||||
#define BCH_WRITE_REFS() \
|
||||
x(journal) \
|
||||
x(trans) \
|
||||
@ -793,19 +769,22 @@ struct bch_fs {
|
||||
struct super_block *vfs_sb;
|
||||
dev_t dev;
|
||||
char name[40];
|
||||
|
||||
struct stdio_redirect *stdio;
|
||||
struct task_struct *stdio_filter;
|
||||
unsigned loglevel;
|
||||
unsigned prev_loglevel;
|
||||
/*
|
||||
* Certain operations are only allowed in single threaded mode, during
|
||||
* recovery, and we want to assert that this is the case:
|
||||
*/
|
||||
struct task_struct *recovery_task;
|
||||
|
||||
/* ro/rw, add/remove/resize devices: */
|
||||
struct rw_semaphore state_lock;
|
||||
|
||||
/* Counts outstanding writes, for clean transition to read-only */
|
||||
struct enumerated_ref writes;
|
||||
/*
|
||||
* Certain operations are only allowed in single threaded mode, during
|
||||
* recovery, and we want to assert that this is the case:
|
||||
*/
|
||||
struct task_struct *recovery_task;
|
||||
|
||||
/*
|
||||
* Analagous to c->writes, for asynchronous ops that don't necessarily
|
||||
@ -813,121 +792,83 @@ struct bch_fs {
|
||||
*/
|
||||
refcount_t ro_ref;
|
||||
wait_queue_head_t ro_ref_wait;
|
||||
|
||||
struct work_struct read_only_work;
|
||||
|
||||
struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
|
||||
struct bch_devs_mask devs_online;
|
||||
struct bch_devs_mask devs_removed;
|
||||
struct bch_devs_mask devs_rotational;
|
||||
|
||||
u8 extent_type_u64s[31];
|
||||
u8 extent_types_known;
|
||||
|
||||
struct bch_accounting_mem accounting;
|
||||
|
||||
struct bch_replicas_cpu replicas;
|
||||
|
||||
struct journal_entry_res btree_root_journal_res;
|
||||
struct journal_entry_res clock_journal_res;
|
||||
|
||||
struct bch_disk_groups_cpu __rcu *disk_groups;
|
||||
|
||||
struct bch_opts opts;
|
||||
atomic_t opt_change_cookie;
|
||||
|
||||
unsigned loglevel;
|
||||
unsigned prev_loglevel;
|
||||
|
||||
/* Updated by bch2_sb_update():*/
|
||||
struct {
|
||||
__uuid_t uuid;
|
||||
__uuid_t user_uuid;
|
||||
|
||||
u16 version;
|
||||
u16 version_incompat;
|
||||
u16 version_incompat_allowed;
|
||||
u16 version_min;
|
||||
u16 version_upgrade_complete;
|
||||
|
||||
u8 nr_devices;
|
||||
u8 clean;
|
||||
bool multi_device; /* true if we've ever had more than one device */
|
||||
|
||||
u8 encryption_type;
|
||||
|
||||
u64 time_base_lo;
|
||||
u32 time_base_hi;
|
||||
unsigned time_units_per_sec;
|
||||
unsigned nsec_per_time_unit;
|
||||
u64 features;
|
||||
u64 compat;
|
||||
u64 recovery_passes_required;
|
||||
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
unsigned long incompat_versions_requested[BITS_TO_LONGS(BCH_VERSION_MINOR(bcachefs_metadata_version_current))];
|
||||
|
||||
struct unicode_map *cf_encoding;
|
||||
|
||||
struct bch_sb_cpu sb;
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
unsigned short block_bits; /* ilog2(block_size) */
|
||||
|
||||
u16 btree_foreground_merge_threshold;
|
||||
|
||||
struct closure sb_write;
|
||||
struct mutex sb_lock;
|
||||
unsigned long incompat_versions_requested[BITS_TO_LONGS(BCH_VERSION_MINOR(bcachefs_metadata_version_current))];
|
||||
struct unicode_map *cf_encoding;
|
||||
|
||||
unsigned short block_bits; /* ilog2(block_size) */
|
||||
u16 btree_foreground_merge_threshold;
|
||||
|
||||
struct delayed_work maybe_schedule_btree_bitmap_gc;
|
||||
|
||||
/* snapshot.c: */
|
||||
struct snapshot_table __rcu *snapshots;
|
||||
struct mutex snapshot_table_lock;
|
||||
struct rw_semaphore snapshot_create_lock;
|
||||
struct bch_fs_counters counters;
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
struct bch_fs_errors errors;
|
||||
|
||||
struct snapshot_delete snapshot_delete;
|
||||
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
|
||||
snapshot_id_list snapshots_unlinked;
|
||||
struct mutex snapshots_unlinked_lock;
|
||||
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
|
||||
struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
|
||||
#endif
|
||||
|
||||
struct journal journal;
|
||||
u64 journal_replay_seq_start;
|
||||
u64 journal_replay_seq_end;
|
||||
GENRADIX(struct journal_replay *) journal_entries;
|
||||
u64 journal_entries_base_seq;
|
||||
struct journal_keys journal_keys;
|
||||
struct list_head journal_iters;
|
||||
struct journal_seq_blacklist_table *journal_seq_blacklist_table;
|
||||
|
||||
struct bch_fs_recovery recovery;
|
||||
|
||||
/* BTREE CACHE */
|
||||
struct bio_set btree_bio;
|
||||
struct workqueue_struct *btree_read_complete_wq;
|
||||
struct workqueue_struct *btree_write_submit_wq;
|
||||
|
||||
struct btree_root btree_roots_known[BTREE_ID_NR];
|
||||
DARRAY(struct btree_root) btree_roots_extra;
|
||||
struct mutex btree_root_lock;
|
||||
|
||||
struct btree_cache btree_cache;
|
||||
|
||||
/*
|
||||
* Cache of allocated btree nodes - if we allocate a btree node and
|
||||
* don't use it, if we free it that space can't be reused until going
|
||||
* _all_ the way through the allocator (which exposes us to a livelock
|
||||
* when allocating btree reserves fail halfway through) - instead, we
|
||||
* can stick them here:
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
*/
|
||||
struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2];
|
||||
unsigned btree_reserve_cache_nr;
|
||||
struct mutex btree_reserve_cache_lock;
|
||||
mempool_t fill_iter;
|
||||
mempool_t btree_bounce_pool;
|
||||
struct bio_set btree_bio;
|
||||
struct workqueue_struct *btree_read_complete_wq;
|
||||
struct workqueue_struct *btree_write_submit_wq;
|
||||
struct journal_entry_res btree_root_journal_res;
|
||||
struct workqueue_struct *btree_write_complete_wq;
|
||||
|
||||
mempool_t btree_interior_update_pool;
|
||||
struct list_head btree_interior_update_list;
|
||||
struct list_head btree_interior_updates_unwritten;
|
||||
struct mutex btree_interior_update_lock;
|
||||
struct mutex btree_interior_update_commit_lock;
|
||||
struct closure_waitlist btree_interior_update_wait;
|
||||
struct bch_fs_btree_cache btree_cache;
|
||||
struct bch_fs_btree_key_cache btree_key_cache;
|
||||
struct bch_fs_btree_write_buffer btree_write_buffer;
|
||||
struct bch_fs_btree_trans btree_trans;
|
||||
struct bch_fs_btree_reserve_cache btree_reserve_cache;
|
||||
struct bch_fs_btree_interior_updates btree_interior_updates;
|
||||
struct bch_fs_btree_node_rewrites btree_node_rewrites;
|
||||
struct find_btree_nodes found_btree_nodes;
|
||||
|
||||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
struct bch_fs_gc gc;
|
||||
struct bch_fs_gc_gens gc_gens;
|
||||
|
||||
struct workqueue_struct *btree_node_rewrite_worker;
|
||||
struct list_head btree_node_rewrites;
|
||||
struct list_head btree_node_rewrites_pending;
|
||||
spinlock_t btree_node_rewrites_lock;
|
||||
struct closure_waitlist btree_node_rewrites_wait;
|
||||
struct bch_accounting_mem accounting;
|
||||
struct bch_replicas_cpu replicas;
|
||||
struct bch_disk_groups_cpu __rcu *disk_groups;
|
||||
struct bch_fs_capacity capacity;
|
||||
struct bch_fs_allocator allocator;
|
||||
struct buckets_waiting_for_journal buckets_waiting_for_journal;
|
||||
|
||||
struct bch_fs_snapshots snapshots;
|
||||
|
||||
/* btree_io.c: */
|
||||
spinlock_t btree_write_error_lock;
|
||||
@ -935,120 +876,21 @@ struct bch_fs {
|
||||
atomic64_t nr;
|
||||
atomic64_t bytes;
|
||||
} btree_write_stats[BTREE_WRITE_TYPE_NR];
|
||||
|
||||
/* btree_iter.c: */
|
||||
struct seqmutex btree_trans_lock;
|
||||
struct list_head btree_trans_list;
|
||||
mempool_t btree_trans_pool;
|
||||
mempool_t btree_trans_mem_pool;
|
||||
struct btree_trans_buf __percpu *btree_trans_bufs;
|
||||
|
||||
struct srcu_struct btree_trans_barrier;
|
||||
bool btree_trans_barrier_initialized;
|
||||
|
||||
struct btree_key_cache btree_key_cache;
|
||||
|
||||
struct btree_write_buffer btree_write_buffer;
|
||||
|
||||
struct workqueue_struct *btree_update_wq;
|
||||
struct workqueue_struct *btree_write_complete_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
/*
|
||||
* Use a dedicated wq for write ref holder tasks. Required to avoid
|
||||
* dependency problems with other wq tasks that can block on ref
|
||||
* draining, such as read-only transition.
|
||||
*/
|
||||
struct workqueue_struct *write_ref_wq;
|
||||
struct workqueue_struct *write_ref_wq;
|
||||
|
||||
struct workqueue_struct *promote_wq;
|
||||
struct semaphore __percpu *promote_limit;
|
||||
struct workqueue_struct *promote_wq;
|
||||
struct semaphore __percpu *promote_limit;
|
||||
|
||||
/* ALLOCATION */
|
||||
struct bch_devs_mask online_devs;
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
unsigned long rw_devs_change_count;
|
||||
|
||||
u64 capacity; /* sectors */
|
||||
u64 reserved; /* sectors */
|
||||
|
||||
/*
|
||||
* When capacity _decreases_ (due to a disk being removed), we
|
||||
* increment capacity_gen - this invalidates outstanding reservations
|
||||
* and forces them to be revalidated
|
||||
*/
|
||||
u32 capacity_gen;
|
||||
unsigned bucket_size_max;
|
||||
|
||||
atomic64_t sectors_available;
|
||||
struct mutex sectors_available_lock;
|
||||
|
||||
struct bch_fs_pcpu __percpu *pcpu;
|
||||
|
||||
struct percpu_rw_semaphore mark_lock;
|
||||
|
||||
seqcount_t usage_lock;
|
||||
struct bch_fs_usage_base __percpu *usage;
|
||||
u64 __percpu *online_reserved;
|
||||
|
||||
unsigned long allocator_last_stuck;
|
||||
|
||||
struct io_clock io_clock[2];
|
||||
|
||||
/* JOURNAL SEQ BLACKLIST */
|
||||
struct journal_seq_blacklist_table *
|
||||
journal_seq_blacklist_table;
|
||||
|
||||
/* ALLOCATOR */
|
||||
spinlock_t freelist_lock;
|
||||
struct closure_waitlist freelist_wait;
|
||||
|
||||
open_bucket_idx_t open_buckets_freelist;
|
||||
open_bucket_idx_t open_buckets_nr_free;
|
||||
struct closure_waitlist open_buckets_wait;
|
||||
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
|
||||
open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
|
||||
|
||||
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
open_bucket_idx_t open_buckets_partial_nr;
|
||||
|
||||
struct write_point btree_write_point;
|
||||
struct write_point reconcile_write_point;
|
||||
|
||||
struct write_point write_points[WRITE_POINT_MAX];
|
||||
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
|
||||
struct mutex write_points_hash_lock;
|
||||
unsigned write_points_nr;
|
||||
|
||||
struct buckets_waiting_for_journal buckets_waiting_for_journal;
|
||||
|
||||
/* GARBAGE COLLECTION */
|
||||
struct work_struct gc_gens_work;
|
||||
unsigned long gc_count;
|
||||
|
||||
enum btree_id gc_gens_btree;
|
||||
struct bpos gc_gens_pos;
|
||||
|
||||
/*
|
||||
* Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
|
||||
* has been marked by GC.
|
||||
*
|
||||
* gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.)
|
||||
*
|
||||
* Protected by gc_pos_lock. Only written to by GC thread, so GC thread
|
||||
* can read without a lock.
|
||||
*/
|
||||
seqcount_t gc_pos_lock;
|
||||
struct gc_pos gc_pos;
|
||||
|
||||
/*
|
||||
* The allocation code needs gc_mark in struct bucket to be correct, but
|
||||
* it's not while a gc is in progress.
|
||||
*/
|
||||
struct rw_semaphore gc_lock;
|
||||
struct mutex gc_gens_lock;
|
||||
struct io_clock io_clock[2];
|
||||
struct journal_entry_res clock_journal_res;
|
||||
|
||||
/* IO PATH */
|
||||
struct workqueue_struct *btree_update_wq;
|
||||
struct bio_set bio_read;
|
||||
struct bio_set bio_read_split;
|
||||
struct bio_set bio_write;
|
||||
@ -1059,10 +901,6 @@ struct bch_fs {
|
||||
nocow_locks;
|
||||
struct rhashtable promote_table;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
|
||||
struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
|
||||
#endif
|
||||
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
|
||||
size_t zstd_workspace_size;
|
||||
@ -1076,38 +914,9 @@ struct bch_fs {
|
||||
struct list_head moving_context_list;
|
||||
struct mutex moving_context_lock;
|
||||
|
||||
/* REBALANCE */
|
||||
struct bch_fs_reconcile reconcile;
|
||||
|
||||
/* COPYGC */
|
||||
struct task_struct *copygc_thread;
|
||||
struct write_point copygc_write_point;
|
||||
s64 copygc_wait_at;
|
||||
s64 copygc_wait;
|
||||
bool copygc_running;
|
||||
wait_queue_head_t copygc_running_wq;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
|
||||
struct hlist_head ec_stripes_new[32];
|
||||
struct hlist_head ec_stripes_new_buckets[64];
|
||||
spinlock_t ec_stripes_new_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
|
||||
struct list_head ec_stripe_new_list;
|
||||
struct mutex ec_stripe_new_lock;
|
||||
wait_queue_head_t ec_stripe_new_wait;
|
||||
|
||||
struct work_struct ec_stripe_create_work;
|
||||
u64 ec_stripe_hint;
|
||||
|
||||
struct work_struct ec_stripe_delete_work;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
struct bch_fs_copygc copygc;
|
||||
struct bch_fs_ec ec;
|
||||
|
||||
/* REFLINK */
|
||||
reflink_gc_table reflink_gc_table;
|
||||
@ -1129,11 +938,6 @@ struct bch_fs {
|
||||
/* QUOTAS */
|
||||
struct bch_memquota_type quotas[QTYP_NR];
|
||||
|
||||
/* RECOVERY */
|
||||
u64 journal_replay_seq_start;
|
||||
u64 journal_replay_seq_end;
|
||||
struct bch_fs_recovery recovery;
|
||||
|
||||
/* DEBUG JUNK */
|
||||
struct dentry *fs_debug_dir;
|
||||
struct dentry *btree_debug_dir;
|
||||
@ -1142,38 +946,6 @@ struct bch_fs {
|
||||
struct btree *verify_data;
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
|
||||
/*
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
*/
|
||||
mempool_t fill_iter;
|
||||
|
||||
mempool_t btree_bounce_pool;
|
||||
|
||||
struct journal journal;
|
||||
GENRADIX(struct journal_replay *) journal_entries;
|
||||
u64 journal_entries_base_seq;
|
||||
struct journal_keys journal_keys;
|
||||
struct list_head journal_iters;
|
||||
|
||||
struct find_btree_nodes found_btree_nodes;
|
||||
|
||||
u64 last_bucket_seq_cleanup;
|
||||
|
||||
struct bch_fs_counters counters;
|
||||
|
||||
struct bch2_time_stats times[BCH_TIME_STAT_NR];
|
||||
|
||||
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
|
||||
|
||||
/* ERRORS */
|
||||
struct list_head fsck_error_msgs;
|
||||
struct mutex fsck_error_msgs_lock;
|
||||
bool fsck_alloc_msgs_err;
|
||||
|
||||
bch_sb_errors_cpu fsck_error_counts;
|
||||
struct mutex fsck_error_counts_lock;
|
||||
};
|
||||
|
||||
static inline int __bch2_err_throw(struct bch_fs *c, int err)
|
||||
|
||||
@ -36,7 +36,7 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
|
||||
{
|
||||
unsigned reserve = 16;
|
||||
|
||||
if (!c->btree_roots_known[0].b)
|
||||
if (!c->btree_cache.roots_known[0].b)
|
||||
reserve += 8;
|
||||
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
@ -51,7 +51,8 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
|
||||
|
||||
static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||
{
|
||||
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||
struct bch_fs_btree_cache *bc =
|
||||
container_of(list, struct bch_fs_btree_cache, live[list->idx]);
|
||||
|
||||
size_t can_free = list->nr;
|
||||
if (!list->idx)
|
||||
@ -59,7 +60,7 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||
return can_free;
|
||||
}
|
||||
|
||||
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||
static void btree_node_to_freedlist(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
|
||||
@ -69,7 +70,7 @@ static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||
list_add(&b->list, &bc->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
|
||||
static void __bch2_btree_node_to_freelist(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(!b->data);
|
||||
@ -80,7 +81,7 @@ static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
|
||||
scoped_guard(mutex, &bc->lock)
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
@ -118,7 +119,7 @@ void __btree_node_data_free(struct btree *b)
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
|
||||
static void btree_node_data_free(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(list_empty(&b->list));
|
||||
list_del_init(&b->list);
|
||||
@ -203,7 +204,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||
static inline bool __btree_node_pinned(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||
|
||||
@ -216,7 +217,7 @@ static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||
|
||||
void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
|
||||
guard(mutex)(&bc->lock);
|
||||
if (!btree_node_is_root(c, b) && !btree_node_pinned(b)) {
|
||||
@ -229,12 +230,12 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||
|
||||
void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b, *n;
|
||||
|
||||
guard(mutex)(&bc->lock);
|
||||
c->btree_cache.pinned_nodes_mask[0] = 0;
|
||||
c->btree_cache.pinned_nodes_mask[1] = 0;
|
||||
bc->pinned_nodes_mask[0] = 0;
|
||||
bc->pinned_nodes_mask[1] = 0;
|
||||
|
||||
list_for_each_entry_safe(b, n, &bc->live[1].list, list) {
|
||||
clear_btree_node_pinned(b);
|
||||
@ -246,7 +247,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
void __bch2_btree_node_hash_remove(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
lockdep_assert_held(&bc->lock);
|
||||
|
||||
@ -264,13 +265,13 @@ void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
list_del_init(&b->list);
|
||||
}
|
||||
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
void bch2_btree_node_hash_remove(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
__bch2_btree_node_hash_remove(bc, b);
|
||||
__bch2_btree_node_to_freelist(bc, b);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs_btree_cache *bc, struct btree *b)
|
||||
{
|
||||
BUG_ON(!list_empty(&b->list));
|
||||
BUG_ON(b->hash_val);
|
||||
@ -289,7 +290,7 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
|
||||
int bch2_btree_node_hash_insert(struct bch_fs_btree_cache *bc, struct btree *b,
|
||||
unsigned level, enum btree_id id)
|
||||
{
|
||||
b->c.level = level;
|
||||
@ -326,8 +327,8 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
__flatten
|
||||
static inline struct btree *btree_cache_find(struct btree_cache *bc,
|
||||
const struct bkey_i *k)
|
||||
static inline struct btree *btree_cache_find(struct bch_fs_btree_cache *bc,
|
||||
const struct bkey_i *k)
|
||||
{
|
||||
u64 v = btree_ptr_hash_val(k);
|
||||
|
||||
@ -337,7 +338,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
|
||||
static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
|
||||
bool flush, bool locked)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
|
||||
lockdep_assert_held(&bc->lock);
|
||||
|
||||
@ -403,7 +404,7 @@ static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b,
|
||||
*/
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&bc->lock);
|
||||
@ -455,7 +456,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct btree_cache_list *list = shrink->private_data;
|
||||
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||
struct bch_fs_btree_cache *bc =
|
||||
container_of(list, struct bch_fs_btree_cache, live[list->idx]);
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
||||
struct btree *b, *t;
|
||||
unsigned long nr = sc->nr_to_scan;
|
||||
@ -573,7 +575,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b, *t;
|
||||
|
||||
shrinker_free(bc->live[1].shrink);
|
||||
@ -608,8 +610,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
atomic_long_read(&c->btree_cache.nr_dirty));
|
||||
BUG_ON(!bch2_journal_error(&c->journal) && atomic_long_read(&bc->nr_dirty));
|
||||
|
||||
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
||||
|
||||
@ -627,11 +628,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
|
||||
if (bc->table_init_done)
|
||||
rhashtable_destroy(&bc->table);
|
||||
|
||||
darray_exit(&bc->roots_extra);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct shrinker *shrink;
|
||||
|
||||
if (rhashtable_init(&bc->table, &bch_btree_cache_params))
|
||||
@ -675,8 +678,9 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||
void bch2_fs_btree_cache_init_early(struct bch_fs_btree_cache *bc)
|
||||
{
|
||||
mutex_init(&bc->root_lock);
|
||||
mutex_init(&bc->lock);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) {
|
||||
bc->live[i].idx = i;
|
||||
@ -695,7 +699,7 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||
*/
|
||||
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_cache *bc = &trans->c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &trans->c->btree_cache;
|
||||
|
||||
if (bc->alloc_lock == current) {
|
||||
event_inc_trace(trans->c, btree_cache_cannibalize_unlock, buf,
|
||||
@ -707,7 +711,7 @@ void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
|
||||
|
||||
static int __btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct task_struct *old;
|
||||
|
||||
old = NULL;
|
||||
@ -743,7 +747,7 @@ int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure
|
||||
|
||||
static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||
@ -769,7 +773,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
||||
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct list_head *freed = pcpu_read_locks
|
||||
? &bc->freed_pcpu
|
||||
: &bc->freed_nonpcpu;
|
||||
@ -895,7 +899,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
bool sync)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
EBUG_ON(path && level + 1 != path->level);
|
||||
@ -1037,7 +1041,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr
|
||||
unsigned long trace_ip)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
@ -1232,7 +1236,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
|
||||
bool nofill)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
@ -1315,7 +1319,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
|
||||
BUG_ON(path && !btree_node_locked(path, level + 1));
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
@ -1334,7 +1338,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
|
||||
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
b = btree_cache_find(bc, k);
|
||||
@ -1471,7 +1475,7 @@ static const char * const bch2_btree_cache_not_freed_reasons_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
|
||||
void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs_btree_cache *bc)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
||||
|
||||
|
||||
@ -14,11 +14,11 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
|
||||
|
||||
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
|
||||
|
||||
void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
void __bch2_btree_node_hash_remove(struct bch_fs_btree_cache *, struct btree *);
|
||||
void bch2_btree_node_hash_remove(struct bch_fs_btree_cache *, struct btree *);
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs_btree_cache *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct bch_fs_btree_cache *, struct btree *,
|
||||
unsigned, enum btree_id);
|
||||
|
||||
void bch2_node_pin(struct bch_fs *, struct btree *);
|
||||
@ -48,7 +48,7 @@ void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||
void bch2_fs_btree_cache_init_early(struct btree_cache *);
|
||||
void bch2_fs_btree_cache_init_early(struct bch_fs_btree_cache *);
|
||||
|
||||
static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
|
||||
{
|
||||
@ -119,21 +119,21 @@ static inline unsigned btree_blocks(const struct bch_fs *c)
|
||||
|
||||
static inline unsigned btree_id_nr_alive(struct bch_fs *c)
|
||||
{
|
||||
return BTREE_ID_NR + c->btree_roots_extra.nr;
|
||||
return BTREE_ID_NR + c->btree_cache.roots_extra.nr;
|
||||
}
|
||||
|
||||
static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned id)
|
||||
{
|
||||
if (likely(id < BTREE_ID_NR)) {
|
||||
return &c->btree_roots_known[id];
|
||||
return &c->btree_cache.roots_known[id];
|
||||
} else {
|
||||
unsigned idx = id - BTREE_ID_NR;
|
||||
|
||||
/* This can happen when we're called from btree_node_scan */
|
||||
if (idx >= c->btree_roots_extra.nr)
|
||||
if (idx >= c->btree_cache.roots_extra.nr)
|
||||
return NULL;
|
||||
|
||||
return &c->btree_roots_extra.data[idx];
|
||||
return &c->btree_cache.roots_extra.data[idx];
|
||||
}
|
||||
}
|
||||
|
||||
@ -160,7 +160,7 @@ void __bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *,
|
||||
enum btree_id, unsigned, struct bkey_s_c);
|
||||
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs_btree_cache *);
|
||||
|
||||
#define trace_btree_node(_c, _b, event) \
|
||||
event_inc_trace(c, event, buf, bch2_btree_pos_to_text(&buf, c, b))
|
||||
|
||||
@ -76,14 +76,14 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
|
||||
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
guard(preempt)();
|
||||
write_seqcount_begin(&c->gc_pos_lock);
|
||||
c->gc_pos = new_pos;
|
||||
write_seqcount_end(&c->gc_pos_lock);
|
||||
write_seqcount_begin(&c->gc.pos_lock);
|
||||
c->gc.pos = new_pos;
|
||||
write_seqcount_end(&c->gc.pos_lock);
|
||||
}
|
||||
|
||||
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) < 0);
|
||||
BUG_ON(gc_pos_cmp(new_pos, c->gc.pos) < 0);
|
||||
__gc_pos_set(c, new_pos);
|
||||
}
|
||||
|
||||
@ -798,7 +798,7 @@ static void bch2_gc_free(struct bch_fs *c)
|
||||
bch2_accounting_gc_free(c);
|
||||
|
||||
genradix_free(&c->reflink_gc_table);
|
||||
genradix_free(&c->gc_stripes);
|
||||
genradix_free(&c->ec.gc_stripes);
|
||||
|
||||
for_each_member_device(c, ca)
|
||||
genradix_free(&ca->buckets_gc);
|
||||
@ -953,7 +953,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
CLASS(printbuf, buf)();
|
||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
||||
struct gc_stripe *m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
||||
struct gc_stripe *m = genradix_ptr(&c->ec.gc_stripes, k.k->p.offset);
|
||||
|
||||
bool bad = false;
|
||||
for (unsigned i = 0; i < s->nr_blocks; i++) {
|
||||
@ -1024,7 +1024,7 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
guard(rwsem_read)(&c->state_lock);
|
||||
guard(rwsem_write)(&c->gc_lock);
|
||||
guard(rwsem_write)(&c->gc.lock);
|
||||
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
|
||||
@ -1046,14 +1046,12 @@ int bch2_check_allocations(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
c->gc_count++;
|
||||
|
||||
ret = bch2_gc_alloc_done(c) ?:
|
||||
bch2_gc_accounting_done(c) ?:
|
||||
bch2_gc_stripes_done(c) ?:
|
||||
bch2_gc_reflink_done(c);
|
||||
out:
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
scoped_guard(percpu_write, &c->capacity.mark_lock) {
|
||||
/* Indicates that gc is no longer in progress: */
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_not_running));
|
||||
bch2_gc_free(c);
|
||||
@ -1063,7 +1061,7 @@ out:
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
* allocator thread - issue wakeup in case they blocked on gc_lock:
|
||||
*/
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
|
||||
bch2_sb_members_clean_deleted(c);
|
||||
@ -1104,7 +1102,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
u64 b, start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (!mutex_trylock(&c->gc_gens_lock))
|
||||
if (!mutex_trylock(&c->gc_gens.lock))
|
||||
return 0;
|
||||
|
||||
event_inc_trace(c, gc_gens_start, buf);
|
||||
@ -1115,7 +1113,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
* state lock at the start of going RO.
|
||||
*/
|
||||
if (!down_read_trylock(&c->state_lock)) {
|
||||
mutex_unlock(&c->gc_gens_lock);
|
||||
mutex_unlock(&c->gc_gens.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1137,8 +1135,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
if (btree_type_has_data_ptrs(i)) {
|
||||
c->gc_gens_btree = i;
|
||||
c->gc_gens_pos = POS_MIN;
|
||||
c->gc_gens.pos = BBPOS(i, POS_MIN);
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, i,
|
||||
@ -1172,10 +1169,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
c->gc_gens_btree = 0;
|
||||
c->gc_gens_pos = POS_MIN;
|
||||
|
||||
c->gc_count++;
|
||||
c->gc_gens.pos = BBPOS_MIN;
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
event_inc_trace(c, gc_gens_end, buf);
|
||||
@ -1192,7 +1186,7 @@ err:
|
||||
}
|
||||
|
||||
up_read(&c->state_lock);
|
||||
mutex_unlock(&c->gc_gens_lock);
|
||||
mutex_unlock(&c->gc_gens.lock);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -1200,7 +1194,7 @@ err:
|
||||
|
||||
static void bch2_gc_gens_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens.work);
|
||||
bch2_gc_gens(c);
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
@ -1208,7 +1202,7 @@ static void bch2_gc_gens_work(struct work_struct *work)
|
||||
void bch2_gc_gens_async(struct bch_fs *c)
|
||||
{
|
||||
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) &&
|
||||
!queue_work(c->write_ref_wq, &c->gc_gens_work))
|
||||
!queue_work(c->write_ref_wq, &c->gc_gens.work))
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
|
||||
@ -1277,9 +1271,9 @@ int bch2_merge_btree_nodes(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
|
||||
{
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
|
||||
seqcount_init(&c->gc.pos_lock);
|
||||
INIT_WORK(&c->gc_gens.work, bch2_gc_gens_work);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
init_rwsem(&c->gc.lock);
|
||||
mutex_init(&c->gc_gens.lock);
|
||||
}
|
||||
|
||||
@ -71,9 +71,9 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
|
||||
bool ret;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->gc_pos_lock);
|
||||
ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
|
||||
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
||||
seq = read_seqcount_begin(&c->gc.pos_lock);
|
||||
ret = gc_pos_cmp(pos, c->gc.pos) <= 0;
|
||||
} while (read_seqcount_retry(&c->gc.pos_lock, seq));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
#ifndef _BCACHEFS_BTREE_GC_TYPES_H
|
||||
#define _BCACHEFS_BTREE_GC_TYPES_H
|
||||
|
||||
#include "btree/bbpos_types.h"
|
||||
|
||||
#include <linux/generic-radix-tree.h>
|
||||
|
||||
#define GC_PHASES() \
|
||||
@ -31,4 +33,21 @@ struct reflink_gc {
|
||||
|
||||
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
|
||||
|
||||
struct bch_fs_gc {
|
||||
seqcount_t pos_lock;
|
||||
struct gc_pos pos;
|
||||
|
||||
/*
|
||||
* The allocation code needs gc_mark in struct bucket to be correct, but
|
||||
* it's not while a gc is in progress.
|
||||
*/
|
||||
struct rw_semaphore lock;
|
||||
};
|
||||
|
||||
struct bch_fs_gc_gens {
|
||||
struct bbpos pos;
|
||||
struct work_struct work;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_GC_TYPES_H */
|
||||
|
||||
@ -670,7 +670,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
|
||||
struct bkey_i *accounting;
|
||||
|
||||
scoped_guard(percpu_read, &c->mark_lock)
|
||||
scoped_guard(percpu_read, &c->capacity.mark_lock)
|
||||
for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
|
||||
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
|
||||
accounting = bkey_next(accounting)) {
|
||||
@ -695,7 +695,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(c->gc_pos.phase)) {
|
||||
if (unlikely(c->gc.pos.phase)) {
|
||||
ret = bch2_trans_commit_run_gc_triggers(trans);
|
||||
if (bch2_fs_fatal_err_on(ret, c, "fatal error in transaction commit: %s", bch2_err_str(ret)))
|
||||
return ret;
|
||||
@ -973,7 +973,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans,
|
||||
struct bkey_i *accounting;
|
||||
retry:
|
||||
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
|
||||
percpu_down_read(&c->mark_lock);
|
||||
percpu_down_read(&c->capacity.mark_lock);
|
||||
for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
|
||||
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
|
||||
accounting = bkey_next(accounting)) {
|
||||
@ -984,7 +984,7 @@ retry:
|
||||
if (ret)
|
||||
goto revert_fs_usage;
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
percpu_up_read(&c->capacity.mark_lock);
|
||||
|
||||
/* Only fatal errors are possible later, so no need to revert this */
|
||||
bch2_trans_account_disk_usage_change(trans);
|
||||
@ -1008,7 +1008,7 @@ retry:
|
||||
}
|
||||
|
||||
if (i->type == BCH_JSET_ENTRY_btree_root) {
|
||||
guard(mutex)(&c->btree_root_lock);
|
||||
guard(mutex)(&c->btree_cache.root_lock);
|
||||
|
||||
struct btree_root *r = bch2_btree_id_root(c, i->btree_id);
|
||||
|
||||
@ -1029,13 +1029,13 @@ retry:
|
||||
return 0;
|
||||
fatal_err:
|
||||
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
|
||||
percpu_down_read(&c->mark_lock);
|
||||
percpu_down_read(&c->capacity.mark_lock);
|
||||
revert_fs_usage:
|
||||
for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
|
||||
i != accounting;
|
||||
i = bkey_next(i))
|
||||
bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
percpu_up_read(&c->capacity.mark_lock);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_btree_insert_need_mark_replicas)) {
|
||||
ret = drop_locks_do(trans, bch2_accounting_update_sb(trans));
|
||||
|
||||
@ -338,7 +338,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
BUG_ON(b->ob.nr);
|
||||
retry:
|
||||
ret = bch2_alloc_sectors_req(trans, req,
|
||||
writepoint_ptr(&c->btree_write_point),
|
||||
writepoint_ptr(&c->allocator.btree_write_point),
|
||||
min(res->nr_replicas,
|
||||
c->opts.metadata_replicas_required),
|
||||
cl, &wp);
|
||||
@ -357,9 +357,9 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
while (c->btree_reserve_cache_nr) {
|
||||
struct btree_alloc *a = c->btree_reserve_cache + --c->btree_reserve_cache_nr;
|
||||
mutex_lock(&c->btree_reserve_cache.lock);
|
||||
while (c->btree_reserve_cache.nr) {
|
||||
struct btree_alloc *a = c->btree_reserve_cache.data + --c->btree_reserve_cache.nr;
|
||||
|
||||
/* check if it has sufficient durability */
|
||||
|
||||
@ -368,13 +368,13 @@ retry:
|
||||
bkey_i_to_s_c(&a->k))) {
|
||||
bkey_copy(&b->key, &a->k);
|
||||
b->ob = a->ob;
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
mutex_unlock(&c->btree_reserve_cache.lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_open_buckets_put(c, &a->ob);
|
||||
}
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
mutex_unlock(&c->btree_reserve_cache.lock);
|
||||
|
||||
bkey_btree_ptr_v2_init(&b->key);
|
||||
bch2_alloc_sectors_append_ptrs(c, wp, &b->key, btree_sectors(c), false);
|
||||
@ -511,12 +511,12 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
|
||||
while (p->nr) {
|
||||
struct btree *b = p->b[--p->nr];
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
mutex_lock(&c->btree_reserve_cache.lock);
|
||||
|
||||
if (c->btree_reserve_cache_nr <
|
||||
ARRAY_SIZE(c->btree_reserve_cache)) {
|
||||
if (c->btree_reserve_cache.nr <
|
||||
ARRAY_SIZE(c->btree_reserve_cache.data)) {
|
||||
struct btree_alloc *a =
|
||||
&c->btree_reserve_cache[c->btree_reserve_cache_nr++];
|
||||
&c->btree_reserve_cache.data[c->btree_reserve_cache.nr++];
|
||||
|
||||
a->ob = b->ob;
|
||||
b->ob.nr = 0;
|
||||
@ -525,7 +525,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
|
||||
bch2_open_buckets_put(c, &b->ob);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
mutex_unlock(&c->btree_reserve_cache.lock);
|
||||
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
@ -575,7 +575,7 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
if (as->took_gc_lock)
|
||||
up_read(&c->gc_lock);
|
||||
up_read(&c->gc.lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
@ -586,19 +586,19 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
|
||||
as->start_time);
|
||||
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
|
||||
list_del(&as->unwritten_list);
|
||||
list_del(&as->list);
|
||||
|
||||
closure_debug_destroy(&as->cl);
|
||||
mempool_free(as, &c->btree_interior_update_pool);
|
||||
mempool_free(as, &c->btree_interior_updates.pool);
|
||||
|
||||
/*
|
||||
* Have to do the wakeup with btree_interior_update_lock still held,
|
||||
* since being on btree_interior_update_list is our ref on @c:
|
||||
*/
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
closure_wake_up(&c->btree_interior_updates.wait);
|
||||
}
|
||||
|
||||
static void bch2_btree_update_add_key(btree_update_nodes *nodes,
|
||||
@ -835,7 +835,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* btree_node_update_key(): having the lock be at the filesystem level
|
||||
* sucks, we'll need to watch for contention
|
||||
*/
|
||||
scoped_guard(mutex, &c->btree_interior_update_commit_lock) {
|
||||
scoped_guard(mutex, &c->btree_interior_updates.commit_lock) {
|
||||
ret = commit_do(trans, &as->disk_res, &journal_seq,
|
||||
BCH_WATERMARK_interior_updates|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
@ -902,7 +902,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
|
||||
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
mutex_lock(&c->btree_interior_updates.lock);
|
||||
|
||||
list_del(&as->write_blocked_list);
|
||||
if (list_empty(&b->write_blocked))
|
||||
@ -910,7 +910,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
|
||||
/*
|
||||
* Node might have been freed, recheck under
|
||||
* btree_interior_update_lock:
|
||||
* btree_interior_updates.lock:
|
||||
*/
|
||||
if (as->b == b) {
|
||||
BUG_ON(!b->c.level);
|
||||
@ -934,7 +934,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
mutex_unlock(&c->btree_interior_updates.lock);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -954,7 +954,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < as->nr_open_buckets; i++)
|
||||
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
|
||||
bch2_open_bucket_put(c, c->allocator.open_buckets + as->open_buckets[i]);
|
||||
|
||||
bch2_btree_update_free(as, trans);
|
||||
}
|
||||
@ -962,12 +962,12 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
static void btree_interior_update_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c =
|
||||
container_of(work, struct bch_fs, btree_interior_update_work);
|
||||
container_of(work, struct bch_fs, btree_interior_updates.work);
|
||||
struct btree_update *as;
|
||||
|
||||
while (1) {
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock) {
|
||||
as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
|
||||
scoped_guard(mutex, &c->btree_interior_updates.lock) {
|
||||
as = list_first_entry_or_null(&c->btree_interior_updates.unwritten,
|
||||
struct btree_update, unwritten_list);
|
||||
if (as && !as->nodes_written)
|
||||
as = NULL;
|
||||
@ -985,10 +985,10 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written)
|
||||
closure_type(as, struct btree_update, cl);
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
scoped_guard(mutex, &c->btree_interior_updates.lock)
|
||||
as->nodes_written = true;
|
||||
|
||||
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
|
||||
queue_work(c->btree_interior_updates.worker, &c->btree_interior_updates.work);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1004,8 +1004,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
|
||||
BUG_ON(!btree_node_dirty(b));
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
list_add_tail(&as->unwritten_list, &c->btree_interior_updates.unwritten);
|
||||
|
||||
as->mode = BTREE_UPDATE_node;
|
||||
as->b = b;
|
||||
@ -1026,7 +1026,7 @@ static void btree_update_reparent(struct btree_update *as,
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
lockdep_assert_held(&c->btree_interior_update_lock);
|
||||
lockdep_assert_held(&c->btree_interior_updates.lock);
|
||||
|
||||
child->b = NULL;
|
||||
child->mode = BTREE_UPDATE_update;
|
||||
@ -1042,8 +1042,8 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
|
||||
BUG_ON(as->mode != BTREE_UPDATE_none);
|
||||
as->mode = BTREE_UPDATE_root;
|
||||
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
|
||||
scoped_guard(mutex, &c->btree_interior_updates.lock)
|
||||
list_add_tail(&as->unwritten_list, &c->btree_interior_updates.unwritten);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1064,7 +1064,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
|
||||
|
||||
closure_get(&as->cl);
|
||||
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
|
||||
BUG_ON(b->will_make_reachable);
|
||||
|
||||
@ -1110,7 +1110,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
if (btree_node_fake(b))
|
||||
return;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
mutex_lock(&c->btree_interior_updates.lock);
|
||||
|
||||
/*
|
||||
* Does this node have any btree_update operations preventing
|
||||
@ -1128,7 +1128,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
* for flush_held_btree_writes() waiting on updates to flush or
|
||||
* nodes to be writeable:
|
||||
*/
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
closure_wake_up(&c->btree_interior_updates.wait);
|
||||
}
|
||||
|
||||
clear_btree_node_dirty_acct(c, b);
|
||||
@ -1153,7 +1153,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
bch2_btree_update_will_free_node_journal_pin_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
mutex_unlock(&c->btree_interior_updates.lock);
|
||||
|
||||
bch2_btree_update_add_node(c, &as->old_nodes, b);
|
||||
}
|
||||
@ -1166,13 +1166,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
|
||||
BUG_ON(as->mode == BTREE_UPDATE_none);
|
||||
|
||||
if (as->took_gc_lock)
|
||||
up_read(&as->c->gc_lock);
|
||||
up_read(&as->c->gc.lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_btree_reserve_put(as, trans);
|
||||
|
||||
continue_at(&as->cl, btree_update_set_nodes_written,
|
||||
as->c->btree_interior_update_worker);
|
||||
as->c->btree_interior_updates.worker);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
|
||||
start_time);
|
||||
@ -1250,15 +1250,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
|
||||
}
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0));
|
||||
if (!down_read_trylock(&c->gc.lock)) {
|
||||
ret = drop_locks_do(trans, (down_read(&c->gc.lock), 0));
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
up_read(&c->gc.lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
|
||||
as = mempool_alloc(&c->btree_interior_updates.pool, GFP_NOFS);
|
||||
memset(as, 0, sizeof(*as));
|
||||
closure_init(&as->cl, NULL);
|
||||
as->c = c;
|
||||
@ -1277,8 +1277,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
darray_init(&as->new_nodes);
|
||||
bch2_keylist_init(&as->parent_keys, as->inline_keys);
|
||||
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
list_add_tail(&as->list, &c->btree_interior_update_list);
|
||||
scoped_guard(mutex, &c->btree_interior_updates.lock)
|
||||
list_add_tail(&as->list, &c->btree_interior_updates.list);
|
||||
|
||||
struct btree *b = btree_path_node(path, path->level);
|
||||
as->node_start = b->data->min_key;
|
||||
@ -1380,7 +1380,7 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
|
||||
scoped_guard(mutex, &c->btree_cache.lock)
|
||||
list_del_init(&b->list);
|
||||
|
||||
scoped_guard(mutex, &c->btree_root_lock)
|
||||
scoped_guard(mutex, &c->btree_cache.root_lock)
|
||||
bch2_btree_id_root(c, b->c.btree_id)->b = b;
|
||||
|
||||
bch2_recalc_btree_reserve(c);
|
||||
@ -1874,7 +1874,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
int live_u64s_added, u64s_added;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
lockdep_assert_held(&c->gc.lock);
|
||||
BUG_ON(!b->c.level);
|
||||
BUG_ON(!as || as->b);
|
||||
bch2_verify_keylist_sorted(keys);
|
||||
@ -2419,10 +2419,10 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
!bch2_err_matches(ret, EROFS))
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites_lock)
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites.lock)
|
||||
list_del(&a->list);
|
||||
|
||||
closure_wake_up(&c->btree_node_rewrites_wait);
|
||||
closure_wake_up(&c->btree_node_rewrites.wait);
|
||||
|
||||
bch2_bkey_buf_exit(&a->key);
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
|
||||
@ -2446,19 +2446,19 @@ static void __bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b, b
|
||||
|
||||
bool now = false, pending = false;
|
||||
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites.lock) {
|
||||
if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) &&
|
||||
enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
|
||||
list_add(&a->list, &c->btree_node_rewrites);
|
||||
list_add(&a->list, &c->btree_node_rewrites.list);
|
||||
now = true;
|
||||
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags) && !merge) {
|
||||
list_add(&a->list, &c->btree_node_rewrites_pending);
|
||||
list_add(&a->list, &c->btree_node_rewrites.pending);
|
||||
pending = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (now) {
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrites.worker, &a->work);
|
||||
} else if (pending) {
|
||||
/* bch2_do_pending_node_rewrites will execute */
|
||||
} else {
|
||||
@ -2479,8 +2479,8 @@ void bch2_btree_node_merge_async(struct bch_fs *c, struct btree *b)
|
||||
|
||||
void bch2_async_btree_node_rewrites_flush(struct bch_fs *c)
|
||||
{
|
||||
closure_wait_event(&c->btree_node_rewrites_wait,
|
||||
list_empty(&c->btree_node_rewrites));
|
||||
closure_wait_event(&c->btree_node_rewrites.wait,
|
||||
list_empty(&c->btree_node_rewrites.list));
|
||||
}
|
||||
|
||||
void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
@ -2488,18 +2488,18 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
while (1) {
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites_lock) {
|
||||
a = list_pop_entry(&c->btree_node_rewrites_pending,
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites.lock) {
|
||||
a = list_pop_entry(&c->btree_node_rewrites.pending,
|
||||
struct async_btree_rewrite, list);
|
||||
if (a)
|
||||
list_add(&a->list, &c->btree_node_rewrites);
|
||||
list_add(&a->list, &c->btree_node_rewrites.list);
|
||||
}
|
||||
|
||||
if (!a)
|
||||
break;
|
||||
|
||||
enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrites.worker, &a->work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2508,8 +2508,8 @@ void bch2_free_pending_node_rewrites(struct bch_fs *c)
|
||||
while (1) {
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites_lock)
|
||||
a = list_pop_entry(&c->btree_node_rewrites_pending,
|
||||
scoped_guard(spinlock, &c->btree_node_rewrites.lock)
|
||||
a = list_pop_entry(&c->btree_node_rewrites.pending,
|
||||
struct async_btree_rewrite, list);
|
||||
|
||||
if (!a)
|
||||
@ -2590,10 +2590,10 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
bkey_copy(&b->key, new_key);
|
||||
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
|
||||
} else {
|
||||
try(bch2_trans_mutex_lock(trans, &c->btree_interior_update_commit_lock));
|
||||
try(bch2_trans_mutex_lock(trans, &c->btree_interior_updates.commit_lock));
|
||||
|
||||
if (!btree_node_will_make_reachable(b)) {
|
||||
mutex_unlock(&c->btree_interior_update_commit_lock);
|
||||
mutex_unlock(&c->btree_interior_updates.commit_lock);
|
||||
return bch_err_throw(c, transaction_restart_nested);
|
||||
}
|
||||
|
||||
@ -2605,7 +2605,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
|
||||
|
||||
bkey_copy(&n->key, new_key);
|
||||
mutex_unlock(&c->btree_interior_update_commit_lock);
|
||||
mutex_unlock(&c->btree_interior_updates.commit_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2730,15 +2730,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct btree_update *as;
|
||||
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list)
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
list_for_each_entry(as, &c->btree_interior_updates.list, list)
|
||||
bch2_btree_update_to_text(out, as);
|
||||
}
|
||||
|
||||
static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
|
||||
{
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
return !list_empty(&c->btree_interior_update_list);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
return !list_empty(&c->btree_interior_updates.list);
|
||||
}
|
||||
|
||||
bool bch2_btree_interior_updates_flush(struct bch_fs *c)
|
||||
@ -2746,7 +2746,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *c)
|
||||
bool ret = bch2_btree_interior_updates_pending(c);
|
||||
|
||||
if (ret)
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
closure_wait_event(&c->btree_interior_updates.wait,
|
||||
!bch2_btree_interior_updates_pending(c));
|
||||
return ret;
|
||||
}
|
||||
@ -2755,7 +2755,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
|
||||
{
|
||||
struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
|
||||
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
|
||||
r->level = entry->level;
|
||||
r->alive = true;
|
||||
@ -2767,7 +2767,7 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
|
||||
struct jset_entry *end,
|
||||
unsigned long skip)
|
||||
{
|
||||
guard(mutex)(&c->btree_interior_update_lock);
|
||||
guard(mutex)(&c->btree_interior_updates.lock);
|
||||
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
@ -2798,49 +2798,49 @@ static void bch2_btree_alloc_to_text(struct printbuf *out,
|
||||
|
||||
void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
for (unsigned i = 0; i < c->btree_reserve_cache_nr; i++)
|
||||
bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache[i]);
|
||||
for (unsigned i = 0; i < c->btree_reserve_cache.nr; i++)
|
||||
bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache.data[i]);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
|
||||
{
|
||||
WARN_ON(!list_empty(&c->btree_node_rewrites));
|
||||
WARN_ON(!list_empty(&c->btree_node_rewrites_pending));
|
||||
WARN_ON(!list_empty(&c->btree_node_rewrites.list));
|
||||
WARN_ON(!list_empty(&c->btree_node_rewrites.pending));
|
||||
|
||||
if (c->btree_node_rewrite_worker)
|
||||
destroy_workqueue(c->btree_node_rewrite_worker);
|
||||
if (c->btree_interior_update_worker)
|
||||
destroy_workqueue(c->btree_interior_update_worker);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
if (c->btree_node_rewrites.worker)
|
||||
destroy_workqueue(c->btree_node_rewrites.worker);
|
||||
if (c->btree_interior_updates.worker)
|
||||
destroy_workqueue(c->btree_interior_updates.worker);
|
||||
mempool_exit(&c->btree_interior_updates.pool);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
|
||||
{
|
||||
mutex_init(&c->btree_reserve_cache_lock);
|
||||
INIT_LIST_HEAD(&c->btree_interior_update_list);
|
||||
INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
|
||||
mutex_init(&c->btree_interior_update_lock);
|
||||
mutex_init(&c->btree_interior_update_commit_lock);
|
||||
INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
|
||||
mutex_init(&c->btree_reserve_cache.lock);
|
||||
INIT_LIST_HEAD(&c->btree_interior_updates.list);
|
||||
INIT_LIST_HEAD(&c->btree_interior_updates.unwritten);
|
||||
mutex_init(&c->btree_interior_updates.lock);
|
||||
mutex_init(&c->btree_interior_updates.commit_lock);
|
||||
INIT_WORK(&c->btree_interior_updates.work, btree_interior_update_work);
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_node_rewrites);
|
||||
INIT_LIST_HEAD(&c->btree_node_rewrites_pending);
|
||||
spin_lock_init(&c->btree_node_rewrites_lock);
|
||||
INIT_LIST_HEAD(&c->btree_node_rewrites.list);
|
||||
INIT_LIST_HEAD(&c->btree_node_rewrites.pending);
|
||||
spin_lock_init(&c->btree_node_rewrites.lock);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_interior_update_init(struct bch_fs *c)
|
||||
{
|
||||
c->btree_interior_update_worker =
|
||||
c->btree_interior_updates.worker =
|
||||
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8);
|
||||
if (!c->btree_interior_update_worker)
|
||||
if (!c->btree_interior_updates.worker)
|
||||
return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
|
||||
|
||||
c->btree_node_rewrite_worker =
|
||||
c->btree_node_rewrites.worker =
|
||||
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
|
||||
if (!c->btree_node_rewrite_worker)
|
||||
if (!c->btree_node_rewrites.worker)
|
||||
return bch_err_throw(c, ENOMEM_btree_interior_update_worker_init);
|
||||
|
||||
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
if (mempool_init_kmalloc_pool(&c->btree_interior_updates.pool, 1,
|
||||
sizeof(struct btree_update)))
|
||||
return bch_err_throw(c, ENOMEM_btree_interior_update_pool_init);
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
|
||||
#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
|
||||
#ifndef _BCACHEFS_BTREE_INTERIOR_H
|
||||
#define _BCACHEFS_BTREE_INTERIOR_H
|
||||
|
||||
#include "btree/cache.h"
|
||||
#include "btree/locking.h"
|
||||
@ -349,4 +349,4 @@ void bch2_fs_btree_interior_update_exit(struct bch_fs *);
|
||||
void bch2_fs_btree_interior_update_init_early(struct bch_fs *);
|
||||
int bch2_fs_btree_interior_update_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
|
||||
#endif /* _BCACHEFS_BTREE_INTERIOR_H */
|
||||
|
||||
48
libbcachefs/btree/interior_types.h
Normal file
48
libbcachefs/btree/interior_types.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_INTERIOR_TYPES_H
|
||||
#define _BCACHEFS_BTREE_INTERIOR_TYPES_H
|
||||
|
||||
struct btree_alloc {
|
||||
struct open_buckets ob;
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
};
|
||||
|
||||
/* Maximum number of nodes we might need to allocate atomically: */
|
||||
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
|
||||
|
||||
/* Size of the freelist we allocate btree nodes from: */
|
||||
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
|
||||
|
||||
/*
|
||||
* Cache of allocated btree nodes - if we allocate a btree node and don't use
|
||||
* it, if we free it that space can't be reused until going _all_ the way
|
||||
* through the allocator (which exposes us to a livelock when allocating btree
|
||||
* reserves fail halfway through) - instead, we can stick them here:
|
||||
*/
|
||||
struct bch_fs_btree_reserve_cache {
|
||||
struct mutex lock;
|
||||
unsigned nr;
|
||||
struct btree_alloc data[BTREE_NODE_RESERVE * 2];
|
||||
};
|
||||
|
||||
struct bch_fs_btree_interior_updates {
|
||||
mempool_t pool;
|
||||
struct list_head list;
|
||||
struct list_head unwritten;
|
||||
struct mutex lock;
|
||||
struct mutex commit_lock;
|
||||
struct closure_waitlist wait;
|
||||
|
||||
struct workqueue_struct *worker;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct bch_fs_btree_node_rewrites {
|
||||
struct list_head list;
|
||||
struct list_head pending;
|
||||
spinlock_t lock;
|
||||
struct closure_waitlist wait;
|
||||
struct workqueue_struct *worker;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_INTERIOR_TYPES_H */
|
||||
@ -3344,7 +3344,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
|
||||
new_mem = allocate_dropping_locks_norelock(trans, lock_dropped,
|
||||
kmalloc(new_bytes, _gfp|__GFP_NOWARN));
|
||||
if (!new_mem) {
|
||||
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_mem = mempool_alloc(&c->btree_trans.malloc_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
trans->used_mempool = true;
|
||||
}
|
||||
@ -3391,7 +3391,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans)
|
||||
path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
|
||||
|
||||
check_srcu_held_too_long(trans);
|
||||
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, trans->srcu_idx);
|
||||
trans->srcu_held = false;
|
||||
}
|
||||
}
|
||||
@ -3399,7 +3399,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans)
|
||||
static void bch2_trans_srcu_lock(struct btree_trans *trans)
|
||||
{
|
||||
if (!trans->srcu_held) {
|
||||
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
|
||||
trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans.barrier);
|
||||
trans->srcu_lock_time = jiffies;
|
||||
trans->srcu_held = true;
|
||||
}
|
||||
@ -3438,7 +3438,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
(void)lock_dropped;
|
||||
|
||||
if (!new_mem) {
|
||||
new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_mem = mempool_alloc(&trans->c->btree_trans.malloc_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
trans->used_mempool = true;
|
||||
kfree(trans->mem);
|
||||
@ -3535,24 +3535,24 @@ unsigned bch2_trans_get_fn_idx(const char *fn)
|
||||
static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
|
||||
{
|
||||
if (IS_ENABLED(__KERNEL__)) {
|
||||
struct btree_trans *trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
|
||||
struct btree_trans *trans = this_cpu_xchg(c->btree_trans.bufs->trans, NULL);
|
||||
if (trans) {
|
||||
memset(trans, 0, offsetof(struct btree_trans, list));
|
||||
return trans;
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_trans *trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
|
||||
struct btree_trans *trans = mempool_alloc(&c->btree_trans.pool, GFP_NOFS);
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
struct btree_trans *pos;
|
||||
pid_t pid = current->pid;
|
||||
|
||||
trans->locking_wait.task = current;
|
||||
|
||||
list_for_each_entry(pos, &c->btree_trans_list, list) {
|
||||
list_for_each_entry(pos, &c->btree_trans.list, list) {
|
||||
struct task_struct *pos_task = READ_ONCE(pos->locking_wait.task);
|
||||
/*
|
||||
* We'd much prefer to be stricter here and completely
|
||||
@ -3566,14 +3566,14 @@ static inline struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
list_add(&trans->list, &c->btree_trans_list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
list_add(&trans->list, &c->btree_trans.list);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
|
||||
return trans;
|
||||
}
|
||||
|
||||
struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
__acquires(&c->btree_trans_barrier)
|
||||
__acquires(&c->btree_trans.barrier)
|
||||
{
|
||||
/*
|
||||
* No multithreaded btree access until we've gone RW and are no longer
|
||||
@ -3608,7 +3608,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
if (fn_idx < BCH_TRANSACTIONS_NR) {
|
||||
trans->fn = bch2_btree_transaction_fns[fn_idx];
|
||||
|
||||
struct btree_transaction_stats *s = &c->btree_transaction_stats[fn_idx];
|
||||
struct btree_transaction_stats *s = &c->btree_trans.stats[fn_idx];
|
||||
|
||||
if (s->max_mem) {
|
||||
unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
|
||||
@ -3621,7 +3621,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
trans->nr_paths_max = s->nr_max_paths;
|
||||
}
|
||||
|
||||
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
trans->srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
|
||||
trans->srcu_lock_time = jiffies;
|
||||
trans->srcu_held = true;
|
||||
trans_set_locked(trans, false);
|
||||
@ -3669,7 +3669,7 @@ static inline void check_btree_paths_leaked(struct btree_trans *trans) {}
|
||||
#endif
|
||||
|
||||
void bch2_trans_put(struct btree_trans *trans)
|
||||
__releases(&c->btree_trans_barrier)
|
||||
__releases(&c->btree_trans.barrier)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
@ -3686,7 +3686,7 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
|
||||
if (trans->srcu_held) {
|
||||
check_srcu_held_too_long(trans);
|
||||
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, trans->srcu_idx);
|
||||
}
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
@ -3714,35 +3714,35 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
kvfree_rcu_mightsleep(paths_allocated);
|
||||
|
||||
if (trans->used_mempool)
|
||||
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
||||
mempool_free(trans->mem, &c->btree_trans.malloc_pool);
|
||||
else
|
||||
kfree(trans->mem);
|
||||
|
||||
/* Userspace doesn't have a real percpu implementation: */
|
||||
if (IS_ENABLED(__KERNEL__))
|
||||
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
|
||||
trans = this_cpu_xchg(c->btree_trans.bufs->trans, trans);
|
||||
|
||||
if (trans) {
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
list_del(&trans->list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
|
||||
mempool_free(trans, &c->btree_trans_pool);
|
||||
mempool_free(trans, &c->btree_trans.pool);
|
||||
}
|
||||
}
|
||||
|
||||
bool bch2_current_has_btree_trans(struct bch_fs *c)
|
||||
{
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
struct btree_trans *trans;
|
||||
bool ret = false;
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list)
|
||||
list_for_each_entry(trans, &c->btree_trans.list, list)
|
||||
if (trans->locking_wait.task == current &&
|
||||
trans->locked) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3837,26 +3837,26 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
struct btree_trans *trans;
|
||||
int cpu;
|
||||
|
||||
if (c->btree_trans_bufs)
|
||||
if (c->btree_trans.bufs)
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct btree_trans *trans =
|
||||
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
|
||||
per_cpu_ptr(c->btree_trans.bufs, cpu)->trans;
|
||||
|
||||
if (trans) {
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
list_del(&trans->list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
}
|
||||
kfree(trans);
|
||||
}
|
||||
free_percpu(c->btree_trans_bufs);
|
||||
free_percpu(c->btree_trans.bufs);
|
||||
|
||||
trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
|
||||
trans = list_first_entry_or_null(&c->btree_trans.list, struct btree_trans, list);
|
||||
if (trans)
|
||||
panic("%s leaked btree_trans\n", trans->fn);
|
||||
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
for (s = c->btree_trans.stats;
|
||||
s < c->btree_trans.stats + ARRAY_SIZE(c->btree_trans.stats);
|
||||
s++) {
|
||||
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
|
||||
darray_exit(&s->trans_kmalloc_trace);
|
||||
@ -3865,39 +3865,39 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
bch2_time_stats_exit(&s->lock_hold_times);
|
||||
}
|
||||
|
||||
if (c->btree_trans_barrier_initialized) {
|
||||
synchronize_srcu_expedited(&c->btree_trans_barrier);
|
||||
cleanup_srcu_struct(&c->btree_trans_barrier);
|
||||
if (c->btree_trans.barrier_initialized) {
|
||||
synchronize_srcu_expedited(&c->btree_trans.barrier);
|
||||
cleanup_srcu_struct(&c->btree_trans.barrier);
|
||||
}
|
||||
mempool_exit(&c->btree_trans_mem_pool);
|
||||
mempool_exit(&c->btree_trans_pool);
|
||||
mempool_exit(&c->btree_trans.malloc_pool);
|
||||
mempool_exit(&c->btree_trans.pool);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_iter_init_early(struct bch_fs *c)
|
||||
{
|
||||
struct btree_transaction_stats *s;
|
||||
|
||||
for (s = c->btree_transaction_stats;
|
||||
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
|
||||
for (s = c->btree_trans.stats;
|
||||
s < c->btree_trans.stats + ARRAY_SIZE(c->btree_trans.stats);
|
||||
s++) {
|
||||
bch2_time_stats_init(&s->duration);
|
||||
bch2_time_stats_init(&s->lock_hold_times);
|
||||
mutex_init(&s->lock);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_trans_list);
|
||||
seqmutex_init(&c->btree_trans_lock);
|
||||
INIT_LIST_HEAD(&c->btree_trans.list);
|
||||
seqmutex_init(&c->btree_trans.lock);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
{
|
||||
c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
|
||||
if (!c->btree_trans_bufs)
|
||||
c->btree_trans.bufs = alloc_percpu(struct btree_trans_buf);
|
||||
if (!c->btree_trans.bufs)
|
||||
return -ENOMEM;
|
||||
|
||||
try(mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, sizeof(struct btree_trans)));
|
||||
try(mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX));
|
||||
try(init_srcu_struct(&c->btree_trans_barrier));
|
||||
try(mempool_init_kmalloc_pool(&c->btree_trans.pool, 1, sizeof(struct btree_trans)));
|
||||
try(mempool_init_kmalloc_pool(&c->btree_trans.malloc_pool, 1, BTREE_TRANS_MEM_MAX));
|
||||
try(init_srcu_struct(&c->btree_trans.barrier));
|
||||
|
||||
/*
|
||||
* static annotation (hackily done) for lock ordering of reclaim vs.
|
||||
@ -3911,7 +3911,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
c->btree_trans_barrier_initialized = true;
|
||||
c->btree_trans.barrier_initialized = true;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool bkey_cached_evict(struct btree_key_cache *c,
|
||||
static bool bkey_cached_evict(struct bch_fs_btree_key_cache *c,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
bool ret = !rhashtable_remove_fast(&c->table, &ck->hash,
|
||||
@ -96,7 +96,7 @@ static bool bkey_cached_evict(struct btree_key_cache *c,
|
||||
|
||||
static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu)
|
||||
{
|
||||
struct bch_fs *c = container_of(pending->srcu, struct bch_fs, btree_trans_barrier);
|
||||
struct bch_fs *c = container_of(pending->srcu, struct bch_fs, btree_trans.barrier);
|
||||
struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu);
|
||||
|
||||
this_cpu_dec(*c->btree_key_cache.nr_pending);
|
||||
@ -104,7 +104,7 @@ static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
|
||||
static inline void bkey_cached_free_noassert(struct btree_key_cache *bc,
|
||||
static inline void bkey_cached_free_noassert(struct bch_fs_btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
kfree(ck->k);
|
||||
@ -120,7 +120,7 @@ static inline void bkey_cached_free_noassert(struct btree_key_cache *bc,
|
||||
}
|
||||
|
||||
static void bkey_cached_free(struct btree_trans *trans,
|
||||
struct btree_key_cache *bc,
|
||||
struct bch_fs_btree_key_cache *bc,
|
||||
struct bkey_cached *ck)
|
||||
{
|
||||
/*
|
||||
@ -152,7 +152,7 @@ static struct bkey_cached *
|
||||
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned key_u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
|
||||
bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
|
||||
int ret;
|
||||
|
||||
@ -182,7 +182,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
bkey_cached_reuse(struct btree_key_cache *c)
|
||||
bkey_cached_reuse(struct bch_fs_btree_key_cache *c)
|
||||
{
|
||||
|
||||
guard(rcu)();
|
||||
@ -209,7 +209,7 @@ static int btree_key_cache_create(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -516,7 +516,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
struct bkey_cached *ck =
|
||||
container_of(pin, struct bkey_cached, journal);
|
||||
struct bkey_cached_key key;
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
|
||||
int ret = 0;
|
||||
|
||||
CLASS(btree_trans, trans)(c);
|
||||
@ -545,7 +545,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
!bch2_journal_error(j), c,
|
||||
"flushing key cache: %s", bch2_err_str(ret));
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -600,7 +600,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
|
||||
/*
|
||||
@ -642,14 +642,14 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct bch_fs *c = shrink->private_data;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bucket_table *tbl;
|
||||
struct bkey_cached *ck;
|
||||
size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
|
||||
unsigned iter, start;
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
|
||||
rcu_read_lock();
|
||||
|
||||
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
|
||||
@ -663,7 +663,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
*/
|
||||
if (unlikely(tbl->nest)) {
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
|
||||
return SHRINK_STOP;
|
||||
}
|
||||
|
||||
@ -712,7 +712,7 @@ out:
|
||||
bc->shrink_iter = iter;
|
||||
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
|
||||
|
||||
return freed;
|
||||
}
|
||||
@ -721,7 +721,7 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct bch_fs *c = shrink->private_data;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bch_fs_btree_key_cache *bc = &c->btree_key_cache;
|
||||
long nr = atomic_long_read(&bc->nr_keys) -
|
||||
atomic_long_read(&bc->nr_dirty);
|
||||
|
||||
@ -736,7 +736,7 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
|
||||
return max(0L, nr);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
void bch2_fs_btree_key_cache_exit(struct bch_fs_btree_key_cache *bc)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
struct bucket_table *tbl;
|
||||
@ -792,11 +792,11 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
free_percpu(bc->nr_pending);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||
void bch2_fs_btree_key_cache_init_early(struct bch_fs_btree_key_cache *c)
|
||||
{
|
||||
}
|
||||
|
||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
int bch2_fs_btree_key_cache_init(struct bch_fs_btree_key_cache *bc)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
struct shrinker *shrink;
|
||||
@ -805,8 +805,8 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
if (!bc->nr_pending)
|
||||
return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
|
||||
|
||||
if (rcu_pending_init(&bc->pending[0], &c->btree_trans_barrier, __bkey_cached_free) ||
|
||||
rcu_pending_init(&bc->pending[1], &c->btree_trans_barrier, __bkey_cached_free))
|
||||
if (rcu_pending_init(&bc->pending[0], &c->btree_trans.barrier, __bkey_cached_free) ||
|
||||
rcu_pending_init(&bc->pending[1], &c->btree_trans.barrier, __bkey_cached_free))
|
||||
return bch_err_throw(c, ENOMEM_fs_btree_cache_init);
|
||||
|
||||
if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
|
||||
@ -827,7 +827,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *bc)
|
||||
void bch2_btree_key_cache_to_text(struct printbuf *out, struct bch_fs_btree_key_cache *bc)
|
||||
{
|
||||
printbuf_tabstop_push(out, 24);
|
||||
printbuf_tabstop_push(out, 12);
|
||||
|
||||
@ -47,11 +47,11 @@ bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
|
||||
void bch2_btree_key_cache_drop(struct btree_trans *,
|
||||
struct btree_path *);
|
||||
|
||||
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
|
||||
void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
|
||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
|
||||
void bch2_fs_btree_key_cache_exit(struct bch_fs_btree_key_cache *);
|
||||
void bch2_fs_btree_key_cache_init_early(struct bch_fs_btree_key_cache *);
|
||||
int bch2_fs_btree_key_cache_init(struct bch_fs_btree_key_cache *);
|
||||
|
||||
void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
|
||||
void bch2_btree_key_cache_to_text(struct printbuf *, struct bch_fs_btree_key_cache *);
|
||||
|
||||
void bch2_btree_key_cache_exit(void);
|
||||
int __init bch2_btree_key_cache_init(void);
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
|
||||
#include "util/rcu_pending.h"
|
||||
|
||||
struct btree_key_cache {
|
||||
struct bch_fs_btree_key_cache {
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
|
||||
|
||||
@ -25,8 +25,8 @@ static inline bool is_btree_node(struct btree_path *path, unsigned l)
|
||||
|
||||
static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
|
||||
{
|
||||
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
|
||||
? &trans->c->btree_transaction_stats[trans->fn_idx]
|
||||
return trans->fn_idx < ARRAY_SIZE(trans->c->btree_trans.stats)
|
||||
? &trans->c->btree_trans.stats[trans->fn_idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
|
||||
@ -59,11 +59,6 @@ struct btree_write {
|
||||
struct journal_entry_pin journal;
|
||||
};
|
||||
|
||||
struct btree_alloc {
|
||||
struct open_buckets ob;
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
};
|
||||
|
||||
struct btree_bkey_cached_common {
|
||||
struct six_lock lock;
|
||||
u8 level;
|
||||
@ -166,7 +161,21 @@ struct btree_cache_list {
|
||||
size_t nr;
|
||||
};
|
||||
|
||||
struct btree_cache {
|
||||
struct btree_root {
|
||||
struct btree *b;
|
||||
|
||||
/* On disk root - see async splits: */
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
u8 level;
|
||||
u8 alive;
|
||||
s16 error;
|
||||
};
|
||||
|
||||
struct bch_fs_btree_cache {
|
||||
struct btree_root roots_known[BTREE_ID_NR];
|
||||
DARRAY(struct btree_root) roots_extra;
|
||||
struct mutex root_lock;
|
||||
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
/*
|
||||
@ -580,6 +589,37 @@ struct btree_trans {
|
||||
struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
|
||||
};
|
||||
|
||||
struct btree_trans_buf {
|
||||
struct btree_trans *trans;
|
||||
};
|
||||
|
||||
struct btree_transaction_stats {
|
||||
struct bch2_time_stats duration;
|
||||
struct bch2_time_stats lock_hold_times;
|
||||
struct mutex lock;
|
||||
unsigned nr_max_paths;
|
||||
unsigned max_mem;
|
||||
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
|
||||
darray_trans_kmalloc_trace trans_kmalloc_trace;
|
||||
#endif
|
||||
char *max_paths_text;
|
||||
};
|
||||
|
||||
#define BCH_TRANSACTIONS_NR 128
|
||||
|
||||
struct bch_fs_btree_trans {
|
||||
struct seqmutex lock;
|
||||
struct list_head list;
|
||||
mempool_t pool;
|
||||
mempool_t malloc_pool;
|
||||
struct btree_trans_buf __percpu *bufs;
|
||||
|
||||
struct srcu_struct barrier;
|
||||
bool barrier_initialized;
|
||||
|
||||
struct btree_transaction_stats stats[BCH_TRANSACTIONS_NR];
|
||||
};
|
||||
|
||||
static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
|
||||
{
|
||||
return trans->paths + iter->path;
|
||||
@ -924,16 +964,6 @@ static inline u8 btree_trigger_order(enum btree_id btree)
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
struct btree *b;
|
||||
|
||||
/* On disk root - see async splits: */
|
||||
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
u8 level;
|
||||
u8 alive;
|
||||
s16 error;
|
||||
};
|
||||
|
||||
enum btree_gc_coalesce_fail_reason {
|
||||
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
|
||||
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
|
||||
|
||||
@ -241,7 +241,7 @@ btree_write_buffered_insert(struct btree_trans *trans,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
}
|
||||
|
||||
static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
|
||||
static void move_keys_from_inc_to_flushing(struct bch_fs_btree_write_buffer *wb)
|
||||
{
|
||||
struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
|
||||
struct journal *j = &c->journal;
|
||||
@ -307,7 +307,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal *j = &c->journal;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct btree_iter iter = { NULL };
|
||||
size_t overwritten = 0, fast = 0, noop = 0, slowpath = 0, could_not_insert = 0;
|
||||
bool write_locked = false;
|
||||
@ -577,7 +577,7 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq,
|
||||
bool *did_work)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret = 0, fetch_from_journal_err;
|
||||
|
||||
do {
|
||||
@ -639,7 +639,7 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
|
||||
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret = 0;
|
||||
|
||||
if (mutex_trylock(&wb->flushing.lock)) {
|
||||
@ -711,7 +711,7 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
|
||||
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret;
|
||||
|
||||
scoped_guard(mutex, &wb->flushing.lock) {
|
||||
@ -724,7 +724,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
|
||||
}
|
||||
|
||||
static void wb_accounting_sort(struct btree_write_buffer *wb)
|
||||
static void wb_accounting_sort(struct bch_fs_btree_write_buffer *wb)
|
||||
{
|
||||
eytzinger0_sort(wb->accounting.data, wb->accounting.nr,
|
||||
sizeof(wb->accounting.data[0]),
|
||||
@ -734,7 +734,7 @@ static void wb_accounting_sort(struct btree_write_buffer *wb)
|
||||
int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree,
|
||||
struct bkey_i_accounting *k)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
event_inc_trace(c, accounting_key_to_wb_slowpath, buf, ({
|
||||
prt_printf(&buf, "have: %zu\n", wb->accounting.nr);
|
||||
@ -754,7 +754,7 @@ int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
|
||||
struct journal_keys_to_wb *dst,
|
||||
enum btree_id btree, struct bkey_i *k)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
unsigned u64s = wb_key_u64s(k);
|
||||
int ret;
|
||||
retry:
|
||||
@ -786,7 +786,7 @@ retry:
|
||||
|
||||
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
if (mutex_trylock(&wb->flushing.lock)) {
|
||||
mutex_lock(&wb->inc.lock);
|
||||
@ -822,7 +822,7 @@ void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_ke
|
||||
|
||||
int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
unsigned live_accounting_keys = 0;
|
||||
int ret = 0;
|
||||
|
||||
@ -875,7 +875,7 @@ static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
|
||||
|
||||
int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb_keys_resize(&wb->flushing, new_size) ?:
|
||||
wb_keys_resize(&wb->inc, new_size);
|
||||
@ -883,7 +883,7 @@ int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
|
||||
|
||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
|
||||
!bch2_journal_error(&c->journal));
|
||||
@ -896,7 +896,7 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
mutex_init(&wb->inc.lock);
|
||||
mutex_init(&wb->flushing.lock);
|
||||
@ -905,7 +905,7 @@ void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
|
||||
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
/* Will be resized by journal as needed: */
|
||||
unsigned initial_size = 1 << 16;
|
||||
|
||||
@ -8,14 +8,14 @@
|
||||
|
||||
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
|
||||
}
|
||||
|
||||
static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
|
||||
}
|
||||
@ -72,7 +72,7 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *,
|
||||
static inline int bch2_accounting_key_to_wb(struct bch_fs *c,
|
||||
enum btree_id btree, struct bkey_i_accounting *k)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct bch_fs_btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct btree_write_buffered_key search;
|
||||
search.btree = btree;
|
||||
search.k.k.p = k->k.p;
|
||||
|
||||
@ -48,7 +48,7 @@ struct btree_write_buffer_keys {
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
struct btree_write_buffer {
|
||||
struct bch_fs_btree_write_buffer {
|
||||
DARRAY(struct wb_key_ref) sorted;
|
||||
struct btree_write_buffer_keys inc;
|
||||
struct btree_write_buffer_keys flushing;
|
||||
|
||||
@ -415,19 +415,19 @@ u64 bch2_copygc_wait_amount(struct bch_fs *c)
|
||||
void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
printbuf_tabstop_push(out, 32);
|
||||
prt_printf(out, "running:\t%u\n", c->copygc_running);
|
||||
prt_printf(out, "copygc_wait:\t%llu\n", c->copygc_wait);
|
||||
prt_printf(out, "copygc_wait_at:\t%llu\n", c->copygc_wait_at);
|
||||
prt_printf(out, "running:\t%u\n", c->copygc.running);
|
||||
prt_printf(out, "copygc_wait:\t%llu\n", c->copygc.wait);
|
||||
prt_printf(out, "copygc_wait_at:\t%llu\n", c->copygc.wait_at);
|
||||
|
||||
prt_printf(out, "Currently waiting for:\t");
|
||||
prt_human_readable_u64(out, max(0LL, c->copygc_wait -
|
||||
prt_human_readable_u64(out, max(0LL, c->copygc.wait -
|
||||
atomic64_read(&c->io_clock[WRITE].now)) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "Currently waiting since:\t");
|
||||
prt_human_readable_u64(out, max(0LL,
|
||||
atomic64_read(&c->io_clock[WRITE].now) -
|
||||
c->copygc_wait_at) << 9);
|
||||
c->copygc.wait_at) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
bch2_printbuf_make_room(out, 4096);
|
||||
@ -442,7 +442,7 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
t = rcu_dereference(c->copygc_thread);
|
||||
t = rcu_dereference(c->copygc.thread);
|
||||
if (t)
|
||||
get_task_struct(t);
|
||||
}
|
||||
@ -483,7 +483,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
writepoint_ptr(&c->copygc.write_point),
|
||||
false);
|
||||
|
||||
while (!ret && !kthread_should_stop()) {
|
||||
@ -508,21 +508,21 @@ static int bch2_copygc_thread(void *arg)
|
||||
wait = bch2_copygc_wait_amount(c);
|
||||
|
||||
if (wait > clock->max_slop) {
|
||||
c->copygc_wait_at = last;
|
||||
c->copygc_wait = last + wait;
|
||||
c->copygc.wait_at = last;
|
||||
c->copygc.wait = last + wait;
|
||||
move_buckets_wait(&ctxt, &buckets, true);
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
continue;
|
||||
}
|
||||
|
||||
c->copygc_wait = 0;
|
||||
c->copygc.wait = 0;
|
||||
|
||||
c->copygc_running = true;
|
||||
c->copygc.running = true;
|
||||
ret = bch2_copygc(&ctxt, &buckets, &did_work);
|
||||
c->copygc_running = false;
|
||||
c->copygc.running = false;
|
||||
|
||||
wake_up(&c->copygc_running_wq);
|
||||
wake_up(&c->copygc.running_wq);
|
||||
|
||||
if (!wait && !did_work) {
|
||||
u64 min_member_capacity = bch2_min_rw_member_capacity(c);
|
||||
@ -548,43 +548,51 @@ err:
|
||||
|
||||
void bch2_copygc_stop(struct bch_fs *c)
|
||||
{
|
||||
if (c->copygc_thread) {
|
||||
kthread_stop(c->copygc_thread);
|
||||
put_task_struct(c->copygc_thread);
|
||||
if (c->copygc.thread) {
|
||||
kthread_stop(c->copygc.thread);
|
||||
put_task_struct(c->copygc.thread);
|
||||
}
|
||||
c->copygc_thread = NULL;
|
||||
c->copygc.thread = NULL;
|
||||
}
|
||||
|
||||
int bch2_copygc_start(struct bch_fs *c)
|
||||
{
|
||||
struct task_struct *t;
|
||||
int ret;
|
||||
|
||||
if (c->copygc_thread)
|
||||
return 0;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return 0;
|
||||
|
||||
if (bch2_fs_init_fault("copygc_start"))
|
||||
return -ENOMEM;
|
||||
|
||||
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
|
||||
ret = PTR_ERR_OR_ZERO(t);
|
||||
bch_err_msg(c, ret, "creating copygc thread");
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!c->copygc.wq &&
|
||||
!(c->copygc.wq = alloc_workqueue("bcachefs_copygc",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)))
|
||||
return bch_err_throw(c, ENOMEM_fs_other_alloc);
|
||||
|
||||
get_task_struct(t);
|
||||
if (!c->copygc.thread) {
|
||||
struct task_struct *t =
|
||||
kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
|
||||
int ret = PTR_ERR_OR_ZERO(t);
|
||||
bch_err_msg(c, ret, "creating copygc thread");
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
c->copygc_thread = t;
|
||||
wake_up_process(c->copygc_thread);
|
||||
get_task_struct(t);
|
||||
|
||||
c->copygc.thread = t;
|
||||
wake_up_process(c->copygc.thread);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_copygc_exit(struct bch_fs *c)
|
||||
{
|
||||
if (c->copygc.wq)
|
||||
destroy_workqueue(c->copygc.wq);
|
||||
}
|
||||
|
||||
void bch2_fs_copygc_init(struct bch_fs *c)
|
||||
{
|
||||
init_waitqueue_head(&c->copygc_running_wq);
|
||||
c->copygc_running = false;
|
||||
init_waitqueue_head(&c->copygc.running_wq);
|
||||
c->copygc.running = false;
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_MOVINGGC_H
|
||||
#define _BCACHEFS_MOVINGGC_H
|
||||
#ifndef _BCACHEFS_COPYGC_H
|
||||
#define _BCACHEFS_COPYGC_H
|
||||
|
||||
u64 bch2_copygc_wait_amount(struct bch_fs *);
|
||||
void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
|
||||
@ -8,13 +8,15 @@ void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
|
||||
static inline void bch2_copygc_wakeup(struct bch_fs *c)
|
||||
{
|
||||
guard(rcu)();
|
||||
struct task_struct *p = rcu_dereference(c->copygc_thread);
|
||||
struct task_struct *p = rcu_dereference(c->copygc.thread);
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
}
|
||||
|
||||
void bch2_copygc_stop(struct bch_fs *);
|
||||
int bch2_copygc_start(struct bch_fs *);
|
||||
|
||||
void bch2_fs_copygc_exit(struct bch_fs *);
|
||||
void bch2_fs_copygc_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_MOVINGGC_H */
|
||||
#endif /* _BCACHEFS_COPYGC_H */
|
||||
|
||||
18
libbcachefs/data/copygc_types.h
Normal file
18
libbcachefs/data/copygc_types.h
Normal file
@ -0,0 +1,18 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_COPYGC_TYPES_H
|
||||
#define _BCACHEFS_COPYGC_TYPES_H
|
||||
|
||||
struct bch_fs_copygc {
|
||||
struct task_struct *thread;
|
||||
struct write_point write_point;
|
||||
s64 wait_at;
|
||||
s64 wait;
|
||||
bool running;
|
||||
wait_queue_head_t running_wq;
|
||||
|
||||
/* Dedicated workqueue for btree updates: */
|
||||
struct workqueue_struct *wq;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_COPYGC_TYPES_H */
|
||||
|
||||
@ -405,7 +405,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
|
||||
struct gc_stripe *gc = NULL;
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
|
||||
gc = genradix_ptr_alloc(&c->ec.gc_stripes, idx, GFP_KERNEL);
|
||||
if (!gc) {
|
||||
bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx);
|
||||
return bch_err_throw(c, ENOMEM_mark_stripe);
|
||||
@ -754,7 +754,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
|
||||
nr_iovecs,
|
||||
opf,
|
||||
GFP_KERNEL,
|
||||
&c->ec_bioset),
|
||||
&c->ec.block_bioset),
|
||||
struct ec_bio, bio);
|
||||
|
||||
ec_bio->ca = ca;
|
||||
@ -852,8 +852,8 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||
|
||||
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
|
||||
{
|
||||
if (c->gc_pos.phase != GC_PHASE_not_running &&
|
||||
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
|
||||
if (c->gc.pos.phase != GC_PHASE_not_running &&
|
||||
!genradix_ptr_alloc(&c->ec.gc_stripes, idx, gfp))
|
||||
return bch_err_throw(c, ENOMEM_ec_stripe_mem_alloc);
|
||||
|
||||
return 0;
|
||||
@ -877,10 +877,10 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
|
||||
|
||||
static bool __bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
|
||||
{
|
||||
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec_stripes_new_buckets)));
|
||||
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec.stripes_new_buckets)));
|
||||
struct ec_stripe_new_bucket *s;
|
||||
|
||||
hlist_for_each_entry(s, &c->ec_stripes_new_buckets[hash], hash)
|
||||
hlist_for_each_entry(s, &c->ec.stripes_new_buckets[hash], hash)
|
||||
if (s->dev_bucket == dev_bucket)
|
||||
return true;
|
||||
return false;
|
||||
@ -888,7 +888,7 @@ static bool __bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
|
||||
|
||||
bool bch2_bucket_has_new_stripe(struct bch_fs *c, u64 dev_bucket)
|
||||
{
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
return __bch2_bucket_has_new_stripe(c, dev_bucket);
|
||||
}
|
||||
|
||||
@ -896,20 +896,20 @@ static void stripe_new_bucket_add(struct bch_fs *c, struct ec_stripe_new_bucket
|
||||
{
|
||||
s->dev_bucket = dev_bucket;
|
||||
|
||||
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec_stripes_new_buckets)));
|
||||
hlist_add_head(&s->hash, &c->ec_stripes_new_buckets[hash]);
|
||||
unsigned hash = hash_64(dev_bucket, ilog2(ARRAY_SIZE(c->ec.stripes_new_buckets)));
|
||||
hlist_add_head(&s->hash, &c->ec.stripes_new_buckets[hash]);
|
||||
}
|
||||
|
||||
static void stripe_new_buckets_add(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
{
|
||||
unsigned nr_blocks = s->nr_data + s->nr_parity;
|
||||
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
for (unsigned i = 0; i < nr_blocks; i++) {
|
||||
if (!s->blocks[i])
|
||||
continue;
|
||||
|
||||
struct open_bucket *ob = c->open_buckets + s->blocks[i];
|
||||
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
|
||||
struct bpos bucket = POS(ob->dev, ob->bucket);
|
||||
|
||||
stripe_new_bucket_add(c, &s->buckets[i], bucket_to_u64(bucket));
|
||||
@ -918,7 +918,7 @@ static void stripe_new_buckets_add(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
|
||||
static void stripe_new_buckets_del(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
{
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
|
||||
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
|
||||
for (unsigned i = 0; i < v->nr_blocks; i++)
|
||||
@ -927,10 +927,10 @@ static void stripe_new_buckets_del(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
|
||||
static struct ec_stripe_handle *bch2_open_stripe_find(struct bch_fs *c, u64 idx)
|
||||
{
|
||||
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
|
||||
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec.stripes_new)));
|
||||
struct ec_stripe_handle *s;
|
||||
|
||||
hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash)
|
||||
hlist_for_each_entry(s, &c->ec.stripes_new[hash], hash)
|
||||
if (s->idx == idx)
|
||||
return s;
|
||||
return NULL;
|
||||
@ -938,7 +938,7 @@ static struct ec_stripe_handle *bch2_open_stripe_find(struct bch_fs *c, u64 idx)
|
||||
|
||||
static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
|
||||
{
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
return bch2_open_stripe_find(c, idx) != NULL;
|
||||
}
|
||||
|
||||
@ -949,13 +949,13 @@ static bool bch2_stripe_handle_tryget(struct bch_fs *c,
|
||||
BUG_ON(s->idx);
|
||||
BUG_ON(!idx);
|
||||
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
bool ret = !bch2_open_stripe_find(c, idx);
|
||||
if (ret) {
|
||||
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
|
||||
unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec.stripes_new)));
|
||||
|
||||
s->idx = idx;
|
||||
hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
|
||||
hlist_add_head(&s->hash, &c->ec.stripes_new[hash]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -965,7 +965,7 @@ static void bch2_stripe_handle_put(struct bch_fs *c, struct ec_stripe_handle *s)
|
||||
if (!s->idx)
|
||||
return;
|
||||
|
||||
guard(spinlock)(&c->ec_stripes_new_lock);
|
||||
guard(spinlock)(&c->ec.stripes_new_lock);
|
||||
BUG_ON(bch2_open_stripe_find(c, s->idx) != s);
|
||||
hlist_del_init(&s->hash);
|
||||
|
||||
@ -998,7 +998,7 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
|
||||
static void ec_stripe_delete_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c =
|
||||
container_of(work, struct bch_fs, ec_stripe_delete_work);
|
||||
container_of(work, struct bch_fs, ec.stripe_delete_work);
|
||||
|
||||
bch2_trans_run(c,
|
||||
bch2_btree_write_buffer_tryflush(trans) ?:
|
||||
@ -1016,7 +1016,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
void bch2_do_stripe_deletes(struct bch_fs *c)
|
||||
{
|
||||
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
|
||||
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
|
||||
!queue_work(c->write_ref_wq, &c->ec.stripe_delete_work))
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
@ -1323,7 +1323,7 @@ static int __ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
for (unsigned i = 0; i < nr_data; i++)
|
||||
if (s->blocks[i]) {
|
||||
struct open_bucket *ob = c->open_buckets + s->blocks[i];
|
||||
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
|
||||
|
||||
if (ob->sectors_free)
|
||||
zero_out_rest_of_ec_bucket(c, s, i, ob);
|
||||
@ -1403,7 +1403,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
|
||||
for (unsigned i = 0; i < v->nr_blocks; i++)
|
||||
if (s->blocks[i]) {
|
||||
struct open_bucket *ob = c->open_buckets + s->blocks[i];
|
||||
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
|
||||
|
||||
if (i < nr_data) {
|
||||
ob->ec = NULL;
|
||||
@ -1413,9 +1413,9 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
}
|
||||
}
|
||||
|
||||
scoped_guard(mutex, &c->ec_stripe_new_lock)
|
||||
scoped_guard(mutex, &c->ec.stripe_new_lock)
|
||||
list_del(&s->list);
|
||||
wake_up(&c->ec_stripe_new_wait);
|
||||
wake_up(&c->ec.stripe_new_wait);
|
||||
|
||||
ec_stripe_buf_exit(&s->old_stripe);
|
||||
ec_stripe_buf_exit(&s->new_stripe);
|
||||
@ -1428,8 +1428,8 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
|
||||
{
|
||||
struct ec_stripe_new *s;
|
||||
|
||||
guard(mutex)(&c->ec_stripe_new_lock);
|
||||
list_for_each_entry(s, &c->ec_stripe_new_list, list)
|
||||
guard(mutex)(&c->ec.stripe_new_lock);
|
||||
list_for_each_entry(s, &c->ec.stripe_new_list, list)
|
||||
if (!atomic_read(&s->ref[STRIPE_REF_io]))
|
||||
return s;
|
||||
return NULL;
|
||||
@ -1438,7 +1438,7 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
|
||||
static void ec_stripe_create_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work,
|
||||
struct bch_fs, ec_stripe_create_work);
|
||||
struct bch_fs, ec.stripe_create_work);
|
||||
struct ec_stripe_new *s;
|
||||
|
||||
while ((s = get_pending_stripe(c)))
|
||||
@ -1451,7 +1451,7 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
|
||||
{
|
||||
enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
|
||||
|
||||
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
|
||||
if (!queue_work(system_long_wq, &c->ec.stripe_create_work))
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
|
||||
}
|
||||
|
||||
@ -1466,8 +1466,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h
|
||||
h->s = NULL;
|
||||
s->pending = true;
|
||||
|
||||
scoped_guard(mutex, &c->ec_stripe_new_lock)
|
||||
list_add(&s->list, &c->ec_stripe_new_list);
|
||||
scoped_guard(mutex, &c->ec.stripe_new_lock)
|
||||
list_add(&s->list, &c->ec.stripe_new_list);
|
||||
|
||||
ec_stripe_new_put(c, s, STRIPE_REF_io);
|
||||
}
|
||||
@ -1649,7 +1649,7 @@ static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *
|
||||
if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
|
||||
ec_stripe_new_cancel(c, h, -EINTR);
|
||||
|
||||
h->rw_devs_change_count = c->rw_devs_change_count;
|
||||
h->rw_devs_change_count = c->allocator.rw_devs_change_count;
|
||||
}
|
||||
|
||||
static struct ec_stripe_head *
|
||||
@ -1671,7 +1671,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
|
||||
h->redundancy = redundancy;
|
||||
h->watermark = watermark;
|
||||
|
||||
list_add(&h->list, &c->ec_stripe_head_list);
|
||||
list_add(&h->list, &c->ec.stripe_head_list);
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -1699,7 +1699,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
if (!redundancy)
|
||||
return NULL;
|
||||
|
||||
int ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock);
|
||||
int ret = bch2_trans_mutex_lock(trans, &c->ec.stripe_head_lock);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@ -1708,7 +1708,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
||||
list_for_each_entry(h, &c->ec.stripe_head_list, list)
|
||||
if (h->disk_label == disk_label &&
|
||||
h->algo == algo &&
|
||||
h->redundancy == redundancy &&
|
||||
@ -1727,7 +1727,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
found:
|
||||
if (h->rw_devs_change_count != c->rw_devs_change_count)
|
||||
if (h->rw_devs_change_count != c->allocator.rw_devs_change_count)
|
||||
ec_stripe_head_devs_update(c, h);
|
||||
|
||||
if (h->insufficient_devs) {
|
||||
@ -1735,7 +1735,7 @@ found:
|
||||
h = NULL;
|
||||
}
|
||||
err:
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
mutex_unlock(&c->ec.stripe_head_lock);
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -1754,7 +1754,7 @@ static int __new_stripe_alloc_buckets(struct btree_trans *trans,
|
||||
|
||||
/* * We bypass the sector allocator which normally does this: */
|
||||
bitmap_and(req->devs_may_alloc.d, req->devs_may_alloc.d,
|
||||
c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
|
||||
c->allocator.rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
|
||||
|
||||
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
|
||||
/*
|
||||
@ -1907,7 +1907,7 @@ static int init_new_stripe_from_old(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
* blocks from the stripe we're reusing:
|
||||
*/
|
||||
for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
|
||||
bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
|
||||
bch2_open_bucket_put(c, c->allocator.open_buckets + s->blocks[i]);
|
||||
s->blocks[i] = 0;
|
||||
}
|
||||
memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
|
||||
@ -1970,12 +1970,12 @@ static int stripe_idx_alloc(struct btree_trans *trans, struct ec_stripe_new *s)
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bpos min_pos = POS(0, 1);
|
||||
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
|
||||
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec.stripe_hint));
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
|
||||
BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
|
||||
c->ec_stripe_hint = iter.pos.offset;
|
||||
c->ec.stripe_hint = iter.pos.offset;
|
||||
|
||||
if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
|
||||
if (start_pos.offset) {
|
||||
@ -2041,7 +2041,7 @@ static int stripe_alloc_or_reuse(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
/* XXX freelist_wait? */
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
closure_wait(&c->allocator.freelist_wait, cl);
|
||||
*waiting = true;
|
||||
}
|
||||
}
|
||||
@ -2107,7 +2107,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
ret = stripe_alloc_or_reuse(trans, req, cl, h, s, &waiting);
|
||||
if (waiting &&
|
||||
!bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2233,7 +2233,7 @@ static bool should_cancel_stripe(struct bch_fs *c, struct ec_stripe_new *s, stru
|
||||
if (!s->blocks[i])
|
||||
continue;
|
||||
|
||||
struct open_bucket *ob = c->open_buckets + s->blocks[i];
|
||||
struct open_bucket *ob = c->allocator.open_buckets + s->blocks[i];
|
||||
if (ob->dev == ca->dev_idx)
|
||||
return true;
|
||||
}
|
||||
@ -2245,8 +2245,8 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
|
||||
guard(mutex)(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
guard(mutex)(&c->ec.stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec.stripe_head_list, list) {
|
||||
guard(mutex)(&h->lock);
|
||||
if (h->s && should_cancel_stripe(c, h->s, ca))
|
||||
ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
|
||||
@ -2267,13 +2267,13 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c)
|
||||
{
|
||||
sched_annotate_sleep();
|
||||
|
||||
guard(mutex)(&c->ec_stripe_new_lock);
|
||||
return list_empty(&c->ec_stripe_new_list);
|
||||
guard(mutex)(&c->ec.stripe_new_lock);
|
||||
return list_empty(&c->ec.stripe_new_list);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_flush(struct bch_fs *c)
|
||||
{
|
||||
wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
|
||||
wait_event(c->ec.stripe_new_wait, bch2_fs_ec_flush_done(c));
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
@ -2305,8 +2305,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
struct ec_stripe_head *h;
|
||||
struct ec_stripe_new *s;
|
||||
|
||||
scoped_guard(mutex, &c->ec_stripe_head_lock)
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
scoped_guard(mutex, &c->ec.stripe_head_lock)
|
||||
list_for_each_entry(h, &c->ec.stripe_head_list, list) {
|
||||
prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
|
||||
h->disk_label, h->algo, h->redundancy,
|
||||
bch2_watermarks[h->watermark],
|
||||
@ -2318,8 +2318,8 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
prt_printf(out, "in flight:\n");
|
||||
|
||||
scoped_guard(mutex, &c->ec_stripe_new_lock)
|
||||
list_for_each_entry(s, &c->ec_stripe_new_list, list)
|
||||
scoped_guard(mutex, &c->ec.stripe_new_lock)
|
||||
list_for_each_entry(s, &c->ec.stripe_new_list, list)
|
||||
bch2_new_stripe_to_text(out, c, s);
|
||||
}
|
||||
|
||||
@ -2329,8 +2329,8 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
while (1) {
|
||||
struct ec_stripe_head *h;
|
||||
|
||||
scoped_guard(mutex, &c->ec_stripe_head_lock)
|
||||
h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list);
|
||||
scoped_guard(mutex, &c->ec.stripe_head_lock)
|
||||
h = list_pop_entry(&c->ec.stripe_head_list, struct ec_stripe_head, list);
|
||||
|
||||
if (!h)
|
||||
break;
|
||||
@ -2346,29 +2346,29 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
kfree(h);
|
||||
}
|
||||
|
||||
BUG_ON(!list_empty(&c->ec_stripe_new_list));
|
||||
BUG_ON(!list_empty(&c->ec.stripe_new_list));
|
||||
|
||||
bioset_exit(&c->ec_bioset);
|
||||
bioset_exit(&c->ec.block_bioset);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_init_early(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->ec_stripes_new_lock);
|
||||
spin_lock_init(&c->ec.stripes_new_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
INIT_LIST_HEAD(&c->ec.stripe_head_list);
|
||||
mutex_init(&c->ec.stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
init_waitqueue_head(&c->ec_stripe_new_wait);
|
||||
INIT_LIST_HEAD(&c->ec.stripe_new_list);
|
||||
mutex_init(&c->ec.stripe_new_lock);
|
||||
init_waitqueue_head(&c->ec.stripe_new_wait);
|
||||
|
||||
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
|
||||
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
|
||||
INIT_WORK(&c->ec.stripe_create_work, ec_stripe_create_work);
|
||||
INIT_WORK(&c->ec.stripe_delete_work, ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
int bch2_fs_ec_init(struct bch_fs *c)
|
||||
{
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
return bioset_init(&c->ec.block_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
|
||||
|
||||
@ -26,4 +26,26 @@ struct gc_stripe {
|
||||
union bch_replicas_padded r;
|
||||
};
|
||||
|
||||
struct bch_fs_ec {
|
||||
struct hlist_head stripes_new[32];
|
||||
struct hlist_head stripes_new_buckets[64];
|
||||
spinlock_t stripes_new_lock;
|
||||
|
||||
struct list_head stripe_head_list;
|
||||
struct mutex stripe_head_lock;
|
||||
|
||||
struct list_head stripe_new_list;
|
||||
struct mutex stripe_new_lock;
|
||||
wait_queue_head_t stripe_new_wait;
|
||||
|
||||
struct work_struct stripe_create_work;
|
||||
u64 stripe_hint;
|
||||
|
||||
struct work_struct stripe_delete_work;
|
||||
|
||||
struct bio_set block_bioset;
|
||||
|
||||
GENRADIX(struct gc_stripe) gc_stripes;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_EC_TYPES_H */
|
||||
|
||||
@ -301,7 +301,7 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec
|
||||
* snapshot while they're in progress, then crashing, will result in the
|
||||
* resume only proceeding in one of the snapshots
|
||||
*/
|
||||
guard(rwsem_read)(&c->snapshot_create_lock);
|
||||
guard(rwsem_read)(&c->snapshots.create_lock);
|
||||
CLASS(btree_trans, trans)(c);
|
||||
try(bch2_logged_op_start(trans, &op.k_i));
|
||||
int ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta);
|
||||
@ -509,7 +509,7 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
|
||||
* snapshot while they're in progress, then crashing, will result in the
|
||||
* resume only proceeding in one of the snapshots
|
||||
*/
|
||||
guard(rwsem_read)(&c->snapshot_create_lock);
|
||||
guard(rwsem_read)(&c->snapshots.create_lock);
|
||||
CLASS(btree_trans, trans)(c);
|
||||
try(bch2_logged_op_start(trans, &op.k_i));
|
||||
int ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta);
|
||||
|
||||
@ -319,9 +319,11 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
else if (data_opts.type != BCH_DATA_UPDATE_scrub) {
|
||||
struct bch_devs_list devs_have = bch2_data_update_devs_keeping(c, &data_opts, k);
|
||||
|
||||
ret = bch2_can_do_write(c, &data_opts, k, &devs_have) ?:
|
||||
bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
|
||||
data_opts.target, 0, data_opts.write_flags);
|
||||
if (data_opts.type != BCH_DATA_UPDATE_copygc)
|
||||
try(bch2_can_do_write(c, &data_opts, k, &devs_have));
|
||||
|
||||
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
|
||||
data_opts.target, 0, data_opts.write_flags);
|
||||
} else
|
||||
ret = bch2_btree_node_scrub(trans, iter->btree_id, level, k, data_opts.read_dev);
|
||||
|
||||
@ -345,10 +347,10 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
||||
bool is_kthread = current->flags & PF_KTHREAD;
|
||||
u64 delay;
|
||||
|
||||
if (ctxt->wait_on_copygc && c->copygc_running) {
|
||||
if (ctxt->wait_on_copygc && c->copygc.running) {
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
wait_event_freezable(c->copygc_running_wq,
|
||||
!c->copygc_running ||
|
||||
wait_event_freezable(c->copygc.running_wq,
|
||||
!c->copygc.running ||
|
||||
(is_kthread && kthread_should_stop()));
|
||||
}
|
||||
|
||||
|
||||
@ -78,7 +78,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
|
||||
guard(rcu)();
|
||||
devs = bch2_target_to_mask(c, target) ?:
|
||||
&c->rw_devs[BCH_DATA_user];
|
||||
&c->allocator.rw_devs[BCH_DATA_user];
|
||||
|
||||
for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
|
||||
struct bch_dev *ca = rcu_dereference(c->devs[d]);
|
||||
|
||||
@ -1500,8 +1500,12 @@ static int do_reconcile_phys(struct moving_context *ctxt,
|
||||
if (!k.k)
|
||||
return 0;
|
||||
|
||||
event_add_trace(c, reconcile_phys, k.k->size, buf,
|
||||
bch2_bkey_val_to_text(&buf, c, k));
|
||||
event_add_trace(c, reconcile_phys, k.k->size, buf, ({
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, bp_k);
|
||||
prt_newline(&buf);
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
}));
|
||||
|
||||
return __do_reconcile_extent(ctxt, snapshot_io_opts, &iter, k);
|
||||
}
|
||||
@ -1875,6 +1879,24 @@ static int do_reconcile(struct moving_context *ctxt)
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((r->work_pos.btree == BTREE_ID_reconcile_hipri_phys ||
|
||||
r->work_pos.btree == BTREE_ID_reconcile_work_phys) &&
|
||||
k.k->p.inode != r->work_pos.pos.inode) {
|
||||
/*
|
||||
* We don't yet do multiple devices in parallel - that
|
||||
* will require extra synchronization to avoid kicking
|
||||
* off the same reconciles simultaneously via multiple
|
||||
* backpointers.
|
||||
*
|
||||
* For now, flush when switching devices to avoid
|
||||
* conflicts:
|
||||
*/
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
bch2_btree_write_buffer_flush_sync(trans);
|
||||
work.nr = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
r->running = true;
|
||||
r->work_pos.pos = k.k->p;
|
||||
|
||||
@ -1912,7 +1934,9 @@ static int do_reconcile(struct moving_context *ctxt)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
r->work_pos.pos = bpos_successor(r->work_pos.pos);
|
||||
r->work_pos.pos = btree_type_has_snapshots(r->work_pos.btree)
|
||||
? bpos_successor(r->work_pos.pos)
|
||||
: bpos_nosnap_successor(r->work_pos.pos);
|
||||
}
|
||||
|
||||
if (!ret && !bkey_deleted(&pending_cookie.k))
|
||||
@ -1954,7 +1978,7 @@ static int bch2_reconcile_thread(void *arg)
|
||||
|
||||
struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit);
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
|
||||
writepoint_ptr(&c->reconcile_write_point),
|
||||
writepoint_ptr(&c->allocator.reconcile_write_point),
|
||||
true);
|
||||
|
||||
while (!kthread_should_stop() && !do_reconcile(&ctxt))
|
||||
|
||||
@ -66,21 +66,22 @@ static unsigned bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
||||
noinline_for_stack
|
||||
static void count_data_update_key_fail(struct data_update *u,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c new,
|
||||
struct bkey_s_c wrote,
|
||||
struct bkey_i *insert,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = u->op.c;
|
||||
unsigned sectors = new.k->p.offset - iter->pos.offset;
|
||||
|
||||
if (u->stats) {
|
||||
atomic64_inc(&u->stats->keys_raced);
|
||||
atomic64_add(sectors, &u->stats->sectors_raced);
|
||||
atomic64_add(insert->k.size, &u->stats->sectors_raced);
|
||||
}
|
||||
|
||||
event_add_trace(c, data_update_key_fail, sectors, buf, ({
|
||||
event_add_trace(c, data_update_key_fail, insert->k.size, buf, ({
|
||||
prt_str(&buf, bch2_data_update_type_strs[u->opts.type]);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_str(&buf, msg);
|
||||
prt_newline(&buf);
|
||||
|
||||
@ -157,8 +158,13 @@ static int data_update_index_update_key(struct btree_trans *trans,
|
||||
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
|
||||
bkey_reassemble(insert, k);
|
||||
|
||||
bch2_cut_front(c, iter->pos, &new->k_i);
|
||||
bch2_cut_front(c, iter->pos, insert);
|
||||
bch2_cut_back(new->k.p, insert);
|
||||
bch2_cut_back(insert->k.p, &new->k_i);
|
||||
|
||||
if (!bch2_extents_match(c, k, old)) {
|
||||
count_data_update_key_fail(u, iter, k, bkey_i_to_s_c(&new->k_i), NULL, "no match:");
|
||||
count_data_update_key_fail(u, k, bkey_i_to_s_c(&new->k_i), insert, "no match:");
|
||||
bch2_btree_iter_advance(iter);
|
||||
return 0;
|
||||
}
|
||||
@ -166,12 +172,6 @@ static int data_update_index_update_key(struct btree_trans *trans,
|
||||
struct bch_inode_opts opts;
|
||||
try(bch2_bkey_get_io_opts(trans, NULL, k, &opts));
|
||||
|
||||
bch2_cut_front(c, iter->pos, &new->k_i);
|
||||
|
||||
bch2_cut_front(c, iter->pos, insert);
|
||||
bch2_cut_back(new->k.p, insert);
|
||||
bch2_cut_back(insert->k.p, &new->k_i);
|
||||
|
||||
bch2_bkey_propagate_incompressible(c, insert, bkey_i_to_s_c(&new->k_i));
|
||||
|
||||
/*
|
||||
@ -204,7 +204,7 @@ static int data_update_index_update_key(struct btree_trans *trans,
|
||||
if (u->opts.ptrs_rewrite &&
|
||||
!rewrites_found &&
|
||||
bch2_bkey_durability(c, k) >= opts.data_replicas) {
|
||||
count_data_update_key_fail(u, iter, k, bkey_i_to_s_c(&new->k_i), insert,
|
||||
count_data_update_key_fail(u, k, bkey_i_to_s_c(&new->k_i), insert,
|
||||
"no rewrites found:");
|
||||
bch2_btree_iter_advance(iter);
|
||||
return 0;
|
||||
@ -220,7 +220,7 @@ static int data_update_index_update_key(struct btree_trans *trans,
|
||||
!ptr_c->cached));
|
||||
|
||||
if (!bkey_val_u64s(&new->k)) {
|
||||
count_data_update_key_fail(u, iter, k,
|
||||
count_data_update_key_fail(u, k,
|
||||
bkey_i_to_s_c(bch2_keylist_front(&u->op.insert_keys)),
|
||||
insert, "new replicas conflicted:");
|
||||
bch2_btree_iter_advance(iter);
|
||||
@ -762,7 +762,7 @@ int bch2_can_do_write(struct bch_fs *c, struct data_update_opts *opts,
|
||||
enum bch_watermark watermark = opts->commit_flags & BCH_WATERMARK_MASK;
|
||||
|
||||
if ((opts->write_flags & BCH_WRITE_alloc_nowait) &&
|
||||
unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
|
||||
unlikely(c->allocator.open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)))
|
||||
return bch_err_throw(c, data_update_fail_would_block);
|
||||
|
||||
guard(rcu)();
|
||||
@ -999,9 +999,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
* (i.e. trying to move a durability=2 replica to a target with a
|
||||
* single durability=2 device)
|
||||
*/
|
||||
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (data_opts.type != BCH_DATA_UPDATE_copygc) {
|
||||
ret = bch2_can_do_write(c, &m->opts, k, &m->op.devs_have);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (reserve_sectors) {
|
||||
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
|
||||
|
||||
@ -20,7 +20,7 @@ void bch2_write_op_error(struct bch_write_op *op, u64, const char *, ...);
|
||||
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
return op->watermark == BCH_WATERMARK_copygc
|
||||
? op->c->copygc_wq
|
||||
? op->c->copygc.wq
|
||||
: op->c->btree_update_wq;
|
||||
}
|
||||
|
||||
|
||||
@ -590,12 +590,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
i->size = size;
|
||||
i->ret = 0;
|
||||
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
int srcu_idx = srcu_read_lock(&c->btree_trans.barrier);
|
||||
restart:
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
list_sort(&c->btree_trans.list, list_ptr_order_cmp);
|
||||
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
list_for_each_entry(trans, &c->btree_trans.list, list) {
|
||||
if ((ulong) trans <= i->iter)
|
||||
continue;
|
||||
|
||||
@ -609,7 +609,7 @@ restart:
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans.lock);
|
||||
|
||||
bch2_btree_trans_to_text(&i->buf, trans);
|
||||
|
||||
@ -624,12 +624,12 @@ restart:
|
||||
if (ret)
|
||||
goto unlocked;
|
||||
|
||||
if (!seqmutex_relock(&c->btree_trans_lock, seq))
|
||||
if (!seqmutex_relock(&c->btree_trans.lock, seq))
|
||||
goto restart;
|
||||
}
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
unlocked:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
srcu_read_unlock(&c->btree_trans.barrier, srcu_idx);
|
||||
|
||||
if (i->buf.allocation_failure)
|
||||
ret = -ENOMEM;
|
||||
@ -759,7 +759,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
|
||||
i->ret = 0;
|
||||
|
||||
while (1) {
|
||||
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
|
||||
struct btree_transaction_stats *s = &c->btree_trans.stats[i->iter];
|
||||
|
||||
err = bch2_debugfs_flush_buf(i);
|
||||
if (err)
|
||||
@ -825,10 +825,10 @@ static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
struct btree_trans *trans;
|
||||
ulong iter = 0;
|
||||
restart:
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
|
||||
seqmutex_lock(&c->btree_trans.lock);
|
||||
list_sort(&c->btree_trans.list, list_ptr_order_cmp);
|
||||
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
list_for_each_entry(trans, &c->btree_trans.list, list) {
|
||||
if ((ulong) trans <= iter)
|
||||
continue;
|
||||
|
||||
@ -837,7 +837,7 @@ restart:
|
||||
if (!closure_get_not_zero(&trans->ref))
|
||||
continue;
|
||||
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans.lock);
|
||||
|
||||
bool found = bch2_check_for_deadlock(trans, out) != 0;
|
||||
|
||||
@ -846,10 +846,10 @@ restart:
|
||||
if (found)
|
||||
return;
|
||||
|
||||
if (!seqmutex_relock(&c->btree_trans_lock, seq))
|
||||
if (!seqmutex_relock(&c->btree_trans.lock, seq))
|
||||
goto restart;
|
||||
}
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans.lock);
|
||||
}
|
||||
|
||||
typedef void (*fs_to_text_fn)(struct printbuf *, struct bch_fs *);
|
||||
|
||||
@ -246,7 +246,7 @@ write_attribute(perf_test);
|
||||
|
||||
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
size_t ret = 0;
|
||||
struct btree *b;
|
||||
|
||||
@ -301,9 +301,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
|
||||
static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
bch2_btree_id_to_text(out, c->gc_gens_btree);
|
||||
prt_printf(out, ": ");
|
||||
bch2_bpos_to_text(out, c->gc_gens_pos);
|
||||
bch2_bbpos_to_text(out, c->gc_gens.pos);
|
||||
prt_printf(out, "\n");
|
||||
}
|
||||
|
||||
@ -311,7 +309,7 @@ static void bch2_fs_usage_base_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_base b = {};
|
||||
|
||||
acc_u64s_percpu(&b.hidden, &c->usage->hidden, sizeof(b) / sizeof(u64));
|
||||
acc_u64s_percpu(&b.hidden, &c->capacity.usage->hidden, sizeof(b) / sizeof(u64));
|
||||
|
||||
prt_printf(out, "hidden:\t\t%llu\n", b.hidden);
|
||||
prt_printf(out, "btree:\t\t%llu\n", b.btree);
|
||||
@ -427,13 +425,13 @@ STORE(bch2_fs)
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_trigger_btree_updates)
|
||||
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
|
||||
queue_work(c->btree_interior_updates.worker, &c->btree_interior_updates.work);
|
||||
|
||||
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))
|
||||
return -EROFS;
|
||||
|
||||
if (attr == &sysfs_trigger_btree_cache_shrink) {
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
struct shrink_control sc;
|
||||
|
||||
sc.gfp_mask = GFP_KERNEL;
|
||||
@ -475,7 +473,7 @@ STORE(bch2_fs)
|
||||
bch2_journal_do_writes(&c->journal);
|
||||
|
||||
if (attr == &sysfs_trigger_freelist_wakeup)
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
if (attr == &sysfs_trigger_recalc_capacity) {
|
||||
guard(rwsem_read)(&c->state_lock);
|
||||
|
||||
@ -425,9 +425,9 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
return ret;
|
||||
|
||||
struct bch_fs_usage_short u = bch2_fs_usage_read_short(c);
|
||||
arg.capacity = c->capacity;
|
||||
arg.capacity = c->capacity.capacity;
|
||||
arg.used = u.used;
|
||||
arg.online_reserved = percpu_u64_get(c->online_reserved);
|
||||
arg.online_reserved = percpu_u64_get(&c->capacity.pcpu->online_reserved);
|
||||
arg.replica_entries_bytes = replicas.nr;
|
||||
|
||||
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
@ -458,9 +458,9 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
arg.capacity = c->capacity;
|
||||
arg.capacity = c->capacity.capacity;
|
||||
arg.used = bch2_fs_usage_read_short(c).used;
|
||||
arg.online_reserved = percpu_u64_get(c->online_reserved);
|
||||
arg.online_reserved = percpu_u64_get(&c->capacity.pcpu->online_reserved);
|
||||
arg.accounting_u64s = accounting.nr / sizeof(u64);
|
||||
|
||||
return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
|
||||
|
||||
@ -167,7 +167,7 @@ int bch2_dev_in_fs(struct bch_sb_handle *fs,
|
||||
void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
|
||||
{
|
||||
if (rw == READ)
|
||||
clear_bit(ca->dev_idx, ca->fs->online_devs.d);
|
||||
clear_bit(ca->dev_idx, ca->fs->devs_online.d);
|
||||
|
||||
if (!enumerated_ref_is_zero(&ca->io_ref[rw]))
|
||||
enumerated_ref_stop(&ca->io_ref[rw],
|
||||
@ -519,7 +519,7 @@ int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct prin
|
||||
|
||||
try(__bch2_dev_attach_bdev(c, ca, sb, err));
|
||||
|
||||
set_bit(ca->dev_idx, c->online_devs.d);
|
||||
set_bit(ca->dev_idx, c->devs_online.d);
|
||||
|
||||
bch2_dev_sysfs_online(c, ca);
|
||||
|
||||
@ -546,7 +546,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
new_state != BCH_MEMBER_STATE_rw) {
|
||||
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
|
||||
struct bch_devs_mask new_rw_devs = c->allocator.rw_devs[0];
|
||||
__clear_bit(ca->dev_idx, new_rw_devs.d);
|
||||
|
||||
return bch2_can_write_fs_with_devs(c, new_rw_devs, flags, err);
|
||||
@ -821,7 +821,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err)
|
||||
ca->disk_sb.sb->dev_idx = dev_idx;
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
|
||||
set_bit(ca->dev_idx, c->online_devs.d);
|
||||
set_bit(ca->dev_idx, c->devs_online.d);
|
||||
|
||||
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
||||
ret = __bch2_dev_group_set(c, ca, label.buf);
|
||||
@ -960,10 +960,10 @@ int bch2_dev_online(struct bch_fs *c, const char *path, struct printbuf *err)
|
||||
|
||||
static int bch2_dev_may_offline(struct bch_fs *c, struct bch_dev *ca, int flags, struct printbuf *err)
|
||||
{
|
||||
struct bch_devs_mask new_devs = c->online_devs;
|
||||
struct bch_devs_mask new_devs = c->devs_online;
|
||||
__clear_bit(ca->dev_idx, new_devs.d);
|
||||
|
||||
struct bch_devs_mask new_rw_devs = c->rw_devs[0];
|
||||
struct bch_devs_mask new_rw_devs = c->allocator.rw_devs[0];
|
||||
__clear_bit(ca->dev_idx, new_devs.d);
|
||||
|
||||
if (!bch2_can_read_fs_with_devs(c, new_devs, flags, err) ||
|
||||
|
||||
@ -274,27 +274,27 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
|
||||
{
|
||||
struct fsck_err_state *s;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_error_msgs, list)
|
||||
list_for_each_entry(s, &c->errors.msgs, list)
|
||||
if (s->id == id) {
|
||||
/*
|
||||
* move it to the head of the list: repeated fsck errors
|
||||
* are common
|
||||
*/
|
||||
list_move(&s->list, &c->fsck_error_msgs);
|
||||
list_move(&s->list, &c->errors.msgs);
|
||||
return s;
|
||||
}
|
||||
|
||||
s = kzalloc(sizeof(*s), GFP_NOFS);
|
||||
if (!s) {
|
||||
if (!c->fsck_alloc_msgs_err)
|
||||
if (!c->errors.msgs_alloc_err)
|
||||
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
|
||||
c->fsck_alloc_msgs_err = true;
|
||||
c->errors.msgs_alloc_err = true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&s->list);
|
||||
s->id = id;
|
||||
list_add(&s->list, &c->fsck_error_msgs);
|
||||
list_add(&s->list, &c->errors.msgs);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -385,7 +385,7 @@ bool __bch2_count_fsck_err(struct bch_fs *c,
|
||||
|
||||
bool print = true, repeat = false, suppress = false;
|
||||
|
||||
scoped_guard(mutex, &c->fsck_error_msgs_lock)
|
||||
scoped_guard(mutex, &c->errors.msgs_lock)
|
||||
count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress);
|
||||
|
||||
if (suppress)
|
||||
@ -506,7 +506,7 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&c->fsck_error_msgs_lock);
|
||||
mutex_lock(&c->errors.msgs_lock);
|
||||
bool repeat = false, print = true, suppress = false;
|
||||
bool inconsistent = false, exiting = false;
|
||||
struct fsck_err_state *s =
|
||||
@ -626,7 +626,7 @@ print:
|
||||
if (s)
|
||||
s->ret = ret;
|
||||
err_unlock:
|
||||
mutex_unlock(&c->fsck_error_msgs_lock);
|
||||
mutex_unlock(&c->errors.msgs_lock);
|
||||
err:
|
||||
if (trans &&
|
||||
!(flags & FSCK_ERR_NO_LOG) &&
|
||||
@ -708,9 +708,9 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print)
|
||||
{
|
||||
struct fsck_err_state *s, *n;
|
||||
|
||||
guard(mutex)(&c->fsck_error_msgs_lock);
|
||||
guard(mutex)(&c->errors.msgs_lock);
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
|
||||
list_for_each_entry_safe(s, n, &c->errors.msgs, list) {
|
||||
if (print && s->ratelimited && s->last_msg)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
|
||||
|
||||
@ -755,3 +755,22 @@ void bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *
|
||||
{
|
||||
lockrestart_do(trans, bch2_inum_offset_err_msg_trans_norestart(trans, out, subvol, pos));
|
||||
}
|
||||
|
||||
void bch2_fs_errors_exit(struct bch_fs *c)
|
||||
{
|
||||
darray_exit(&c->errors.counts);
|
||||
}
|
||||
|
||||
void bch2_fs_errors_init_early(struct bch_fs *c)
|
||||
{
|
||||
INIT_LIST_HEAD(&c->errors.msgs);
|
||||
mutex_init(&c->errors.msgs_lock);
|
||||
|
||||
mutex_init(&c->errors.counts_lock);
|
||||
darray_init(&c->errors.counts);
|
||||
}
|
||||
|
||||
int bch2_fs_errors_init(struct bch_fs *c)
|
||||
{
|
||||
return bch2_sb_errors_to_cpu(c);
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_ERROR_H
|
||||
#define _BCACHEFS_ERROR_H
|
||||
#ifndef _BCACHEFS_INIT_ERROR_H
|
||||
#define _BCACHEFS_INIT_ERROR_H
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/printk.h>
|
||||
@ -280,4 +280,8 @@ static inline void bch2_account_io_completion(struct bch_dev *ca,
|
||||
int bch2_inum_offset_err_msg_trans_norestart(struct btree_trans *, struct printbuf *, u32, struct bpos);
|
||||
void bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, u32, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_ERROR_H */
|
||||
void bch2_fs_errors_exit(struct bch_fs *);
|
||||
void bch2_fs_errors_init_early(struct bch_fs *);
|
||||
int bch2_fs_errors_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_INIT_ERROR_H */
|
||||
|
||||
16
libbcachefs/init/error_types.h
Normal file
16
libbcachefs/init/error_types.h
Normal file
@ -0,0 +1,16 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_INIT_ERROR_TYPES_H
|
||||
#define _BCACHEFS_INIT_ERROR_TYPES_H
|
||||
|
||||
#include "sb/errors_types.h"
|
||||
|
||||
struct bch_fs_errors {
|
||||
struct list_head msgs;
|
||||
struct mutex msgs_lock;
|
||||
bool msgs_alloc_err;
|
||||
|
||||
bch_sb_errors_cpu counts;
|
||||
struct mutex counts_lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_INIT_ERROR_TYPES_H */
|
||||
@ -556,6 +556,9 @@ int bch2_fs_read_write_early(struct bch_fs *c)
|
||||
|
||||
static void __bch2_fs_free(struct bch_fs *c)
|
||||
{
|
||||
bch2_journal_keys_put_initial(c);
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
|
||||
for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_exit(&c->times[i]);
|
||||
|
||||
@ -570,7 +573,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_free_fsck_errs(c);
|
||||
bch2_fs_vfs_exit(c);
|
||||
bch2_fs_snapshots_exit(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
bch2_fs_replicas_exit(c);
|
||||
bch2_fs_reconcile_exit(c);
|
||||
bch2_fs_quota_exit(c);
|
||||
@ -581,12 +583,15 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_fsio_exit(c);
|
||||
bch2_fs_io_write_exit(c);
|
||||
bch2_fs_io_read_exit(c);
|
||||
bch2_fs_errors_exit(c);
|
||||
bch2_fs_encryption_exit(c);
|
||||
bch2_fs_ec_exit(c);
|
||||
bch2_fs_counters_exit(c);
|
||||
bch2_fs_copygc_exit(c);
|
||||
bch2_fs_compress_exit(c);
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_capacity_exit(c);
|
||||
bch2_fs_buckets_waiting_for_journal_exit(c);
|
||||
bch2_fs_btree_write_buffer_exit(c);
|
||||
bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
|
||||
@ -595,19 +600,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
bch2_fs_accounting_exit(c);
|
||||
bch2_fs_async_obj_exit(c);
|
||||
bch2_journal_keys_put_initial(c);
|
||||
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
if (c->online_reserved) {
|
||||
u64 v = percpu_u64_get(c->online_reserved);
|
||||
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
|
||||
free_percpu(c->online_reserved);
|
||||
}
|
||||
|
||||
darray_exit(&c->btree_roots_extra);
|
||||
free_percpu(c->pcpu);
|
||||
free_percpu(c->usage);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->fill_iter);
|
||||
@ -623,8 +616,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
destroy_workqueue(c->btree_write_submit_wq);
|
||||
if (c->btree_read_complete_wq)
|
||||
destroy_workqueue(c->btree_read_complete_wq);
|
||||
if (c->copygc_wq)
|
||||
destroy_workqueue(c->copygc_wq);
|
||||
if (c->btree_write_complete_wq)
|
||||
destroy_workqueue(c->btree_write_complete_wq);
|
||||
if (c->btree_update_wq)
|
||||
@ -682,7 +673,7 @@ int bch2_fs_stop(struct bch_fs *c)
|
||||
|
||||
cancel_work_sync(&c->read_only_work);
|
||||
|
||||
flush_work(&c->btree_interior_update_work);
|
||||
flush_work(&c->btree_interior_updates.work);
|
||||
}
|
||||
|
||||
if (test_bit(BCH_FS_emergency_ro, &c->flags))
|
||||
@ -769,8 +760,6 @@ int bch2_fs_init_rw(struct bch_fs *c)
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
|
||||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
|
||||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
|
||||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
|
||||
@ -1060,7 +1049,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
|
||||
init_rwsem(&c->state_lock);
|
||||
mutex_init(&c->sb_lock);
|
||||
mutex_init(&c->btree_root_lock);
|
||||
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
|
||||
|
||||
refcount_set(&c->ro_ref, 1);
|
||||
@ -1079,13 +1067,13 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
bch2_fs_btree_write_buffer_init_early(c);
|
||||
bch2_fs_copygc_init(c);
|
||||
bch2_fs_ec_init_early(c);
|
||||
bch2_fs_errors_init_early(c);
|
||||
bch2_fs_journal_init_early(&c->journal);
|
||||
bch2_fs_journal_keys_init(c);
|
||||
bch2_fs_move_init(c);
|
||||
bch2_fs_nocow_locking_init_early(c);
|
||||
bch2_fs_quota_init(c);
|
||||
bch2_fs_recovery_passes_init(c);
|
||||
bch2_fs_sb_errors_init_early(c);
|
||||
bch2_fs_snapshots_init_early(c);
|
||||
bch2_fs_subvolumes_init_early(c);
|
||||
bch2_find_btree_nodes_init(&c->found_btree_nodes);
|
||||
@ -1093,18 +1081,11 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
|
||||
mutex_init(&c->bio_bounce_pages_lock);
|
||||
mutex_init(&c->snapshot_table_lock);
|
||||
init_rwsem(&c->snapshot_create_lock);
|
||||
|
||||
spin_lock_init(&c->btree_write_error_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->journal_iters);
|
||||
|
||||
INIT_LIST_HEAD(&c->fsck_error_msgs);
|
||||
mutex_init(&c->fsck_error_msgs_lock);
|
||||
|
||||
seqcount_init(&c->usage_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->vfs_inodes_list);
|
||||
mutex_init(&c->vfs_inodes_lock);
|
||||
|
||||
@ -1112,9 +1093,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
|
||||
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
|
||||
|
||||
mutex_init(&c->sectors_available_lock);
|
||||
|
||||
try(percpu_init_rwsem(&c->mark_lock));
|
||||
try(bch2_fs_capacity_init(c));
|
||||
|
||||
scoped_guard(mutex, &c->sb_lock)
|
||||
try(bch2_sb_to_fs(c, sb));
|
||||
@ -1171,9 +1150,6 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
max(offsetof(struct btree_read_bio, bio),
|
||||
offsetof(struct btree_write_bio, wbio.bio)),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
!(c->usage = alloc_percpu(struct bch_fs_usage_base)) ||
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
c->opts.btree_node_size))
|
||||
return bch_err_throw(c, ENOMEM_fs_other_alloc);
|
||||
@ -1189,12 +1165,12 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
|
||||
try(bch2_fs_compress_init(c));
|
||||
try(bch2_fs_counters_init(c));
|
||||
try(bch2_fs_ec_init(c));
|
||||
try(bch2_fs_errors_init(c));
|
||||
try(bch2_fs_encryption_init(c));
|
||||
try(bch2_fs_fsio_init(c));
|
||||
try(bch2_fs_fs_io_direct_init(c));
|
||||
try(bch2_fs_io_read_init(c));
|
||||
try(bch2_fs_reconcile_init(c));
|
||||
try(bch2_fs_sb_errors_init(c));
|
||||
try(bch2_fs_vfs_init(c));
|
||||
|
||||
|
||||
@ -1304,9 +1280,9 @@ static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
|
||||
}
|
||||
}
|
||||
|
||||
if (!bch2_can_read_fs_with_devs(c, c->online_devs, flags, err) ||
|
||||
if (!bch2_can_read_fs_with_devs(c, c->devs_online, flags, err) ||
|
||||
(!c->opts.read_only &&
|
||||
!bch2_can_write_fs_with_devs(c, c->rw_devs[0], flags, err))) {
|
||||
!bch2_can_write_fs_with_devs(c, c->allocator.rw_devs[0], flags, err))) {
|
||||
prt_printf(err, "Missing devices\n");
|
||||
for_each_member_device(c, ca)
|
||||
if (!bch2_dev_is_online(ca) && bch2_dev_has_data(c, ca)) {
|
||||
|
||||
@ -511,8 +511,8 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
entry->btree_id, BTREE_ID_NR_MAX))
|
||||
return 0;
|
||||
|
||||
while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR)
|
||||
try(darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }));
|
||||
while (entry->btree_id >= c->btree_cache.roots_extra.nr + BTREE_ID_NR)
|
||||
try(darray_push(&c->btree_cache.roots_extra, (struct btree_root) { NULL }));
|
||||
|
||||
struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
|
||||
|
||||
|
||||
@ -49,7 +49,7 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
|
||||
if (ret == -BCH_ERR_bucket_alloc_blocked)
|
||||
ret = bch_err_throw(c, freelist_empty);
|
||||
if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
@ -475,7 +475,6 @@ int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
|
||||
scoped_guard(spinlock, &j->lock) {
|
||||
j->last_flush_write = jiffies;
|
||||
j->reservations.idx = journal_cur_seq(j);
|
||||
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
||||
}
|
||||
|
||||
try(bch2_replicas_gc_reffed(c));
|
||||
|
||||
@ -653,7 +653,7 @@ static unsigned max_dev_latency(struct bch_fs *c)
|
||||
u64 nsecs = 0;
|
||||
|
||||
guard(rcu)();
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
|
||||
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal])
|
||||
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
|
||||
|
||||
return nsecs_to_jiffies(nsecs);
|
||||
@ -1137,7 +1137,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
j->space[journal_space_total].total);
|
||||
}
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
@ -161,7 +161,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
|
||||
size_t mem_limit = max_t(ssize_t, 0,
|
||||
(totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes);
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
|
||||
if (!ca->journal.nr)
|
||||
continue;
|
||||
|
||||
@ -209,7 +209,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
lockdep_assert_held(&j->lock);
|
||||
guard(rcu)();
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!ja->nr)
|
||||
@ -238,7 +238,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
|
||||
"rw journal devs:", nr_online, metadata_replicas_required(c));
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal])
|
||||
for_each_member_device_rcu(c, ca, &c->allocator.rw_devs[BCH_DATA_journal])
|
||||
prt_printf(&buf, " %s", ca->name);
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
@ -709,7 +709,7 @@ static u64 journal_seq_to_flush(struct journal *j)
|
||||
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct bch_fs_btree_cache *bc = &c->btree_cache;
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
u64 seq_to_flush;
|
||||
size_t min_nr, min_key_cache, nr_flushed;
|
||||
|
||||
@ -369,7 +369,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
|
||||
if (last_seq_ondisk_updated) {
|
||||
bch2_reset_alloc_cursors(c);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
closure_wake_up(&c->allocator.freelist_wait);
|
||||
bch2_do_discards(c);
|
||||
}
|
||||
|
||||
@ -410,6 +410,11 @@ static CLOSURE_CALLBACK(journal_write_submit)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
|
||||
|
||||
event_inc_trace(c, journal_write, buf, ({
|
||||
prt_printf(&buf, "seq %llu\n", le64_to_cpu(w->data->seq));
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&w->key));
|
||||
}));
|
||||
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
|
||||
@ -442,9 +447,6 @@ static CLOSURE_CALLBACK(journal_write_submit)
|
||||
|
||||
bch2_bio_map(bio, w->data, sectors << 9);
|
||||
|
||||
event_inc_trace(c, journal_write, buf,
|
||||
prt_printf(&buf, "seq %llu", le64_to_cpu(w->data->seq)));
|
||||
|
||||
closure_bio_submit(bio, cl);
|
||||
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
|
||||
@ -699,7 +701,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
|
||||
unsigned nr_rw_members = dev_mask_nr(&c->allocator.rw_devs[BCH_DATA_free]);
|
||||
int ret;
|
||||
|
||||
BUG_ON(!w->write_started);
|
||||
|
||||
@ -212,10 +212,10 @@ UPGRADE_TABLE_INCOMPAT()
|
||||
|
||||
static int have_stripes(struct bch_fs *c)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
|
||||
if (IS_ERR_OR_NULL(c->btree_cache.roots_known[BTREE_ID_stripes].b))
|
||||
return 0;
|
||||
|
||||
return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
|
||||
return !btree_node_fake(c->btree_cache.roots_known[BTREE_ID_stripes].b);
|
||||
}
|
||||
|
||||
int bch2_sb_set_upgrade_extra(struct bch_fs *c)
|
||||
|
||||
@ -107,9 +107,9 @@ void bch2_fs_errors_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
if (out->nr_tabstops < 3)
|
||||
printbuf_tabstop_push(out, 16);
|
||||
|
||||
guard(mutex)(&c->fsck_error_counts_lock);
|
||||
guard(mutex)(&c->errors.counts_lock);
|
||||
|
||||
bch_sb_errors_cpu *e = &c->fsck_error_counts;
|
||||
bch_sb_errors_cpu *e = &c->errors.counts;
|
||||
darray_for_each(*e, i) {
|
||||
bch2_sb_error_id_to_text(out, i->id);
|
||||
prt_tab(out);
|
||||
@ -122,7 +122,7 @@ void bch2_fs_errors_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
|
||||
{
|
||||
bch_sb_errors_cpu *e = &c->fsck_error_counts;
|
||||
bch_sb_errors_cpu *e = &c->errors.counts;
|
||||
struct bch_sb_error_entry_cpu n = {
|
||||
.id = err,
|
||||
.nr = 1,
|
||||
@ -130,7 +130,7 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
|
||||
};
|
||||
unsigned i;
|
||||
|
||||
guard(mutex)(&c->fsck_error_counts_lock);
|
||||
guard(mutex)(&c->errors.counts_lock);
|
||||
|
||||
for (i = 0; i < e->nr; i++) {
|
||||
if (err == e->data[i].id) {
|
||||
@ -150,9 +150,9 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
|
||||
|
||||
void bch2_sb_errors_from_cpu(struct bch_fs *c)
|
||||
{
|
||||
guard(mutex)(&c->fsck_error_counts_lock);
|
||||
guard(mutex)(&c->errors.counts_lock);
|
||||
|
||||
bch_sb_errors_cpu *src = &c->fsck_error_counts;
|
||||
bch_sb_errors_cpu *src = &c->errors.counts;
|
||||
struct bch_sb_field_errors *dst =
|
||||
bch2_sb_field_resize(&c->disk_sb, errors,
|
||||
bch2_sb_field_errors_u64s(src->nr));
|
||||
@ -166,12 +166,12 @@ void bch2_sb_errors_from_cpu(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
|
||||
int bch2_sb_errors_to_cpu(struct bch_fs *c)
|
||||
{
|
||||
guard(mutex)(&c->fsck_error_counts_lock);
|
||||
guard(mutex)(&c->errors.counts_lock);
|
||||
|
||||
struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
|
||||
bch_sb_errors_cpu *dst = &c->fsck_error_counts;
|
||||
bch_sb_errors_cpu *dst = &c->errors.counts;
|
||||
unsigned nr = bch2_sb_field_errors_nr_entries(src);
|
||||
|
||||
if (!nr)
|
||||
@ -191,19 +191,3 @@ static int bch2_sb_errors_to_cpu(struct bch_fs *c)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_sb_errors_exit(struct bch_fs *c)
|
||||
{
|
||||
darray_exit(&c->fsck_error_counts);
|
||||
}
|
||||
|
||||
void bch2_fs_sb_errors_init_early(struct bch_fs *c)
|
||||
{
|
||||
mutex_init(&c->fsck_error_counts_lock);
|
||||
darray_init(&c->fsck_error_counts);
|
||||
}
|
||||
|
||||
int bch2_fs_sb_errors_init(struct bch_fs *c)
|
||||
{
|
||||
return bch2_sb_errors_to_cpu(c);
|
||||
}
|
||||
|
||||
@ -14,9 +14,6 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
|
||||
void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
|
||||
|
||||
void bch2_sb_errors_from_cpu(struct bch_fs *);
|
||||
|
||||
void bch2_fs_sb_errors_exit(struct bch_fs *);
|
||||
void bch2_fs_sb_errors_init_early(struct bch_fs *);
|
||||
int bch2_fs_sb_errors_init(struct bch_fs *);
|
||||
int bch2_sb_errors_to_cpu(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_SB_ERRORS_H */
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SUPER_IO_H
|
||||
#define _BCACHEFS_SUPER_IO_H
|
||||
#ifndef _BCACHEFS_SB_IO_H
|
||||
#define _BCACHEFS_SB_IO_H
|
||||
|
||||
#include "data/extents.h"
|
||||
#include "init/dev_types.h"
|
||||
@ -116,4 +116,4 @@ void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
|
||||
void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
|
||||
void bch2_sb_to_text(struct printbuf *, struct bch_sb *, bool, unsigned);
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_IO_H */
|
||||
#endif /* _BCACHEFS_SB_IO_H */
|
||||
|
||||
33
libbcachefs/sb/io_types.h
Normal file
33
libbcachefs/sb/io_types.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_SB_IO_TYPES_H
|
||||
#define _BCACHEFS_SB_IO_TYPES_H
|
||||
|
||||
/* Updated by bch2_sb_update():*/
|
||||
struct bch_sb_cpu {
|
||||
__uuid_t uuid;
|
||||
__uuid_t user_uuid;
|
||||
|
||||
u16 version;
|
||||
u16 version_incompat;
|
||||
u16 version_incompat_allowed;
|
||||
u16 version_min;
|
||||
u16 version_upgrade_complete;
|
||||
|
||||
u8 nr_devices;
|
||||
u8 clean;
|
||||
bool multi_device; /* true if we've ever had more than one device */
|
||||
|
||||
u8 encryption_type;
|
||||
|
||||
u64 time_base_lo;
|
||||
u32 time_base_hi;
|
||||
unsigned time_units_per_sec;
|
||||
unsigned nsec_per_time_unit;
|
||||
u64 features;
|
||||
u64 compat;
|
||||
u64 recovery_passes_required;
|
||||
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
|
||||
u64 btrees_lost_data;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_SB_IO_TYPES_H */
|
||||
@ -128,10 +128,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
|
||||
(_ca = __bch2_next_dev((_c), _ca, (_mask)));)
|
||||
|
||||
#define for_each_online_member_rcu(_c, _ca) \
|
||||
for_each_member_device_rcu(_c, _ca, &(_c)->online_devs)
|
||||
for_each_member_device_rcu(_c, _ca, &(_c)->devs_online)
|
||||
|
||||
#define for_each_rw_member_rcu(_c, _ca) \
|
||||
for_each_member_device_rcu(_c, _ca, &(_c)->rw_devs[BCH_DATA_free])
|
||||
for_each_member_device_rcu(_c, _ca, &(_c)->allocator.rw_devs[BCH_DATA_free])
|
||||
|
||||
static inline void bch2_dev_get(struct bch_dev *ca)
|
||||
{
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
|
||||
{
|
||||
guard(mutex)(&c->snapshot_table_lock);
|
||||
guard(mutex)(&c->snapshots.table_lock);
|
||||
return bch2_snapshot_t_mut(c, id)
|
||||
? 0
|
||||
: bch_err_throw(c, ENOMEM_mark_snapshot);
|
||||
@ -38,7 +38,7 @@ u32 bch2_snapshot_oldest_subvol(struct bch_fs *c, u32 snapshot_root,
|
||||
snapshot_id_list *skip)
|
||||
{
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
while (true) {
|
||||
u32 id = snapshot_root, subvol = 0;
|
||||
|
||||
@ -91,7 +91,7 @@ static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id,
|
||||
bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
guard(rcu)();
|
||||
return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
|
||||
return __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots.table), id, ancestor);
|
||||
}
|
||||
|
||||
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
|
||||
@ -125,7 +125,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
#endif
|
||||
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
if (unlikely(recovery_pass_will_run(c, BCH_RECOVERY_PASS_check_snapshots)))
|
||||
return __bch2_snapshot_is_ancestor_early(t, id, ancestor);
|
||||
@ -159,25 +159,23 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
|
||||
|
||||
new->nr = new_size;
|
||||
|
||||
old = rcu_dereference_protected(c->snapshots, true);
|
||||
old = rcu_dereference_protected(c->snapshots.table, true);
|
||||
if (old)
|
||||
memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
|
||||
|
||||
rcu_assign_pointer(c->snapshots, new);
|
||||
rcu_assign_pointer(c->snapshots.table, new);
|
||||
kvfree_rcu(old, rcu);
|
||||
|
||||
return &rcu_dereference_protected(c->snapshots,
|
||||
lockdep_is_held(&c->snapshot_table_lock))->s[idx];
|
||||
return &rcu_dereference_protected(c->snapshots.table,
|
||||
lockdep_is_held(&c->snapshots.table_lock))->s[idx];
|
||||
}
|
||||
|
||||
struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *c, u32 id)
|
||||
{
|
||||
size_t idx = U32_MAX - id;
|
||||
struct snapshot_table *table =
|
||||
rcu_dereference_protected(c->snapshots,
|
||||
lockdep_is_held(&c->snapshot_table_lock));
|
||||
|
||||
lockdep_assert_held(&c->snapshot_table_lock);
|
||||
rcu_dereference_protected(c->snapshots.table,
|
||||
lockdep_is_held(&c->snapshots.table_lock));
|
||||
|
||||
if (likely(table && idx < table->nr))
|
||||
return &table->s[idx];
|
||||
@ -278,7 +276,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
|
||||
struct snapshot_t *t;
|
||||
u32 id = new.k->p.offset;
|
||||
|
||||
guard(mutex)(&c->snapshot_table_lock);
|
||||
guard(mutex)(&c->snapshots.table_lock);
|
||||
|
||||
t = bch2_snapshot_t_mut(c, id);
|
||||
if (!t)
|
||||
@ -687,10 +685,10 @@ static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
|
||||
|
||||
static unsigned live_child(struct bch_fs *c, u32 start)
|
||||
{
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
for (u32 id = bch2_snapshot_tree_next(t, start);
|
||||
id && id != start;
|
||||
@ -714,7 +712,7 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct snapshot_delete *d = &trans->c->snapshot_delete;
|
||||
struct snapshot_delete *d = &trans->c->snapshots.delete;
|
||||
|
||||
if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
|
||||
return bch2_btree_delete_at(trans, iter,
|
||||
@ -744,7 +742,7 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
|
||||
u64 inum = iter->btree_id != BTREE_ID_inodes
|
||||
? iter->pos.inode
|
||||
@ -771,7 +769,7 @@ static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree
|
||||
static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
|
||||
bch2_progress_init(&d->progress, c, btree_has_snapshots_mask);
|
||||
d->progress.silent = true;
|
||||
@ -820,7 +818,7 @@ static int delete_dead_snapshot_keys_range(struct btree_trans *trans,
|
||||
static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
CLASS(disk_reservation, res)(c);
|
||||
u64 prev_inum = 0;
|
||||
|
||||
@ -893,7 +891,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
|
||||
return 0;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
||||
unsigned live_children = 0;
|
||||
|
||||
@ -937,7 +935,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
||||
interior_delete_list *skip)
|
||||
{
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
while (interior_delete_has_id(skip, id))
|
||||
id = __bch2_snapshot_parent(t, id);
|
||||
@ -1045,7 +1043,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
|
||||
try(for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
|
||||
check_should_delete_snapshot(trans, k)));
|
||||
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
if (!d->delete_leaves.nr && !d->delete_interior.nr)
|
||||
return 0;
|
||||
|
||||
@ -1070,7 +1068,7 @@ static int delete_dead_snapshots_locked(struct bch_fs *c)
|
||||
|
||||
int __bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
{
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
|
||||
if (!mutex_trylock(&d->lock))
|
||||
return 0;
|
||||
@ -1108,7 +1106,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
|
||||
void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, snapshots.delete.work);
|
||||
|
||||
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
|
||||
|
||||
@ -1126,13 +1124,13 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
|
||||
|
||||
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_delete.work))
|
||||
if (!queue_work(system_long_wq, &c->snapshots.delete.work))
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
||||
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct snapshot_delete *d = &c->snapshots.delete;
|
||||
|
||||
if (!d->running) {
|
||||
prt_str(out, "(not running)");
|
||||
@ -1272,13 +1270,17 @@ int bch2_snapshots_read(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_snapshots_exit(struct bch_fs *c)
|
||||
{
|
||||
kvfree(rcu_dereference_protected(c->snapshots, true));
|
||||
kvfree(rcu_dereference_protected(c->snapshots.table, true));
|
||||
}
|
||||
|
||||
void bch2_fs_snapshots_init_early(struct bch_fs *c)
|
||||
{
|
||||
INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
|
||||
mutex_init(&c->snapshot_delete.lock);
|
||||
mutex_init(&c->snapshot_delete.progress_lock);
|
||||
mutex_init(&c->snapshots_unlinked_lock);
|
||||
mutex_init(&c->snapshots.table_lock);
|
||||
init_rwsem(&c->snapshots.create_lock);
|
||||
|
||||
INIT_WORK(&c->snapshots.delete.work, bch2_delete_dead_snapshots_work);
|
||||
mutex_init(&c->snapshots.delete.lock);
|
||||
mutex_init(&c->snapshots.delete.progress_lock);
|
||||
|
||||
mutex_init(&c->snapshots.unlinked_lock);
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
|
||||
|
||||
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return __snapshot_t(rcu_dereference(c->snapshots), id);
|
||||
return __snapshot_t(rcu_dereference(c->snapshots.table), id);
|
||||
}
|
||||
|
||||
struct snapshot_t *bch2_snapshot_t_mut(struct bch_fs *, u32);
|
||||
@ -96,13 +96,13 @@ static inline u32 __bch2_snapshot_parent(struct snapshot_table *t, u32 id)
|
||||
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
|
||||
{
|
||||
guard(rcu)();
|
||||
return __bch2_snapshot_parent(rcu_dereference(c->snapshots), id);
|
||||
return __bch2_snapshot_parent(rcu_dereference(c->snapshots.table), id);
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
|
||||
{
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
while (n--)
|
||||
id = __bch2_snapshot_parent(t, id);
|
||||
@ -115,7 +115,7 @@ u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
|
||||
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
|
||||
{
|
||||
guard(rcu)();
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots.table);
|
||||
|
||||
u32 parent;
|
||||
while ((parent = __bch2_snapshot_parent(t, id)))
|
||||
@ -132,7 +132,7 @@ static inline enum snapshot_id_state __bch2_snapshot_id_state(struct snapshot_ta
|
||||
static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
|
||||
{
|
||||
guard(rcu)();
|
||||
return __bch2_snapshot_id_state(rcu_dereference(c->snapshots), id);
|
||||
return __bch2_snapshot_id_state(rcu_dereference(c->snapshots.table), id);
|
||||
}
|
||||
|
||||
static inline bool __bch2_snapshot_exists(struct snapshot_table *t, u32 id)
|
||||
|
||||
@ -56,4 +56,14 @@ struct snapshot_delete {
|
||||
struct progress_indicator progress;
|
||||
};
|
||||
|
||||
struct bch_fs_snapshots {
|
||||
struct snapshot_table __rcu *table;
|
||||
struct mutex table_lock;
|
||||
struct rw_semaphore create_lock;
|
||||
struct snapshot_delete delete;
|
||||
struct work_struct wait_for_pagecache_and_delete_work;
|
||||
snapshot_id_list unlinked;
|
||||
struct mutex unlinked_lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */
|
||||
|
||||
@ -460,15 +460,15 @@ static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
|
||||
static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs,
|
||||
snapshot_wait_for_pagecache_and_delete_work);
|
||||
snapshots.wait_for_pagecache_and_delete_work);
|
||||
int ret = 0;
|
||||
|
||||
while (!ret) {
|
||||
snapshot_id_list s;
|
||||
|
||||
scoped_guard(mutex, &c->snapshots_unlinked_lock) {
|
||||
s = c->snapshots_unlinked;
|
||||
darray_init(&c->snapshots_unlinked);
|
||||
scoped_guard(mutex, &c->snapshots.unlinked_lock) {
|
||||
s = c->snapshots.unlinked;
|
||||
darray_init(&c->snapshots.unlinked);
|
||||
}
|
||||
|
||||
if (!s.nr)
|
||||
@ -502,14 +502,14 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
|
||||
struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
scoped_guard(mutex, &c->snapshots_unlinked_lock)
|
||||
if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
|
||||
try(snapshot_list_add(c, &c->snapshots_unlinked, h->subvol));
|
||||
scoped_guard(mutex, &c->snapshots.unlinked_lock)
|
||||
if (!snapshot_list_has_id(&c->snapshots.unlinked, h->subvol))
|
||||
try(snapshot_list_add(c, &c->snapshots.unlinked, h->subvol));
|
||||
|
||||
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache))
|
||||
return -EROFS;
|
||||
|
||||
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
if (!queue_work(c->write_ref_wq, &c->snapshots.wait_for_pagecache_and_delete_work))
|
||||
enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
|
||||
return 0;
|
||||
}
|
||||
@ -661,6 +661,6 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
|
||||
|
||||
void bch2_fs_subvolumes_init_early(struct bch_fs *c)
|
||||
{
|
||||
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
|
||||
INIT_WORK(&c->snapshots.wait_for_pagecache_and_delete_work,
|
||||
bch2_subvolume_wait_for_pagecache_and_delete);
|
||||
}
|
||||
|
||||
@ -540,8 +540,8 @@ static bool can_write_now(struct bch_fs *c, unsigned replicas_want, struct closu
|
||||
unsigned reserved = OPEN_BUCKETS_COUNT -
|
||||
(OPEN_BUCKETS_COUNT - bch2_open_buckets_reserved(BCH_WATERMARK_normal)) / 2;
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= reserved)) {
|
||||
closure_wait(&c->open_buckets_wait, cl);
|
||||
if (unlikely(c->allocator.open_buckets_nr_free <= reserved)) {
|
||||
closure_wait(&c->allocator.open_buckets_wait, cl);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -314,7 +314,7 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
|
||||
!arg.src_ptr)
|
||||
snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
|
||||
|
||||
scoped_guard(rwsem_write, &c->snapshot_create_lock)
|
||||
scoped_guard(rwsem_write, &c->snapshots.create_lock)
|
||||
inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
|
||||
dst_dentry, arg.mode|S_IFDIR,
|
||||
0, snapshot_src, create_flags);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user