mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 9e7ae5219c bcachefs: Make write points more dynamic
This commit is contained in:
parent
74cb922032
commit
22291ae84a
@ -1 +1 @@
|
||||
661faf58dbcab87e512e64e7cb164905689e64c8
|
||||
192d759a491f50d92c89c2e842639d2307c815a5
|
||||
|
@ -265,7 +265,7 @@ static void write_data(struct bch_fs *c,
|
||||
if (ret)
|
||||
die("error reserving space in new filesystem: %s", strerror(-ret));
|
||||
|
||||
bch2_write_op_init(&op, c, res, c->write_points,
|
||||
bch2_write_op_init(&op, c, res, NULL, 0,
|
||||
POS(dst_inode->bi_inum, dst_offset >> 9), NULL, 0);
|
||||
closure_call(&op.cl, bch2_write, NULL, &cl);
|
||||
closure_sync(&cl);
|
||||
|
@ -70,7 +70,7 @@ extern int register_refined_jiffies(long clock_tick_rate);
|
||||
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
|
||||
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
|
||||
|
||||
static inline u64 local_clock(void)
|
||||
static inline u64 sched_clock(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
@ -79,6 +79,11 @@ static inline u64 local_clock(void)
|
||||
return ((s64) ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
|
||||
}
|
||||
|
||||
static inline u64 local_clock(void)
|
||||
{
|
||||
return sched_clock();
|
||||
}
|
||||
|
||||
extern unsigned long clock_t_to_jiffies(unsigned long x);
|
||||
extern u64 jiffies_64_to_clock_t(u64 x);
|
||||
extern u64 nsec_to_clock_t(u64 x);
|
||||
@ -87,7 +92,7 @@ extern unsigned long nsecs_to_jiffies(u64 n);
|
||||
|
||||
static inline u64 get_jiffies_64(void)
|
||||
{
|
||||
return nsecs_to_jiffies64(local_clock());
|
||||
return nsecs_to_jiffies64(sched_clock());
|
||||
}
|
||||
|
||||
#define jiffies_64 get_jiffies_64()
|
||||
|
@ -1,8 +1,6 @@
|
||||
#ifndef _LINUX_RCULIST_H
|
||||
#define _LINUX_RCULIST_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/*
|
||||
* RCU-protected list version
|
||||
*/
|
||||
@ -671,5 +669,4 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
|
||||
pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
|
||||
&(pos)->member)), typeof(*(pos)), member))
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif
|
||||
|
@ -70,6 +70,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sort.h>
|
||||
@ -1118,6 +1119,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
{
|
||||
enum bucket_alloc_ret ret = NO_DEVICES;
|
||||
struct dev_alloc_list devs_sorted;
|
||||
u64 buckets_free;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(nr_replicas > ARRAY_SIZE(ob->ptrs));
|
||||
@ -1127,46 +1129,55 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
|
||||
rcu_read_lock();
|
||||
devs_sorted = bch2_wp_alloc_list(c, wp, devs);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++) {
|
||||
struct bch_dev *ca =
|
||||
rcu_dereference(c->devs[devs_sorted.devs[i]]);
|
||||
long bucket;
|
||||
struct open_bucket_ptr ptr;
|
||||
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
bucket = bch2_bucket_alloc(c, ca, reserve);
|
||||
if (bucket < 0) {
|
||||
ret = FREELIST_EMPTY;
|
||||
continue;
|
||||
if (wp->type == BCH_DATA_USER &&
|
||||
ca->open_buckets_partial_nr) {
|
||||
ptr = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
|
||||
} else {
|
||||
long bucket = bch2_bucket_alloc(c, ca, reserve);
|
||||
if (bucket < 0) {
|
||||
ret = FREELIST_EMPTY;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr = (struct open_bucket_ptr) {
|
||||
.ptr.gen = ca->buckets[bucket].mark.gen,
|
||||
.ptr.offset = bucket_to_sector(ca, bucket),
|
||||
.ptr.dev = ca->dev_idx,
|
||||
.sectors_free = ca->mi.bucket_size,
|
||||
};
|
||||
}
|
||||
|
||||
wp->next_alloc[ca->dev_idx] +=
|
||||
div64_u64(U64_MAX, dev_buckets_free(ca) *
|
||||
ca->mi.bucket_size);
|
||||
bch2_wp_rescale(c, ca, wp);
|
||||
|
||||
__clear_bit(ca->dev_idx, devs->d);
|
||||
|
||||
/*
|
||||
* open_bucket_add_buckets expects new pointers at the head of
|
||||
* the list:
|
||||
*/
|
||||
BUG_ON(ob->nr_ptrs >= BCH_REPLICAS_MAX);
|
||||
BUG_ON(ob->nr_ptrs >= ARRAY_SIZE(ob->ptrs));
|
||||
memmove(&ob->ptrs[1],
|
||||
&ob->ptrs[0],
|
||||
ob->nr_ptrs * sizeof(ob->ptrs[0]));
|
||||
memmove(&ob->ptr_offset[1],
|
||||
&ob->ptr_offset[0],
|
||||
ob->nr_ptrs * sizeof(ob->ptr_offset[0]));
|
||||
ob->nr_ptrs++;
|
||||
ob->ptrs[0] = (struct bch_extent_ptr) {
|
||||
.gen = ca->buckets[bucket].mark.gen,
|
||||
.offset = bucket_to_sector(ca, bucket),
|
||||
.dev = ca->dev_idx,
|
||||
};
|
||||
ob->ptr_offset[0] = 0;
|
||||
ob->ptrs[0] = ptr;
|
||||
|
||||
buckets_free = U64_MAX, dev_buckets_free(ca);
|
||||
if (buckets_free)
|
||||
wp->next_alloc[ca->dev_idx] +=
|
||||
div64_u64(U64_MAX, buckets_free *
|
||||
ca->mi.bucket_size);
|
||||
else
|
||||
wp->next_alloc[ca->dev_idx] = U64_MAX;
|
||||
bch2_wp_rescale(c, ca, wp);
|
||||
|
||||
__clear_bit(ca->dev_idx, devs->d);
|
||||
|
||||
if (ob->nr_ptrs == nr_replicas) {
|
||||
ret = ALLOC_SUCCESS;
|
||||
@ -1175,6 +1186,7 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
}
|
||||
|
||||
EBUG_ON(ret != ALLOC_SUCCESS && reserve == RESERVE_MOVINGGC);
|
||||
spin_unlock(&ob->lock);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
@ -1242,24 +1254,45 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
|
||||
|
||||
void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
const struct open_bucket_ptr *ptr;
|
||||
u8 new_ob;
|
||||
|
||||
if (!atomic_dec_and_test(&ob->pin))
|
||||
return;
|
||||
|
||||
spin_lock(&c->open_buckets_lock);
|
||||
down_read(&c->alloc_gc_lock);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
open_bucket_for_each_ptr(ob, ptr) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = c->devs[ptr->ptr.dev];
|
||||
|
||||
bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), false);
|
||||
if (ptr->sectors_free) {
|
||||
/*
|
||||
* This is a ptr to a bucket that still has free space,
|
||||
* but we don't want to use it
|
||||
*/
|
||||
BUG_ON(ca->open_buckets_partial_nr >=
|
||||
ARRAY_SIZE(ca->open_buckets_partial));
|
||||
|
||||
spin_lock(&ca->freelist_lock);
|
||||
ca->open_buckets_partial[ca->open_buckets_partial_nr++]
|
||||
= *ptr;
|
||||
spin_unlock(&ca->freelist_lock);
|
||||
} else {
|
||||
bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, &ptr->ptr), false);
|
||||
}
|
||||
}
|
||||
|
||||
ob->nr_ptrs = 0;
|
||||
|
||||
spin_unlock(&ob->lock);
|
||||
up_read(&c->alloc_gc_lock);
|
||||
|
||||
new_ob = ob->new_ob;
|
||||
ob->new_ob = 0;
|
||||
|
||||
list_move(&ob->list, &c->open_buckets_free);
|
||||
spin_lock(&c->open_buckets_lock);
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
c->open_buckets_nr_free++;
|
||||
spin_unlock(&c->open_buckets_lock);
|
||||
|
||||
@ -1270,22 +1303,19 @@ void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
|
||||
unsigned nr_reserved,
|
||||
struct closure *cl)
|
||||
unsigned nr_reserved,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ret;
|
||||
|
||||
spin_lock(&c->open_buckets_lock);
|
||||
|
||||
if (c->open_buckets_nr_free > nr_reserved) {
|
||||
BUG_ON(list_empty(&c->open_buckets_free));
|
||||
ret = list_first_entry(&c->open_buckets_free,
|
||||
struct open_bucket, list);
|
||||
list_move(&ret->list, &c->open_buckets_open);
|
||||
BUG_ON(ret->nr_ptrs);
|
||||
BUG_ON(!c->open_buckets_freelist);
|
||||
|
||||
ret = c->open_buckets + c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ret->freelist;
|
||||
atomic_set(&ret->pin, 1); /* XXX */
|
||||
ret->has_full_ptrs = false;
|
||||
|
||||
BUG_ON(ret->new_ob);
|
||||
BUG_ON(ret->nr_ptrs);
|
||||
@ -1307,148 +1337,259 @@ static struct open_bucket *bch2_open_bucket_get(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned ob_ptr_sectors_free(struct bch_fs *c,
|
||||
struct open_bucket *ob,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
unsigned i = ptr - ob->ptrs;
|
||||
unsigned used = bucket_remainder(ca, ptr->offset) +
|
||||
ob->ptr_offset[i];
|
||||
|
||||
BUG_ON(used > ca->mi.bucket_size);
|
||||
|
||||
return ca->mi.bucket_size - used;
|
||||
}
|
||||
|
||||
static unsigned open_bucket_sectors_free(struct bch_fs *c,
|
||||
struct open_bucket *ob,
|
||||
unsigned nr_replicas)
|
||||
{
|
||||
unsigned i, sectors_free = UINT_MAX;
|
||||
unsigned sectors_free = UINT_MAX;
|
||||
struct open_bucket_ptr *ptr;
|
||||
|
||||
for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
|
||||
sectors_free = min(sectors_free,
|
||||
ob_ptr_sectors_free(c, ob, &ob->ptrs[i]));
|
||||
open_bucket_for_each_ptr(ob, ptr)
|
||||
sectors_free = min(sectors_free, ptr->sectors_free);
|
||||
|
||||
return sectors_free != UINT_MAX ? sectors_free : 0;
|
||||
}
|
||||
|
||||
static void open_bucket_copy_unused_ptrs(struct bch_fs *c,
|
||||
struct open_bucket *new,
|
||||
struct open_bucket *old)
|
||||
static void open_bucket_move_ptrs(struct bch_fs *c,
|
||||
struct open_bucket *dst,
|
||||
struct open_bucket *src,
|
||||
struct bch_devs_mask *devs,
|
||||
unsigned nr_ptrs_dislike)
|
||||
{
|
||||
bool moved_ptr = false;
|
||||
int i;
|
||||
|
||||
for (i = old->nr_ptrs - 1; i >= 0; --i)
|
||||
if (ob_ptr_sectors_free(c, old, &old->ptrs[i])) {
|
||||
BUG_ON(new->nr_ptrs >= BCH_REPLICAS_MAX);
|
||||
down_read(&c->alloc_gc_lock);
|
||||
|
||||
new->ptrs[new->nr_ptrs] = old->ptrs[i];
|
||||
new->ptr_offset[new->nr_ptrs] = old->ptr_offset[i];
|
||||
new->nr_ptrs++;
|
||||
if (dst < src) {
|
||||
spin_lock(&dst->lock);
|
||||
spin_lock_nested(&src->lock, 1);
|
||||
} else {
|
||||
spin_lock(&src->lock);
|
||||
spin_lock_nested(&dst->lock, 1);
|
||||
}
|
||||
|
||||
old->nr_ptrs--;
|
||||
memmove(&old->ptrs[i],
|
||||
&old->ptrs[i + 1],
|
||||
(old->nr_ptrs - i) * sizeof(old->ptrs[0]));
|
||||
memmove(&old->ptr_offset[i],
|
||||
&old->ptr_offset[i + 1],
|
||||
(old->nr_ptrs - i) * sizeof(old->ptr_offset[0]));
|
||||
for (i = src->nr_ptrs - 1; i >= 0; --i) {
|
||||
if (!src->ptrs[i].sectors_free) {
|
||||
/*
|
||||
* Don't do anything: leave the ptr on the old
|
||||
* open_bucket for gc to find
|
||||
*/
|
||||
} else if (nr_ptrs_dislike &&
|
||||
!test_bit(src->ptrs[i].ptr.dev, devs->d)) {
|
||||
/*
|
||||
* We don't want this pointer; bch2_open_bucket_put()
|
||||
* will stick it on ca->open_buckets_partial to be
|
||||
* reused
|
||||
*/
|
||||
--nr_ptrs_dislike;
|
||||
} else {
|
||||
BUG_ON(dst->nr_ptrs >= ARRAY_SIZE(dst->ptrs));
|
||||
|
||||
dst->ptrs[dst->nr_ptrs++] = src->ptrs[i];
|
||||
|
||||
src->nr_ptrs--;
|
||||
memmove(&src->ptrs[i],
|
||||
&src->ptrs[i + 1],
|
||||
(src->nr_ptrs - i) * sizeof(src->ptrs[0]));
|
||||
|
||||
moved_ptr = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (moved_ptr) {
|
||||
BUG_ON(old->new_ob);
|
||||
BUG_ON(src->new_ob);
|
||||
|
||||
atomic_inc(&new->pin);
|
||||
old->new_ob = new - c->open_buckets;
|
||||
atomic_inc(&dst->pin);
|
||||
src->new_ob = dst - c->open_buckets;
|
||||
}
|
||||
|
||||
spin_unlock(&dst->lock);
|
||||
spin_unlock(&src->lock);
|
||||
up_read(&c->alloc_gc_lock);
|
||||
}
|
||||
|
||||
static void verify_not_stale(struct bch_fs *c, const struct open_bucket *ob)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
const struct bch_extent_ptr *ptr;
|
||||
const struct open_bucket_ptr *ptr;
|
||||
|
||||
open_bucket_for_each_ptr(ob, ptr) {
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
struct bch_dev *ca = c->devs[ptr->ptr.dev];
|
||||
|
||||
BUG_ON(ptr_stale(ca, ptr));
|
||||
BUG_ON(ptr_stale(ca, &ptr->ptr));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Sector allocator */
|
||||
|
||||
static struct open_bucket *lock_writepoint(struct bch_fs *c,
|
||||
struct write_point *wp)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
while ((ob = ACCESS_ONCE(wp->b))) {
|
||||
mutex_lock(&ob->lock);
|
||||
if (wp->b == ob)
|
||||
break;
|
||||
|
||||
mutex_unlock(&ob->lock);
|
||||
}
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_mask *_devs,
|
||||
struct open_bucket *ob,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_devs_mask devs = c->rw_devs[wp->type];
|
||||
unsigned i;
|
||||
int ret;
|
||||
struct open_bucket_ptr *ptr;
|
||||
|
||||
if (ob->nr_ptrs >= nr_replicas)
|
||||
return 0;
|
||||
|
||||
if (_devs)
|
||||
bitmap_and(devs.d, devs.d, _devs->d, BCH_SB_MEMBERS_MAX);
|
||||
|
||||
/* Don't allocate from devices we already have pointers to: */
|
||||
for (i = 0; i < ob->nr_ptrs; i++)
|
||||
__clear_bit(ob->ptrs[i].dev, devs.d);
|
||||
open_bucket_for_each_ptr(ob, ptr)
|
||||
if (ptr->sectors_free)
|
||||
__clear_bit(ptr->ptr.dev, devs.d);
|
||||
|
||||
if (wp->group)
|
||||
bitmap_and(devs.d, devs.d, wp->group->d, BCH_SB_MEMBERS_MAX);
|
||||
return bch2_bucket_alloc_set(c, wp, ob, nr_replicas,
|
||||
reserve, &devs, cl);
|
||||
}
|
||||
|
||||
ret = bch2_bucket_alloc_set(c, wp, ob, nr_replicas,
|
||||
reserve, &devs, cl);
|
||||
static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
unsigned long write_point)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
if (ret == -EROFS &&
|
||||
ob->nr_ptrs >= nr_replicas_required)
|
||||
ret = 0;
|
||||
hlist_for_each_entry_rcu(wp, head, node) {
|
||||
if (wp->write_point == write_point)
|
||||
continue;
|
||||
|
||||
return ret;
|
||||
mutex_lock(&wp->lock);
|
||||
if (wp->write_point == write_point)
|
||||
return wp;
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct hlist_head *writepoint_hash(struct bch_fs *c,
|
||||
unsigned long write_point)
|
||||
{
|
||||
unsigned hash =
|
||||
hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
|
||||
|
||||
return &c->write_points_hash[hash];
|
||||
}
|
||||
|
||||
static struct write_point *writepoint_find(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
unsigned long write_point)
|
||||
{
|
||||
struct write_point *wp, *oldest = NULL;
|
||||
struct hlist_head *head;
|
||||
|
||||
switch (data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
wp = &c->btree_write_point;
|
||||
mutex_lock(&wp->lock);
|
||||
return wp;
|
||||
case BCH_DATA_USER:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
head = writepoint_hash(c, write_point);
|
||||
wp = __writepoint_find(head, write_point);
|
||||
if (wp)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&c->write_points_hash_lock);
|
||||
wp = __writepoint_find(head, write_point);
|
||||
if (wp)
|
||||
goto out_unlock;
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points);
|
||||
wp++)
|
||||
if (!oldest || time_before64(wp->last_used, oldest->last_used))
|
||||
oldest = wp;
|
||||
|
||||
wp = oldest;
|
||||
BUG_ON(!wp);
|
||||
|
||||
mutex_lock(&wp->lock);
|
||||
hlist_del_rcu(&wp->node);
|
||||
wp->write_point = write_point;
|
||||
hlist_add_head_rcu(&wp->node, head);
|
||||
out_unlock:
|
||||
mutex_unlock(&c->write_points_hash_lock);
|
||||
out:
|
||||
wp->last_used = sched_clock();
|
||||
return wp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get us an open_bucket we can allocate from, return with it locked:
|
||||
*/
|
||||
struct open_bucket *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_mask *devs,
|
||||
unsigned long write_point,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
unsigned open_buckets_reserved = wp == &c->btree_write_point
|
||||
struct write_point *wp;
|
||||
struct open_bucket_ptr *ptr;
|
||||
unsigned open_buckets_reserved = data_type == BCH_DATA_BTREE
|
||||
? 0 : BTREE_NODE_RESERVE;
|
||||
unsigned nr_ptrs_empty = 0, nr_ptrs_dislike = 0;
|
||||
int ret;
|
||||
|
||||
BUG_ON(!nr_replicas);
|
||||
retry:
|
||||
ob = lock_writepoint(c, wp);
|
||||
|
||||
wp = writepoint_find(c, data_type, write_point);
|
||||
BUG_ON(wp->type != data_type);
|
||||
|
||||
wp->last_used = sched_clock();
|
||||
|
||||
ob = wp->ob;
|
||||
|
||||
/* does ob have ptrs we don't need? */
|
||||
open_bucket_for_each_ptr(ob, ptr) {
|
||||
if (!ptr->sectors_free)
|
||||
nr_ptrs_empty++;
|
||||
else if (devs && !test_bit(ptr->ptr.dev, devs->d))
|
||||
nr_ptrs_dislike++;
|
||||
}
|
||||
|
||||
ret = open_bucket_add_buckets(c, wp, devs, ob,
|
||||
nr_replicas + nr_ptrs_empty + nr_ptrs_dislike,
|
||||
reserve, cl);
|
||||
if (ret && ret != -EROFS)
|
||||
goto err;
|
||||
|
||||
if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||
goto alloc_done;
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* Should this allocation be _forced_ to used the specified device (e.g.
|
||||
* internal migration), or should we fall back to allocating from all
|
||||
* devices?
|
||||
*/
|
||||
ret = open_bucket_add_buckets(c, wp, NULL, ob,
|
||||
nr_replicas + nr_ptrs_empty,
|
||||
reserve, cl);
|
||||
if (ret && ret != -EROFS)
|
||||
goto err;
|
||||
alloc_done:
|
||||
if (ob->nr_ptrs - nr_ptrs_empty -
|
||||
((flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) ? nr_ptrs_dislike : 0)
|
||||
< nr_replicas_required) {
|
||||
ret = -EROFS;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If ob->sectors_free == 0, one or more of the buckets ob points to is
|
||||
@ -1456,53 +1597,34 @@ retry:
|
||||
* still needs to find them; instead, we must allocate a new open bucket
|
||||
* and copy any pointers to non-full buckets into the new open bucket.
|
||||
*/
|
||||
if (!ob || ob->has_full_ptrs) {
|
||||
struct open_bucket *new_ob;
|
||||
BUG_ON(ob->nr_ptrs - nr_ptrs_empty - nr_replicas > nr_ptrs_dislike);
|
||||
nr_ptrs_dislike = ob->nr_ptrs - nr_ptrs_empty - nr_replicas;
|
||||
|
||||
new_ob = bch2_open_bucket_get(c, open_buckets_reserved, cl);
|
||||
if (IS_ERR(new_ob))
|
||||
return new_ob;
|
||||
|
||||
mutex_lock(&new_ob->lock);
|
||||
|
||||
/*
|
||||
* We point the write point at the open_bucket before doing the
|
||||
* allocation to avoid a race with shutdown:
|
||||
*/
|
||||
if (race_fault() ||
|
||||
cmpxchg(&wp->b, ob, new_ob) != ob) {
|
||||
/* We raced: */
|
||||
mutex_unlock(&new_ob->lock);
|
||||
bch2_open_bucket_put(c, new_ob);
|
||||
|
||||
if (ob)
|
||||
mutex_unlock(&ob->lock);
|
||||
goto retry;
|
||||
if (nr_ptrs_empty || nr_ptrs_dislike) {
|
||||
ob = bch2_open_bucket_get(c, open_buckets_reserved, cl);
|
||||
if (IS_ERR(ob)) {
|
||||
ret = PTR_ERR(ob);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ob) {
|
||||
open_bucket_copy_unused_ptrs(c, new_ob, ob);
|
||||
mutex_unlock(&ob->lock);
|
||||
bch2_open_bucket_put(c, ob);
|
||||
}
|
||||
/* Remove pointers we don't want to use: */
|
||||
|
||||
ob = new_ob;
|
||||
open_bucket_move_ptrs(c, ob, wp->ob, devs, nr_ptrs_dislike);
|
||||
bch2_open_bucket_put(c, wp->ob);
|
||||
wp->ob = ob;
|
||||
}
|
||||
|
||||
ret = open_bucket_add_buckets(c, wp, ob, nr_replicas,
|
||||
nr_replicas_required,
|
||||
reserve, cl);
|
||||
if (ret) {
|
||||
mutex_unlock(&ob->lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
BUG_ON(ob->nr_ptrs < nr_replicas_required);
|
||||
|
||||
ob->sectors_free = open_bucket_sectors_free(c, ob, nr_replicas);
|
||||
wp->sectors_free = open_bucket_sectors_free(c, ob, nr_replicas);
|
||||
|
||||
BUG_ON(!ob->sectors_free);
|
||||
BUG_ON(!wp->sectors_free);
|
||||
verify_not_stale(c, ob);
|
||||
|
||||
return ob;
|
||||
return wp;
|
||||
err:
|
||||
mutex_unlock(&wp->lock);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1514,29 +1636,26 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
|
||||
unsigned sectors)
|
||||
{
|
||||
struct bch_extent_ptr tmp;
|
||||
bool has_data = false;
|
||||
unsigned i;
|
||||
struct open_bucket_ptr *ptr;
|
||||
|
||||
/*
|
||||
* We're keeping any existing pointer k has, and appending new pointers:
|
||||
* __bch2_write() will only write to the pointers we add here:
|
||||
*/
|
||||
|
||||
BUG_ON(sectors > ob->sectors_free);
|
||||
for (ptr = ob->ptrs;
|
||||
ptr < ob->ptrs + min_t(u8, ob->nr_ptrs, nr_replicas); ptr++) {
|
||||
struct bch_dev *ca = c->devs[ptr->ptr.dev];
|
||||
|
||||
/* didn't use all the ptrs: */
|
||||
if (nr_replicas < ob->nr_ptrs)
|
||||
has_data = true;
|
||||
EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ptr->ptr.dev));
|
||||
|
||||
for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
|
||||
EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
|
||||
|
||||
tmp = ob->ptrs[i];
|
||||
tmp = ptr->ptr;
|
||||
tmp.cached = bkey_extent_is_cached(&e->k);
|
||||
tmp.offset += ob->ptr_offset[i];
|
||||
tmp.offset += ca->mi.bucket_size - ptr->sectors_free;
|
||||
extent_ptr_append(e, tmp);
|
||||
|
||||
ob->ptr_offset[i] += sectors;
|
||||
BUG_ON(sectors > ptr->sectors_free);
|
||||
ptr->sectors_free -= sectors;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1544,25 +1663,27 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct bkey_i_extent *e,
|
||||
* Append pointers to the space we just allocated to @k, and mark @sectors space
|
||||
* as allocated out of @ob
|
||||
*/
|
||||
void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
|
||||
struct open_bucket *ob)
|
||||
void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
|
||||
{
|
||||
bool has_data = false;
|
||||
unsigned i;
|
||||
struct open_bucket *ob = wp->ob, *new_ob = NULL;
|
||||
struct open_bucket_ptr *ptr;
|
||||
bool empty = false;
|
||||
|
||||
for (i = 0; i < ob->nr_ptrs; i++) {
|
||||
if (!ob_ptr_sectors_free(c, ob, &ob->ptrs[i]))
|
||||
ob->has_full_ptrs = true;
|
||||
else
|
||||
has_data = true;
|
||||
open_bucket_for_each_ptr(ob, ptr)
|
||||
empty |= !ptr->sectors_free;
|
||||
|
||||
if (empty)
|
||||
new_ob = bch2_open_bucket_get(c, 0, NULL);
|
||||
|
||||
if (!IS_ERR_OR_NULL(new_ob)) {
|
||||
/* writepoint's ref becomes our ref: */
|
||||
wp->ob = new_ob;
|
||||
open_bucket_move_ptrs(c, new_ob, ob, 0, 0);
|
||||
} else {
|
||||
atomic_inc(&ob->pin);
|
||||
}
|
||||
|
||||
if (likely(has_data))
|
||||
atomic_inc(&ob->pin);
|
||||
else
|
||||
BUG_ON(xchg(&wp->b, NULL) != ob);
|
||||
|
||||
mutex_unlock(&ob->lock);
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1583,27 +1704,33 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp,
|
||||
* @cl - closure to wait for a bucket
|
||||
*/
|
||||
struct open_bucket *bch2_alloc_sectors(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_mask *devs,
|
||||
unsigned long write_point,
|
||||
struct bkey_i_extent *e,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct write_point *wp;
|
||||
struct open_bucket *ob;
|
||||
|
||||
ob = bch2_alloc_sectors_start(c, wp, nr_replicas,
|
||||
nr_replicas_required,
|
||||
reserve, cl);
|
||||
if (IS_ERR_OR_NULL(ob))
|
||||
return ob;
|
||||
wp = bch2_alloc_sectors_start(c, data_type, devs, write_point,
|
||||
nr_replicas, nr_replicas_required,
|
||||
reserve, flags, cl);
|
||||
if (IS_ERR_OR_NULL(wp))
|
||||
return ERR_CAST(wp);
|
||||
|
||||
if (e->k.size > ob->sectors_free)
|
||||
bch2_key_resize(&e->k, ob->sectors_free);
|
||||
ob = wp->ob;
|
||||
|
||||
if (e->k.size > wp->sectors_free)
|
||||
bch2_key_resize(&e->k, wp->sectors_free);
|
||||
|
||||
bch2_alloc_sectors_append_ptrs(c, e, nr_replicas, ob, e->k.size);
|
||||
|
||||
bch2_alloc_sectors_done(c, wp, ob);
|
||||
bch2_alloc_sectors_done(c, wp);
|
||||
|
||||
return ob;
|
||||
}
|
||||
@ -1640,8 +1767,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
}
|
||||
|
||||
c->fastest_tier = fastest_tier != slowest_tier ? fastest_tier : NULL;
|
||||
|
||||
c->promote_write_point.group = &fastest_tier->devs;
|
||||
c->fastest_devs = fastest_tier != slowest_tier ? &fastest_tier->devs : NULL;
|
||||
|
||||
if (!fastest_tier)
|
||||
goto set_capacity;
|
||||
@ -1713,49 +1839,61 @@ set_capacity:
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
}
|
||||
|
||||
static bool open_bucket_has_device(struct open_bucket *ob,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
struct open_bucket_ptr *ptr;
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
open_bucket_for_each_ptr(ob, ptr)
|
||||
ret |= ptr->ptr.dev == ca->dev_idx;
|
||||
spin_unlock(&ob->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_stop_write_point(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct write_point *wp)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct closure cl;
|
||||
|
||||
ob = lock_writepoint(c, wp);
|
||||
if (!ob)
|
||||
closure_init_stack(&cl);
|
||||
retry:
|
||||
mutex_lock(&wp->lock);
|
||||
if (!open_bucket_has_device(wp->ob, ca)) {
|
||||
mutex_unlock(&wp->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
|
||||
if (ptr->dev == ca->dev_idx)
|
||||
goto found;
|
||||
ob = bch2_open_bucket_get(c, 0, &cl);
|
||||
if (IS_ERR(ob)) {
|
||||
mutex_unlock(&wp->lock);
|
||||
closure_sync(&cl);
|
||||
goto retry;
|
||||
|
||||
mutex_unlock(&ob->lock);
|
||||
return;
|
||||
found:
|
||||
BUG_ON(xchg(&wp->b, NULL) != ob);
|
||||
mutex_unlock(&ob->lock);
|
||||
}
|
||||
|
||||
/* Drop writepoint's ref: */
|
||||
bch2_open_bucket_put(c, ob);
|
||||
open_bucket_move_ptrs(c, ob, wp->ob, &ca->self, ob->nr_ptrs);
|
||||
bch2_open_bucket_put(c, wp->ob);
|
||||
wp->ob = ob;
|
||||
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct open_bucket *ob;
|
||||
bool ret = false;
|
||||
|
||||
for (ob = c->open_buckets;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
|
||||
ob++)
|
||||
if (atomic_read(&ob->pin)) {
|
||||
mutex_lock(&ob->lock);
|
||||
for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
|
||||
if (ptr->dev == ca->dev_idx) {
|
||||
mutex_unlock(&ob->lock);
|
||||
return true;
|
||||
}
|
||||
mutex_unlock(&ob->lock);
|
||||
}
|
||||
if (atomic_read(&ob->pin))
|
||||
ret |= open_bucket_has_device(ob, ca);
|
||||
|
||||
return false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* device goes ro: */
|
||||
@ -1782,11 +1920,6 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
/* Next, close write points that point to this device... */
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_stop_write_point(c, ca, &c->write_points[i]);
|
||||
|
||||
bch2_stop_write_point(c, ca, &ca->copygc_write_point);
|
||||
bch2_stop_write_point(c, ca, &c->promote_write_point);
|
||||
bch2_stop_write_point(c, ca, &c->tiers[ca->mi.tier].wp);
|
||||
bch2_stop_write_point(c, ca, &c->migration_write_point);
|
||||
bch2_stop_write_point(c, ca, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
@ -1880,35 +2013,44 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
|
||||
|
||||
void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
struct open_bucket *ob;
|
||||
struct write_point *wp;
|
||||
|
||||
INIT_LIST_HEAD(&c->open_buckets_open);
|
||||
INIT_LIST_HEAD(&c->open_buckets_free);
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
init_rwsem(&c->alloc_gc_lock);
|
||||
spin_lock_init(&c->open_buckets_lock);
|
||||
bch2_prio_timer_init(c, READ);
|
||||
bch2_prio_timer_init(c, WRITE);
|
||||
|
||||
/* open bucket 0 is a sentinal NULL: */
|
||||
mutex_init(&c->open_buckets[0].lock);
|
||||
INIT_LIST_HEAD(&c->open_buckets[0].list);
|
||||
spin_lock_init(&c->open_buckets[0].lock);
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(c->open_buckets); i++) {
|
||||
mutex_init(&c->open_buckets[i].lock);
|
||||
for (ob = c->open_buckets + 1;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
|
||||
spin_lock_init(&ob->lock);
|
||||
c->open_buckets_nr_free++;
|
||||
list_add(&c->open_buckets[i].list, &c->open_buckets_free);
|
||||
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
}
|
||||
|
||||
c->journal.wp.type = BCH_DATA_JOURNAL;
|
||||
mutex_init(&c->btree_write_point.lock);
|
||||
c->btree_write_point.type = BCH_DATA_BTREE;
|
||||
c->btree_write_point.ob = bch2_open_bucket_get(c, 0, NULL);
|
||||
BUG_ON(IS_ERR(c->btree_write_point.ob));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
|
||||
c->tiers[i].wp.type = BCH_DATA_USER;
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
|
||||
mutex_init(&wp->lock);
|
||||
wp->type = BCH_DATA_USER;
|
||||
wp->ob = bch2_open_bucket_get(c, 0, NULL);
|
||||
wp->last_used = sched_clock();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
c->write_points[i].type = BCH_DATA_USER;
|
||||
wp->write_point = (unsigned long) wp;
|
||||
hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
|
||||
|
||||
c->promote_write_point.type = BCH_DATA_USER;
|
||||
c->migration_write_point.type = BCH_DATA_USER;
|
||||
BUG_ON(IS_ERR(wp->ob));
|
||||
}
|
||||
|
||||
c->pd_controllers_update_seconds = 5;
|
||||
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
|
||||
|
@ -28,20 +28,28 @@ long bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve);
|
||||
|
||||
void bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
struct open_bucket *bch2_alloc_sectors_start(struct bch_fs *,
|
||||
struct write_point *,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
struct closure *);
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
|
||||
enum bch_data_type,
|
||||
struct bch_devs_mask *,
|
||||
unsigned long,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
unsigned,
|
||||
struct closure *);
|
||||
|
||||
void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct bkey_i_extent *,
|
||||
unsigned, struct open_bucket *, unsigned);
|
||||
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *,
|
||||
struct open_bucket *);
|
||||
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
|
||||
|
||||
struct open_bucket *bch2_alloc_sectors(struct bch_fs *, struct write_point *,
|
||||
struct bkey_i_extent *, unsigned, unsigned,
|
||||
enum alloc_reserve, struct closure *);
|
||||
struct open_bucket *bch2_alloc_sectors(struct bch_fs *,
|
||||
enum bch_data_type,
|
||||
struct bch_devs_mask *,
|
||||
unsigned long,
|
||||
struct bkey_i_extent *,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
unsigned,
|
||||
struct closure *);
|
||||
|
||||
static inline void bch2_wake_allocator(struct bch_dev *ca)
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define _BCACHEFS_ALLOC_TYPES_H
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include "clock_types.h"
|
||||
|
||||
@ -44,39 +45,34 @@ enum alloc_reserve {
|
||||
|
||||
/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
|
||||
#define OPEN_BUCKETS_COUNT 256
|
||||
#define WRITE_POINT_COUNT 32
|
||||
|
||||
#define WRITE_POINT_COUNT 16
|
||||
struct open_bucket_ptr {
|
||||
struct bch_extent_ptr ptr;
|
||||
unsigned sectors_free;
|
||||
};
|
||||
|
||||
struct open_bucket {
|
||||
struct list_head list;
|
||||
struct mutex lock;
|
||||
spinlock_t lock;
|
||||
atomic_t pin;
|
||||
bool has_full_ptrs;
|
||||
u8 freelist;
|
||||
u8 new_ob;
|
||||
u8 nr_ptrs;
|
||||
|
||||
/*
|
||||
* recalculated every time we allocate from this open_bucket based on
|
||||
* how many pointers we're actually going to use:
|
||||
*/
|
||||
unsigned sectors_free;
|
||||
unsigned nr_ptrs;
|
||||
struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
|
||||
unsigned ptr_offset[BCH_REPLICAS_MAX];
|
||||
struct open_bucket_ptr ptrs[BCH_REPLICAS_MAX * 2];
|
||||
};
|
||||
|
||||
struct write_point {
|
||||
struct open_bucket *b;
|
||||
struct hlist_node node;
|
||||
struct mutex lock;
|
||||
u64 last_used;
|
||||
unsigned long write_point;
|
||||
enum bch_data_type type;
|
||||
|
||||
/*
|
||||
* If not NULL, cache group for tiering, promotion and moving GC -
|
||||
* always allocates a single replica
|
||||
*
|
||||
* Otherwise do a normal replicated bucket allocation that could come
|
||||
* from any device in tier 0 (foreground write)
|
||||
*/
|
||||
struct bch_devs_mask *group;
|
||||
/* calculated based on how many pointers we're actually going to use: */
|
||||
unsigned sectors_free;
|
||||
|
||||
struct open_bucket *ob;
|
||||
u64 next_alloc[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
|
@ -392,6 +392,9 @@ struct bch_dev {
|
||||
unsigned nr_invalidated;
|
||||
bool alloc_thread_started;
|
||||
|
||||
struct open_bucket_ptr open_buckets_partial[BCH_REPLICAS_MAX * WRITE_POINT_COUNT];
|
||||
unsigned open_buckets_partial_nr;
|
||||
|
||||
size_t fifo_last_bucket;
|
||||
|
||||
/* Allocation stuff: */
|
||||
@ -426,8 +429,6 @@ struct bch_dev {
|
||||
|
||||
struct bch_pd_controller moving_gc_pd;
|
||||
|
||||
struct write_point copygc_write_point;
|
||||
|
||||
struct journal_device journal;
|
||||
|
||||
struct work_struct io_error_work;
|
||||
@ -472,7 +473,6 @@ struct bch_tier {
|
||||
struct bch_pd_controller pd;
|
||||
|
||||
struct bch_devs_mask devs;
|
||||
struct write_point wp;
|
||||
};
|
||||
|
||||
enum bch_fs_state {
|
||||
@ -546,40 +546,7 @@ struct bch_fs {
|
||||
struct btree_root btree_roots[BTREE_ID_NR];
|
||||
struct mutex btree_root_lock;
|
||||
|
||||
bool btree_cache_table_init_done;
|
||||
struct rhashtable btree_cache_table;
|
||||
|
||||
/*
|
||||
* We never free a struct btree, except on shutdown - we just put it on
|
||||
* the btree_cache_freed list and reuse it later. This simplifies the
|
||||
* code, and it doesn't cost us much memory as the memory usage is
|
||||
* dominated by buffers that hold the actual btree node data and those
|
||||
* can be freed - and the number of struct btrees allocated is
|
||||
* effectively bounded.
|
||||
*
|
||||
* btree_cache_freeable effectively is a small cache - we use it because
|
||||
* high order page allocations can be rather expensive, and it's quite
|
||||
* common to delete and allocate btree nodes in quick succession. It
|
||||
* should never grow past ~2-3 nodes in practice.
|
||||
*/
|
||||
struct mutex btree_cache_lock;
|
||||
struct list_head btree_cache;
|
||||
struct list_head btree_cache_freeable;
|
||||
struct list_head btree_cache_freed;
|
||||
|
||||
/* Number of elements in btree_cache + btree_cache_freeable lists */
|
||||
unsigned btree_cache_used;
|
||||
unsigned btree_cache_reserve;
|
||||
struct shrinker btree_cache_shrink;
|
||||
|
||||
/*
|
||||
* If we need to allocate memory for a new btree node and that
|
||||
* allocation fails, we can cannibalize another node in the btree cache
|
||||
* to satisfy the allocation - lock to guarantee only one thread does
|
||||
* this at a time:
|
||||
*/
|
||||
struct closure_waitlist mca_wait;
|
||||
struct task_struct *btree_cache_alloc_lock;
|
||||
struct btree_cache btree_cache;
|
||||
|
||||
mempool_t btree_reserve_pool;
|
||||
|
||||
@ -606,6 +573,7 @@ struct bch_fs {
|
||||
struct workqueue_struct *copygc_wq;
|
||||
|
||||
/* ALLOCATION */
|
||||
struct rw_semaphore alloc_gc_lock;
|
||||
struct bch_pd_controller foreground_write_pd;
|
||||
struct delayed_work pd_controllers_update;
|
||||
unsigned pd_controllers_update_seconds;
|
||||
@ -622,6 +590,7 @@ struct bch_fs {
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
struct bch_tier tiers[BCH_TIER_MAX];
|
||||
/* NULL if we only have devices in one tier: */
|
||||
struct bch_devs_mask *fastest_devs;
|
||||
struct bch_tier *fastest_tier;
|
||||
|
||||
u64 capacity; /* sectors */
|
||||
@ -654,17 +623,17 @@ struct bch_fs {
|
||||
struct io_clock io_clock[2];
|
||||
|
||||
/* SECTOR ALLOCATOR */
|
||||
struct list_head open_buckets_open;
|
||||
struct list_head open_buckets_free;
|
||||
unsigned open_buckets_nr_free;
|
||||
struct closure_waitlist open_buckets_wait;
|
||||
spinlock_t open_buckets_lock;
|
||||
u8 open_buckets_freelist;
|
||||
u8 open_buckets_nr_free;
|
||||
struct closure_waitlist open_buckets_wait;
|
||||
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
|
||||
|
||||
struct write_point btree_write_point;
|
||||
|
||||
struct write_point write_points[WRITE_POINT_COUNT];
|
||||
struct write_point promote_write_point;
|
||||
struct hlist_head write_points_hash[WRITE_POINT_COUNT];
|
||||
struct mutex write_points_hash_lock;
|
||||
|
||||
/*
|
||||
* This write point is used for migrating data off a device
|
||||
|
@ -31,13 +31,15 @@ void bch2_recalc_btree_reserve(struct bch_fs *c)
|
||||
reserve += min_t(unsigned, 1,
|
||||
c->btree_roots[i].b->level) * 8;
|
||||
|
||||
c->btree_cache_reserve = reserve;
|
||||
c->btree_cache.reserve = reserve;
|
||||
}
|
||||
|
||||
#define mca_can_free(c) \
|
||||
max_t(int, 0, c->btree_cache_used - c->btree_cache_reserve)
|
||||
static inline unsigned btree_cache_can_free(struct btree_cache *bc)
|
||||
{
|
||||
return max_t(int, 0, bc->used - bc->reserve);
|
||||
}
|
||||
|
||||
static void __mca_data_free(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
EBUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
@ -46,11 +48,13 @@ static void __mca_data_free(struct bch_fs *c, struct btree *b)
|
||||
bch2_btree_keys_free(b);
|
||||
}
|
||||
|
||||
static void mca_data_free(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
__mca_data_free(c, b);
|
||||
c->btree_cache_used--;
|
||||
list_move(&b->list, &c->btree_cache_freed);
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
__btree_node_data_free(c, b);
|
||||
bc->used--;
|
||||
list_move(&b->list, &bc->freed);
|
||||
}
|
||||
|
||||
static const struct rhashtable_params bch_btree_cache_params = {
|
||||
@ -59,8 +63,10 @@ static const struct rhashtable_params bch_btree_cache_params = {
|
||||
.key_len = sizeof(struct bch_extent_ptr),
|
||||
};
|
||||
|
||||
static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
b->data = kvpmalloc(btree_bytes(c), gfp);
|
||||
if (!b->data)
|
||||
goto err;
|
||||
@ -68,16 +74,16 @@ static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp))
|
||||
goto err;
|
||||
|
||||
c->btree_cache_used++;
|
||||
list_move(&b->list, &c->btree_cache_freeable);
|
||||
bc->used++;
|
||||
list_move(&b->list, &bc->freeable);
|
||||
return;
|
||||
err:
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
list_move(&b->list, &c->btree_cache_freed);
|
||||
list_move(&b->list, &bc->freed);
|
||||
}
|
||||
|
||||
static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
{
|
||||
struct btree *b = kzalloc(sizeof(struct btree), gfp);
|
||||
if (!b)
|
||||
@ -88,49 +94,48 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
|
||||
mca_data_alloc(c, b, gfp);
|
||||
btree_node_data_alloc(c, b, gfp);
|
||||
return b->data ? b : NULL;
|
||||
}
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
|
||||
void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
rhashtable_remove_fast(&c->btree_cache_table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b)
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
{
|
||||
return rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
|
||||
return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
}
|
||||
|
||||
int bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b,
|
||||
unsigned level, enum btree_id id)
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
|
||||
unsigned level, enum btree_id id)
|
||||
{
|
||||
int ret;
|
||||
|
||||
b->level = level;
|
||||
b->btree_id = id;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
ret = __bch2_btree_node_hash_insert(c, b);
|
||||
mutex_lock(&bc->lock);
|
||||
ret = __bch2_btree_node_hash_insert(bc, b);
|
||||
if (!ret)
|
||||
list_add(&b->list, &c->btree_cache);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
list_add(&b->list, &bc->live);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__flatten
|
||||
static inline struct btree *mca_find(struct bch_fs *c,
|
||||
static inline struct btree *btree_cache_find(struct btree_cache *bc,
|
||||
const struct bkey_i *k)
|
||||
{
|
||||
return rhashtable_lookup_fast(&c->btree_cache_table, &PTR_HASH(k),
|
||||
return rhashtable_lookup_fast(&bc->table, &PTR_HASH(k),
|
||||
bch_btree_cache_params);
|
||||
}
|
||||
|
||||
@ -140,9 +145,10 @@ static inline struct btree *mca_find(struct bch_fs *c,
|
||||
*/
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&c->btree_cache_lock);
|
||||
lockdep_assert_held(&bc->lock);
|
||||
|
||||
if (!six_trylock_intent(&b->lock))
|
||||
return -ENOMEM;
|
||||
@ -201,11 +207,12 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
||||
return __btree_node_reclaim(c, b, true);
|
||||
}
|
||||
|
||||
static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
||||
btree_cache_shrink);
|
||||
btree_cache.shrink);
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b, *t;
|
||||
unsigned long nr = sc->nr_to_scan;
|
||||
unsigned long can_free;
|
||||
@ -218,8 +225,8 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
||||
|
||||
/* Return -1 if we can't do anything right now */
|
||||
if (sc->gfp_mask & __GFP_IO)
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
else if (!mutex_trylock(&c->btree_cache_lock))
|
||||
mutex_lock(&bc->lock);
|
||||
else if (!mutex_trylock(&bc->lock))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
@ -230,11 +237,11 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
||||
* IO can always make forward progress:
|
||||
*/
|
||||
nr /= btree_pages(c);
|
||||
can_free = mca_can_free(c);
|
||||
can_free = btree_cache_can_free(bc);
|
||||
nr = min_t(unsigned long, nr, can_free);
|
||||
|
||||
i = 0;
|
||||
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
|
||||
list_for_each_entry_safe(b, t, &bc->freeable, list) {
|
||||
touched++;
|
||||
|
||||
if (freed >= nr)
|
||||
@ -242,34 +249,34 @@ static unsigned long bch2_mca_scan(struct shrinker *shrink,
|
||||
|
||||
if (++i > 3 &&
|
||||
!btree_node_reclaim(c, b)) {
|
||||
mca_data_free(c, b);
|
||||
btree_node_data_free(c, b);
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
freed++;
|
||||
}
|
||||
}
|
||||
restart:
|
||||
list_for_each_entry_safe(b, t, &c->btree_cache, list) {
|
||||
list_for_each_entry_safe(b, t, &bc->live, list) {
|
||||
touched++;
|
||||
|
||||
if (freed >= nr) {
|
||||
/* Save position */
|
||||
if (&t->list != &c->btree_cache)
|
||||
list_move_tail(&c->btree_cache, &t->list);
|
||||
if (&t->list != &bc->live)
|
||||
list_move_tail(&bc->live, &t->list);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!btree_node_accessed(b) &&
|
||||
!btree_node_reclaim(c, b)) {
|
||||
/* can't call bch2_btree_node_hash_remove under btree_cache_lock */
|
||||
/* can't call bch2_btree_node_hash_remove under lock */
|
||||
freed++;
|
||||
if (&t->list != &c->btree_cache)
|
||||
list_move_tail(&c->btree_cache, &t->list);
|
||||
if (&t->list != &bc->live)
|
||||
list_move_tail(&bc->live, &t->list);
|
||||
|
||||
mca_data_free(c, b);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
btree_node_data_free(c, b);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
||||
@ -277,97 +284,97 @@ restart:
|
||||
goto out;
|
||||
|
||||
if (sc->gfp_mask & __GFP_IO)
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
else if (!mutex_trylock(&c->btree_cache_lock))
|
||||
mutex_lock(&bc->lock);
|
||||
else if (!mutex_trylock(&bc->lock))
|
||||
goto out;
|
||||
goto restart;
|
||||
} else
|
||||
clear_btree_node_accessed(b);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&bc->lock);
|
||||
out:
|
||||
return (unsigned long) freed * btree_pages(c);
|
||||
}
|
||||
|
||||
static unsigned long bch2_mca_count(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
||||
btree_cache_shrink);
|
||||
btree_cache.shrink);
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
if (btree_shrinker_disabled(c))
|
||||
return 0;
|
||||
|
||||
return mca_can_free(c) * btree_pages(c);
|
||||
return btree_cache_can_free(bc) * btree_pages(c);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_exit(struct bch_fs *c)
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
|
||||
if (c->btree_cache_shrink.list.next)
|
||||
unregister_shrinker(&c->btree_cache_shrink);
|
||||
if (bc->shrink.list.next)
|
||||
unregister_shrinker(&bc->shrink);
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
mutex_lock(&bc->lock);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
if (c->verify_data)
|
||||
list_move(&c->verify_data->list, &c->btree_cache);
|
||||
list_move(&c->verify_data->list, &bc->live);
|
||||
|
||||
kvpfree(c->verify_ondisk, btree_bytes(c));
|
||||
#endif
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (c->btree_roots[i].b)
|
||||
list_add(&c->btree_roots[i].b->list, &c->btree_cache);
|
||||
list_add(&c->btree_roots[i].b->list, &bc->live);
|
||||
|
||||
list_splice(&c->btree_cache_freeable,
|
||||
&c->btree_cache);
|
||||
list_splice(&bc->freeable, &bc->live);
|
||||
|
||||
while (!list_empty(&c->btree_cache)) {
|
||||
b = list_first_entry(&c->btree_cache, struct btree, list);
|
||||
while (!list_empty(&bc->live)) {
|
||||
b = list_first_entry(&bc->live, struct btree, list);
|
||||
|
||||
if (btree_node_dirty(b))
|
||||
bch2_btree_complete_write(c, b, btree_current_write(b));
|
||||
clear_btree_node_dirty(b);
|
||||
|
||||
mca_data_free(c, b);
|
||||
btree_node_data_free(c, b);
|
||||
}
|
||||
|
||||
while (!list_empty(&c->btree_cache_freed)) {
|
||||
b = list_first_entry(&c->btree_cache_freed,
|
||||
struct btree, list);
|
||||
while (!list_empty(&bc->freed)) {
|
||||
b = list_first_entry(&bc->freed, struct btree, list);
|
||||
list_del(&b->list);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
if (c->btree_cache_table_init_done)
|
||||
rhashtable_destroy(&c->btree_cache_table);
|
||||
if (bc->table_init_done)
|
||||
rhashtable_destroy(&bc->table);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_init(struct bch_fs *c)
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
ret = rhashtable_init(&c->btree_cache_table, &bch_btree_cache_params);
|
||||
ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
c->btree_cache_table_init_done = true;
|
||||
bc->table_init_done = true;
|
||||
|
||||
bch2_recalc_btree_reserve(c);
|
||||
|
||||
for (i = 0; i < c->btree_cache_reserve; i++)
|
||||
if (!mca_bucket_alloc(c, GFP_KERNEL))
|
||||
for (i = 0; i < bc->reserve; i++)
|
||||
if (!btree_node_mem_alloc(c, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
list_splice_init(&c->btree_cache,
|
||||
&c->btree_cache_freeable);
|
||||
list_splice_init(&bc->live, &bc->freeable);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
mutex_init(&c->verify_lock);
|
||||
@ -376,42 +383,53 @@ int bch2_fs_btree_init(struct bch_fs *c)
|
||||
if (!c->verify_ondisk)
|
||||
return -ENOMEM;
|
||||
|
||||
c->verify_data = mca_bucket_alloc(c, GFP_KERNEL);
|
||||
c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
if (!c->verify_data)
|
||||
return -ENOMEM;
|
||||
|
||||
list_del_init(&c->verify_data->list);
|
||||
#endif
|
||||
|
||||
c->btree_cache_shrink.count_objects = bch2_mca_count;
|
||||
c->btree_cache_shrink.scan_objects = bch2_mca_scan;
|
||||
c->btree_cache_shrink.seeks = 4;
|
||||
c->btree_cache_shrink.batch = btree_pages(c) * 2;
|
||||
register_shrinker(&c->btree_cache_shrink);
|
||||
bc->shrink.count_objects = bch2_btree_cache_count;
|
||||
bc->shrink.scan_objects = bch2_btree_cache_scan;
|
||||
bc->shrink.seeks = 4;
|
||||
bc->shrink.batch = btree_pages(c) * 2;
|
||||
register_shrinker(&bc->shrink);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||
{
|
||||
mutex_init(&bc->lock);
|
||||
INIT_LIST_HEAD(&bc->live);
|
||||
INIT_LIST_HEAD(&bc->freeable);
|
||||
INIT_LIST_HEAD(&bc->freed);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only have one thread cannibalizing other cached btree nodes at a time,
|
||||
* or we'll deadlock. We use an open coded mutex to ensure that, which a
|
||||
* cannibalize_bucket() will take. This means every time we unlock the root of
|
||||
* the btree, we need to release this lock if we have it held.
|
||||
*/
|
||||
void bch2_btree_node_cannibalize_unlock(struct bch_fs *c)
|
||||
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
|
||||
{
|
||||
if (c->btree_cache_alloc_lock == current) {
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
if (bc->alloc_lock == current) {
|
||||
trace_btree_node_cannibalize_unlock(c);
|
||||
c->btree_cache_alloc_lock = NULL;
|
||||
closure_wake_up(&c->mca_wait);
|
||||
bc->alloc_lock = NULL;
|
||||
closure_wake_up(&bc->alloc_wait);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct task_struct *old;
|
||||
|
||||
old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
|
||||
old = cmpxchg(&bc->alloc_lock, NULL, current);
|
||||
if (old == NULL || old == current)
|
||||
goto success;
|
||||
|
||||
@ -420,13 +438,13 @@ int bch2_btree_node_cannibalize_lock(struct bch_fs *c, struct closure *cl)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
closure_wait(&c->mca_wait, cl);
|
||||
closure_wait(&bc->alloc_wait, cl);
|
||||
|
||||
/* Try again, after adding ourselves to waitlist */
|
||||
old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
|
||||
old = cmpxchg(&bc->alloc_lock, NULL, current);
|
||||
if (old == NULL || old == current) {
|
||||
/* We raced */
|
||||
closure_wake_up(&c->mca_wait);
|
||||
closure_wake_up(&bc->alloc_wait);
|
||||
goto success;
|
||||
}
|
||||
|
||||
@ -438,16 +456,17 @@ success:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct btree *mca_cannibalize(struct bch_fs *c)
|
||||
static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
||||
list_for_each_entry_reverse(b, &bc->live, list)
|
||||
if (!btree_node_reclaim(c, b))
|
||||
return b;
|
||||
|
||||
while (1) {
|
||||
list_for_each_entry_reverse(b, &c->btree_cache, list)
|
||||
list_for_each_entry_reverse(b, &bc->live, list)
|
||||
if (!btree_node_write_and_reclaim(c, b))
|
||||
return b;
|
||||
|
||||
@ -462,16 +481,17 @@ static struct btree *mca_cannibalize(struct bch_fs *c)
|
||||
|
||||
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
u64 start_time = local_clock();
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
mutex_lock(&bc->lock);
|
||||
|
||||
/*
|
||||
* btree_free() doesn't free memory; it sticks the node on the end of
|
||||
* the list. Check if there's any freed nodes there:
|
||||
*/
|
||||
list_for_each_entry(b, &c->btree_cache_freeable, list)
|
||||
list_for_each_entry(b, &bc->freeable, list)
|
||||
if (!btree_node_reclaim(c, b))
|
||||
goto out_unlock;
|
||||
|
||||
@ -479,9 +499,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
* We never free struct btree itself, just the memory that holds the on
|
||||
* disk node. Check the freed list before allocating a new one:
|
||||
*/
|
||||
list_for_each_entry(b, &c->btree_cache_freed, list)
|
||||
list_for_each_entry(b, &bc->freed, list)
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
mca_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
|
||||
btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_NOIO);
|
||||
if (b->data)
|
||||
goto out_unlock;
|
||||
|
||||
@ -490,7 +510,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
b = mca_bucket_alloc(c, __GFP_NOWARN|GFP_NOIO);
|
||||
b = btree_node_mem_alloc(c, __GFP_NOWARN|GFP_NOIO);
|
||||
if (!b)
|
||||
goto err;
|
||||
|
||||
@ -501,7 +521,7 @@ out_unlock:
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
list_del_init(&b->list);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&bc->lock);
|
||||
out:
|
||||
b->flags = 0;
|
||||
b->written = 0;
|
||||
@ -517,18 +537,18 @@ out:
|
||||
return b;
|
||||
err:
|
||||
/* Try to cannibalize another cached btree node: */
|
||||
if (c->btree_cache_alloc_lock == current) {
|
||||
b = mca_cannibalize(c);
|
||||
if (bc->alloc_lock == current) {
|
||||
b = btree_node_cannibalize(c);
|
||||
list_del_init(&b->list);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
|
||||
trace_btree_node_cannibalize(c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&bc->lock);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -539,6 +559,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
unsigned level,
|
||||
enum six_lock_type lock_type)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
/*
|
||||
@ -552,15 +573,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
return b;
|
||||
|
||||
bkey_copy(&b->key, k);
|
||||
if (bch2_btree_node_hash_insert(c, b, level, iter->btree_id)) {
|
||||
if (bch2_btree_node_hash_insert(bc, b, level, iter->btree_id)) {
|
||||
/* raced with another fill: */
|
||||
|
||||
/* mark as unhashed... */
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_add(&b->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_lock(&bc->lock);
|
||||
list_add(&b->list, &bc->freeable);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
@ -601,13 +622,14 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
|
||||
const struct bkey_i *k, unsigned level,
|
||||
enum six_lock_type lock_type)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
struct bset_tree *t;
|
||||
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
b = mca_find(c, k);
|
||||
b = btree_cache_find(bc, k);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(!b)) {
|
||||
@ -755,12 +777,13 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
|
||||
void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
|
||||
unsigned level, enum btree_id btree_id)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
rcu_read_lock();
|
||||
b = mca_find(c, k);
|
||||
b = btree_cache_find(bc, k);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (b)
|
||||
@ -771,15 +794,15 @@ void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
|
||||
return;
|
||||
|
||||
bkey_copy(&b->key, k);
|
||||
if (bch2_btree_node_hash_insert(c, b, level, btree_id)) {
|
||||
if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
|
||||
/* raced with another fill: */
|
||||
|
||||
/* mark as unhashed... */
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_add(&b->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_lock(&bc->lock);
|
||||
list_add(&b->list, &bc->freeable);
|
||||
mutex_unlock(&bc->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -11,13 +11,13 @@ extern const char * const bch2_btree_ids[];
|
||||
|
||||
void bch2_recalc_btree_reserve(struct bch_fs *);
|
||||
|
||||
void bch2_btree_node_hash_remove(struct bch_fs *, struct btree *);
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct bch_fs *, struct btree *,
|
||||
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||
unsigned, enum btree_id);
|
||||
|
||||
void bch2_btree_node_cannibalize_unlock(struct bch_fs *);
|
||||
int bch2_btree_node_cannibalize_lock(struct bch_fs *, struct closure *);
|
||||
void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
|
||||
int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
|
||||
|
||||
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
|
||||
|
||||
@ -32,8 +32,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
|
||||
void bch2_btree_node_prefetch(struct bch_fs *, const struct bkey_i *,
|
||||
unsigned, enum btree_id);
|
||||
|
||||
void bch2_fs_btree_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_init(struct bch_fs *);
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_cache_init(struct bch_fs *);
|
||||
void bch2_fs_btree_cache_init_early(struct btree_cache *);
|
||||
|
||||
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
|
||||
|
||||
|
@ -278,9 +278,12 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct open_bucket *ob;
|
||||
const struct open_bucket_ptr *ptr;
|
||||
size_t i, j, iter;
|
||||
unsigned ci;
|
||||
|
||||
down_write(&c->alloc_gc_lock);
|
||||
|
||||
for_each_member_device(ca, c, ci) {
|
||||
spin_lock(&ca->freelist_lock);
|
||||
|
||||
@ -291,21 +294,26 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
fifo_for_each_entry(i, &ca->free[j], iter)
|
||||
bch2_mark_alloc_bucket(ca, &ca->buckets[i], true);
|
||||
|
||||
for (ptr = ca->open_buckets_partial;
|
||||
ptr < ca->open_buckets_partial + ca->open_buckets_partial_nr;
|
||||
ptr++)
|
||||
bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, &ptr->ptr), true);
|
||||
|
||||
spin_unlock(&ca->freelist_lock);
|
||||
}
|
||||
|
||||
for (ob = c->open_buckets;
|
||||
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
|
||||
ob++) {
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
mutex_lock(&ob->lock);
|
||||
spin_lock(&ob->lock);
|
||||
open_bucket_for_each_ptr(ob, ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr), true);
|
||||
ca = c->devs[ptr->ptr.dev];
|
||||
bch2_mark_alloc_bucket(ca, PTR_BUCKET(ca, &ptr->ptr), true);
|
||||
}
|
||||
mutex_unlock(&ob->lock);
|
||||
spin_unlock(&ob->lock);
|
||||
}
|
||||
|
||||
up_write(&c->alloc_gc_lock);
|
||||
}
|
||||
|
||||
static void mark_metadata_sectors(struct bch_dev *ca, u64 start, u64 end,
|
||||
|
@ -1364,17 +1364,17 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
do {
|
||||
ret = bch2_btree_node_cannibalize_lock(c, &cl);
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret);
|
||||
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
bch2_btree_node_cannibalize_unlock(c);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
|
||||
BUG_ON(IS_ERR(b));
|
||||
|
||||
bkey_copy(&b->key, k);
|
||||
BUG_ON(bch2_btree_node_hash_insert(c, b, level, id));
|
||||
BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
|
||||
|
||||
bch2_btree_node_read(c, b, true);
|
||||
six_unlock_write(&b->lock);
|
||||
@ -1844,8 +1844,8 @@ void bch2_btree_verify_flushed(struct bch_fs *c)
|
||||
unsigned i;
|
||||
|
||||
rcu_read_lock();
|
||||
tbl = rht_dereference_rcu(c->btree_cache_table.tbl,
|
||||
&c->btree_cache_table);
|
||||
tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
|
||||
&c->btree_cache.table);
|
||||
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
rht_for_each_entry_rcu(b, pos, tbl, i, hash)
|
||||
|
@ -769,7 +769,7 @@ retry_all:
|
||||
closure_init_stack(&cl);
|
||||
|
||||
do {
|
||||
ret = bch2_btree_node_cannibalize_lock(c, &cl);
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret);
|
||||
}
|
||||
@ -817,7 +817,7 @@ retry:
|
||||
|
||||
ret = btree_iter_linked(iter) ? -EINTR : 0;
|
||||
out:
|
||||
bch2_btree_node_cannibalize_unlock(c);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
return ret;
|
||||
io_error:
|
||||
BUG_ON(ret != -EIO);
|
||||
|
@ -130,6 +130,42 @@ struct btree {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct btree_cache {
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
/*
|
||||
* We never free a struct btree, except on shutdown - we just put it on
|
||||
* the btree_cache_freed list and reuse it later. This simplifies the
|
||||
* code, and it doesn't cost us much memory as the memory usage is
|
||||
* dominated by buffers that hold the actual btree node data and those
|
||||
* can be freed - and the number of struct btrees allocated is
|
||||
* effectively bounded.
|
||||
*
|
||||
* btree_cache_freeable effectively is a small cache - we use it because
|
||||
* high order page allocations can be rather expensive, and it's quite
|
||||
* common to delete and allocate btree nodes in quick succession. It
|
||||
* should never grow past ~2-3 nodes in practice.
|
||||
*/
|
||||
struct mutex lock;
|
||||
struct list_head live;
|
||||
struct list_head freeable;
|
||||
struct list_head freed;
|
||||
|
||||
/* Number of elements in live + freeable lists */
|
||||
unsigned used;
|
||||
unsigned reserve;
|
||||
struct shrinker shrink;
|
||||
|
||||
/*
|
||||
* If we need to allocate memory for a new btree node and that
|
||||
* allocation fails, we can cannibalize another node in the btree cache
|
||||
* to satisfy the allocation - lock to guarantee only one thread does
|
||||
* this at a time:
|
||||
*/
|
||||
struct task_struct *alloc_lock;
|
||||
struct closure_waitlist alloc_wait;
|
||||
};
|
||||
|
||||
#define BTREE_FLAG(flag) \
|
||||
static inline bool btree_node_ ## flag(struct btree *b) \
|
||||
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
|
||||
|
@ -237,11 +237,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
|
||||
|
||||
six_lock_write(&b->lock);
|
||||
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_move(&b->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
/*
|
||||
* By using six_unlock_write() directly instead of
|
||||
@ -339,11 +339,11 @@ retry:
|
||||
bkey_extent_init(&tmp.k);
|
||||
tmp.k.k.size = c->opts.btree_node_size,
|
||||
|
||||
ob = bch2_alloc_sectors(c, &c->btree_write_point,
|
||||
bkey_i_to_extent(&tmp.k),
|
||||
res->nr_replicas,
|
||||
c->opts.metadata_replicas_required,
|
||||
alloc_reserve, cl);
|
||||
ob = bch2_alloc_sectors(c, BCH_DATA_BTREE, 0, 0,
|
||||
bkey_i_to_extent(&tmp.k),
|
||||
res->nr_replicas,
|
||||
c->opts.metadata_replicas_required,
|
||||
alloc_reserve, 0, cl);
|
||||
if (IS_ERR(ob))
|
||||
return ERR_CAST(ob);
|
||||
|
||||
@ -374,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
|
||||
b = as->reserve->b[--as->reserve->nr];
|
||||
|
||||
BUG_ON(bch2_btree_node_hash_insert(c, b, level, as->btree_id));
|
||||
BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id));
|
||||
|
||||
set_btree_node_accessed(b);
|
||||
set_btree_node_dirty(b);
|
||||
@ -515,7 +515,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
* Protects reaping from the btree node cache and using the btree node
|
||||
* open bucket reserve:
|
||||
*/
|
||||
ret = bch2_btree_node_cannibalize_lock(c, cl);
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, cl);
|
||||
if (ret) {
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
return ERR_PTR(ret);
|
||||
@ -543,11 +543,11 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
reserve->b[reserve->nr++] = b;
|
||||
}
|
||||
|
||||
bch2_btree_node_cannibalize_unlock(c);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
return reserve;
|
||||
err_free:
|
||||
bch2_btree_reserve_put(c, reserve);
|
||||
bch2_btree_node_cannibalize_unlock(c);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
trace_btree_reserve_get_fail(c, nr_nodes, cl);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1015,9 +1015,9 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
|
||||
static void __bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
/* Root nodes cannot be reaped */
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_del_init(&b->list);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
btree_node_root(c, b) = b;
|
||||
@ -1802,7 +1802,7 @@ retry:
|
||||
PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
|
||||
/* bch2_btree_reserve_get will unlock */
|
||||
do {
|
||||
ret = bch2_btree_node_cannibalize_lock(c, &cl);
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
@ -1873,23 +1873,24 @@ retry:
|
||||
if (parent) {
|
||||
if (new_hash) {
|
||||
bkey_copy(&new_hash->key, &new_key->k_i);
|
||||
BUG_ON(bch2_btree_node_hash_insert(c, new_hash,
|
||||
b->level, b->btree_id));
|
||||
ret = bch2_btree_node_hash_insert(&c->btree_cache,
|
||||
new_hash, b->level, b->btree_id);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
bch2_btree_insert_node(as, parent, &iter,
|
||||
&keylist_single(&new_key->k_i));
|
||||
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
bch2_btree_node_hash_remove(c, new_hash);
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
|
||||
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
ret = __bch2_btree_node_hash_insert(c, b);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
BUG_ON(ret);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
} else {
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
}
|
||||
@ -1918,9 +1919,9 @@ retry:
|
||||
bch2_btree_update_done(as);
|
||||
out:
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_move(&new_hash->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&new_hash->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
six_unlock_write(&new_hash->lock);
|
||||
six_unlock_intent(&new_hash->lock);
|
||||
|
@ -407,8 +407,11 @@ void bch2_mark_metadata_bucket(struct bch_dev *ca, struct bucket *g,
|
||||
|
||||
static int __disk_sectors(const union bch_extent_crc *crc, unsigned sectors)
|
||||
{
|
||||
return sectors * crc_compressed_size(NULL, crc) /
|
||||
crc_uncompressed_size(NULL, crc);
|
||||
if (!sectors)
|
||||
return 0;
|
||||
|
||||
return max(1U, DIV_ROUND_UP(sectors * crc_compressed_size(NULL, crc),
|
||||
crc_uncompressed_size(NULL, crc)));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -25,7 +25,7 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
|
||||
{
|
||||
void *b;
|
||||
|
||||
BUG_ON(size > c->sb.encoded_extent_max);
|
||||
BUG_ON(size > c->sb.encoded_extent_max << 9);
|
||||
|
||||
b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
|
||||
if (b)
|
||||
@ -164,8 +164,8 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
}
|
||||
break;
|
||||
case BCH_COMPRESSION_LZ4:
|
||||
ret = LZ4_decompress_safe(src_data.b, dst_data,
|
||||
src_len, dst_len);
|
||||
ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
||||
src_len, dst_len, dst_len);
|
||||
if (ret != dst_len) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
@ -269,7 +269,8 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
|
||||
size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (crc_uncompressed_size(NULL, &crc) < c->sb.encoded_extent_max)
|
||||
if (crc_uncompressed_size(NULL, &crc) > c->sb.encoded_extent_max ||
|
||||
crc_compressed_size(NULL, &crc) > c->sb.encoded_extent_max)
|
||||
return -EIO;
|
||||
|
||||
dst_data = dst_len == dst_iter.bi_size
|
||||
@ -294,7 +295,7 @@ static int __bio_compress(struct bch_fs *c,
|
||||
{
|
||||
struct bbuf src_data = { NULL }, dst_data = { NULL };
|
||||
unsigned pad;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
dst_data = bio_map_or_bounce(c, dst, WRITE);
|
||||
src_data = bio_map_or_bounce(c, src, READ);
|
||||
@ -307,23 +308,28 @@ static int __bio_compress(struct bch_fs *c,
|
||||
void *workspace;
|
||||
int len = src->bi_iter.bi_size;
|
||||
|
||||
ret = 0;
|
||||
|
||||
workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
|
||||
|
||||
while (len > block_bytes(c) &&
|
||||
(!(ret = LZ4_compress_destSize(
|
||||
while (1) {
|
||||
if (len <= block_bytes(c)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = LZ4_compress_destSize(
|
||||
src_data.b, dst_data.b,
|
||||
&len, dst->bi_iter.bi_size,
|
||||
workspace)) ||
|
||||
(len & (block_bytes(c) - 1)))) {
|
||||
/*
|
||||
* On error, the compressed data was bigger than
|
||||
* dst_len - round down to nearest block and try again:
|
||||
*/
|
||||
workspace);
|
||||
if (ret >= len) {
|
||||
/* uncompressible: */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(len & (block_bytes(c) - 1)))
|
||||
break;
|
||||
len = round_down(len, block_bytes(c));
|
||||
}
|
||||
|
||||
mempool_free(workspace, &c->lz4_workspace_pool);
|
||||
|
||||
if (!ret)
|
||||
@ -331,6 +337,7 @@ static int __bio_compress(struct bch_fs *c,
|
||||
|
||||
*src_len = len;
|
||||
*dst_len = ret;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
case BCH_COMPRESSION_GZIP: {
|
||||
@ -446,20 +453,22 @@ int bch2_check_set_has_compressed_data(struct bch_fs *c,
|
||||
unsigned compression_type)
|
||||
{
|
||||
switch (compression_type) {
|
||||
case BCH_COMPRESSION_NONE:
|
||||
case BCH_COMPRESSION_OPT_NONE:
|
||||
return 0;
|
||||
case BCH_COMPRESSION_LZ4:
|
||||
case BCH_COMPRESSION_OPT_LZ4:
|
||||
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
|
||||
return 0;
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4);
|
||||
break;
|
||||
case BCH_COMPRESSION_GZIP:
|
||||
case BCH_COMPRESSION_OPT_GZIP:
|
||||
if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
|
||||
return 0;
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return bch2_fs_compress_init(c);
|
||||
|
@ -511,19 +511,19 @@ static void extent_pick_read_device(struct bch_fs *c,
|
||||
struct bch_dev *ca = c->devs[ptr->dev];
|
||||
|
||||
if (ptr->cached && ptr_stale(ca, ptr))
|
||||
return;
|
||||
continue;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
|
||||
return;
|
||||
continue;
|
||||
|
||||
if (avoid && test_bit(ca->dev_idx, avoid->d))
|
||||
return;
|
||||
continue;
|
||||
|
||||
if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
|
||||
return;
|
||||
continue;
|
||||
|
||||
if (!percpu_ref_tryget(&ca->io_ref))
|
||||
return;
|
||||
continue;
|
||||
|
||||
if (pick->ca)
|
||||
percpu_ref_put(&pick->ca->io_ref);
|
||||
|
@ -974,7 +974,8 @@ alloc_io:
|
||||
(struct disk_reservation) {
|
||||
.nr_replicas = c->opts.data_replicas,
|
||||
},
|
||||
foreground_write_point(c, inode->ei_last_dirtied),
|
||||
c->fastest_devs,
|
||||
inode->ei_last_dirtied,
|
||||
POS(inum, 0),
|
||||
&inode->ei_journal_seq,
|
||||
BCH_WRITE_THROTTLE);
|
||||
@ -1545,10 +1546,11 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
dio->iop.is_dio = true;
|
||||
dio->iop.new_i_size = U64_MAX;
|
||||
bch2_write_op_init(&dio->iop.op, dio->c, dio->res,
|
||||
foreground_write_point(dio->c, (unsigned long) current),
|
||||
POS(inode->v.i_ino, (dio->offset + dio->written) >> 9),
|
||||
&inode->ei_journal_seq,
|
||||
flags|BCH_WRITE_THROTTLE);
|
||||
dio->c->fastest_devs,
|
||||
(unsigned long) dio->task,
|
||||
POS(inode->v.i_ino, (dio->offset + dio->written) >> 9),
|
||||
&inode->ei_journal_seq,
|
||||
flags|BCH_WRITE_THROTTLE);
|
||||
dio->iop.op.index_update_fn = bchfs_write_index_update;
|
||||
|
||||
dio->res.sectors -= bio_sectors(bio);
|
||||
@ -1568,13 +1570,13 @@ static void bch2_dio_write_loop_async(struct closure *cl)
|
||||
bch2_dio_write_done(dio);
|
||||
|
||||
if (dio->iter.count && !dio->error) {
|
||||
use_mm(dio->mm);
|
||||
use_mm(dio->task->mm);
|
||||
pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
bch2_do_direct_IO_write(dio);
|
||||
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
unuse_mm(dio->mm);
|
||||
unuse_mm(dio->task->mm);
|
||||
|
||||
continue_at(&dio->cl, bch2_dio_write_loop_async, NULL);
|
||||
} else {
|
||||
@ -1617,7 +1619,7 @@ static int bch2_direct_IO_write(struct bch_fs *c,
|
||||
dio->offset = offset;
|
||||
dio->iovec = NULL;
|
||||
dio->iter = *iter;
|
||||
dio->mm = current->mm;
|
||||
dio->task = current;
|
||||
closure_init(&dio->cl, NULL);
|
||||
|
||||
if (offset + iter->count > inode->v.i_size)
|
||||
|
@ -74,7 +74,7 @@ struct dio_write {
|
||||
struct iovec inline_vecs[UIO_FASTIOV];
|
||||
struct iov_iter iter;
|
||||
|
||||
struct mm_struct *mm;
|
||||
struct task_struct *task;
|
||||
|
||||
/* must be last: */
|
||||
struct bchfs_write_op iop;
|
||||
|
@ -350,7 +350,7 @@ static void init_append_extent(struct bch_write_op *op,
|
||||
bch2_keylist_push(&op->insert_keys);
|
||||
}
|
||||
|
||||
static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct bio *orig = &op->wbio.bio;
|
||||
@ -371,7 +371,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
/* Need to decompress data? */
|
||||
if ((op->flags & BCH_WRITE_DATA_COMPRESSED) &&
|
||||
(crc_uncompressed_size(NULL, &op->crc) != op->size ||
|
||||
crc_compressed_size(NULL, &op->crc) > ob->sectors_free)) {
|
||||
crc_compressed_size(NULL, &op->crc) > wp->sectors_free)) {
|
||||
int ret;
|
||||
|
||||
ret = bch2_bio_uncompress_inplace(c, orig, op->size, op->crc);
|
||||
@ -389,7 +389,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
op->crc.nonce,
|
||||
op->crc.csum,
|
||||
op->crc.csum_type,
|
||||
ob);
|
||||
wp->ob);
|
||||
|
||||
bio = orig;
|
||||
wbio = wbio_init(bio);
|
||||
@ -398,7 +398,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
compression_type != BCH_COMPRESSION_NONE) {
|
||||
/* all units here in bytes */
|
||||
unsigned total_output = 0, output_available =
|
||||
min(ob->sectors_free << 9, orig->bi_iter.bi_size);
|
||||
min(wp->sectors_free << 9, orig->bi_iter.bi_size);
|
||||
unsigned crc_nonce = bch2_csum_type_is_encryption(csum_type)
|
||||
? op->nonce : 0;
|
||||
struct bch_csum csum;
|
||||
@ -441,7 +441,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
init_append_extent(op,
|
||||
dst_len >> 9, src_len >> 9,
|
||||
fragment_compression_type,
|
||||
crc_nonce, csum, csum_type, ob);
|
||||
crc_nonce, csum, csum_type, wp->ob);
|
||||
|
||||
total_output += dst_len;
|
||||
bio_advance(bio, dst_len);
|
||||
@ -468,14 +468,14 @@ static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
|
||||
more = orig->bi_iter.bi_size != 0;
|
||||
} else {
|
||||
bio = bio_next_split(orig, ob->sectors_free, GFP_NOIO,
|
||||
bio = bio_next_split(orig, wp->sectors_free, GFP_NOIO,
|
||||
&c->bio_write);
|
||||
wbio = wbio_init(bio);
|
||||
wbio->put_bio = bio != orig;
|
||||
|
||||
init_append_extent(op, bio_sectors(bio), bio_sectors(bio),
|
||||
compression_type, 0,
|
||||
(struct bch_csum) { 0 }, csum_type, ob);
|
||||
(struct bch_csum) { 0 }, csum_type, wp->ob);
|
||||
|
||||
more = bio != orig;
|
||||
}
|
||||
@ -505,7 +505,8 @@ static void __bch2_write(struct closure *cl)
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
unsigned open_bucket_nr = 0;
|
||||
struct open_bucket *b;
|
||||
struct write_point *wp;
|
||||
struct open_bucket *ob;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
@ -519,16 +520,19 @@ static void __bch2_write(struct closure *cl)
|
||||
BKEY_EXTENT_U64s_MAX))
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
|
||||
b = bch2_alloc_sectors_start(c, op->wp,
|
||||
wp = bch2_alloc_sectors_start(c, BCH_DATA_USER,
|
||||
op->devs,
|
||||
op->write_point,
|
||||
op->nr_replicas,
|
||||
c->opts.data_replicas_required,
|
||||
op->alloc_reserve,
|
||||
op->flags,
|
||||
(op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl);
|
||||
EBUG_ON(!b);
|
||||
EBUG_ON(!wp);
|
||||
|
||||
if (unlikely(IS_ERR(b))) {
|
||||
if (unlikely(PTR_ERR(b) != -EAGAIN)) {
|
||||
ret = PTR_ERR(b);
|
||||
if (unlikely(IS_ERR(wp))) {
|
||||
if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
|
||||
ret = PTR_ERR(wp);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -561,13 +565,15 @@ static void __bch2_write(struct closure *cl)
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(b - c->open_buckets == 0 ||
|
||||
b - c->open_buckets > U8_MAX);
|
||||
op->open_buckets[open_bucket_nr++] = b - c->open_buckets;
|
||||
ob = wp->ob;
|
||||
|
||||
ret = bch2_write_extent(op, b);
|
||||
BUG_ON(ob - c->open_buckets == 0 ||
|
||||
ob - c->open_buckets > U8_MAX);
|
||||
op->open_buckets[open_bucket_nr++] = ob - c->open_buckets;
|
||||
|
||||
bch2_alloc_sectors_done(c, op->wp, b);
|
||||
ret = bch2_write_extent(op, wp);
|
||||
|
||||
bch2_alloc_sectors_done(c, wp);
|
||||
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
@ -704,7 +710,9 @@ void bch2_write(struct closure *cl)
|
||||
|
||||
void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
struct disk_reservation res,
|
||||
struct write_point *wp, struct bpos pos,
|
||||
struct bch_devs_mask *devs,
|
||||
unsigned long write_point,
|
||||
struct bpos pos,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
EBUG_ON(res.sectors && !res.nr_replicas);
|
||||
@ -723,7 +731,8 @@ void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
op->pos = pos;
|
||||
op->version = ZERO_VERSION;
|
||||
op->res = res;
|
||||
op->wp = wp;
|
||||
op->devs = devs;
|
||||
op->write_point = write_point;
|
||||
|
||||
if (journal_seq) {
|
||||
op->journal_seq_p = journal_seq;
|
||||
@ -826,6 +835,7 @@ static struct promote_op *promote_alloc(struct bch_fs *c,
|
||||
* Adjust bio to correspond to _live_ portion of @k -
|
||||
* which might be less than what we're actually reading:
|
||||
*/
|
||||
bio->bi_iter.bi_size = sectors << 9;
|
||||
bio_advance(bio, pick->crc.offset << 9);
|
||||
BUG_ON(bio_sectors(bio) < k.k->size);
|
||||
bio->bi_iter.bi_size = k.k->size << 9;
|
||||
@ -836,7 +846,8 @@ static struct promote_op *promote_alloc(struct bch_fs *c,
|
||||
*/
|
||||
op->write.op.pos.offset = iter.bi_sector;
|
||||
}
|
||||
bch2_migrate_write_init(c, &op->write, &c->promote_write_point,
|
||||
bch2_migrate_write_init(c, &op->write,
|
||||
c->fastest_devs,
|
||||
k, NULL,
|
||||
BCH_WRITE_ALLOC_NOWAIT|
|
||||
BCH_WRITE_CACHED);
|
||||
|
@ -22,11 +22,12 @@ enum bch_write_flags {
|
||||
BCH_WRITE_FLUSH = (1 << 2),
|
||||
BCH_WRITE_DATA_COMPRESSED = (1 << 3),
|
||||
BCH_WRITE_THROTTLE = (1 << 4),
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 5),
|
||||
|
||||
/* Internal: */
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 5),
|
||||
BCH_WRITE_DONE = (1 << 6),
|
||||
BCH_WRITE_LOOPED = (1 << 7),
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 6),
|
||||
BCH_WRITE_DONE = (1 << 7),
|
||||
BCH_WRITE_LOOPED = (1 << 8),
|
||||
};
|
||||
|
||||
static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
@ -35,15 +36,10 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
? op->journal_seq_p : &op->journal_seq;
|
||||
}
|
||||
|
||||
static inline struct write_point *foreground_write_point(struct bch_fs *c,
|
||||
unsigned long v)
|
||||
{
|
||||
return c->write_points +
|
||||
hash_long(v, ilog2(ARRAY_SIZE(c->write_points)));
|
||||
}
|
||||
|
||||
void bch2_write_op_init(struct bch_write_op *, struct bch_fs *,
|
||||
struct disk_reservation, struct write_point *,
|
||||
struct disk_reservation,
|
||||
struct bch_devs_mask *,
|
||||
unsigned long,
|
||||
struct bpos, u64 *, unsigned);
|
||||
void bch2_write(struct closure *);
|
||||
|
||||
|
@ -116,9 +116,10 @@ struct bch_write_op {
|
||||
struct bch_extent_crc128 crc;
|
||||
unsigned size;
|
||||
|
||||
struct disk_reservation res;
|
||||
struct bch_devs_mask *devs;
|
||||
unsigned long write_point;
|
||||
|
||||
struct write_point *wp;
|
||||
struct disk_reservation res;
|
||||
|
||||
union {
|
||||
u8 open_buckets[16];
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
static int issue_migration_move(struct bch_dev *ca,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_devs_mask *devs,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
@ -33,7 +34,7 @@ static int issue_migration_move(struct bch_dev *ca,
|
||||
found:
|
||||
/* XXX: we need to be doing something with the disk reservation */
|
||||
|
||||
ret = bch2_data_move(c, ctxt, &c->migration_write_point, k, ptr);
|
||||
ret = bch2_data_move(c, ctxt, devs, k, ptr);
|
||||
if (ret)
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
return ret;
|
||||
@ -110,7 +111,7 @@ int bch2_move_data_off_device(struct bch_dev *ca)
|
||||
ca->dev_idx))
|
||||
goto next;
|
||||
|
||||
ret = issue_migration_move(ca, &ctxt, k);
|
||||
ret = issue_migration_move(ca, &ctxt, NULL, k);
|
||||
if (ret == -ENOMEM) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
|
@ -139,7 +139,7 @@ out:
|
||||
|
||||
void bch2_migrate_write_init(struct bch_fs *c,
|
||||
struct migrate_write *m,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_mask *devs,
|
||||
struct bkey_s_c k,
|
||||
const struct bch_extent_ptr *move_ptr,
|
||||
unsigned flags)
|
||||
@ -155,8 +155,10 @@ void bch2_migrate_write_init(struct bch_fs *c,
|
||||
(move_ptr && move_ptr->cached))
|
||||
flags |= BCH_WRITE_CACHED;
|
||||
|
||||
bch2_write_op_init(&m->op, c, (struct disk_reservation) { 0 }, wp,
|
||||
bkey_start_pos(k.k), NULL, flags);
|
||||
bch2_write_op_init(&m->op, c, (struct disk_reservation) { 0 },
|
||||
devs, (unsigned long) current,
|
||||
bkey_start_pos(k.k), NULL,
|
||||
flags|BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
||||
|
||||
if (m->move)
|
||||
m->op.alloc_reserve = RESERVE_MOVINGGC;
|
||||
@ -249,7 +251,7 @@ static void read_moving_endio(struct bio *bio)
|
||||
|
||||
int bch2_data_move(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_mask *devs,
|
||||
struct bkey_s_c k,
|
||||
const struct bch_extent_ptr *move_ptr)
|
||||
{
|
||||
@ -280,7 +282,7 @@ int bch2_data_move(struct bch_fs *c,
|
||||
|
||||
migrate_bio_init(io, &io->write.op.wbio.bio, k.k->size);
|
||||
|
||||
bch2_migrate_write_init(c, &io->write, wp, k, move_ptr, 0);
|
||||
bch2_migrate_write_init(c, &io->write, devs, k, move_ptr, 0);
|
||||
|
||||
trace_move_read(&io->write.key.k);
|
||||
|
||||
|
@ -20,12 +20,9 @@ struct migrate_write {
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
void bch2_migrate_write_init(struct bch_fs *,
|
||||
struct migrate_write *,
|
||||
struct write_point *,
|
||||
struct bkey_s_c,
|
||||
const struct bch_extent_ptr *,
|
||||
unsigned);
|
||||
void bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
|
||||
struct bch_devs_mask *, struct bkey_s_c,
|
||||
const struct bch_extent_ptr *, unsigned);
|
||||
|
||||
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
|
||||
|
||||
@ -69,11 +66,9 @@ struct moving_io {
|
||||
struct bio_vec bi_inline_vecs[0];
|
||||
};
|
||||
|
||||
int bch2_data_move(struct bch_fs *,
|
||||
struct moving_context *,
|
||||
struct write_point *,
|
||||
struct bkey_s_c,
|
||||
const struct bch_extent_ptr *);
|
||||
int bch2_data_move(struct bch_fs *, struct moving_context *,
|
||||
struct bch_devs_mask *, struct bkey_s_c,
|
||||
const struct bch_extent_ptr *);
|
||||
|
||||
int bch2_move_ctxt_wait(struct moving_context *);
|
||||
void bch2_move_ctxt_wait_for_io(struct moving_context *);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "movinggc.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
#include <linux/freezer.h>
|
||||
@ -72,7 +73,7 @@ static int issue_moving_gc_move(struct bch_dev *ca,
|
||||
if (!ptr) /* We raced - bucket's been reused */
|
||||
return 0;
|
||||
|
||||
ret = bch2_data_move(c, ctxt, &ca->copygc_write_point, k, ptr);
|
||||
ret = bch2_data_move(c, ctxt, &ca->self, k, ptr);
|
||||
if (!ret)
|
||||
trace_gc_copy(k.k);
|
||||
else
|
||||
|
@ -376,7 +376,7 @@ err:
|
||||
static void bch2_fs_free(struct bch_fs *c)
|
||||
{
|
||||
bch2_fs_encryption_exit(c);
|
||||
bch2_fs_btree_exit(c);
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
bch2_fs_journal_exit(&c->journal);
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
@ -491,7 +491,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mutex_init(&c->state_lock);
|
||||
mutex_init(&c->sb_lock);
|
||||
mutex_init(&c->replicas_gc_lock);
|
||||
mutex_init(&c->btree_cache_lock);
|
||||
mutex_init(&c->bucket_lock);
|
||||
mutex_init(&c->btree_root_lock);
|
||||
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
|
||||
@ -507,9 +506,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_tiering_init(c);
|
||||
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
INIT_LIST_HEAD(&c->btree_cache);
|
||||
INIT_LIST_HEAD(&c->btree_cache_freeable);
|
||||
INIT_LIST_HEAD(&c->btree_cache_freed);
|
||||
|
||||
INIT_LIST_HEAD(&c->btree_interior_update_list);
|
||||
mutex_init(&c->btree_reserve_cache_lock);
|
||||
@ -546,6 +542,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
c->journal.blocked_time = &c->journal_blocked_time;
|
||||
c->journal.flush_seq_time = &c->journal_flush_seq_time;
|
||||
|
||||
bch2_fs_btree_cache_init_early(&c->btree_cache);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
if (bch2_sb_to_fs(c, sb)) {
|
||||
@ -599,7 +597,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
bch2_fs_journal_init(&c->journal) ||
|
||||
bch2_fs_btree_init(c) ||
|
||||
bch2_fs_btree_cache_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
bch2_fs_compress_init(c) ||
|
||||
bch2_check_set_has_compressed_data(c, c->opts.compression))
|
||||
@ -1107,8 +1105,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
ca->dev_idx = dev_idx;
|
||||
__set_bit(ca->dev_idx, ca->self.d);
|
||||
|
||||
ca->copygc_write_point.type = BCH_DATA_USER;
|
||||
|
||||
spin_lock_init(&ca->freelist_lock);
|
||||
bch2_dev_moving_gc_init(ca);
|
||||
|
||||
@ -1169,8 +1165,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
total_reserve += ca->free[i].size;
|
||||
|
||||
ca->copygc_write_point.group = &ca->self;
|
||||
|
||||
ca->fs = c;
|
||||
rcu_assign_pointer(c->devs[ca->dev_idx], ca);
|
||||
|
||||
|
@ -209,11 +209,11 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
size_t ret = 0;
|
||||
struct btree *b;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_for_each_entry(b, &c->btree_cache, list)
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_for_each_entry(b, &c->btree_cache.live, list)
|
||||
ret += btree_bytes(c);
|
||||
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -436,7 +436,7 @@ STORE(__bch2_fs)
|
||||
|
||||
sc.gfp_mask = GFP_KERNEL;
|
||||
sc.nr_to_scan = strtoul_or_return(buf);
|
||||
c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
|
||||
c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
|
||||
}
|
||||
|
||||
return size;
|
||||
|
@ -54,7 +54,7 @@ static int issue_tiering_move(struct bch_fs *c,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bch2_data_move(c, ctxt, &tier->wp, k, NULL);
|
||||
ret = bch2_data_move(c, ctxt, &tier->devs, k, NULL);
|
||||
if (!ret)
|
||||
trace_tiering_copy(k.k);
|
||||
else
|
||||
@ -241,6 +241,5 @@ void bch2_fs_tiering_init(struct bch_fs *c)
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||
c->tiers[i].idx = i;
|
||||
bch2_pd_controller_init(&c->tiers[i].pd);
|
||||
c->tiers[i].wp.group = &c->tiers[i].devs;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user