mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-03 00:00:07 +03:00
284 lines
5.9 KiB
C
284 lines
5.9 KiB
C
|
|
#include "bcachefs.h"
|
|
#include "alloc.h"
|
|
#include "btree_iter.h"
|
|
#include "buckets.h"
|
|
#include "clock.h"
|
|
#include "extents.h"
|
|
#include "io.h"
|
|
#include "keylist.h"
|
|
#include "move.h"
|
|
#include "super-io.h"
|
|
#include "tier.h"
|
|
|
|
#include <linux/freezer.h>
|
|
#include <linux/kthread.h>
|
|
#include <trace/events/bcachefs.h>
|
|
|
|
struct tiering_state {
|
|
struct bch_tier *tier;
|
|
unsigned sectors;
|
|
unsigned stripe_size;
|
|
unsigned dev_idx;
|
|
struct bch_dev *ca;
|
|
};
|
|
|
|
static bool tiering_pred(struct bch_fs *c,
|
|
struct tiering_state *s,
|
|
struct bkey_s_c k)
|
|
{
|
|
if (bkey_extent_is_data(k.k)) {
|
|
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
|
const struct bch_extent_ptr *ptr;
|
|
unsigned replicas = 0;
|
|
|
|
/* Make sure we have room to add a new pointer: */
|
|
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
|
|
BKEY_EXTENT_VAL_U64s_MAX)
|
|
return false;
|
|
|
|
extent_for_each_ptr(e, ptr)
|
|
if (c->devs[ptr->dev]->mi.tier >= s->tier->idx)
|
|
replicas++;
|
|
|
|
return replicas < c->opts.data_replicas;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void tier_put_device(struct tiering_state *s)
|
|
{
|
|
if (s->ca)
|
|
percpu_ref_put(&s->ca->io_ref);
|
|
s->ca = NULL;
|
|
}
|
|
|
|
/**
|
|
* refill_next - move on to refilling the next cache's tiering keylist
|
|
*/
|
|
static void tier_next_device(struct bch_fs *c, struct tiering_state *s)
|
|
{
|
|
if (!s->ca || s->sectors > s->stripe_size) {
|
|
tier_put_device(s);
|
|
s->sectors = 0;
|
|
s->dev_idx++;
|
|
|
|
spin_lock(&s->tier->devs.lock);
|
|
if (s->dev_idx >= s->tier->devs.nr)
|
|
s->dev_idx = 0;
|
|
|
|
if (s->tier->devs.nr) {
|
|
s->ca = s->tier->devs.d[s->dev_idx].dev;
|
|
percpu_ref_get(&s->ca->io_ref);
|
|
}
|
|
spin_unlock(&s->tier->devs.lock);
|
|
}
|
|
}
|
|
|
|
static int issue_tiering_move(struct bch_fs *c,
|
|
struct tiering_state *s,
|
|
struct moving_context *ctxt,
|
|
struct bkey_s_c k)
|
|
{
|
|
int ret;
|
|
|
|
ret = bch2_data_move(c, ctxt, &s->ca->tiering_write_point, k, NULL);
|
|
if (!ret) {
|
|
trace_tiering_copy(k.k);
|
|
s->sectors += k.k->size;
|
|
} else {
|
|
trace_tiering_alloc_fail(c, k.k->size);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* tiering_next_cache - issue a move to write an extent to the next cache
|
|
* device in round robin order
|
|
*/
|
|
static s64 read_tiering(struct bch_fs *c, struct bch_tier *tier)
|
|
{
|
|
struct moving_context ctxt;
|
|
struct tiering_state s;
|
|
struct btree_iter iter;
|
|
struct bkey_s_c k;
|
|
unsigned nr_devices = READ_ONCE(tier->devs.nr);
|
|
int ret;
|
|
|
|
if (!nr_devices)
|
|
return 0;
|
|
|
|
trace_tiering_start(c);
|
|
|
|
memset(&s, 0, sizeof(s));
|
|
s.tier = tier;
|
|
s.stripe_size = 2048; /* 1 mb for now */
|
|
|
|
bch2_move_ctxt_init(&ctxt, &tier->pd.rate,
|
|
nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
|
|
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
|
BTREE_ITER_PREFETCH);
|
|
|
|
while (!kthread_should_stop() &&
|
|
!bch2_move_ctxt_wait(&ctxt) &&
|
|
(k = bch2_btree_iter_peek(&iter)).k &&
|
|
!btree_iter_err(k)) {
|
|
if (!tiering_pred(c, &s, k))
|
|
goto next;
|
|
|
|
tier_next_device(c, &s);
|
|
if (!s.ca)
|
|
break;
|
|
|
|
ret = issue_tiering_move(c, &s, &ctxt, k);
|
|
if (ret) {
|
|
bch2_btree_iter_unlock(&iter);
|
|
|
|
/* memory allocation failure, wait for some IO to finish */
|
|
bch2_move_ctxt_wait_for_io(&ctxt);
|
|
continue;
|
|
}
|
|
next:
|
|
bch2_btree_iter_advance_pos(&iter);
|
|
//bch2_btree_iter_cond_resched(&iter);
|
|
|
|
/* unlock before calling moving_context_wait() */
|
|
bch2_btree_iter_unlock(&iter);
|
|
cond_resched();
|
|
}
|
|
|
|
bch2_btree_iter_unlock(&iter);
|
|
tier_put_device(&s);
|
|
bch2_move_ctxt_exit(&ctxt);
|
|
trace_tiering_end(c, ctxt.sectors_moved, ctxt.keys_moved);
|
|
|
|
return ctxt.sectors_moved;
|
|
}
|
|
|
|
static int bch2_tiering_thread(void *arg)
|
|
{
|
|
struct bch_tier *tier = arg;
|
|
struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]);
|
|
struct io_clock *clock = &c->io_clock[WRITE];
|
|
struct bch_dev *ca;
|
|
u64 tier_capacity, available_sectors;
|
|
unsigned long last;
|
|
unsigned i;
|
|
|
|
set_freezable();
|
|
|
|
while (!kthread_should_stop()) {
|
|
if (kthread_wait_freezable(c->tiering_enabled &&
|
|
tier->devs.nr))
|
|
break;
|
|
|
|
while (1) {
|
|
struct bch_tier *faster_tier;
|
|
|
|
last = atomic_long_read(&clock->now);
|
|
|
|
tier_capacity = available_sectors = 0;
|
|
for (faster_tier = c->tiers;
|
|
faster_tier != tier;
|
|
faster_tier++) {
|
|
spin_lock(&faster_tier->devs.lock);
|
|
group_for_each_dev(ca, &faster_tier->devs, i) {
|
|
tier_capacity +=
|
|
(ca->mi.nbuckets -
|
|
ca->mi.first_bucket) << ca->bucket_bits;
|
|
available_sectors +=
|
|
dev_buckets_available(ca) << ca->bucket_bits;
|
|
}
|
|
spin_unlock(&faster_tier->devs.lock);
|
|
}
|
|
|
|
if (available_sectors < (tier_capacity >> 1))
|
|
break;
|
|
|
|
bch2_kthread_io_clock_wait(clock,
|
|
last +
|
|
available_sectors -
|
|
(tier_capacity >> 1));
|
|
if (kthread_should_stop())
|
|
return 0;
|
|
}
|
|
|
|
read_tiering(c, tier);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void __bch2_tiering_stop(struct bch_tier *tier)
|
|
{
|
|
tier->pd.rate.rate = UINT_MAX;
|
|
bch2_ratelimit_reset(&tier->pd.rate);
|
|
|
|
if (tier->migrate)
|
|
kthread_stop(tier->migrate);
|
|
|
|
tier->migrate = NULL;
|
|
}
|
|
|
|
void bch2_tiering_stop(struct bch_fs *c)
|
|
{
|
|
struct bch_tier *tier;
|
|
|
|
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++)
|
|
__bch2_tiering_stop(tier);
|
|
}
|
|
|
|
static int __bch2_tiering_start(struct bch_tier *tier)
|
|
{
|
|
if (!tier->migrate) {
|
|
struct task_struct *p =
|
|
kthread_create(bch2_tiering_thread, tier,
|
|
"bch_tier[%u]", tier->idx);
|
|
if (IS_ERR(p))
|
|
return PTR_ERR(p);
|
|
|
|
tier->migrate = p;
|
|
}
|
|
|
|
wake_up_process(tier->migrate);
|
|
return 0;
|
|
}
|
|
|
|
int bch2_tiering_start(struct bch_fs *c)
|
|
{
|
|
struct bch_tier *tier;
|
|
bool have_faster_tier = false;
|
|
|
|
if (c->opts.nochanges)
|
|
return 0;
|
|
|
|
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
|
if (!tier->devs.nr)
|
|
continue;
|
|
|
|
if (have_faster_tier) {
|
|
int ret = __bch2_tiering_start(tier);
|
|
if (ret)
|
|
return ret;
|
|
} else {
|
|
__bch2_tiering_stop(tier);
|
|
}
|
|
|
|
have_faster_tier = true;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void bch2_fs_tiering_init(struct bch_fs *c)
|
|
{
|
|
unsigned i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
|
c->tiers[i].idx = i;
|
|
bch2_pd_controller_init(&c->tiers[i].pd);
|
|
}
|
|
}
|