mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 90d78c2461 bcachefs: Option parsing for io targets
This commit is contained in:
parent
4aefd5f903
commit
90ef8b9f57
@ -1 +1 @@
|
||||
e99d29e40210f6d9b7ec9e5b7aee1e48ae7655c5
|
||||
90d78c246188f4e90bd9ceb29fe95186b7dc680d
|
||||
|
6
include/linux/sched/cputime.h
Normal file
6
include/linux/sched/cputime.h
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
static inline void task_cputime_adjusted(struct task_struct *p, u64 *utime, u64 *stime)
|
||||
{
|
||||
*utime = 0;
|
||||
*stime = 0;
|
||||
}
|
@ -49,15 +49,13 @@ DECLARE_EVENT_CLASS(bch_dev,
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
__field(unsigned, tier )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, ca->uuid.b, 16);
|
||||
__entry->tier = ca->mi.tier;
|
||||
),
|
||||
|
||||
TP_printk("%pU tier %u", __entry->uuid, __entry->tier)
|
||||
TP_printk("%pU", __entry->uuid)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bch_fs,
|
||||
|
@ -89,69 +89,29 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
struct bch_fs,
|
||||
pd_controllers_update);
|
||||
struct bch_dev *ca;
|
||||
unsigned i, iter;
|
||||
unsigned i;
|
||||
|
||||
/* All units are in bytes */
|
||||
u64 faster_tiers_size = 0;
|
||||
u64 faster_tiers_dirty = 0;
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
|
||||
|
||||
u64 copygc_can_free = 0;
|
||||
u64 free = bucket_to_sector(ca,
|
||||
__dev_buckets_free(ca, stats)) << 9;
|
||||
/*
|
||||
* Bytes of internal fragmentation, which can be
|
||||
* reclaimed by copy GC
|
||||
*/
|
||||
s64 fragmented = (bucket_to_sector(ca,
|
||||
stats.buckets[BCH_DATA_USER] +
|
||||
stats.buckets[BCH_DATA_CACHED]) -
|
||||
(stats.sectors[BCH_DATA_USER] +
|
||||
stats.sectors[BCH_DATA_CACHED])) << 9;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||
bch2_pd_controller_update(&c->tiers[i].pd,
|
||||
div_u64(faster_tiers_size *
|
||||
c->tiering_percent, 100),
|
||||
faster_tiers_dirty,
|
||||
-1);
|
||||
fragmented = max(0LL, fragmented);
|
||||
|
||||
for_each_member_device_rcu(ca, c, iter, &c->tiers[i].devs) {
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
|
||||
|
||||
u64 size = bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket) << 9;
|
||||
u64 dirty = bucket_to_sector(ca,
|
||||
stats.buckets[BCH_DATA_USER]) << 9;
|
||||
u64 free = bucket_to_sector(ca,
|
||||
__dev_buckets_free(ca, stats)) << 9;
|
||||
/*
|
||||
* Bytes of internal fragmentation, which can be
|
||||
* reclaimed by copy GC
|
||||
*/
|
||||
s64 fragmented = (bucket_to_sector(ca,
|
||||
stats.buckets[BCH_DATA_USER] +
|
||||
stats.buckets[BCH_DATA_CACHED]) -
|
||||
(stats.sectors[BCH_DATA_USER] +
|
||||
stats.sectors[BCH_DATA_CACHED])) << 9;
|
||||
|
||||
fragmented = max(0LL, fragmented);
|
||||
|
||||
bch2_pd_controller_update(&ca->copygc_pd,
|
||||
free, fragmented, -1);
|
||||
|
||||
faster_tiers_size += size;
|
||||
faster_tiers_dirty += dirty;
|
||||
|
||||
copygc_can_free += fragmented;
|
||||
}
|
||||
bch2_pd_controller_update(&ca->copygc_pd,
|
||||
free, fragmented, -1);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Throttle foreground writes if tier 0 is running out of free buckets,
|
||||
* and either tiering or copygc can free up space.
|
||||
*
|
||||
* Target will be small if there isn't any work to do - we don't want to
|
||||
* throttle foreground writes if we currently have all the free space
|
||||
* we're ever going to have.
|
||||
*
|
||||
* Otherwise, if there's work to do, try to keep 20% of tier0 available
|
||||
* for foreground writes.
|
||||
*/
|
||||
if (c->fastest_tier)
|
||||
copygc_can_free = U64_MAX;
|
||||
|
||||
schedule_delayed_work(&c->pd_controllers_update,
|
||||
c->pd_controllers_update_seconds * HZ);
|
||||
}
|
||||
@ -1201,22 +1161,14 @@ out:
|
||||
return ob - c->open_buckets;
|
||||
}
|
||||
|
||||
static int __dev_alloc_cmp(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
static int __dev_alloc_cmp(struct write_point *wp,
|
||||
unsigned l, unsigned r)
|
||||
{
|
||||
struct bch_dev *ca_l = rcu_dereference(c->devs[l]);
|
||||
struct bch_dev *ca_r = rcu_dereference(c->devs[r]);
|
||||
|
||||
if (ca_l && ca_r && ca_l->mi.tier != ca_r->mi.tier)
|
||||
return ((ca_l->mi.tier > ca_r->mi.tier) -
|
||||
(ca_l->mi.tier < ca_r->mi.tier));
|
||||
|
||||
return ((wp->next_alloc[l] > wp->next_alloc[r]) -
|
||||
(wp->next_alloc[l] < wp->next_alloc[r]));
|
||||
}
|
||||
|
||||
#define dev_alloc_cmp(l, r) __dev_alloc_cmp(c, wp, l, r)
|
||||
#define dev_alloc_cmp(l, r) __dev_alloc_cmp(wp, l, r)
|
||||
|
||||
struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
@ -1355,7 +1307,7 @@ static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
|
||||
|
||||
static void writepoint_drop_ptrs(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_mask *devs,
|
||||
u16 target, bool in_target,
|
||||
unsigned nr_ptrs_dislike)
|
||||
{
|
||||
int i;
|
||||
@ -1367,7 +1319,8 @@ static void writepoint_drop_ptrs(struct bch_fs *c,
|
||||
struct open_bucket *ob = wp->ptrs[i];
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
if (nr_ptrs_dislike && !test_bit(ob->ptr.dev, devs->d)) {
|
||||
if (nr_ptrs_dislike &&
|
||||
dev_in_target(ca, target) == in_target) {
|
||||
BUG_ON(ca->open_buckets_partial_nr >=
|
||||
ARRAY_SIZE(ca->open_buckets_partial));
|
||||
|
||||
@ -1401,7 +1354,7 @@ static void verify_not_stale(struct bch_fs *c, const struct write_point *wp)
|
||||
}
|
||||
|
||||
static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
struct bch_devs_mask *_devs,
|
||||
u16 target,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
@ -1422,8 +1375,15 @@ static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
writepoint_for_each_ptr(wp, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
|
||||
if (_devs)
|
||||
bitmap_and(devs.d, devs.d, _devs->d, BCH_SB_MEMBERS_MAX);
|
||||
if (target) {
|
||||
const struct bch_devs_mask *t;
|
||||
|
||||
rcu_read_lock();
|
||||
t = bch2_target_to_mask(c, target);
|
||||
if (t)
|
||||
bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
return bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
|
||||
}
|
||||
@ -1503,7 +1463,7 @@ out:
|
||||
* Get us an open_bucket we can allocate from, return with it locked:
|
||||
*/
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
struct bch_devs_mask *devs,
|
||||
unsigned target,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
@ -1525,17 +1485,27 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
writepoint_for_each_ptr(wp, ob, i)
|
||||
if (bch2_dev_list_has_dev(*devs_have, ob->ptr.dev))
|
||||
nr_ptrs_have++;
|
||||
else if (devs && !test_bit(ob->ptr.dev, devs->d))
|
||||
else if (!dev_in_target(c->devs[ob->ptr.dev], target))
|
||||
nr_ptrs_dislike++;
|
||||
|
||||
ret = open_bucket_add_buckets(c, devs, wp, devs_have,
|
||||
ret = open_bucket_add_buckets(c, target, wp, devs_have,
|
||||
nr_replicas + nr_ptrs_have + nr_ptrs_dislike,
|
||||
reserve, cl);
|
||||
if (ret && ret != -EROFS)
|
||||
goto err;
|
||||
|
||||
if (wp->nr_ptrs <
|
||||
nr_ptrs_have + nr_ptrs_dislike + nr_replicas_required) {
|
||||
if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||
goto alloc_done;
|
||||
|
||||
ret = open_bucket_add_buckets(c, target, wp, devs_have,
|
||||
nr_replicas + nr_ptrs_have,
|
||||
reserve, cl);
|
||||
if (ret && ret != -EROFS)
|
||||
goto err;
|
||||
alloc_done:
|
||||
if (wp->nr_ptrs - nr_ptrs_have -
|
||||
((flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) ? nr_ptrs_dislike : 0)
|
||||
< nr_replicas_required) {
|
||||
ret = -EROFS;
|
||||
goto err;
|
||||
}
|
||||
@ -1545,7 +1515,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
0, nr_ptrs_dislike);
|
||||
|
||||
/* Remove pointers we don't want to use: */
|
||||
writepoint_drop_ptrs(c, wp, devs, nr_ptrs_dislike);
|
||||
writepoint_drop_ptrs(c, wp, target, false, nr_ptrs_dislike);
|
||||
|
||||
/*
|
||||
* Move pointers to devices we already have to end of open bucket
|
||||
@ -1637,7 +1607,6 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
|
||||
|
||||
void bch2_recalc_capacity(struct bch_fs *c)
|
||||
{
|
||||
struct bch_tier *fastest_tier = NULL, *slowest_tier = NULL, *tier;
|
||||
struct bch_dev *ca;
|
||||
u64 total_capacity, capacity = 0, reserved_sectors = 0;
|
||||
unsigned long ra_pages = 0;
|
||||
@ -1653,28 +1622,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
bch2_set_ra_pages(c, ra_pages);
|
||||
|
||||
/* Find fastest, slowest tiers with devices: */
|
||||
|
||||
for (tier = c->tiers;
|
||||
tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||
if (!dev_mask_nr(&tier->devs))
|
||||
continue;
|
||||
if (!fastest_tier)
|
||||
fastest_tier = tier;
|
||||
slowest_tier = tier;
|
||||
}
|
||||
|
||||
c->fastest_tier = fastest_tier != slowest_tier ? fastest_tier : NULL;
|
||||
c->fastest_devs = fastest_tier != slowest_tier ? &fastest_tier->devs : NULL;
|
||||
|
||||
if (!fastest_tier)
|
||||
goto set_capacity;
|
||||
|
||||
/*
|
||||
* Capacity of the filesystem is the capacity of all the devices in the
|
||||
* slowest (highest) tier - we don't include lower tier devices.
|
||||
*/
|
||||
for_each_member_device_rcu(ca, c, i, &slowest_tier->devs) {
|
||||
for_each_rw_member(ca, c, i) {
|
||||
size_t reserve = 0;
|
||||
|
||||
/*
|
||||
@ -1700,16 +1648,14 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
reserve += ARRAY_SIZE(c->write_points);
|
||||
|
||||
if (ca->mi.tier)
|
||||
reserve += 1; /* tiering write point */
|
||||
reserve += 1; /* btree write point */
|
||||
reserve += 1; /* btree write point */
|
||||
|
||||
reserved_sectors += bucket_to_sector(ca, reserve);
|
||||
|
||||
capacity += bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket);
|
||||
}
|
||||
set_capacity:
|
||||
|
||||
total_capacity = capacity;
|
||||
|
||||
capacity *= (100 - c->opts.gc_reserve_percent);
|
||||
@ -1745,7 +1691,8 @@ static void bch2_stop_write_point(struct bch_fs *c, struct bch_dev *ca,
|
||||
bitmap_complement(not_self.d, ca->self.d, BCH_SB_MEMBERS_MAX);
|
||||
|
||||
mutex_lock(&wp->lock);
|
||||
writepoint_drop_ptrs(c, wp, ¬_self, wp->nr_ptrs);
|
||||
writepoint_drop_ptrs(c, wp, dev_to_target(ca->dev_idx),
|
||||
true, wp->nr_ptrs);
|
||||
mutex_unlock(&wp->lock);
|
||||
}
|
||||
|
||||
@ -1776,7 +1723,6 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
/* First, remove device from allocation groups: */
|
||||
|
||||
clear_bit(ca->dev_idx, c->tiers[ca->mi.tier].devs.d);
|
||||
for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||
clear_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
|
||||
@ -1790,7 +1736,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
bch2_stop_write_point(c, ca, &c->write_points[i]);
|
||||
|
||||
bch2_stop_write_point(c, ca, &ca->copygc_write_point);
|
||||
bch2_stop_write_point(c, ca, &c->tiers[ca->mi.tier].wp);
|
||||
bch2_stop_write_point(c, ca, &c->rebalance_write_point);
|
||||
bch2_stop_write_point(c, ca, &c->btree_write_point);
|
||||
|
||||
mutex_lock(&c->btree_reserve_cache_lock);
|
||||
@ -1828,7 +1774,6 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||
if (ca->mi.data_allowed & (1 << i))
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
set_bit(ca->dev_idx, c->tiers[ca->mi.tier].devs.d);
|
||||
}
|
||||
|
||||
/* stop allocator thread: */
|
||||
@ -2059,7 +2004,6 @@ void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
struct write_point *wp;
|
||||
unsigned i;
|
||||
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
spin_lock_init(&c->freelist_lock);
|
||||
@ -2079,9 +2023,7 @@ void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
}
|
||||
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++)
|
||||
writepoint_init(&c->tiers[i].wp, BCH_DATA_USER);
|
||||
writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
|
||||
|
@ -66,7 +66,7 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
|
||||
}
|
||||
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
|
||||
struct bch_devs_mask *,
|
||||
unsigned,
|
||||
struct write_point_specifier,
|
||||
struct bch_devs_list *,
|
||||
unsigned, unsigned,
|
||||
|
@ -408,6 +408,8 @@ struct bch_dev {
|
||||
struct bch_pd_controller copygc_pd;
|
||||
struct write_point copygc_write_point;
|
||||
|
||||
atomic64_t rebalance_work;
|
||||
|
||||
struct journal_device journal;
|
||||
|
||||
struct work_struct io_error_work;
|
||||
@ -458,15 +460,6 @@ struct btree_debug {
|
||||
struct dentry *failed;
|
||||
};
|
||||
|
||||
struct bch_tier {
|
||||
unsigned idx;
|
||||
struct task_struct *migrate;
|
||||
struct bch_pd_controller pd;
|
||||
|
||||
struct bch_devs_mask devs;
|
||||
struct write_point wp;
|
||||
};
|
||||
|
||||
enum bch_fs_state {
|
||||
BCH_FS_STARTING = 0,
|
||||
BCH_FS_STOPPING,
|
||||
@ -522,6 +515,7 @@ struct bch_fs {
|
||||
u64 time_base_lo;
|
||||
u32 time_base_hi;
|
||||
u32 time_precision;
|
||||
u64 features;
|
||||
} sb;
|
||||
|
||||
struct bch_sb *disk_sb;
|
||||
@ -569,16 +563,13 @@ struct bch_fs {
|
||||
struct delayed_work pd_controllers_update;
|
||||
unsigned pd_controllers_update_seconds;
|
||||
|
||||
/* REBALANCE */
|
||||
struct task_struct *rebalance_thread;
|
||||
struct bch_pd_controller rebalance_pd;
|
||||
|
||||
atomic64_t rebalance_work_unknown_dev;
|
||||
|
||||
/*
|
||||
* These contain all r/w devices - i.e. devices we can currently
|
||||
* allocate from:
|
||||
*/
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
struct bch_tier tiers[BCH_TIER_MAX];
|
||||
/* NULL if we only have devices in one tier: */
|
||||
struct bch_devs_mask *fastest_devs;
|
||||
struct bch_tier *fastest_tier;
|
||||
|
||||
u64 capacity; /* sectors */
|
||||
|
||||
@ -615,6 +606,7 @@ struct bch_fs {
|
||||
struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
|
||||
|
||||
struct write_point btree_write_point;
|
||||
struct write_point rebalance_write_point;
|
||||
|
||||
struct write_point write_points[WRITE_POINT_COUNT];
|
||||
struct hlist_head write_points_hash[WRITE_POINT_COUNT];
|
||||
@ -717,8 +709,8 @@ struct bch_fs {
|
||||
|
||||
unsigned btree_gc_periodic:1;
|
||||
unsigned copy_gc_enabled:1;
|
||||
unsigned tiering_enabled:1;
|
||||
unsigned tiering_percent;
|
||||
unsigned rebalance_enabled:1;
|
||||
unsigned rebalance_percent;
|
||||
|
||||
#define BCH_DEBUG_PARAM(name, description) bool name;
|
||||
BCH_DEBUG_PARAMS_ALL()
|
||||
|
@ -608,12 +608,22 @@ BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION);
|
||||
BCH_INODE_FIELD(bi_dev, 32) \
|
||||
BCH_INODE_FIELD(bi_data_checksum, 8) \
|
||||
BCH_INODE_FIELD(bi_compression, 8) \
|
||||
BCH_INODE_FIELD(bi_project, 32)
|
||||
BCH_INODE_FIELD(bi_project, 32) \
|
||||
BCH_INODE_FIELD(bi_background_compression, 8) \
|
||||
BCH_INODE_FIELD(bi_data_replicas, 8) \
|
||||
BCH_INODE_FIELD(bi_promote_target, 16) \
|
||||
BCH_INODE_FIELD(bi_foreground_target, 16) \
|
||||
BCH_INODE_FIELD(bi_background_target, 16)
|
||||
|
||||
#define BCH_INODE_FIELDS_INHERIT() \
|
||||
BCH_INODE_FIELD(bi_data_checksum) \
|
||||
BCH_INODE_FIELD(bi_compression) \
|
||||
BCH_INODE_FIELD(bi_project)
|
||||
BCH_INODE_FIELD(bi_project) \
|
||||
BCH_INODE_FIELD(bi_background_compression) \
|
||||
BCH_INODE_FIELD(bi_data_replicas) \
|
||||
BCH_INODE_FIELD(bi_promote_target) \
|
||||
BCH_INODE_FIELD(bi_foreground_target) \
|
||||
BCH_INODE_FIELD(bi_background_target)
|
||||
|
||||
enum {
|
||||
/*
|
||||
@ -814,13 +824,14 @@ struct bch_member {
|
||||
};
|
||||
|
||||
LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags[0], 0, 4)
|
||||
LE64_BITMASK(BCH_MEMBER_TIER, struct bch_member, flags[0], 4, 8)
|
||||
/* 8-10 unused, was HAS_(META)DATA */
|
||||
/* 4-10 unused, was TIER, HAS_(META)DATA */
|
||||
LE64_BITMASK(BCH_MEMBER_REPLACEMENT, struct bch_member, flags[0], 10, 14)
|
||||
LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15)
|
||||
LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags[0], 15, 20)
|
||||
LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28)
|
||||
|
||||
#define BCH_TIER_MAX 4U
|
||||
|
||||
#if 0
|
||||
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
|
||||
LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
|
||||
@ -834,8 +845,6 @@ enum bch_member_state {
|
||||
BCH_MEMBER_STATE_NR = 4,
|
||||
};
|
||||
|
||||
#define BCH_TIER_MAX 4U
|
||||
|
||||
enum cache_replacement {
|
||||
CACHE_REPLACEMENT_LRU = 0,
|
||||
CACHE_REPLACEMENT_FIFO = 1,
|
||||
@ -1077,6 +1086,12 @@ LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
|
||||
|
||||
LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
|
||||
LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
|
||||
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
|
||||
struct bch_sb, flags[1], 28, 32);
|
||||
|
||||
LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
|
||||
LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
|
||||
LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
|
||||
|
||||
/* Features: */
|
||||
enum bch_sb_features {
|
||||
|
@ -348,7 +348,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
retry:
|
||||
wp = bch2_alloc_sectors_start(c, NULL,
|
||||
wp = bch2_alloc_sectors_start(c, c->opts.foreground_target,
|
||||
writepoint_ptr(&c->btree_write_point),
|
||||
&devs_have,
|
||||
res->nr_replicas,
|
||||
|
@ -40,27 +40,15 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
|
||||
if (!ca)
|
||||
return ERR_PTR(-EINVAL);
|
||||
} else {
|
||||
struct block_device *bdev;
|
||||
char *path;
|
||||
unsigned i;
|
||||
|
||||
path = strndup_user((const char __user *)
|
||||
(unsigned long) dev, PATH_MAX);
|
||||
if (IS_ERR(path))
|
||||
return ERR_CAST(path);
|
||||
|
||||
bdev = lookup_bdev(path);
|
||||
ca = bch2_dev_lookup(c, path);
|
||||
kfree(path);
|
||||
if (IS_ERR(bdev))
|
||||
return ERR_CAST(bdev);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
if (ca->disk_sb.bdev == bdev)
|
||||
goto found;
|
||||
|
||||
ca = ERR_PTR(-ENOENT);
|
||||
found:
|
||||
bdput(bdev);
|
||||
}
|
||||
|
||||
return ca;
|
||||
|
@ -360,6 +360,9 @@ static unsigned __bio_compress(struct bch_fs *c,
|
||||
unsigned pad;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(compression_type >= BCH_COMPRESSION_NR);
|
||||
BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
|
||||
|
||||
/* If it's only one block, don't bother trying to compress: */
|
||||
if (bio_sectors(src) <= c->opts.block_size)
|
||||
return 0;
|
||||
@ -465,6 +468,8 @@ unsigned bch2_bio_compress(struct bch_fs *c,
|
||||
return compression_type;
|
||||
}
|
||||
|
||||
static int __bch2_fs_compress_init(struct bch_fs *, u64);
|
||||
|
||||
#define BCH_FEATURE_NONE 0
|
||||
|
||||
static const unsigned bch2_compression_opt_to_feature[] = {
|
||||
@ -475,29 +480,42 @@ static const unsigned bch2_compression_opt_to_feature[] = {
|
||||
|
||||
#undef BCH_FEATURE_NONE
|
||||
|
||||
/* doesn't write superblock: */
|
||||
int bch2_check_set_has_compressed_data(struct bch_fs *c,
|
||||
unsigned compression_type)
|
||||
int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
|
||||
{
|
||||
unsigned f;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
if ((c->sb.features & f) == f)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
if ((c->sb.features & f) == f) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = __bch2_fs_compress_init(c, c->sb.features|f);
|
||||
if (ret) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
c->disk_sb->features[0] |= cpu_to_le64(f);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_check_set_has_compressed_data(struct bch_fs *c,
|
||||
unsigned compression_type)
|
||||
{
|
||||
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
|
||||
|
||||
if (!compression_type)
|
||||
goto out;
|
||||
|
||||
f = bch2_compression_opt_to_feature[compression_type];
|
||||
if (bch2_sb_test_feature(c->disk_sb, f))
|
||||
goto out;
|
||||
|
||||
bch2_sb_set_feature(c->disk_sb, f);
|
||||
ret = bch2_fs_compress_init(c);
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
return compression_type
|
||||
? __bch2_check_set_has_compressed_data(c,
|
||||
1ULL << bch2_compression_opt_to_feature[compression_type])
|
||||
: 0;
|
||||
}
|
||||
|
||||
void bch2_fs_compress_exit(struct bch_fs *c)
|
||||
@ -531,7 +549,7 @@ static int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int bch2_fs_compress_init(struct bch_fs *c)
|
||||
static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
{
|
||||
size_t max_extent = c->sb.encoded_extent_max << 9;
|
||||
size_t order = get_order(max_extent);
|
||||
@ -561,7 +579,7 @@ int bch2_fs_compress_init(struct bch_fs *c)
|
||||
for (i = compression_types;
|
||||
i < compression_types + ARRAY_SIZE(compression_types);
|
||||
i++)
|
||||
if (bch2_sb_test_feature(c->disk_sb, i->feature))
|
||||
if (features & (1 << i->feature))
|
||||
goto have_compressed;
|
||||
|
||||
goto out;
|
||||
@ -587,7 +605,7 @@ have_compressed:
|
||||
decompress_workspace_size =
|
||||
max(decompress_workspace_size, i->decompress_workspace);
|
||||
|
||||
if (!bch2_sb_test_feature(c->disk_sb, i->feature))
|
||||
if (!(features & (1 << i->feature)))
|
||||
continue;
|
||||
|
||||
if (i->decompress_workspace)
|
||||
@ -609,3 +627,17 @@ out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_compress_init(struct bch_fs *c)
|
||||
{
|
||||
u64 f = c->sb.features;
|
||||
|
||||
if (c->opts.compression)
|
||||
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
|
||||
|
||||
if (c->opts.background_compression)
|
||||
f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
|
||||
|
||||
return __bch2_fs_compress_init(c, f);
|
||||
|
||||
}
|
||||
|
@ -1766,7 +1766,6 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
|
||||
unsigned seq, stale;
|
||||
char buf[160];
|
||||
bool bad;
|
||||
unsigned ptrs_per_tier[BCH_TIER_MAX];
|
||||
unsigned replicas = 0;
|
||||
|
||||
/*
|
||||
@ -1778,12 +1777,9 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
|
||||
* going to get overwritten during replay)
|
||||
*/
|
||||
|
||||
memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
replicas++;
|
||||
ptrs_per_tier[ca->mi.tier]++;
|
||||
|
||||
/*
|
||||
* If journal replay hasn't finished, we might be seeing keys
|
||||
@ -1886,12 +1882,6 @@ static void bch2_extent_to_text(struct bch_fs *c, char *buf,
|
||||
#undef p
|
||||
}
|
||||
|
||||
static unsigned PTR_TIER(struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
return bch_dev_bkey_exists(c, ptr->dev)->mi.tier;
|
||||
}
|
||||
|
||||
static void bch2_extent_crc_init(union bch_extent_crc *crc,
|
||||
struct bch_extent_crc_unpacked new)
|
||||
{
|
||||
@ -2014,45 +2004,31 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
|
||||
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *c,
|
||||
struct bkey_s_extent e,
|
||||
unsigned nr_desired_replicas)
|
||||
unsigned nr_desired_replicas,
|
||||
unsigned target)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
unsigned tier = 0, nr_cached = 0;
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
|
||||
bool have_higher_tier;
|
||||
unsigned nr_cached = 0, nr_good = bch2_extent_nr_good_ptrs(c, e.c);
|
||||
|
||||
if (nr_good <= nr_desired_replicas)
|
||||
return;
|
||||
|
||||
nr_cached = nr_good - nr_desired_replicas;
|
||||
|
||||
do {
|
||||
have_higher_tier = false;
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
if (!ptr->cached &&
|
||||
PTR_TIER(c, ptr) == tier) {
|
||||
ptr->cached = true;
|
||||
nr_cached--;
|
||||
if (!nr_cached)
|
||||
return;
|
||||
}
|
||||
|
||||
if (PTR_TIER(c, ptr) > tier)
|
||||
have_higher_tier = true;
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached &&
|
||||
!dev_in_target(c->devs[ptr->dev], target)) {
|
||||
ptr->cached = true;
|
||||
nr_cached--;
|
||||
if (!nr_cached)
|
||||
return;
|
||||
}
|
||||
|
||||
tier++;
|
||||
} while (have_higher_tier);
|
||||
}
|
||||
|
||||
/*
|
||||
* This picks a non-stale pointer, preferabbly from a device other than
|
||||
* avoid. Avoid can be NULL, meaning pick any. If there are no non-stale
|
||||
* pointers to other devices, it will still pick a pointer from avoid.
|
||||
* Note that it prefers lowered-numbered pointers to higher-numbered pointers
|
||||
* as the pointers are sorted by tier, hence preferring pointers to tier 0
|
||||
* rather than pointers to tier 1.
|
||||
* This picks a non-stale pointer, preferably from a device other than @avoid.
|
||||
* Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
|
||||
* other devices, it will still pick a pointer from avoid.
|
||||
*/
|
||||
void bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bch_devs_mask *avoid,
|
||||
|
@ -39,7 +39,7 @@ bch2_insert_fixup_extent(struct btree_insert *,
|
||||
|
||||
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
|
||||
unsigned);
|
||||
unsigned, unsigned);
|
||||
|
||||
const struct bch_extent_ptr *
|
||||
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
|
||||
|
@ -504,10 +504,8 @@ static inline void bch2_fswrite_op_init(struct bchfs_write_op *op,
|
||||
op->unalloc = false;
|
||||
op->new_i_size = U64_MAX;
|
||||
|
||||
bch2_write_op_init(&op->op, c);
|
||||
op->op.csum_type = bch2_data_checksum_type(c, opts.data_checksum);
|
||||
op->op.compression_type = bch2_compression_opt_to_type[opts.compression];
|
||||
op->op.devs = c->fastest_devs;
|
||||
bch2_write_op_init(&op->op, c, opts);
|
||||
op->op.target = opts.foreground_target;
|
||||
op->op.index_update_fn = bchfs_write_index_update;
|
||||
op_journal_seq_set(&op->op, &inode->ei_journal_seq);
|
||||
}
|
||||
@ -615,8 +613,14 @@ static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *in
|
||||
struct page *page, bool check_enospc)
|
||||
{
|
||||
struct bch_page_state *s = page_state(page), new, old;
|
||||
|
||||
/* XXX: this should not be open coded */
|
||||
unsigned nr_replicas = inode->ei_inode.bi_data_replicas
|
||||
? inode->ei_inode.bi_data_replicas - 1
|
||||
: c->opts.data_replicas;
|
||||
|
||||
struct disk_reservation disk_res = bch2_disk_reservation_init(c,
|
||||
READ_ONCE(c->opts.data_replicas));
|
||||
nr_replicas);
|
||||
struct quota_res quota_res = { 0 };
|
||||
int ret = 0;
|
||||
|
||||
@ -1894,7 +1898,7 @@ static int bch2_direct_IO_write(struct kiocb *req,
|
||||
goto err;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9,
|
||||
c->opts.data_replicas, 0);
|
||||
dio->iop.op.opts.data_replicas, 0);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_check_range_allocated(c, POS(inode->v.i_ino,
|
||||
offset >> 9),
|
||||
@ -2351,7 +2355,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
|
||||
loff_t block_start, block_end;
|
||||
loff_t end = offset + len;
|
||||
unsigned sectors;
|
||||
unsigned replicas = READ_ONCE(c->opts.data_replicas);
|
||||
unsigned replicas = io_opts(c, inode).data_replicas;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
|
@ -1266,6 +1266,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct bch_fs *c = root->d_sb->s_fs_info;
|
||||
enum bch_opt_id i;
|
||||
char buf[512];
|
||||
|
||||
for (i = 0; i < bch2_opts_nr; i++) {
|
||||
const struct bch_option *opt = &bch2_opt_table[i];
|
||||
@ -1277,17 +1278,10 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
|
||||
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
|
||||
continue;
|
||||
|
||||
switch (opt->type) {
|
||||
case BCH_OPT_BOOL:
|
||||
seq_printf(seq, ",%s%s", v ? "" : "no", opt->attr.name);
|
||||
break;
|
||||
case BCH_OPT_UINT:
|
||||
seq_printf(seq, ",%s=%llu", opt->attr.name, v);
|
||||
break;
|
||||
case BCH_OPT_STR:
|
||||
seq_printf(seq, ",%s=%s", opt->attr.name, opt->choices[v]);
|
||||
break;
|
||||
}
|
||||
bch2_opt_to_text(c, buf, sizeof(buf), opt, v,
|
||||
OPT_SHOW_MOUNT_STYLE);
|
||||
seq_putc(seq, ',');
|
||||
seq_puts(seq, buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "move.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "tier.h"
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/random.h>
|
||||
@ -220,9 +221,9 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
|
||||
NULL, op_journal_seq(op),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
NULL, op_journal_seq(op),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
return ret;
|
||||
@ -238,7 +239,7 @@ static void bch2_write_index(struct closure *cl)
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_s_extent e;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *src, *dst = keys->keys, *n;
|
||||
struct bkey_i *src, *dst = keys->keys, *n, *k;
|
||||
int ret;
|
||||
|
||||
op->flags |= BCH_WRITE_LOOPED;
|
||||
@ -268,6 +269,14 @@ static void bch2_write_index(struct closure *cl)
|
||||
|
||||
keys->top = dst;
|
||||
|
||||
/*
|
||||
* probably not the ideal place to hook this in, but I don't
|
||||
* particularly want to plumb io_opts all the way through the btree
|
||||
* update stack right now
|
||||
*/
|
||||
for_each_keylist_key(keys, k)
|
||||
bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
|
||||
|
||||
if (!bch2_keylist_empty(keys)) {
|
||||
u64 sectors_start = keylist_sectors(keys);
|
||||
int ret = op->index_update_fn(op);
|
||||
@ -735,7 +744,7 @@ static void __bch2_write(struct closure *cl)
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
|
||||
wp = bch2_alloc_sectors_start(c,
|
||||
op->devs,
|
||||
op->target,
|
||||
op->write_point,
|
||||
&op->devs_have,
|
||||
op->nr_replicas,
|
||||
@ -935,29 +944,32 @@ static struct promote_op *promote_alloc(struct bch_read_bio *rbio,
|
||||
memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
|
||||
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
|
||||
|
||||
ret = bch2_migrate_write_init(c, &op->write, c->fastest_devs,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
rbio->opts,
|
||||
DATA_PROMOTE,
|
||||
(struct data_opts) { 0 },
|
||||
k);
|
||||
ret = bch2_migrate_write_init(c, &op->write,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
rbio->opts,
|
||||
DATA_PROMOTE,
|
||||
(struct data_opts) {
|
||||
.target = rbio->opts.promote_target
|
||||
},
|
||||
k);
|
||||
BUG_ON(ret);
|
||||
|
||||
return op;
|
||||
}
|
||||
|
||||
/* only promote if we're not reading from the fastest tier: */
|
||||
static bool should_promote(struct bch_fs *c,
|
||||
struct extent_pick_ptr *pick, unsigned flags)
|
||||
static bool should_promote(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
unsigned flags, u16 target)
|
||||
{
|
||||
if (!target)
|
||||
return false;
|
||||
|
||||
if (!(flags & BCH_READ_MAY_PROMOTE))
|
||||
return false;
|
||||
|
||||
if (percpu_ref_is_dying(&c->writes))
|
||||
return false;
|
||||
|
||||
return c->fastest_tier &&
|
||||
c->fastest_tier < c->tiers + pick->ca->mi.tier;
|
||||
return bch2_extent_has_target(c, e, target);
|
||||
}
|
||||
|
||||
/* Read */
|
||||
@ -1323,7 +1335,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
bounce = true;
|
||||
}
|
||||
|
||||
promote = should_promote(c, pick, flags);
|
||||
promote = should_promote(c, e, flags, orig->opts.promote_target);
|
||||
/* could also set read_full */
|
||||
if (promote)
|
||||
bounce = true;
|
||||
|
@ -61,24 +61,25 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *);
|
||||
|
||||
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c)
|
||||
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
struct bch_io_opts opts)
|
||||
{
|
||||
op->c = c;
|
||||
op->io_wq = index_update_wq(op);
|
||||
op->flags = 0;
|
||||
op->written = 0;
|
||||
op->error = 0;
|
||||
op->csum_type = bch2_data_checksum_type(c, c->opts.data_checksum);
|
||||
op->compression_type =
|
||||
bch2_compression_opt_to_type[c->opts.compression];
|
||||
op->csum_type = bch2_data_checksum_type(c, opts.data_checksum);
|
||||
op->compression_type = bch2_compression_opt_to_type[opts.compression];
|
||||
op->nr_replicas = 0;
|
||||
op->nr_replicas_required = c->opts.data_replicas_required;
|
||||
op->alloc_reserve = RESERVE_NONE;
|
||||
op->open_buckets_nr = 0;
|
||||
op->devs_have.nr = 0;
|
||||
op->target = 0;
|
||||
op->opts = opts;
|
||||
op->pos = POS_MAX;
|
||||
op->version = ZERO_VERSION;
|
||||
op->devs = NULL;
|
||||
op->write_point = (struct write_point_specifier) { 0 };
|
||||
op->res = (struct disk_reservation) { 0 };
|
||||
op->journal_seq = 0;
|
||||
|
@ -103,13 +103,14 @@ struct bch_write_op {
|
||||
u16 target;
|
||||
u16 nonce;
|
||||
|
||||
struct bch_io_opts opts;
|
||||
|
||||
struct bpos pos;
|
||||
struct bversion version;
|
||||
|
||||
/* For BCH_WRITE_DATA_ENCODED: */
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
|
||||
struct bch_devs_mask *devs;
|
||||
struct write_point_specifier write_point;
|
||||
|
||||
struct disk_reservation res;
|
||||
|
@ -14,11 +14,16 @@
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
|
||||
|
||||
struct moving_io {
|
||||
struct list_head list;
|
||||
struct closure cl;
|
||||
bool read_completed;
|
||||
unsigned sectors;
|
||||
|
||||
unsigned read_dev;
|
||||
unsigned read_sectors;
|
||||
unsigned write_sectors;
|
||||
|
||||
struct bch_read_bio rbio;
|
||||
|
||||
@ -34,7 +39,11 @@ struct moving_context {
|
||||
struct bch_move_stats *stats;
|
||||
|
||||
struct list_head reads;
|
||||
atomic_t sectors_in_flight;
|
||||
|
||||
/* in flight sectors: */
|
||||
atomic_t read_sectors[BCH_SB_MEMBERS_MAX];
|
||||
atomic_t write_sectors;
|
||||
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
@ -116,7 +125,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
(struct bch_extent_crc_unpacked) { 0 });
|
||||
bch2_extent_normalize(c, extent_i_to_s(insert).s);
|
||||
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert),
|
||||
c->opts.data_replicas);
|
||||
op->opts.background_target,
|
||||
op->opts.data_replicas);
|
||||
|
||||
/*
|
||||
* It's possible we race, and for whatever reason the extent now
|
||||
@ -206,7 +216,6 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
|
||||
}
|
||||
|
||||
int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
enum data_cmd data_cmd,
|
||||
@ -219,11 +228,11 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
m->data_opts = data_opts;
|
||||
m->nr_ptrs_reserved = bch2_extent_nr_dirty_ptrs(k);
|
||||
|
||||
bch2_write_op_init(&m->op, c);
|
||||
m->op.csum_type = bch2_data_checksum_type(c, io_opts.data_checksum);
|
||||
bch2_write_op_init(&m->op, c, io_opts);
|
||||
m->op.compression_type =
|
||||
bch2_compression_opt_to_type[io_opts.compression];
|
||||
m->op.devs = devs;
|
||||
bch2_compression_opt_to_type[io_opts.background_compression ?:
|
||||
io_opts.compression];
|
||||
m->op.target = data_opts.target,
|
||||
m->op.write_point = wp;
|
||||
|
||||
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
|
||||
@ -241,8 +250,8 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
|
||||
switch (data_cmd) {
|
||||
case DATA_ADD_REPLICAS:
|
||||
if (m->nr_ptrs_reserved < c->opts.data_replicas) {
|
||||
m->op.nr_replicas = c->opts.data_replicas - m->nr_ptrs_reserved;
|
||||
if (m->nr_ptrs_reserved < io_opts.data_replicas) {
|
||||
m->op.nr_replicas = io_opts.data_replicas - m->nr_ptrs_reserved;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &m->op.res,
|
||||
k.k->size,
|
||||
@ -250,7 +259,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
m->nr_ptrs_reserved = c->opts.data_replicas;
|
||||
m->nr_ptrs_reserved = io_opts.data_replicas;
|
||||
}
|
||||
break;
|
||||
case DATA_REWRITE:
|
||||
@ -279,19 +288,29 @@ static void move_free(struct closure *cl)
|
||||
if (bv->bv_page)
|
||||
__free_page(bv->bv_page);
|
||||
|
||||
atomic_sub(io->sectors, &ctxt->sectors_in_flight);
|
||||
wake_up(&ctxt->wait);
|
||||
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
static void move_write_done(struct closure *cl)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
|
||||
atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
closure_return_with_destructor(cl, move_free);
|
||||
}
|
||||
|
||||
static void move_write(struct closure *cl)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
|
||||
if (likely(!io->rbio.bio.bi_status)) {
|
||||
bch2_migrate_read_done(&io->write, &io->rbio);
|
||||
|
||||
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
closure_call(&io->write.op.cl, bch2_write, NULL, cl);
|
||||
continue_at(cl, move_write_done, NULL);
|
||||
}
|
||||
|
||||
closure_return_with_destructor(cl, move_free);
|
||||
@ -310,94 +329,15 @@ static void move_read_endio(struct bio *bio)
|
||||
struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
|
||||
atomic_sub(io->read_sectors, &ctxt->read_sectors[io->read_dev]);
|
||||
io->read_completed = true;
|
||||
|
||||
if (next_pending_write(ctxt))
|
||||
wake_up(&ctxt->wait);
|
||||
|
||||
closure_put(&ctxt->cl);
|
||||
}
|
||||
|
||||
static int bch2_move_extent(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct bkey_s_c_extent e,
|
||||
enum data_cmd data_cmd,
|
||||
struct data_opts data_opts)
|
||||
{
|
||||
struct extent_pick_ptr pick;
|
||||
struct moving_io *io;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
unsigned sectors = e.k->size, pages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
bch2_extent_pick_ptr(c, e.s_c, NULL, &pick);
|
||||
if (IS_ERR_OR_NULL(pick.ca))
|
||||
return pick.ca ? PTR_ERR(pick.ca) : 0;
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
sectors = max_t(unsigned, sectors, crc.uncompressed_size);
|
||||
|
||||
pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
||||
io = kzalloc(sizeof(struct moving_io) +
|
||||
sizeof(struct bio_vec) * pages, GFP_KERNEL);
|
||||
if (!io)
|
||||
goto err;
|
||||
|
||||
io->write.ctxt = ctxt;
|
||||
io->sectors = e.k->size;
|
||||
|
||||
bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->write.op.wbio.bio,
|
||||
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->write.op.wbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
|
||||
bch2_bio_map(&io->write.op.wbio.bio, NULL);
|
||||
if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL))
|
||||
goto err_free;
|
||||
|
||||
io->rbio.opts = io_opts;
|
||||
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->rbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
|
||||
bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_migrate_write_init(c, &io->write, devs, wp,
|
||||
io_opts, data_cmd, data_opts, e.s_c);
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(e.k->size, &ctxt->stats->sectors_moved);
|
||||
|
||||
trace_move_extent(e.k);
|
||||
|
||||
atomic_add(io->sectors, &ctxt->sectors_in_flight);
|
||||
list_add_tail(&io->list, &ctxt->reads);
|
||||
|
||||
/*
|
||||
* dropped by move_read_endio() - guards against use after free of
|
||||
* ctxt when doing wakeup
|
||||
*/
|
||||
closure_get(&ctxt->cl);
|
||||
bch2_read_extent(c, &io->rbio, e, &pick, BCH_READ_NODECODE);
|
||||
return 0;
|
||||
err_free_pages:
|
||||
bio_free_pages(&io->write.op.wbio.bio);
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
percpu_ref_put(&pick.ca->io_ref);
|
||||
trace_move_alloc_fail(e.k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void do_pending_writes(struct moving_context *ctxt)
|
||||
{
|
||||
struct moving_io *io;
|
||||
@ -420,17 +360,105 @@ do { \
|
||||
|
||||
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
||||
{
|
||||
unsigned sectors_pending = atomic_read(&ctxt->sectors_in_flight);
|
||||
unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
|
||||
|
||||
move_ctxt_wait_event(ctxt,
|
||||
!atomic_read(&ctxt->sectors_in_flight) ||
|
||||
atomic_read(&ctxt->sectors_in_flight) != sectors_pending);
|
||||
!atomic_read(&ctxt->write_sectors) ||
|
||||
atomic_read(&ctxt->write_sectors) != sectors_pending);
|
||||
}
|
||||
|
||||
static int bch2_move_extent(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct bkey_s_c_extent e,
|
||||
enum data_cmd data_cmd,
|
||||
struct data_opts data_opts)
|
||||
{
|
||||
struct extent_pick_ptr pick;
|
||||
struct moving_io *io;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
unsigned sectors = e.k->size, pages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
move_ctxt_wait_event(ctxt,
|
||||
atomic_read(&ctxt->write_sectors) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
bch2_extent_pick_ptr(c, e.s_c, NULL, &pick);
|
||||
if (IS_ERR_OR_NULL(pick.ca))
|
||||
return pick.ca ? PTR_ERR(pick.ca) : 0;
|
||||
|
||||
move_ctxt_wait_event(ctxt,
|
||||
atomic_read(&ctxt->read_sectors[pick.ca->dev_idx]) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
sectors = max_t(unsigned, sectors, crc.uncompressed_size);
|
||||
|
||||
pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
||||
io = kzalloc(sizeof(struct moving_io) +
|
||||
sizeof(struct bio_vec) * pages, GFP_KERNEL);
|
||||
if (!io)
|
||||
goto err;
|
||||
|
||||
io->write.ctxt = ctxt;
|
||||
io->read_dev = pick.ca->dev_idx;
|
||||
io->read_sectors = pick.crc.uncompressed_size;
|
||||
io->write_sectors = e.k->size;
|
||||
|
||||
bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->write.op.wbio.bio,
|
||||
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->write.op.wbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
|
||||
bch2_bio_map(&io->write.op.wbio.bio, NULL);
|
||||
if (bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL))
|
||||
goto err_free;
|
||||
|
||||
io->rbio.opts = io_opts;
|
||||
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
|
||||
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
io->rbio.bio.bi_iter.bi_size = sectors << 9;
|
||||
|
||||
bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
|
||||
data_cmd, data_opts, e.s_c);
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(e.k->size, &ctxt->stats->sectors_moved);
|
||||
|
||||
trace_move_extent(e.k);
|
||||
|
||||
atomic_add(io->read_sectors, &ctxt->read_sectors[io->read_dev]);
|
||||
list_add_tail(&io->list, &ctxt->reads);
|
||||
|
||||
/*
|
||||
* dropped by move_read_endio() - guards against use after free of
|
||||
* ctxt when doing wakeup
|
||||
*/
|
||||
closure_get(&ctxt->cl);
|
||||
bch2_read_extent(c, &io->rbio, e, &pick, BCH_READ_NODECODE);
|
||||
return 0;
|
||||
err_free_pages:
|
||||
bio_free_pages(&io->write.op.wbio.bio);
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
percpu_ref_put(&pick.ca->io_ref);
|
||||
trace_move_alloc_fail(e.k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_move_data(struct bch_fs *c,
|
||||
struct bch_ratelimit *rate,
|
||||
unsigned sectors_in_flight,
|
||||
struct bch_devs_mask *devs,
|
||||
struct write_point_specifier wp,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
@ -460,13 +488,6 @@ int bch2_move_data(struct bch_fs *c,
|
||||
bch2_ratelimit_reset(rate);
|
||||
|
||||
while (!kthread || !(ret = kthread_should_stop())) {
|
||||
if (atomic_read(&ctxt.sectors_in_flight) >= sectors_in_flight) {
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
move_ctxt_wait_event(&ctxt,
|
||||
atomic_read(&ctxt.sectors_in_flight) <
|
||||
sectors_in_flight);
|
||||
}
|
||||
|
||||
if (rate &&
|
||||
bch2_ratelimit_delay(rate) &&
|
||||
(bch2_btree_iter_unlock(&stats->iter),
|
||||
@ -519,7 +540,7 @@ peek:
|
||||
k = bkey_i_to_s_c(&tmp.k);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
ret2 = bch2_move_extent(c, &ctxt, devs, wp, io_opts,
|
||||
ret2 = bch2_move_extent(c, &ctxt, wp, io_opts,
|
||||
bkey_s_c_to_extent(k),
|
||||
data_cmd, data_opts);
|
||||
if (ret2) {
|
||||
@ -545,11 +566,10 @@ next_nondata:
|
||||
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
move_ctxt_wait_event(&ctxt, !atomic_read(&ctxt.sectors_in_flight));
|
||||
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
|
||||
closure_sync(&ctxt.cl);
|
||||
|
||||
EBUG_ON(!list_empty(&ctxt.reads));
|
||||
EBUG_ON(atomic_read(&ctxt.sectors_in_flight));
|
||||
EBUG_ON(atomic_read(&ctxt.write_sectors));
|
||||
|
||||
trace_move_data(c,
|
||||
atomic64_read(&stats->sectors_moved),
|
||||
@ -671,11 +691,12 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
|
||||
unsigned nr_good = bch2_extent_nr_good_ptrs(c, e);
|
||||
unsigned replicas = type == BKEY_TYPE_BTREE
|
||||
? c->opts.metadata_replicas
|
||||
: c->opts.data_replicas;
|
||||
: io_opts->data_replicas;
|
||||
|
||||
if (!nr_good || nr_good >= replicas)
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->target = 0;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
@ -691,6 +712,7 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
|
||||
if (!bch2_extent_has_device(e, op->migrate.dev))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->target = 0;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
data_opts->rewrite_dev = op->migrate.dev;
|
||||
return DATA_REWRITE;
|
||||
@ -710,8 +732,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
|
||||
ret = bch2_gc_btree_replicas(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
NULL,
|
||||
ret = bch2_move_data(c, NULL,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
op.start,
|
||||
op.end,
|
||||
@ -728,8 +749,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
|
||||
ret = bch2_gc_btree_replicas(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c, NULL, SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
NULL,
|
||||
ret = bch2_move_data(c, NULL,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
op.start,
|
||||
op.end,
|
||||
|
@ -17,6 +17,7 @@ enum data_cmd {
|
||||
};
|
||||
|
||||
struct data_opts {
|
||||
u16 target;
|
||||
unsigned rewrite_dev;
|
||||
int btree_insert_flags;
|
||||
};
|
||||
@ -38,14 +39,11 @@ struct migrate_write {
|
||||
|
||||
void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
|
||||
int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
|
||||
struct bch_devs_mask *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts,
|
||||
enum data_cmd, struct data_opts,
|
||||
struct bkey_s_c);
|
||||
|
||||
#define SECTORS_IN_FLIGHT_PER_DEVICE 2048
|
||||
|
||||
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
|
||||
enum bkey_type, struct bkey_s_c_extent,
|
||||
struct bch_io_opts *, struct data_opts *);
|
||||
@ -61,7 +59,6 @@ struct bch_move_stats {
|
||||
};
|
||||
|
||||
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
|
||||
unsigned, struct bch_devs_mask *,
|
||||
struct write_point_specifier,
|
||||
struct bpos, struct bpos,
|
||||
move_pred_fn, void *,
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
@ -94,7 +95,8 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
if (!__copygc_pred(ca, e))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE,
|
||||
data_opts->target = dev_to_target(ca->dev_idx);
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE;
|
||||
data_opts->rewrite_dev = ca->dev_idx;
|
||||
return DATA_REWRITE;
|
||||
}
|
||||
@ -178,8 +180,6 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
bucket_offset_cmp, NULL);
|
||||
|
||||
ret = bch2_move_data(c, &ca->copygc_pd.rate,
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE,
|
||||
&ca->self,
|
||||
writepoint_ptr(&ca->copygc_write_point),
|
||||
POS_MIN, POS_MAX,
|
||||
copygc_pred, ca,
|
||||
@ -248,8 +248,10 @@ void bch2_copygc_stop(struct bch_dev *ca)
|
||||
ca->copygc_pd.rate.rate = UINT_MAX;
|
||||
bch2_ratelimit_reset(&ca->copygc_pd.rate);
|
||||
|
||||
if (ca->copygc_thread)
|
||||
if (ca->copygc_thread) {
|
||||
kthread_stop(ca->copygc_thread);
|
||||
put_task_struct(ca->copygc_thread);
|
||||
}
|
||||
ca->copygc_thread = NULL;
|
||||
}
|
||||
|
||||
@ -269,6 +271,8 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
|
||||
get_task_struct(t);
|
||||
|
||||
ca->copygc_thread = t;
|
||||
wake_up_process(ca->copygc_thread);
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "opts.h"
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
|
||||
const char * const bch2_error_actions[] = {
|
||||
@ -139,6 +141,9 @@ const struct bch_option bch2_opt_table[] = {
|
||||
#define OPT_BOOL() .type = BCH_OPT_BOOL
|
||||
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max
|
||||
#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices
|
||||
#define OPT_FN(_fn) .type = BCH_OPT_FN, \
|
||||
.parse = _fn##_parse, \
|
||||
.print = _fn##_print
|
||||
|
||||
#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \
|
||||
[Opt_##_name] = { \
|
||||
@ -189,7 +194,8 @@ static int bch2_mount_opt_lookup(const char *name)
|
||||
return bch2_opt_lookup(name);
|
||||
}
|
||||
|
||||
int bch2_opt_parse(const struct bch_option *opt, const char *val, u64 *res)
|
||||
int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
|
||||
const char *val, u64 *res)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
@ -217,11 +223,50 @@ int bch2_opt_parse(const struct bch_option *opt, const char *val, u64 *res)
|
||||
|
||||
*res = ret;
|
||||
break;
|
||||
case BCH_OPT_FN:
|
||||
if (!c)
|
||||
return -EINVAL;
|
||||
|
||||
return opt->parse(c, val, res);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_opt_to_text(struct bch_fs *c, char *buf, size_t len,
|
||||
const struct bch_option *opt, u64 v,
|
||||
unsigned flags)
|
||||
{
|
||||
char *out = buf, *end = buf + len;
|
||||
|
||||
if (flags & OPT_SHOW_MOUNT_STYLE) {
|
||||
if (opt->type == BCH_OPT_BOOL)
|
||||
return scnprintf(out, end - out, "%s%s",
|
||||
v ? "" : "no",
|
||||
opt->attr.name);
|
||||
|
||||
out += scnprintf(out, end - out, "%s=", opt->attr.name);
|
||||
}
|
||||
|
||||
switch (opt->type) {
|
||||
case BCH_OPT_BOOL:
|
||||
case BCH_OPT_UINT:
|
||||
out += scnprintf(out, end - out, "%lli", v);
|
||||
break;
|
||||
case BCH_OPT_STR:
|
||||
out += (flags & OPT_SHOW_FULL_LIST)
|
||||
? bch2_scnprint_string_list(out, end - out, opt->choices, v)
|
||||
: scnprintf(out, end - out, opt->choices[v]);
|
||||
break;
|
||||
case BCH_OPT_FN:
|
||||
return opt->print(c, out, end - out, v);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
|
||||
{
|
||||
char *opt, *name, *val;
|
||||
@ -237,7 +282,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
|
||||
if (id < 0)
|
||||
goto bad_opt;
|
||||
|
||||
ret = bch2_opt_parse(&bch2_opt_table[id], val, &v);
|
||||
ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v);
|
||||
if (ret < 0)
|
||||
goto bad_val;
|
||||
} else {
|
||||
|
@ -42,6 +42,7 @@ enum opt_type {
|
||||
BCH_OPT_BOOL,
|
||||
BCH_OPT_UINT,
|
||||
BCH_OPT_STR,
|
||||
BCH_OPT_FN,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -94,9 +95,21 @@ enum opt_type {
|
||||
BCH_OPT(compression, u8, OPT_RUNTIME, \
|
||||
OPT_STR(bch2_compression_types), \
|
||||
BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_NONE)\
|
||||
BCH_OPT(background_compression, u8, OPT_RUNTIME, \
|
||||
OPT_STR(bch2_compression_types), \
|
||||
BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_NONE)\
|
||||
BCH_OPT(str_hash, u8, OPT_RUNTIME, \
|
||||
OPT_STR(bch2_str_hash_types), \
|
||||
BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_SIPHASH) \
|
||||
BCH_OPT(foreground_target, u16, OPT_RUNTIME, \
|
||||
OPT_FN(bch2_opt_target), \
|
||||
BCH_SB_FOREGROUND_TARGET, 0) \
|
||||
BCH_OPT(background_target, u16, OPT_RUNTIME, \
|
||||
OPT_FN(bch2_opt_target), \
|
||||
BCH_SB_BACKGROUND_TARGET, 0) \
|
||||
BCH_OPT(promote_target, u16, OPT_RUNTIME, \
|
||||
OPT_FN(bch2_opt_target), \
|
||||
BCH_SB_PROMOTE_TARGET, 0) \
|
||||
BCH_OPT(inodes_32bit, u8, OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, false) \
|
||||
@ -205,6 +218,8 @@ enum bch_opt_id {
|
||||
bch2_opts_nr
|
||||
};
|
||||
|
||||
struct bch_fs;
|
||||
|
||||
struct bch_option {
|
||||
struct attribute attr;
|
||||
void (*set_sb)(struct bch_sb *, u64);
|
||||
@ -218,6 +233,10 @@ struct bch_option {
|
||||
struct {
|
||||
const char * const *choices;
|
||||
};
|
||||
struct {
|
||||
int (*parse)(struct bch_fs *, const char *, u64 *);
|
||||
int (*print)(struct bch_fs *, char *, size_t, u64);
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
@ -231,14 +250,26 @@ void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
|
||||
struct bch_opts bch2_opts_from_sb(struct bch_sb *);
|
||||
|
||||
int bch2_opt_lookup(const char *);
|
||||
int bch2_opt_parse(const struct bch_option *, const char *, u64 *);
|
||||
int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *);
|
||||
|
||||
#define OPT_SHOW_FULL_LIST (1 << 0)
|
||||
#define OPT_SHOW_MOUNT_STYLE (1 << 1)
|
||||
|
||||
int bch2_opt_to_text(struct bch_fs *, char *, size_t,
|
||||
const struct bch_option *, u64, unsigned);
|
||||
|
||||
int bch2_parse_mount_opts(struct bch_opts *, char *);
|
||||
|
||||
/* inode opts: */
|
||||
|
||||
#define BCH_INODE_OPTS() \
|
||||
BCH_INODE_OPT(data_checksum, 8) \
|
||||
BCH_INODE_OPT(compression, 8)
|
||||
BCH_INODE_OPT(compression, 8) \
|
||||
BCH_INODE_OPT(background_compression, 8) \
|
||||
BCH_INODE_OPT(data_replicas, 8) \
|
||||
BCH_INODE_OPT(promote_target, 16) \
|
||||
BCH_INODE_OPT(foreground_target, 16) \
|
||||
BCH_INODE_OPT(background_target, 16)
|
||||
|
||||
struct bch_io_opts {
|
||||
#define BCH_INODE_OPT(_name, _bits) unsigned _name##_defined:1;
|
||||
|
@ -400,6 +400,7 @@ static void bch2_sb_update(struct bch_fs *c)
|
||||
c->sb.time_base_lo = le64_to_cpu(src->time_base_lo);
|
||||
c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
|
||||
c->sb.time_precision = le32_to_cpu(src->time_precision);
|
||||
c->sb.features = le64_to_cpu(src->features[0]);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
ca->mi = bch2_mi_to_cpu(mi->members + i);
|
||||
@ -1600,24 +1601,22 @@ static const char *bch2_sb_validate_quota(struct bch_sb *sb,
|
||||
|
||||
/* Disk groups: */
|
||||
|
||||
#if 0
|
||||
static size_t trim_nulls(const char *str, size_t len)
|
||||
static int strcmp_void(const void *l, const void *r)
|
||||
{
|
||||
while (len && !str[len - 1])
|
||||
--len;
|
||||
return len;
|
||||
return strcmp(l, r);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const char *bch2_sb_validate_disk_groups(struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
field_to_type(f, disk_groups);
|
||||
struct bch_disk_group *g;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_member *m;
|
||||
struct bch_disk_group *g;
|
||||
unsigned nr_groups;
|
||||
unsigned i, nr_groups, nr_live = 0, len;
|
||||
char **labels, *l;
|
||||
const char *err = NULL;
|
||||
|
||||
mi = bch2_sb_get_members(sb);
|
||||
groups = bch2_sb_get_disk_groups(sb);
|
||||
@ -1626,32 +1625,57 @@ static const char *bch2_sb_validate_disk_groups(struct bch_sb *sb,
|
||||
for (m = mi->members;
|
||||
m < mi->members + sb->nr_devices;
|
||||
m++) {
|
||||
unsigned g;
|
||||
|
||||
if (!BCH_MEMBER_GROUP(m))
|
||||
continue;
|
||||
|
||||
if (BCH_MEMBER_GROUP(m) >= nr_groups)
|
||||
return "disk has invalid group";
|
||||
g = BCH_MEMBER_GROUP(m) - 1;
|
||||
|
||||
g = &groups->entries[BCH_MEMBER_GROUP(m)];
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
if (g >= nr_groups ||
|
||||
BCH_GROUP_DELETED(&groups->entries[g]))
|
||||
return "disk has invalid group";
|
||||
}
|
||||
#if 0
|
||||
if (!groups)
|
||||
|
||||
if (!nr_groups)
|
||||
return NULL;
|
||||
|
||||
char **labels;
|
||||
labels = kcalloc(nr_groups, sizeof(char *), GFP_KERNEL);
|
||||
if (!labels)
|
||||
return "cannot allocate memory";
|
||||
|
||||
for (g = groups->groups;
|
||||
g < groups->groups + nr_groups;
|
||||
for (g = groups->entries;
|
||||
g < groups->entries + nr_groups;
|
||||
g++) {
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
continue;
|
||||
|
||||
len = strnlen(g->label, sizeof(g->label));
|
||||
|
||||
labels[nr_live++] = l = kmalloc(len + 1, GFP_KERNEL);
|
||||
if (!l) {
|
||||
err = "cannot allocate memory";
|
||||
goto err;
|
||||
}
|
||||
|
||||
memcpy(l, g->label, len);
|
||||
l[len] = '\0';
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
|
||||
sort(labels, nr_live, sizeof(labels[0]), strcmp_void, NULL);
|
||||
|
||||
for (i = 0; i + 1 < nr_live; i++)
|
||||
if (!strcmp(labels[i], labels[i + 1])) {
|
||||
err = "duplicate group labels";
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
for (i = 0; i < nr_live; i++)
|
||||
kfree(labels[i]);
|
||||
kfree(labels);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
|
||||
@ -1692,7 +1716,11 @@ static int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
|
||||
if (!bch2_member_exists(m))
|
||||
continue;
|
||||
|
||||
__set_bit(i, dst->devs.d);
|
||||
dst = BCH_MEMBER_GROUP(m)
|
||||
? &cpu_g->entries[BCH_MEMBER_GROUP(m) - 1]
|
||||
: NULL;
|
||||
if (dst)
|
||||
__set_bit(i, dst->devs.d);
|
||||
}
|
||||
|
||||
old_g = c->disk_groups;
|
||||
@ -1708,18 +1736,140 @@ const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned targe
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_DEV:
|
||||
BUG_ON(t.dev >= c->sb.nr_devices && !c->devs[t.dev]);
|
||||
return &c->devs[t.dev]->self;
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
return ca ? &ca->self : NULL;
|
||||
}
|
||||
case TARGET_GROUP: {
|
||||
struct bch_disk_groups_cpu *g =
|
||||
rcu_dereference(c->disk_groups);
|
||||
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
|
||||
|
||||
/* XXX: what to do here? */
|
||||
BUG_ON(t.group >= g->nr || g->entries[t.group].deleted);
|
||||
return &g->entries[t.group].devs;
|
||||
return t.group < g->nr && !g->entries[t.group].deleted
|
||||
? &g->entries[t.group].devs
|
||||
: NULL;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
|
||||
const char *name)
|
||||
{
|
||||
unsigned i, nr_groups = disk_groups_nr(groups);
|
||||
unsigned len = strlen(name);
|
||||
|
||||
for (i = 0; i < nr_groups; i++) {
|
||||
struct bch_disk_group *g = groups->entries + i;
|
||||
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
continue;
|
||||
|
||||
if (strnlen(g->label, sizeof(g->label)) == len &&
|
||||
!memcmp(name, g->label, len))
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int bch2_disk_group_find(struct bch_fs *c, const char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
ret = __bch2_disk_group_find(bch2_sb_get_disk_groups(c->disk_sb), name);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
int g;
|
||||
|
||||
if (!strlen(buf) || !strcmp(buf, "none")) {
|
||||
*v = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Is it a device? */
|
||||
ca = bch2_dev_lookup(c, buf);
|
||||
if (!IS_ERR(ca)) {
|
||||
*v = dev_to_target(ca->dev_idx);
|
||||
percpu_ref_put(&ca->ref);
|
||||
return 0;
|
||||
}
|
||||
|
||||
g = bch2_disk_group_find(c, buf);
|
||||
if (g >= 0) {
|
||||
*v = group_to_target(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int bch2_opt_target_print(struct bch_fs *c, char *buf, size_t len, u64 v)
|
||||
{
|
||||
struct target t = target_decode(v);
|
||||
int ret;
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return scnprintf(buf, len, "none");
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
|
||||
if (ca && percpu_ref_tryget(&ca->io_ref)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
ret = scnprintf(buf, len, "/dev/%s",
|
||||
bdevname(ca->disk_sb.bdev, b));
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
} else if (ca) {
|
||||
ret = scnprintf(buf, len, "offline device %u", t.dev);
|
||||
} else {
|
||||
ret = scnprintf(buf, len, "invalid device %u", t.dev);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
case TARGET_GROUP: {
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_group *g;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb);
|
||||
|
||||
g = t.group < disk_groups_nr(groups)
|
||||
? groups->entries + t.group
|
||||
: NULL;
|
||||
|
||||
if (g && !BCH_GROUP_DELETED(g)) {
|
||||
ret = len ? min(len - 1, strnlen(g->label, sizeof(g->label))) : 0;
|
||||
|
||||
memcpy(buf, g->label, ret);
|
||||
if (len)
|
||||
buf[ret] = '\0';
|
||||
} else {
|
||||
ret = scnprintf(buf, len, "invalid group %u", t.group);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->sb_lock);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -129,7 +129,6 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
.bucket_size = le16_to_cpu(mi->bucket_size),
|
||||
.group = BCH_MEMBER_GROUP(mi),
|
||||
.state = BCH_MEMBER_STATE(mi),
|
||||
.tier = BCH_MEMBER_TIER(mi),
|
||||
.replacement = BCH_MEMBER_REPLACEMENT(mi),
|
||||
.discard = BCH_MEMBER_DISCARD(mi),
|
||||
.data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
|
||||
@ -204,27 +203,34 @@ struct target {
|
||||
};
|
||||
};
|
||||
|
||||
#define TARGET_DEV_START 1
|
||||
#define TARGET_GROUP_START (256 + TARGET_DEV_START)
|
||||
|
||||
static inline u16 dev_to_target(unsigned dev)
|
||||
{
|
||||
return 1 + dev;
|
||||
return TARGET_DEV_START + dev;
|
||||
}
|
||||
|
||||
static inline u16 group_to_target(unsigned group)
|
||||
{
|
||||
return 1 + U8_MAX + group;
|
||||
return TARGET_GROUP_START + group;
|
||||
}
|
||||
|
||||
static inline struct target target_decode(unsigned target)
|
||||
{
|
||||
if (!target)
|
||||
return (struct target) { .type = TARGET_NULL };
|
||||
if (target >= TARGET_GROUP_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_GROUP,
|
||||
.group = target - TARGET_GROUP_START
|
||||
};
|
||||
|
||||
--target;
|
||||
if (target <= U8_MAX)
|
||||
return (struct target) { .type = TARGET_DEV, .dev = target };
|
||||
if (target >= TARGET_DEV_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_DEV,
|
||||
.group = target - TARGET_DEV_START
|
||||
};
|
||||
|
||||
target -= U8_MAX;
|
||||
return (struct target) { .type = TARGET_GROUP, .group = target };
|
||||
return (struct target) { .type = TARGET_NULL };
|
||||
}
|
||||
|
||||
static inline bool dev_in_target(struct bch_dev *ca, unsigned target)
|
||||
@ -232,6 +238,8 @@ static inline bool dev_in_target(struct bch_dev *ca, unsigned target)
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return false;
|
||||
case TARGET_DEV:
|
||||
return ca->dev_idx == t.dev;
|
||||
case TARGET_GROUP:
|
||||
@ -243,4 +251,9 @@ static inline bool dev_in_target(struct bch_dev *ca, unsigned target)
|
||||
|
||||
const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
|
||||
|
||||
int __bch2_disk_group_find(struct bch_sb_field_disk_groups *, const char *);
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
|
||||
int bch2_opt_target_print(struct bch_fs *, char *, size_t, u64);
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_IO_H */
|
||||
|
@ -149,6 +149,7 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
if (bdi_bits & (1 << WB_sync_congested)) {
|
||||
/* Reads - check all devices: */
|
||||
for_each_readable_member(ca, c, i) {
|
||||
@ -160,12 +161,11 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Writes prefer fastest tier: */
|
||||
struct bch_tier *tier = READ_ONCE(c->fastest_tier);
|
||||
struct bch_devs_mask *devs =
|
||||
tier ? &tier->devs : &c->rw_devs[BCH_DATA_USER];
|
||||
unsigned target = READ_ONCE(c->opts.foreground_target);
|
||||
const struct bch_devs_mask *devs = target
|
||||
? bch2_target_to_mask(c, target)
|
||||
: &c->rw_devs[BCH_DATA_USER];
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i, devs) {
|
||||
bdi = ca->disk_sb.bdev->bd_bdi;
|
||||
|
||||
@ -174,8 +174,8 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -185,9 +185,9 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
/*
|
||||
* For startup/shutdown of RW stuff, the dependencies are:
|
||||
*
|
||||
* - foreground writes depend on copygc and tiering (to free up space)
|
||||
* - foreground writes depend on copygc and rebalance (to free up space)
|
||||
*
|
||||
* - copygc and tiering depend on mark and sweep gc (they actually probably
|
||||
* - copygc and rebalance depend on mark and sweep gc (they actually probably
|
||||
* don't because they either reserve ahead of time or don't block if
|
||||
* allocations fail, but allocations can require mark and sweep gc to run
|
||||
* because of generation number wraparound)
|
||||
@ -225,7 +225,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
bch2_tiering_stop(c);
|
||||
bch2_rebalance_stop(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_copygc_stop(ca);
|
||||
@ -385,8 +385,8 @@ const char *bch2_fs_read_write(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "error starting tiering thread";
|
||||
if (bch2_tiering_start(c))
|
||||
err = "error starting rebalance thread";
|
||||
if (bch2_rebalance_start(c))
|
||||
goto err;
|
||||
|
||||
schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
|
||||
@ -531,7 +531,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
#undef BCH_TIME_STAT
|
||||
|
||||
bch2_fs_allocator_init(c);
|
||||
bch2_fs_tiering_init(c);
|
||||
bch2_fs_rebalance_init(c);
|
||||
bch2_fs_quota_init(c);
|
||||
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
@ -555,8 +555,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
c->writeback_pages_max = (256 << 10) / PAGE_SIZE;
|
||||
|
||||
c->copy_gc_enabled = 1;
|
||||
c->tiering_enabled = 1;
|
||||
c->tiering_percent = 10;
|
||||
c->rebalance_enabled = 1;
|
||||
c->rebalance_percent = 10;
|
||||
|
||||
c->journal.write_time = &c->journal_write_time;
|
||||
c->journal.delay_time = &c->journal_delay_time;
|
||||
@ -626,7 +626,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_btree_cache_init(c) ||
|
||||
bch2_fs_encryption_init(c) ||
|
||||
bch2_fs_compress_init(c) ||
|
||||
bch2_check_set_has_compressed_data(c, c->opts.compression) ||
|
||||
bch2_fs_fsio_init(c))
|
||||
goto err;
|
||||
|
||||
@ -1216,6 +1215,8 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
|
||||
rebalance_wakeup(c);
|
||||
|
||||
percpu_ref_reinit(&ca->io_ref);
|
||||
return 0;
|
||||
}
|
||||
@ -1340,9 +1341,6 @@ static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (bch2_copygc_start(c, ca))
|
||||
return "error starting copygc thread";
|
||||
|
||||
if (bch2_tiering_start(c))
|
||||
return "error starting tiering thread";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1350,6 +1348,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
int ret = 0;
|
||||
|
||||
if (ca->mi.state == new_state)
|
||||
return 0;
|
||||
@ -1368,10 +1367,13 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_RW)
|
||||
return __bch2_dev_read_write(c, ca) ? -ENOMEM : 0;
|
||||
if (new_state == BCH_MEMBER_STATE_RW &&
|
||||
__bch2_dev_read_write(c, ca))
|
||||
ret = -ENOMEM;
|
||||
|
||||
return 0;
|
||||
rebalance_wakeup(c);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -1700,6 +1702,95 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* return with ref on ca->ref: */
|
||||
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
|
||||
{
|
||||
|
||||
struct block_device *bdev = lookup_bdev(path);
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
if (IS_ERR(bdev))
|
||||
return ERR_CAST(bdev);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
if (ca->disk_sb.bdev == bdev)
|
||||
goto found;
|
||||
|
||||
ca = ERR_PTR(-ENOENT);
|
||||
found:
|
||||
bdput(bdev);
|
||||
return ca;
|
||||
}
|
||||
|
||||
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *label)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_group *g;
|
||||
struct bch_member *mi;
|
||||
unsigned i, v, nr_groups;
|
||||
int ret;
|
||||
|
||||
if (strlen(label) > BCH_SB_LABEL_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb);
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
|
||||
if (!strcmp(label, "none")) {
|
||||
v = 0;
|
||||
goto write_sb;
|
||||
}
|
||||
|
||||
ret = __bch2_disk_group_find(groups, label);
|
||||
if (ret >= 0) {
|
||||
v = ret + 1;
|
||||
goto write_sb;
|
||||
}
|
||||
|
||||
/* not found - create a new disk group: */
|
||||
|
||||
for (i = 0;
|
||||
i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
|
||||
i++)
|
||||
;
|
||||
|
||||
if (i == nr_groups) {
|
||||
unsigned u64s =
|
||||
(sizeof(struct bch_sb_field_disk_groups) +
|
||||
sizeof(struct bch_disk_group) * (nr_groups + 1)) /
|
||||
sizeof(u64);
|
||||
|
||||
groups = bch2_fs_sb_resize_disk_groups(c, u64s);
|
||||
if (!groups) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
}
|
||||
|
||||
BUG_ON(i >= nr_groups);
|
||||
|
||||
g = &groups->entries[i];
|
||||
v = i + 1;
|
||||
|
||||
memcpy(g->label, label, strlen(label));
|
||||
if (strlen(label) < sizeof(g->label))
|
||||
g->label[strlen(label)] = '\0';
|
||||
SET_BCH_GROUP_DELETED(g, 0);
|
||||
SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
|
||||
write_sb:
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_GROUP(mi, v);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Filesystem open: */
|
||||
|
||||
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
|
@ -194,6 +194,8 @@ int bch2_dev_add(struct bch_fs *, const char *);
|
||||
int bch2_dev_online(struct bch_fs *, const char *);
|
||||
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
|
||||
int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
|
||||
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
|
||||
int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
|
||||
|
||||
bool bch2_fs_emergency_read_only(struct bch_fs *);
|
||||
void bch2_fs_read_only(struct bch_fs *);
|
||||
|
@ -24,7 +24,6 @@ struct bch_member_cpu {
|
||||
u16 bucket_size; /* sectors */
|
||||
u16 group;
|
||||
u8 state;
|
||||
u8 tier;
|
||||
u8 replacement;
|
||||
u8 discard;
|
||||
u8 data_allowed;
|
||||
|
@ -168,15 +168,14 @@ rw_attribute(writeback_pages_max);
|
||||
|
||||
rw_attribute(discard);
|
||||
rw_attribute(cache_replacement_policy);
|
||||
rw_attribute(group);
|
||||
|
||||
rw_attribute(copy_gc_enabled);
|
||||
sysfs_pd_controller_attribute(copy_gc);
|
||||
|
||||
rw_attribute(tier);
|
||||
rw_attribute(tiering_enabled);
|
||||
rw_attribute(tiering_percent);
|
||||
sysfs_pd_controller_attribute(tiering);
|
||||
|
||||
rw_attribute(rebalance_enabled);
|
||||
rw_attribute(rebalance_percent);
|
||||
sysfs_pd_controller_attribute(rebalance);
|
||||
|
||||
rw_attribute(pd_controllers_update_seconds);
|
||||
|
||||
@ -332,10 +331,10 @@ SHOW(bch2_fs)
|
||||
sysfs_print(pd_controllers_update_seconds,
|
||||
c->pd_controllers_update_seconds);
|
||||
|
||||
sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
|
||||
sysfs_print(tiering_percent, c->tiering_percent);
|
||||
sysfs_printf(rebalance_enabled, "%i", c->rebalance_enabled);
|
||||
sysfs_print(rebalance_percent, c->rebalance_percent);
|
||||
|
||||
sysfs_pd_controller_show(tiering, &c->tiers[1].pd); /* XXX */
|
||||
sysfs_pd_controller_show(rebalance, &c->rebalance_pd); /* XXX */
|
||||
|
||||
sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
|
||||
sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
|
||||
@ -397,19 +396,19 @@ STORE(__bch2_fs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_tiering_enabled) {
|
||||
ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
|
||||
if (attr == &sysfs_rebalance_enabled) {
|
||||
ssize_t ret = strtoul_safe(buf, c->rebalance_enabled)
|
||||
?: (ssize_t) size;
|
||||
|
||||
bch2_tiering_start(c); /* issue wakeups */
|
||||
rebalance_wakeup(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
sysfs_strtoul(pd_controllers_update_seconds,
|
||||
c->pd_controllers_update_seconds);
|
||||
|
||||
sysfs_strtoul(tiering_percent, c->tiering_percent);
|
||||
sysfs_pd_controller_store(tiering, &c->tiers[1].pd); /* XXX */
|
||||
sysfs_strtoul(rebalance_percent, c->rebalance_percent);
|
||||
sysfs_pd_controller_store(rebalance, &c->rebalance_pd);
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
@ -468,7 +467,7 @@ struct attribute *bch2_fs_files[] = {
|
||||
|
||||
&sysfs_writeback_pages_max,
|
||||
|
||||
&sysfs_tiering_percent,
|
||||
&sysfs_rebalance_percent,
|
||||
|
||||
&sysfs_compression_stats,
|
||||
NULL
|
||||
@ -506,8 +505,8 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_prune_cache,
|
||||
|
||||
&sysfs_copy_gc_enabled,
|
||||
&sysfs_tiering_enabled,
|
||||
sysfs_pd_controller_files(tiering),
|
||||
&sysfs_rebalance_enabled,
|
||||
sysfs_pd_controller_files(rebalance),
|
||||
&sysfs_internal_uuid,
|
||||
|
||||
#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
|
||||
@ -527,9 +526,7 @@ SHOW(bch2_fs_opts_dir)
|
||||
int id = opt - bch2_opt_table;
|
||||
u64 v = bch2_opt_get_by_id(&c->opts, id);
|
||||
|
||||
out += opt->type == BCH_OPT_STR
|
||||
? bch2_scnprint_string_list(out, end - out, opt->choices, v)
|
||||
: scnprintf(out, end - out, "%lli", v);
|
||||
out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST);
|
||||
out += scnprintf(out, end - out, "\n");
|
||||
|
||||
return out - buf;
|
||||
@ -542,13 +539,12 @@ STORE(bch2_fs_opts_dir)
|
||||
int ret, id = opt - bch2_opt_table;
|
||||
u64 v;
|
||||
|
||||
ret = bch2_opt_parse(opt, buf, &v);
|
||||
ret = bch2_opt_parse(c, opt, buf, &v);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
if (id == Opt_compression) {
|
||||
if (id == Opt_compression ||
|
||||
id == Opt_background_compression) {
|
||||
int ret = bch2_check_set_has_compressed_data(c, v);
|
||||
if (ret) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -557,13 +553,19 @@ STORE(bch2_fs_opts_dir)
|
||||
}
|
||||
|
||||
if (opt->set_sb != SET_NO_SB_OPT) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
opt->set_sb(c->disk_sb, v);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
bch2_opt_set_by_id(&c->opts, id, v);
|
||||
|
||||
mutex_unlock(&c->sb_lock);
|
||||
if ((id == Opt_background_target ||
|
||||
id == Opt_background_compression) && v) {
|
||||
bch2_rebalance_add_work(c, S64_MAX);
|
||||
rebalance_wakeup(c);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
@ -809,6 +811,26 @@ SHOW(bch2_dev)
|
||||
sysfs_print(nbuckets, ca->mi.nbuckets);
|
||||
sysfs_print(discard, ca->mi.discard);
|
||||
|
||||
if (attr == &sysfs_group) {
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_group *g;
|
||||
unsigned len;
|
||||
|
||||
if (!ca->mi.group)
|
||||
return scnprintf(out, end - out, "none\n");
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb);
|
||||
|
||||
g = &groups->entries[ca->mi.group - 1];
|
||||
len = strnlen(g->label, sizeof(g->label));
|
||||
memcpy(buf, g->label, len);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
buf[len++] = '\n';
|
||||
return len;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_has_data) {
|
||||
out += bch2_scnprint_flag_list(out, end - out,
|
||||
bch2_data_types,
|
||||
@ -827,8 +849,6 @@ SHOW(bch2_dev)
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
sysfs_print(tier, ca->mi.tier);
|
||||
|
||||
if (attr == &sysfs_state_rw) {
|
||||
out += bch2_scnprint_string_list(out, end - out,
|
||||
bch2_dev_state,
|
||||
@ -892,29 +912,10 @@ STORE(bch2_dev)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_tier) {
|
||||
unsigned prev_tier;
|
||||
unsigned v = strtoul_restrict_or_return(buf,
|
||||
0, BCH_TIER_MAX - 1);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
prev_tier = ca->mi.tier;
|
||||
|
||||
if (v == ca->mi.tier) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return size;
|
||||
}
|
||||
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_TIER(mi, v);
|
||||
bch2_write_super(c);
|
||||
|
||||
clear_bit(ca->dev_idx, c->tiers[prev_tier].devs.d);
|
||||
set_bit(ca->dev_idx, c->tiers[ca->mi.tier].devs.d);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
bch2_recalc_capacity(c);
|
||||
bch2_tiering_start(c);
|
||||
if (attr == &sysfs_group) {
|
||||
int ret = bch2_dev_group_set(c, ca, buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_wake_allocator)
|
||||
@ -934,8 +935,8 @@ struct attribute *bch2_dev_files[] = {
|
||||
/* settings: */
|
||||
&sysfs_discard,
|
||||
&sysfs_cache_replacement_policy,
|
||||
&sysfs_tier,
|
||||
&sysfs_state_rw,
|
||||
&sysfs_group,
|
||||
|
||||
&sysfs_has_data,
|
||||
&sysfs_iostats,
|
||||
|
@ -12,173 +12,247 @@
|
||||
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static bool __tiering_pred(struct bch_fs *c, struct bch_tier *tier,
|
||||
struct bkey_s_c_extent e)
|
||||
static inline bool rebalance_ptr_pred(struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
struct bch_extent_crc_unpacked crc,
|
||||
struct bch_io_opts *io_opts)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (io_opts->background_target &&
|
||||
!dev_in_target(ca, io_opts->background_target) &&
|
||||
!ptr->cached)
|
||||
return true;
|
||||
|
||||
if (io_opts->background_compression &&
|
||||
crc.compression_type !=
|
||||
bch2_compression_opt_to_type[io_opts->background_compression])
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_key(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned replicas = 0;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
struct bkey_s_c_extent e;
|
||||
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
return;
|
||||
|
||||
if (!io_opts->background_target &&
|
||||
!io_opts->background_compression)
|
||||
return;
|
||||
|
||||
e = bkey_s_c_to_extent(k);
|
||||
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
if (rebalance_ptr_pred(c, ptr, crc, io_opts)) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (!atomic64_add_return(crc.compressed_size,
|
||||
&ca->rebalance_work))
|
||||
rebalance_wakeup(c);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
|
||||
{
|
||||
if (!atomic64_add_return(sectors, &c->rebalance_work_unknown_dev))
|
||||
rebalance_wakeup(c);
|
||||
}
|
||||
|
||||
static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
|
||||
/* Make sure we have room to add a new pointer: */
|
||||
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
|
||||
BKEY_EXTENT_VAL_U64s_MAX)
|
||||
return false;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (bch_dev_bkey_exists(c, ptr->dev)->mi.tier >= tier->idx)
|
||||
replicas++;
|
||||
|
||||
return replicas < c->opts.data_replicas;
|
||||
}
|
||||
|
||||
static enum data_cmd tiering_pred(struct bch_fs *c, void *arg,
|
||||
enum bkey_type type,
|
||||
struct bkey_s_c_extent e,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_tier *tier = arg;
|
||||
|
||||
if (!__tiering_pred(c, tier, e))
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->btree_insert_flags = 0;
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
if (rebalance_ptr_pred(c, ptr, crc, io_opts))
|
||||
goto found;
|
||||
|
||||
return DATA_SKIP;
|
||||
found:
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
return DATA_ADD_REPLICAS;
|
||||
}
|
||||
|
||||
static int bch2_tiering_thread(void *arg)
|
||||
struct rebalance_work {
|
||||
unsigned dev_most_full_percent;
|
||||
u64 dev_most_full_work;
|
||||
u64 dev_most_full_capacity;
|
||||
u64 total_work;
|
||||
};
|
||||
|
||||
static struct rebalance_work rebalance_work(struct bch_fs *c)
|
||||
{
|
||||
struct bch_tier *tier = arg;
|
||||
struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]);
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
struct bch_dev *ca;
|
||||
struct bch_move_stats move_stats;
|
||||
u64 tier_capacity, available_sectors;
|
||||
unsigned long last;
|
||||
unsigned i, nr_devices;
|
||||
struct rebalance_work ret = { 0 };
|
||||
unsigned i;
|
||||
|
||||
memset(&move_stats, 0, sizeof(move_stats));
|
||||
set_freezable();
|
||||
for_each_online_member(ca, c, i) {
|
||||
u64 capacity = bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket);
|
||||
u64 work = atomic64_read(&ca->rebalance_work) +
|
||||
atomic64_read(&c->rebalance_work_unknown_dev);
|
||||
unsigned percent_full = div_u64(work * 100, capacity);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (kthread_wait_freezable(c->tiering_enabled &&
|
||||
(nr_devices = dev_mask_nr(&tier->devs))))
|
||||
break;
|
||||
|
||||
while (1) {
|
||||
struct bch_tier *faster_tier;
|
||||
|
||||
last = atomic_long_read(&clock->now);
|
||||
|
||||
tier_capacity = available_sectors = 0;
|
||||
for (faster_tier = c->tiers;
|
||||
faster_tier != tier;
|
||||
faster_tier++) {
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&faster_tier->devs) {
|
||||
tier_capacity +=
|
||||
bucket_to_sector(ca,
|
||||
ca->mi.nbuckets -
|
||||
ca->mi.first_bucket);
|
||||
available_sectors +=
|
||||
bucket_to_sector(ca,
|
||||
dev_buckets_available(c, ca));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
if (available_sectors < (tier_capacity >> 1))
|
||||
break;
|
||||
|
||||
bch2_kthread_io_clock_wait(clock,
|
||||
last +
|
||||
available_sectors -
|
||||
(tier_capacity >> 1));
|
||||
if (kthread_should_stop())
|
||||
return 0;
|
||||
if (percent_full > ret.dev_most_full_percent) {
|
||||
ret.dev_most_full_percent = percent_full;
|
||||
ret.dev_most_full_work = work;
|
||||
ret.dev_most_full_capacity = capacity;
|
||||
}
|
||||
|
||||
bch2_move_data(c, &tier->pd.rate,
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE * nr_devices,
|
||||
&tier->devs,
|
||||
writepoint_ptr(&tier->wp),
|
||||
ret.total_work += atomic64_read(&ca->rebalance_work);
|
||||
}
|
||||
|
||||
ret.total_work += atomic64_read(&c->rebalance_work_unknown_dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rebalance_work_reset(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
atomic64_set(&ca->rebalance_work, 0);
|
||||
|
||||
atomic64_set(&c->rebalance_work_unknown_dev, 0);
|
||||
}
|
||||
|
||||
static unsigned long curr_cputime(void)
|
||||
{
|
||||
u64 utime, stime;
|
||||
|
||||
task_cputime_adjusted(current, &utime, &stime);
|
||||
return nsecs_to_jiffies(utime + stime);
|
||||
}
|
||||
|
||||
static int bch2_rebalance_thread(void *arg)
|
||||
{
|
||||
struct bch_fs *c = arg;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
struct rebalance_work w, p;
|
||||
unsigned long start, prev_start;
|
||||
unsigned long prev_run_time, prev_run_cputime;
|
||||
unsigned long cputime, prev_cputime;
|
||||
|
||||
set_freezable();
|
||||
|
||||
p = rebalance_work(c);
|
||||
prev_start = jiffies;
|
||||
prev_cputime = curr_cputime();
|
||||
|
||||
while (!kthread_wait_freezable(c->rebalance_enabled)) {
|
||||
struct bch_move_stats move_stats = { 0 };
|
||||
|
||||
w = rebalance_work(c);
|
||||
start = jiffies;
|
||||
cputime = curr_cputime();
|
||||
|
||||
prev_run_time = start - prev_start;
|
||||
prev_run_cputime = cputime - prev_cputime;
|
||||
|
||||
if (!w.total_work) {
|
||||
kthread_wait_freezable(rebalance_work(c).total_work);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (w.dev_most_full_percent < 20 &&
|
||||
prev_run_cputime * 5 > prev_run_time) {
|
||||
if (w.dev_most_full_capacity) {
|
||||
bch2_kthread_io_clock_wait(clock,
|
||||
atomic_long_read(&clock->now) +
|
||||
div_u64(w.dev_most_full_capacity, 5));
|
||||
} else {
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
|
||||
schedule_timeout(prev_run_cputime * 5 -
|
||||
prev_run_time);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* minimum 1 mb/sec: */
|
||||
c->rebalance_pd.rate.rate =
|
||||
max_t(u64, 1 << 11,
|
||||
c->rebalance_pd.rate.rate *
|
||||
max(p.dev_most_full_percent, 1U) /
|
||||
max(w.dev_most_full_percent, 1U));
|
||||
|
||||
rebalance_work_reset(c);
|
||||
|
||||
bch2_move_data(c, &c->rebalance_pd.rate,
|
||||
writepoint_ptr(&c->rebalance_write_point),
|
||||
POS_MIN, POS_MAX,
|
||||
tiering_pred, tier,
|
||||
rebalance_pred, NULL,
|
||||
&move_stats);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __bch2_tiering_stop(struct bch_tier *tier)
|
||||
void bch2_rebalance_stop(struct bch_fs *c)
|
||||
{
|
||||
tier->pd.rate.rate = UINT_MAX;
|
||||
bch2_ratelimit_reset(&tier->pd.rate);
|
||||
struct task_struct *p;
|
||||
|
||||
if (tier->migrate)
|
||||
kthread_stop(tier->migrate);
|
||||
c->rebalance_pd.rate.rate = UINT_MAX;
|
||||
bch2_ratelimit_reset(&c->rebalance_pd.rate);
|
||||
|
||||
tier->migrate = NULL;
|
||||
}
|
||||
p = c->rebalance_thread;
|
||||
c->rebalance_thread = NULL;
|
||||
|
||||
void bch2_tiering_stop(struct bch_fs *c)
|
||||
{
|
||||
struct bch_tier *tier;
|
||||
if (p) {
|
||||
/* for sychronizing with rebalance_wakeup() */
|
||||
synchronize_rcu();
|
||||
|
||||
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++)
|
||||
__bch2_tiering_stop(tier);
|
||||
}
|
||||
|
||||
static int __bch2_tiering_start(struct bch_tier *tier)
|
||||
{
|
||||
if (!tier->migrate) {
|
||||
struct task_struct *p =
|
||||
kthread_create(bch2_tiering_thread, tier,
|
||||
"bch_tier[%u]", tier->idx);
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
|
||||
tier->migrate = p;
|
||||
kthread_stop(p);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
wake_up_process(tier->migrate);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_tiering_start(struct bch_fs *c)
|
||||
int bch2_rebalance_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_tier *tier;
|
||||
bool have_faster_tier = false;
|
||||
struct task_struct *p;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
return 0;
|
||||
|
||||
for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) {
|
||||
if (!dev_mask_nr(&tier->devs))
|
||||
continue;
|
||||
p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
|
||||
if (have_faster_tier) {
|
||||
int ret = __bch2_tiering_start(tier);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
__bch2_tiering_stop(tier);
|
||||
}
|
||||
|
||||
have_faster_tier = true;
|
||||
}
|
||||
get_task_struct(p);
|
||||
|
||||
rcu_assign_pointer(c->rebalance_thread, p);
|
||||
wake_up_process(c->rebalance_thread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_tiering_init(struct bch_fs *c)
|
||||
void bch2_fs_rebalance_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
bch2_pd_controller_init(&c->rebalance_pd);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->tiers); i++) {
|
||||
c->tiers[i].idx = i;
|
||||
bch2_pd_controller_init(&c->tiers[i].pd);
|
||||
}
|
||||
atomic64_set(&c->rebalance_work_unknown_dev, S64_MAX);
|
||||
}
|
||||
|
@ -1,8 +1,23 @@
|
||||
#ifndef _BCACHEFS_TIER_H
|
||||
#define _BCACHEFS_TIER_H
|
||||
|
||||
void bch2_tiering_stop(struct bch_fs *);
|
||||
int bch2_tiering_start(struct bch_fs *);
|
||||
void bch2_fs_tiering_init(struct bch_fs *);
|
||||
static inline void rebalance_wakeup(struct bch_fs *c)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
p = rcu_dereference(c->rebalance_thread);
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_io_opts *);
|
||||
void bch2_rebalance_add_work(struct bch_fs *, u64);
|
||||
|
||||
void bch2_rebalance_stop(struct bch_fs *);
|
||||
int bch2_rebalance_start(struct bch_fs *);
|
||||
void bch2_fs_rebalance_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_TIER_H */
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "extents.h"
|
||||
#include "fs.h"
|
||||
#include "str_hash.h"
|
||||
#include "tier.h"
|
||||
#include "xattr.h"
|
||||
|
||||
#include <linux/dcache.h>
|
||||
@ -366,6 +367,7 @@ static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
|
||||
const char *name, void *buffer, size_t size)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_opts opts =
|
||||
bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
|
||||
const struct bch_option *opt;
|
||||
@ -383,12 +385,9 @@ static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
|
||||
|
||||
v = bch2_opt_get_by_id(&opts, id);
|
||||
|
||||
if (opt->type == BCH_OPT_STR)
|
||||
ret = snprintf(buffer, size, "%s", opt->choices[v]);
|
||||
else
|
||||
ret = snprintf(buffer, size, "%llu", v);
|
||||
ret = bch2_opt_to_text(c, buffer, size, opt, v, 0);
|
||||
|
||||
return ret <= size || !buffer ? ret : -ERANGE;
|
||||
return ret < size || !buffer ? ret : -ERANGE;
|
||||
}
|
||||
|
||||
struct inode_opt_set {
|
||||
@ -435,17 +434,15 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
memcpy(buf, value, size);
|
||||
buf[size] = '\0';
|
||||
|
||||
ret = bch2_opt_parse(opt, buf, &s.v);
|
||||
ret = bch2_opt_parse(c, opt, buf, &s.v);
|
||||
kfree(buf);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (s.id == Opt_compression) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (s.id == Opt_compression ||
|
||||
s.id == Opt_background_compression) {
|
||||
ret = bch2_check_set_has_compressed_data(c, s.v);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -459,6 +456,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (value &&
|
||||
(s.id == Opt_background_compression ||
|
||||
s.id == Opt_background_target))
|
||||
bch2_rebalance_add_work(c, inode->v.i_blocks);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user