bcachefs-tools/libbcache/sysfs.c
2017-01-20 09:07:08 -09:00

1398 lines
34 KiB
C

/*
* bcache sysfs interfaces
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
*/
#include "bcache.h"
#include "alloc.h"
#include "blockdev.h"
#include "sysfs.h"
#include "btree_cache.h"
#include "btree_iter.h"
#include "btree_gc.h"
#include "buckets.h"
#include "inode.h"
#include "journal.h"
#include "keylist.h"
#include "move.h"
#include "opts.h"
#include "request.h"
#include "writeback.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
static const char * const cache_replacement_policies[] = {
"lru",
"fifo",
"random",
NULL
};
/* Default is -1; we skip past it for struct cached_dev's cache mode */
static const char * const bch_cache_modes[] = {
"default",
"writethrough",
"writeback",
"writearound",
"none",
NULL
};
static const char * const bch_cache_state[] = {
"active",
"readonly",
"failed",
"spare",
NULL
};
write_attribute(attach);
write_attribute(detach);
write_attribute(unregister);
write_attribute(stop);
write_attribute(clear_stats);
write_attribute(trigger_btree_coalesce);
write_attribute(trigger_gc);
write_attribute(prune_cache);
write_attribute(blockdev_volume_create);
write_attribute(add_device);
read_attribute(uuid);
read_attribute(minor);
read_attribute(bucket_size);
read_attribute(bucket_size_bytes);
read_attribute(block_size);
read_attribute(block_size_bytes);
read_attribute(btree_node_size);
read_attribute(btree_node_size_bytes);
read_attribute(first_bucket);
read_attribute(nbuckets);
read_attribute(tree_depth);
read_attribute(root_usage_percent);
read_attribute(read_priority_stats);
read_attribute(write_priority_stats);
read_attribute(fragmentation_stats);
read_attribute(oldest_gen_stats);
read_attribute(reserve_stats);
read_attribute(btree_cache_size);
read_attribute(cache_available_percent);
read_attribute(compression_stats);
read_attribute(written);
read_attribute(btree_written);
read_attribute(metadata_written);
read_attribute(journal_debug);
write_attribute(journal_flush);
read_attribute(internal_uuid);
read_attribute(btree_gc_running);
read_attribute(btree_nodes);
read_attribute(btree_used_percent);
read_attribute(average_key_size);
read_attribute(available_buckets);
read_attribute(free_buckets);
read_attribute(dirty_data);
read_attribute(dirty_bytes);
read_attribute(dirty_buckets);
read_attribute(cached_data);
read_attribute(cached_bytes);
read_attribute(cached_buckets);
read_attribute(meta_buckets);
read_attribute(alloc_buckets);
read_attribute(has_data);
read_attribute(has_metadata);
read_attribute(bset_tree_stats);
read_attribute(alloc_debug);
read_attribute(state);
read_attribute(cache_read_races);
read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
rw_attribute(io_error_limit);
rw_attribute(io_error_halflife);
read_attribute(congested);
rw_attribute(congested_read_threshold_us);
rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(cache_mode);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
sysfs_pd_controller_attribute(writeback);
read_attribute(stripe_size);
read_attribute(partial_stripes_expensive);
rw_attribute(journal_write_delay_ms);
rw_attribute(journal_reclaim_delay_ms);
read_attribute(journal_entry_size_max);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
rw_attribute(readahead);
rw_attribute(verify);
rw_attribute(bypass_torture_test);
rw_attribute(cache_replacement_policy);
rw_attribute(foreground_write_ratelimit_enabled);
rw_attribute(copy_gc_enabled);
sysfs_pd_controller_attribute(copy_gc);
rw_attribute(tiering_enabled);
rw_attribute(tiering_percent);
sysfs_pd_controller_attribute(tiering);
sysfs_pd_controller_attribute(foreground_write);
rw_attribute(pd_controllers_update_seconds);
rw_attribute(foreground_target_percent);
rw_attribute(size);
read_attribute(meta_replicas_have);
read_attribute(data_replicas_have);
read_attribute(tier);
#define BCH_DEBUG_PARAM(name, description) \
rw_attribute(name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
static struct attribute sysfs_opt_##_name = { \
.name = #_name, \
.mode = S_IRUGO|(_perm ? S_IWUSR : 0) \
};
CACHE_SET_VISIBLE_OPTS()
#undef CACHE_SET_OPT
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_time_stats_attribute(name, frequency_units, duration_units);
BCH_TIME_STATS()
#undef BCH_TIME_STAT
static struct attribute sysfs_state_rw = {
.name = "state",
.mode = S_IRUGO|S_IWUSR
};
SHOW(bch_cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
#define var(stat) (dc->stat)
if (attr == &sysfs_cache_mode)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_cache_modes + 1,
BDEV_CACHE_MODE(dc->disk_sb.sb));
var_printf(verify, "%i");
var_printf(bypass_torture_test, "%i");
var_printf(writeback_metadata, "%i");
var_printf(writeback_running, "%i");
var_print(writeback_percent);
sysfs_pd_controller_show(writeback, &dc->writeback_pd);
sysfs_hprint(dirty_data,
bcache_dev_sectors_dirty(&dc->disk) << 9);
sysfs_print(dirty_bytes,
bcache_dev_sectors_dirty(&dc->disk) << 9);
sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff);
var_hprint(readahead);
sysfs_print(running, atomic_read(&dc->running));
sysfs_print(state, states[BDEV_STATE(dc->disk_sb.sb)]);
if (attr == &sysfs_label) {
memcpy(buf, dc->disk_sb.sb->label, SB_LABEL_SIZE);
buf[SB_LABEL_SIZE + 1] = '\0';
strcat(buf, "\n");
return strlen(buf);
}
#undef var
return 0;
}
STORE(__cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
unsigned v = size;
struct cache_set *c;
struct kobj_uevent_env *env;
#define d_strtoul(var) sysfs_strtoul(var, dc->var)
#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
#define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
d_strtoul(verify);
d_strtoul(bypass_torture_test);
d_strtoul(writeback_metadata);
d_strtoul(writeback_running);
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
sysfs_pd_controller_store(writeback, &dc->writeback_pd);
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
if (attr == &sysfs_clear_stats)
bch_cache_accounting_clear(&dc->accounting);
if (attr == &sysfs_running &&
strtoul_or_return(buf))
bch_cached_dev_run(dc);
if (attr == &sysfs_cache_mode) {
ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1);
if (v < 0)
return v;
if ((unsigned) v != BDEV_CACHE_MODE(dc->disk_sb.sb)) {
SET_BDEV_CACHE_MODE(dc->disk_sb.sb, v);
bch_write_bdev_super(dc, NULL);
}
}
if (attr == &sysfs_label) {
u64 journal_seq = 0;
int ret = 0;
if (size > SB_LABEL_SIZE)
return -EINVAL;
mutex_lock(&dc->disk.inode_lock);
memcpy(dc->disk_sb.sb->label, buf, size);
if (size < SB_LABEL_SIZE)
dc->disk_sb.sb->label[size] = '\0';
if (size && dc->disk_sb.sb->label[size - 1] == '\n')
dc->disk_sb.sb->label[size - 1] = '\0';
memcpy(dc->disk.inode.v.i_label,
dc->disk_sb.sb->label, SB_LABEL_SIZE);
bch_write_bdev_super(dc, NULL);
if (dc->disk.c)
ret = bch_inode_update(dc->disk.c, &dc->disk.inode.k_i,
&journal_seq);
mutex_unlock(&dc->disk.inode_lock);
if (ret)
return ret;
if (dc->disk.c)
ret = bch_journal_flush_seq(&dc->disk.c->journal,
journal_seq);
if (ret)
return ret;
env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
add_uevent_var(env, "DRIVER=bcache");
add_uevent_var(env, "CACHED_UUID=%pU", dc->disk_sb.sb->disk_uuid.b),
add_uevent_var(env, "CACHED_LABEL=%s", buf);
kobject_uevent_env(
&disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
kfree(env);
}
if (attr == &sysfs_attach) {
if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
return -EINVAL;
list_for_each_entry(c, &bch_cache_sets, list) {
v = bch_cached_dev_attach(dc, c);
if (!v)
return size;
}
pr_err("Can't attach %s: cache set not found", buf);
size = v;
}
if (attr == &sysfs_detach && dc->disk.c)
bch_cached_dev_detach(dc);
if (attr == &sysfs_stop)
bch_blockdev_stop(&dc->disk);
return size;
}
STORE(bch_cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
mutex_lock(&bch_register_lock);
size = __cached_dev_store(kobj, attr, buf, size);
if (attr == &sysfs_writeback_running)
bch_writeback_queue(dc);
if (attr == &sysfs_writeback_percent)
schedule_delayed_work(&dc->writeback_pd_update,
dc->writeback_pd_update_seconds * HZ);
mutex_unlock(&bch_register_lock);
return size;
}
static struct attribute *bch_cached_dev_files[] = {
&sysfs_attach,
&sysfs_detach,
&sysfs_stop,
&sysfs_cache_mode,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
&sysfs_writeback_percent,
sysfs_pd_controller_files(writeback),
&sysfs_dirty_data,
&sysfs_dirty_bytes,
&sysfs_stripe_size,
&sysfs_partial_stripes_expensive,
&sysfs_sequential_cutoff,
&sysfs_clear_stats,
&sysfs_running,
&sysfs_state,
&sysfs_label,
&sysfs_readahead,
#ifdef CONFIG_BCACHE_DEBUG
&sysfs_verify,
&sysfs_bypass_torture_test,
#endif
NULL
};
KTYPE(bch_cached_dev);
SHOW(bch_blockdev_volume)
{
struct bcache_device *d = container_of(kobj, struct bcache_device,
kobj);
sysfs_hprint(size, le64_to_cpu(d->inode.v.i_size));
if (attr == &sysfs_label) {
memcpy(buf, d->inode.v.i_label, SB_LABEL_SIZE);
buf[SB_LABEL_SIZE + 1] = '\0';
strcat(buf, "\n");
return strlen(buf);
}
return 0;
}
STORE(__bch_blockdev_volume)
{
struct bcache_device *d = container_of(kobj, struct bcache_device,
kobj);
if (attr == &sysfs_size) {
u64 journal_seq = 0;
u64 v = strtoi_h_or_return(buf);
int ret;
mutex_lock(&d->inode_lock);
if (v < le64_to_cpu(d->inode.v.i_size) ){
ret = bch_inode_truncate(d->c, d->inode.k.p.inode,
v >> 9, NULL, NULL);
if (ret) {
mutex_unlock(&d->inode_lock);
return ret;
}
}
d->inode.v.i_size = cpu_to_le64(v);
ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
if (ret)
return ret;
ret = bch_journal_flush_seq(&d->c->journal, journal_seq);
if (ret)
return ret;
set_capacity(d->disk, v >> 9);
}
if (attr == &sysfs_label) {
u64 journal_seq = 0;
int ret;
mutex_lock(&d->inode_lock);
memcpy(d->inode.v.i_label, buf, SB_LABEL_SIZE);
ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq);
}
if (attr == &sysfs_unregister) {
set_bit(BCACHE_DEV_DETACHING, &d->flags);
bch_blockdev_stop(d);
}
return size;
}
STORE_LOCKED(bch_blockdev_volume)
static struct attribute *bch_blockdev_volume_files[] = {
&sysfs_unregister,
&sysfs_label,
&sysfs_size,
NULL
};
KTYPE(bch_blockdev_volume);
static int bch_bset_print_stats(struct cache_set *c, char *buf)
{
struct bset_stats stats;
size_t nodes = 0;
struct btree *b;
struct bucket_table *tbl;
struct rhash_head *pos;
unsigned iter;
memset(&stats, 0, sizeof(stats));
rcu_read_lock();
for_each_cached_btree(b, c, tbl, iter, pos) {
bch_btree_keys_stats(b, &stats);
nodes++;
}
rcu_read_unlock();
return snprintf(buf, PAGE_SIZE,
"btree nodes: %zu\n"
"written sets: %zu\n"
"written key bytes: %zu\n"
"unwritten sets: %zu\n"
"unwritten key bytes: %zu\n"
"no table sets: %zu\n"
"no table key bytes: %zu\n"
"floats: %zu\n"
"failed unpacked: %zu\n"
"failed prev: %zu\n"
"failed overflow: %zu\n",
nodes,
stats.sets[BSET_RO_AUX_TREE].nr,
stats.sets[BSET_RO_AUX_TREE].bytes,
stats.sets[BSET_RW_AUX_TREE].nr,
stats.sets[BSET_RW_AUX_TREE].bytes,
stats.sets[BSET_NO_AUX_TREE].nr,
stats.sets[BSET_NO_AUX_TREE].bytes,
stats.floats,
stats.failed_unpacked,
stats.failed_prev,
stats.failed_overflow);
}
static unsigned bch_root_usage(struct cache_set *c)
{
unsigned bytes = 0;
struct bkey_packed *k;
struct btree *b;
struct btree_node_iter iter;
goto lock_root;
do {
six_unlock_read(&b->lock);
lock_root:
b = c->btree_roots[BTREE_ID_EXTENTS].b;
six_lock_read(&b->lock);
} while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
bytes += bkey_bytes(k);
six_unlock_read(&b->lock);
return (bytes * 100) / btree_bytes(c);
}
static size_t bch_cache_size(struct cache_set *c)
{
size_t ret = 0;
struct btree *b;
mutex_lock(&c->btree_cache_lock);
list_for_each_entry(b, &c->btree_cache, list)
ret += btree_bytes(c);
mutex_unlock(&c->btree_cache_lock);
return ret;
}
static unsigned bch_cache_available_percent(struct cache_set *c)
{
return div64_u64((u64) sectors_available(c) * 100,
c->capacity ?: 1);
}
#if 0
static unsigned bch_btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
static unsigned bch_average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
: 0;
}
#endif
static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf)
{
struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
return scnprintf(buf, PAGE_SIZE,
"capacity:\t\t%llu\n"
"compressed:\n"
"\tmeta:\t\t%llu\n"
"\tdirty:\t\t%llu\n"
"\tcached:\t\t%llu\n"
"uncompressed:\n"
"\tmeta:\t\t%llu\n"
"\tdirty:\t\t%llu\n"
"\tcached:\t\t%llu\n"
"persistent reserved sectors:\t%llu\n"
"online reserved sectors:\t%llu\n",
c->capacity,
stats.s[S_COMPRESSED][S_META],
stats.s[S_COMPRESSED][S_DIRTY],
stats.s[S_COMPRESSED][S_CACHED],
stats.s[S_UNCOMPRESSED][S_META],
stats.s[S_UNCOMPRESSED][S_DIRTY],
stats.s[S_UNCOMPRESSED][S_CACHED],
stats.persistent_reserved,
stats.online_reserved);
}
static ssize_t bch_compression_stats(struct cache_set *c, char *buf)
{
struct btree_iter iter;
struct bkey_s_c k;
u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
nr_compressed_extents = 0,
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
if (k.k->type == BCH_EXTENT) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
const union bch_extent_crc *crc;
extent_for_each_ptr_crc(e, ptr, crc) {
if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
nr_uncompressed_extents++;
uncompressed_sectors += e.k->size;
} else {
nr_compressed_extents++;
compressed_sectors_compressed +=
crc_compressed_size(e.k, crc);
compressed_sectors_uncompressed +=
crc_uncompressed_size(e.k, crc);
}
/* only looking at the first ptr */
break;
}
}
bch_btree_iter_unlock(&iter);
return snprintf(buf, PAGE_SIZE,
"uncompressed data:\n"
" nr extents: %llu\n"
" size (bytes): %llu\n"
"compressed data:\n"
" nr extents: %llu\n"
" compressed size (bytes): %llu\n"
" uncompressed size (bytes): %llu\n",
nr_uncompressed_extents,
uncompressed_sectors << 9,
nr_compressed_extents,
compressed_sectors_compressed << 9,
compressed_sectors_uncompressed << 9);
}
SHOW(bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
sysfs_print(minor, c->minor);
sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
sysfs_hprint(journal_entry_size_max, c->journal.entry_size_max);
sysfs_hprint(block_size, block_bytes(c));
sysfs_print(block_size_bytes, block_bytes(c));
sysfs_hprint(btree_node_size, c->sb.btree_node_size << 9);
sysfs_print(btree_node_size_bytes, c->sb.btree_node_size << 9);
sysfs_hprint(btree_cache_size, bch_cache_size(c));
sysfs_print(cache_available_percent, bch_cache_available_percent(c));
sysfs_print(btree_gc_running, c->gc_pos.phase != GC_PHASE_DONE);
#if 0
/* XXX: reimplement */
sysfs_print(btree_used_percent, bch_btree_used(c));
sysfs_print(btree_nodes, c->gc_stats.nodes);
sysfs_hprint(average_key_size, bch_average_key_size(c));
#endif
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
sysfs_print(writeback_keys_done,
atomic_long_read(&c->writeback_keys_done));
sysfs_print(writeback_keys_failed,
atomic_long_read(&c->writeback_keys_failed));
/* See count_io_errors for why 88 */
sysfs_print(io_error_halflife, c->error_decay * 88);
sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
sysfs_hprint(congested,
((uint64_t) bch_get_congested(c)) << 9);
sysfs_print(congested_read_threshold_us,
c->congested_read_threshold_us);
sysfs_print(congested_write_threshold_us,
c->congested_write_threshold_us);
sysfs_printf(foreground_write_ratelimit_enabled, "%i",
c->foreground_write_ratelimit_enabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
sysfs_print(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
sysfs_print(foreground_target_percent, c->foreground_target_percent);
sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
sysfs_print(tiering_percent, c->tiering_percent);
sysfs_pd_controller_show(tiering, &c->tiering_pd);
sysfs_printf(meta_replicas_have, "%llu",
CACHE_SET_META_REPLICAS_HAVE(&c->disk_sb));
sysfs_printf(data_replicas_have, "%llu",
CACHE_SET_DATA_REPLICAS_HAVE(&c->disk_sb));
/* Debugging: */
if (attr == &sysfs_journal_debug)
return bch_journal_print_debug(&c->journal, buf);
#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
if (!test_bit(CACHE_SET_RUNNING, &c->flags))
return -EPERM;
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
if (attr == &sysfs_alloc_debug)
return show_cache_set_alloc_debug(c, buf);
sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
sysfs_print(root_usage_percent, bch_root_usage(c));
if (attr == &sysfs_compression_stats)
return bch_compression_stats(c, buf);
sysfs_printf(internal_uuid, "%pU", c->disk_sb.set_uuid.b);
return 0;
}
STORE(__bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
if (attr == &sysfs_unregister) {
bch_cache_set_unregister(c);
return size;
}
if (attr == &sysfs_stop) {
bch_cache_set_stop(c);
return size;
}
if (attr == &sysfs_clear_stats) {
atomic_long_set(&c->writeback_keys_done, 0);
atomic_long_set(&c->writeback_keys_failed, 0);
bch_cache_accounting_clear(&c->accounting);
return size;
}
sysfs_strtoul(congested_read_threshold_us,
c->congested_read_threshold_us);
sysfs_strtoul(congested_write_threshold_us,
c->congested_write_threshold_us);
if (attr == &sysfs_io_error_limit) {
c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
return size;
}
/* See count_io_errors() for why 88 */
if (attr == &sysfs_io_error_halflife) {
c->error_decay = strtoul_or_return(buf) / 88;
return size;
}
sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
sysfs_strtoul(foreground_write_ratelimit_enabled,
c->foreground_write_ratelimit_enabled);
if (attr == &sysfs_copy_gc_enabled) {
struct cache *ca;
unsigned i;
ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
?: (ssize_t) size;
for_each_cache(ca, c, i)
if (ca->moving_gc_read)
wake_up_process(ca->moving_gc_read);
return ret;
}
if (attr == &sysfs_tiering_enabled) {
ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
?: (ssize_t) size;
if (c->tiering_read)
wake_up_process(c->tiering_read);
return ret;
}
sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
if (attr == &sysfs_journal_flush) {
bch_journal_meta_async(&c->journal, NULL);
return size;
}
sysfs_strtoul(pd_controllers_update_seconds,
c->pd_controllers_update_seconds);
sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
sysfs_strtoul(tiering_percent, c->tiering_percent);
sysfs_pd_controller_store(tiering, &c->tiering_pd);
/* Debugging: */
#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
if (!test_bit(CACHE_SET_RUNNING, &c->flags))
return -EPERM;
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINTR;
if (attr == &sysfs_blockdev_volume_create) {
u64 v = strtoi_h_or_return(buf);
int r = bch_blockdev_volume_create(c, v);
if (r)
return r;
}
if (attr == &sysfs_trigger_btree_coalesce)
bch_coalesce(c);
/* Debugging: */
if (attr == &sysfs_trigger_gc)
bch_gc(c);
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
}
return size;
}
STORE(bch_cache_set)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
mutex_lock(&bch_register_lock);
size = __bch_cache_set_store(kobj, attr, buf, size);
mutex_unlock(&bch_register_lock);
if (attr == &sysfs_add_device) {
char *path = kstrdup(buf, GFP_KERNEL);
int r = bch_cache_set_add_cache(c, strim(path));
kfree(path);
if (r)
return r;
}
return size;
}
static struct attribute *bch_cache_set_files[] = {
&sysfs_unregister,
&sysfs_stop,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,
&sysfs_journal_entry_size_max,
&sysfs_blockdev_volume_create,
&sysfs_add_device,
&sysfs_block_size,
&sysfs_block_size_bytes,
&sysfs_btree_node_size,
&sysfs_btree_node_size_bytes,
&sysfs_tree_depth,
&sysfs_root_usage_percent,
&sysfs_btree_cache_size,
&sysfs_cache_available_percent,
&sysfs_compression_stats,
&sysfs_average_key_size,
&sysfs_io_error_limit,
&sysfs_io_error_halflife,
&sysfs_congested,
&sysfs_congested_read_threshold_us,
&sysfs_congested_write_threshold_us,
&sysfs_clear_stats,
&sysfs_meta_replicas_have,
&sysfs_data_replicas_have,
&sysfs_foreground_target_percent,
&sysfs_tiering_percent,
&sysfs_journal_flush,
NULL
};
KTYPE(bch_cache_set);
/* internal dir - just a wrapper */
SHOW(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
return bch_cache_set_show(&c->kobj, attr, buf);
}
STORE(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
return bch_cache_set_store(&c->kobj, attr, buf, size);
}
static void bch_cache_set_internal_release(struct kobject *k)
{
}
static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_journal_debug,
&sysfs_alloc_debug,
&sysfs_btree_gc_running,
&sysfs_btree_nodes,
&sysfs_btree_used_percent,
&sysfs_bset_tree_stats,
&sysfs_cache_read_races,
&sysfs_writeback_keys_done,
&sysfs_writeback_keys_failed,
&sysfs_trigger_btree_coalesce,
&sysfs_trigger_gc,
&sysfs_prune_cache,
&sysfs_foreground_write_ratelimit_enabled,
&sysfs_copy_gc_enabled,
&sysfs_tiering_enabled,
sysfs_pd_controller_files(tiering),
sysfs_pd_controller_files(foreground_write),
&sysfs_internal_uuid,
#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
NULL
};
KTYPE(bch_cache_set_internal);
/* options */
SHOW(bch_cache_set_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
if (attr == &sysfs_opt_##_name) \
return _choices == bch_bool_opt || _choices == bch_uint_opt\
? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\
: bch_snprint_string_list(buf, PAGE_SIZE, \
_choices, c->opts._name);\
CACHE_SET_VISIBLE_OPTS()
#undef CACHE_SET_OPT
return 0;
}
STORE(bch_cache_set_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
if (attr == &sysfs_opt_##_name) { \
ssize_t v = (_choices == bch_bool_opt || \
_choices == bch_uint_opt) \
? strtoul_restrict_or_return(buf, _min, _max - 1)\
: bch_read_string_list(buf, _choices); \
\
if (v < 0) \
return v; \
\
c->opts._name = v; \
\
if (_sb_opt##_BITS && v != _sb_opt(&c->disk_sb)) { \
SET_##_sb_opt(&c->disk_sb, v); \
bcache_write_super(c); \
} \
\
return size; \
}
CACHE_SET_VISIBLE_OPTS()
#undef CACHE_SET_OPT
return size;
}
static void bch_cache_set_opts_dir_release(struct kobject *k)
{
}
static struct attribute *bch_cache_set_opts_dir_files[] = {
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
&sysfs_opt_##_name,
CACHE_SET_VISIBLE_OPTS()
#undef CACHE_SET_OPT
NULL
};
KTYPE(bch_cache_set_opts_dir);
/* time stats */
SHOW(bch_cache_set_time_stats)
{
struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_print_time_stats(&c->name##_time, name, \
frequency_units, duration_units);
BCH_TIME_STATS()
#undef BCH_TIME_STAT
return 0;
}
STORE(bch_cache_set_time_stats)
{
struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_clear_time_stats(&c->name##_time, name);
BCH_TIME_STATS()
#undef BCH_TIME_STAT
return size;
}
static void bch_cache_set_time_stats_release(struct kobject *k)
{
}
static struct attribute *bch_cache_set_time_stats_files[] = {
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
BCH_TIME_STATS()
#undef BCH_TIME_STAT
NULL
};
KTYPE(bch_cache_set_time_stats);
typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *);
static unsigned bucket_priority_fn(struct cache *ca, struct bucket *g,
void *private)
{
int rw = (private ? 1 : 0);
return ca->set->prio_clock[rw].hand - g->prio[rw];
}
static unsigned bucket_sectors_used_fn(struct cache *ca, struct bucket *g,
void *private)
{
return bucket_sectors_used(g);
}
static unsigned bucket_oldest_gen_fn(struct cache *ca, struct bucket *g,
void *private)
{
return bucket_gc_gen(ca, g);
}
static ssize_t show_quantiles(struct cache *ca, char *buf,
bucket_map_fn *fn, void *private)
{
int cmp(const void *l, const void *r)
{ return *((unsigned *) r) - *((unsigned *) l); }
size_t n = ca->mi.nbuckets, i;
/* Compute 31 quantiles */
unsigned q[31], *p;
ssize_t ret = 0;
p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
if (!p)
return -ENOMEM;
for (i = ca->mi.first_bucket; i < n; i++)
p[i] = fn(ca, &ca->buckets[i], private);
sort(p, n, sizeof(unsigned), cmp, NULL);
while (n &&
!p[n - 1])
--n;
for (i = 0; i < ARRAY_SIZE(q); i++)
q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
vfree(p);
for (i = 0; i < ARRAY_SIZE(q); i++)
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
"%u ", q[i]);
buf[ret - 1] = '\n';
return ret;
}
static ssize_t show_reserve_stats(struct cache *ca, char *buf)
{
enum alloc_reserve i;
ssize_t ret;
spin_lock(&ca->freelist_lock);
ret = scnprintf(buf, PAGE_SIZE,
"free_inc:\t%zu\t%zu\n",
fifo_used(&ca->free_inc),
ca->free_inc.size);
for (i = 0; i < RESERVE_NR; i++)
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
"free[%u]:\t%zu\t%zu\n", i,
fifo_used(&ca->free[i]),
ca->free[i].size);
spin_unlock(&ca->freelist_lock);
return ret;
}
static ssize_t show_cache_alloc_debug(struct cache *ca, char *buf)
{
struct cache_set *c = ca->set;
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
return scnprintf(buf, PAGE_SIZE,
"free_inc: %zu/%zu\n"
"free[RESERVE_PRIO]: %zu/%zu\n"
"free[RESERVE_BTREE]: %zu/%zu\n"
"free[RESERVE_MOVINGGC]: %zu/%zu\n"
"free[RESERVE_NONE]: %zu/%zu\n"
"alloc: %llu/%llu\n"
"meta: %llu/%llu\n"
"dirty: %llu/%llu\n"
"available: %llu/%llu\n"
"freelist_wait: %s\n"
"open buckets: %u/%u (reserved %u)\n"
"open_buckets_wait: %s\n",
fifo_used(&ca->free_inc), ca->free_inc.size,
fifo_used(&ca->free[RESERVE_PRIO]), ca->free[RESERVE_PRIO].size,
fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
stats.buckets_alloc, ca->mi.nbuckets - ca->mi.first_bucket,
stats.buckets_meta, ca->mi.nbuckets - ca->mi.first_bucket,
stats.buckets_dirty, ca->mi.nbuckets - ca->mi.first_bucket,
__buckets_available_cache(ca, stats), ca->mi.nbuckets - ca->mi.first_bucket,
c->freelist_wait.list.first ? "waiting" : "empty",
c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
c->open_buckets_wait.list.first ? "waiting" : "empty");
}
static u64 sectors_written(struct cache *ca)
{
u64 ret = 0;
int cpu;
for_each_possible_cpu(cpu)
ret += *per_cpu_ptr(ca->sectors_written, cpu);
return ret;
}
SHOW(bch_cache)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
struct cache_set *c = ca->set;
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
sysfs_printf(uuid, "%pU\n", ca->disk_sb.sb->disk_uuid.b);
sysfs_hprint(bucket_size, bucket_bytes(ca));
sysfs_print(bucket_size_bytes, bucket_bytes(ca));
sysfs_hprint(block_size, block_bytes(c));
sysfs_print(block_size_bytes, block_bytes(c));
sysfs_print(first_bucket, ca->mi.first_bucket);
sysfs_print(nbuckets, ca->mi.nbuckets);
sysfs_print(discard, ca->mi.discard);
sysfs_hprint(written, sectors_written(ca) << 9);
sysfs_hprint(btree_written,
atomic64_read(&ca->btree_sectors_written) << 9);
sysfs_hprint(metadata_written,
(atomic64_read(&ca->meta_sectors_written) +
atomic64_read(&ca->btree_sectors_written)) << 9);
sysfs_print(io_errors,
atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
sysfs_hprint(dirty_data, stats.sectors_dirty << 9);
sysfs_print(dirty_bytes, stats.sectors_dirty << 9);
sysfs_print(dirty_buckets, stats.buckets_dirty);
sysfs_hprint(cached_data, stats.sectors_cached << 9);
sysfs_print(cached_bytes, stats.sectors_cached << 9);
sysfs_print(cached_buckets, stats.buckets_cached);
sysfs_print(meta_buckets, stats.buckets_meta);
sysfs_print(alloc_buckets, stats.buckets_alloc);
sysfs_print(available_buckets, buckets_available_cache(ca));
sysfs_print(free_buckets, buckets_free_cache(ca));
sysfs_print(has_data, ca->mi.has_data);
sysfs_print(has_metadata, ca->mi.has_metadata);
sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
if (attr == &sysfs_cache_replacement_policy)
return bch_snprint_string_list(buf, PAGE_SIZE,
cache_replacement_policies,
ca->mi.replacement);
sysfs_print(tier, ca->mi.tier);
if (attr == &sysfs_state_rw)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_cache_state,
ca->mi.state);
if (attr == &sysfs_read_priority_stats)
return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
if (attr == &sysfs_write_priority_stats)
return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
if (attr == &sysfs_fragmentation_stats)
return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
if (attr == &sysfs_oldest_gen_stats)
return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
if (attr == &sysfs_reserve_stats)
return show_reserve_stats(ca, buf);
if (attr == &sysfs_alloc_debug)
return show_cache_alloc_debug(ca, buf);
return 0;
}
STORE(__bch_cache)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
struct cache_set *c = ca->set;
struct cache_member *mi = &c->disk_mi[ca->sb.nr_this_dev];
sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
if (v != CACHE_DISCARD(mi)) {
SET_CACHE_DISCARD(mi, v);
bcache_write_super(c);
}
}
if (attr == &sysfs_cache_replacement_policy) {
ssize_t v = bch_read_string_list(buf, cache_replacement_policies);
if (v < 0)
return v;
if ((unsigned) v != CACHE_REPLACEMENT(mi)) {
SET_CACHE_REPLACEMENT(mi, v);
bcache_write_super(c);
}
}
if (attr == &sysfs_state_rw) {
char name[BDEVNAME_SIZE];
const char *err = NULL;
ssize_t v = bch_read_string_list(buf, bch_cache_state);
if (v < 0)
return v;
if (v == ca->mi.state)
return size;
switch (v) {
case CACHE_ACTIVE:
err = bch_cache_read_write(ca);
break;
case CACHE_RO:
bch_cache_read_only(ca);
break;
case CACHE_FAILED:
case CACHE_SPARE:
/*
* XXX: need to migrate data off and set correct state
*/
pr_err("can't set %s %s: not supported",
bdevname(ca->disk_sb.bdev, name),
bch_cache_state[v]);
return -EINVAL;
}
if (err) {
pr_err("can't set %s %s: %s",
bdevname(ca->disk_sb.bdev, name),
bch_cache_state[v], err);
return -EINVAL;
}
}
if (attr == &sysfs_unregister) {
bool force = false;
if (!strncmp(buf, "force", 5) &&
(buf[5] == '\0' || buf[5] == '\n'))
force = true;
bch_cache_remove(ca, force);
}
if (attr == &sysfs_clear_stats) {
int cpu;
for_each_possible_cpu(cpu)
*per_cpu_ptr(ca->sectors_written, cpu) = 0;
atomic64_set(&ca->btree_sectors_written, 0);
atomic64_set(&ca->meta_sectors_written, 0);
atomic_set(&ca->io_count, 0);
atomic_set(&ca->io_errors, 0);
}
return size;
}
STORE_LOCKED(bch_cache)
static struct attribute *bch_cache_files[] = {
&sysfs_uuid,
&sysfs_unregister,
&sysfs_bucket_size,
&sysfs_bucket_size_bytes,
&sysfs_block_size,
&sysfs_block_size_bytes,
&sysfs_first_bucket,
&sysfs_nbuckets,
&sysfs_read_priority_stats,
&sysfs_write_priority_stats,
&sysfs_fragmentation_stats,
&sysfs_oldest_gen_stats,
&sysfs_reserve_stats,
&sysfs_available_buckets,
&sysfs_free_buckets,
&sysfs_dirty_data,
&sysfs_dirty_bytes,
&sysfs_dirty_buckets,
&sysfs_cached_data,
&sysfs_cached_bytes,
&sysfs_cached_buckets,
&sysfs_meta_buckets,
&sysfs_alloc_buckets,
&sysfs_has_data,
&sysfs_has_metadata,
&sysfs_discard,
&sysfs_written,
&sysfs_btree_written,
&sysfs_metadata_written,
&sysfs_io_errors,
&sysfs_clear_stats,
&sysfs_cache_replacement_policy,
&sysfs_tier,
&sysfs_state_rw,
&sysfs_alloc_debug,
sysfs_pd_controller_files(copy_gc),
NULL
};
KTYPE(bch_cache);