mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to edf5f38218 bcachefs: Refactor superblock code
This commit is contained in:
parent
ff5e165532
commit
c598d91dcb
@ -1 +1 @@
|
||||
9fc6ccd8659598d4ca885220a795889071b619f4
|
||||
edf5f38218f699e53913a549465f35d36c4418f7
|
||||
|
@ -86,7 +86,7 @@ int cmd_set_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
@ -100,7 +100,7 @@ int cmd_set_passphrase(int argc, char *argv[])
|
||||
char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
|
||||
struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
|
||||
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb),
|
||||
if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
|
||||
&new_key, sizeof(new_key)))
|
||||
die("error encrypting key");
|
||||
crypt->key = new_key;
|
||||
@ -123,7 +123,7 @@ int cmd_remove_passphrase(int argc, char *argv[])
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
|
||||
if (!crypt)
|
||||
die("Filesystem does not have encryption enabled");
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "libbcachefs/fs.h"
|
||||
#include "libbcachefs/inode.h"
|
||||
#include "libbcachefs/io.h"
|
||||
#include "libbcachefs/replicas.h"
|
||||
#include "libbcachefs/str_hash.h"
|
||||
#include "libbcachefs/super.h"
|
||||
#include "libbcachefs/xattr.h"
|
||||
|
@ -15,7 +15,7 @@
|
||||
#define BUG_ON(cond) assert(!(cond))
|
||||
|
||||
#define WARN_ON_ONCE(cond) ({ bool _r = (cond); if (_r) assert(0); _r; })
|
||||
#define WARN_ONCE(cond, msg) ({ bool _r = (cond); if (_r) assert(0); _r; })
|
||||
#define WARN_ONCE(cond, ...) ({ bool _r = (cond); if (_r) assert(0); _r; })
|
||||
|
||||
#define __WARN() assert(0)
|
||||
#define __WARN_printf(arg...) assert(0)
|
||||
|
@ -319,7 +319,7 @@ TRACE_EVENT(btree_gc_coalesce_fail,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->reason = reason;
|
||||
memcpy(__entry->uuid, c->disk_sb->user_uuid.b, 16);
|
||||
memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16);
|
||||
),
|
||||
|
||||
TP_printk("%pU: %u", __entry->uuid, __entry->reason)
|
||||
|
152
libbcachefs.c
152
libbcachefs.c
@ -14,12 +14,14 @@
|
||||
|
||||
#include <uuid/uuid.h>
|
||||
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
#include "libbcachefs/checksum.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs.h"
|
||||
#include "crypto.h"
|
||||
#include "libbcachefs/bcachefs_format.h"
|
||||
#include "libbcachefs/btree_cache.h"
|
||||
#include "libbcachefs/checksum.h"
|
||||
#include "libbcachefs/disk_groups.h"
|
||||
#include "libbcachefs/opts.h"
|
||||
#include "libbcachefs/replicas.h"
|
||||
#include "libbcachefs/super-io.h"
|
||||
|
||||
#define NSEC_PER_SEC 1000000000L
|
||||
@ -124,8 +126,8 @@ void bch2_pick_bucket_size(struct format_opts opts, struct dev_opts *dev)
|
||||
|
||||
}
|
||||
|
||||
static unsigned parse_target(struct dev_opts *devs, size_t nr_devs,
|
||||
struct bch_sb_field_disk_groups *gi,
|
||||
static unsigned parse_target(struct bch_sb_handle *sb,
|
||||
struct dev_opts *devs, size_t nr_devs,
|
||||
const char *s)
|
||||
{
|
||||
struct dev_opts *i;
|
||||
@ -138,7 +140,7 @@ static unsigned parse_target(struct dev_opts *devs, size_t nr_devs,
|
||||
if (!strcmp(s, i->path))
|
||||
return dev_to_target(i - devs);
|
||||
|
||||
idx = __bch2_disk_group_find(gi, s);
|
||||
idx = bch2_disk_path_find(sb, s);
|
||||
if (idx >= 0)
|
||||
return group_to_target(idx);
|
||||
|
||||
@ -149,11 +151,9 @@ static unsigned parse_target(struct dev_opts *devs, size_t nr_devs,
|
||||
struct bch_sb *bch2_format(struct format_opts opts,
|
||||
struct dev_opts *devs, size_t nr_devs)
|
||||
{
|
||||
struct bch_sb *sb;
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct dev_opts *i;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_sb_field_disk_groups *gi = NULL;
|
||||
unsigned u64s;
|
||||
|
||||
/* calculate block size: */
|
||||
if (!opts.block_size)
|
||||
@ -184,58 +184,51 @@ struct bch_sb *bch2_format(struct format_opts opts,
|
||||
if (uuid_is_null(opts.uuid.b))
|
||||
uuid_generate(opts.uuid.b);
|
||||
|
||||
sb = calloc(1, sizeof(*sb) +
|
||||
sizeof(struct bch_sb_field_members) +
|
||||
sizeof(struct bch_member) * nr_devs +
|
||||
sizeof(struct bch_sb_field_disk_groups) +
|
||||
sizeof(struct bch_disk_group) * nr_devs +
|
||||
sizeof(struct bch_sb_field_crypt));
|
||||
if (bch2_sb_realloc(&sb, 0))
|
||||
die("insufficient memory");
|
||||
|
||||
sb->version = cpu_to_le64(BCH_SB_VERSION_MAX);
|
||||
sb->magic = BCACHE_MAGIC;
|
||||
sb->block_size = cpu_to_le16(opts.block_size);
|
||||
sb->user_uuid = opts.uuid;
|
||||
sb->nr_devices = nr_devs;
|
||||
sb.sb->version = cpu_to_le64(BCH_SB_VERSION_MAX);
|
||||
sb.sb->magic = BCACHE_MAGIC;
|
||||
sb.sb->block_size = cpu_to_le16(opts.block_size);
|
||||
sb.sb->user_uuid = opts.uuid;
|
||||
sb.sb->nr_devices = nr_devs;
|
||||
|
||||
uuid_generate(sb->uuid.b);
|
||||
uuid_generate(sb.sb->uuid.b);
|
||||
|
||||
if (opts.label)
|
||||
strncpy((char *) sb->label, opts.label, sizeof(sb->label));
|
||||
strncpy((char *) sb.sb->label, opts.label, sizeof(sb.sb->label));
|
||||
|
||||
SET_BCH_SB_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_META_CSUM_TYPE(sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_DATA_CSUM_TYPE(sb, opts.data_csum_type);
|
||||
SET_BCH_SB_COMPRESSION_TYPE(sb, opts.compression_type);
|
||||
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(sb, opts.background_compression_type);
|
||||
SET_BCH_SB_CSUM_TYPE(sb.sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_META_CSUM_TYPE(sb.sb, opts.meta_csum_type);
|
||||
SET_BCH_SB_DATA_CSUM_TYPE(sb.sb, opts.data_csum_type);
|
||||
SET_BCH_SB_COMPRESSION_TYPE(sb.sb, opts.compression_type);
|
||||
SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(sb.sb,
|
||||
opts.background_compression_type);
|
||||
|
||||
SET_BCH_SB_BTREE_NODE_SIZE(sb, opts.btree_node_size);
|
||||
SET_BCH_SB_GC_RESERVE(sb, 8);
|
||||
SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_REQ(sb, opts.meta_replicas_required);
|
||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_REQ(sb, opts.data_replicas_required);
|
||||
SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
|
||||
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
|
||||
SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, ilog2(opts.encoded_extent_max));
|
||||
SET_BCH_SB_BTREE_NODE_SIZE(sb.sb, opts.btree_node_size);
|
||||
SET_BCH_SB_GC_RESERVE(sb.sb, 8);
|
||||
SET_BCH_SB_META_REPLICAS_WANT(sb.sb, opts.meta_replicas);
|
||||
SET_BCH_SB_META_REPLICAS_REQ(sb.sb, opts.meta_replicas_required);
|
||||
SET_BCH_SB_DATA_REPLICAS_WANT(sb.sb, opts.data_replicas);
|
||||
SET_BCH_SB_DATA_REPLICAS_REQ(sb.sb, opts.data_replicas_required);
|
||||
SET_BCH_SB_ERROR_ACTION(sb.sb, opts.on_error_action);
|
||||
SET_BCH_SB_STR_HASH_TYPE(sb.sb, BCH_STR_HASH_SIPHASH);
|
||||
SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb.sb,ilog2(opts.encoded_extent_max));
|
||||
|
||||
SET_BCH_SB_POSIX_ACL(sb, 1);
|
||||
SET_BCH_SB_POSIX_ACL(sb.sb, 1);
|
||||
|
||||
struct timespec now;
|
||||
if (clock_gettime(CLOCK_REALTIME, &now))
|
||||
die("error getting current time: %m");
|
||||
|
||||
sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
|
||||
sb->time_precision = cpu_to_le32(1);
|
||||
|
||||
mi = vstruct_end(sb);
|
||||
u64s = (sizeof(struct bch_sb_field_members) +
|
||||
sizeof(struct bch_member) * nr_devs) / sizeof(u64);
|
||||
|
||||
le32_add_cpu(&sb->u64s, u64s);
|
||||
le32_add_cpu(&mi->field.u64s, u64s);
|
||||
mi->field.type = BCH_SB_FIELD_members;
|
||||
sb.sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
|
||||
sb.sb->time_precision = cpu_to_le32(1);
|
||||
|
||||
/* Member info: */
|
||||
mi = bch2_sb_resize_members(&sb,
|
||||
(sizeof(*mi) + sizeof(struct bch_member) *
|
||||
nr_devs) / sizeof(u64));
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = mi->members + (i - devs);
|
||||
|
||||
@ -253,63 +246,38 @@ struct bch_sb *bch2_format(struct format_opts opts,
|
||||
/* Disk groups */
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
struct bch_member *m = mi->members + (i - devs);
|
||||
struct bch_disk_group *g;
|
||||
size_t len;
|
||||
int idx;
|
||||
|
||||
if (!i->group)
|
||||
continue;
|
||||
|
||||
len = min_t(size_t, strlen(i->group) + 1, BCH_SB_LABEL_SIZE);
|
||||
idx = bch2_disk_path_find_or_create(&sb, i->group);
|
||||
if (idx < 0)
|
||||
die("error creating disk path: %s", idx);
|
||||
|
||||
if (!gi) {
|
||||
gi = vstruct_end(sb);
|
||||
u64s = sizeof(*gi) / sizeof(u64);
|
||||
le32_add_cpu(&sb->u64s, u64s);
|
||||
le32_add_cpu(&gi->field.u64s, u64s);
|
||||
gi->field.type = BCH_SB_FIELD_disk_groups;
|
||||
}
|
||||
|
||||
idx = __bch2_disk_group_find(gi, i->group);
|
||||
if (idx >= 0) {
|
||||
g = gi->entries + idx;
|
||||
} else {
|
||||
u64s = sizeof(*g) / sizeof(u64);
|
||||
g = vstruct_end(&gi->field);
|
||||
le32_add_cpu(&sb->u64s, u64s);
|
||||
le32_add_cpu(&gi->field.u64s, u64s);
|
||||
memcpy(g->label, i->group, len);
|
||||
SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
|
||||
}
|
||||
|
||||
SET_BCH_MEMBER_GROUP(m, (g - gi->entries) + 1);
|
||||
SET_BCH_MEMBER_GROUP(m, idx + 1);
|
||||
}
|
||||
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb,
|
||||
parse_target(devs, nr_devs, gi, opts.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb,
|
||||
parse_target(devs, nr_devs, gi, opts.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb,
|
||||
parse_target(devs, nr_devs, gi, opts.promote_target));
|
||||
SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, opts.foreground_target));
|
||||
SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, opts.background_target));
|
||||
SET_BCH_SB_PROMOTE_TARGET(sb.sb,
|
||||
parse_target(&sb, devs, nr_devs, opts.promote_target));
|
||||
|
||||
/* Crypt: */
|
||||
if (opts.encrypted) {
|
||||
struct bch_sb_field_crypt *crypt = vstruct_end(sb);
|
||||
struct bch_sb_field_crypt *crypt =
|
||||
bch2_sb_resize_crypt(&sb, sizeof(*crypt) / sizeof(u64));
|
||||
|
||||
u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
|
||||
|
||||
le32_add_cpu(&sb->u64s, u64s);
|
||||
crypt->field.u64s = cpu_to_le32(u64s);
|
||||
crypt->field.type = BCH_SB_FIELD_crypt;
|
||||
|
||||
bch_sb_crypt_init(sb, crypt, opts.passphrase);
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
|
||||
bch_sb_crypt_init(sb.sb, crypt, opts.passphrase);
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1);
|
||||
}
|
||||
|
||||
for (i = devs; i < devs + nr_devs; i++) {
|
||||
sb->dev_idx = i - devs;
|
||||
sb.sb->dev_idx = i - devs;
|
||||
|
||||
init_layout(&sb->layout, opts.block_size,
|
||||
init_layout(&sb.sb->layout, opts.block_size,
|
||||
i->sb_offset, i->sb_end);
|
||||
|
||||
if (i->sb_offset == BCH_SB_SECTOR) {
|
||||
@ -319,11 +287,11 @@ struct bch_sb *bch2_format(struct format_opts opts,
|
||||
xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
|
||||
}
|
||||
|
||||
bch2_super_write(i->fd, sb);
|
||||
bch2_super_write(i->fd, sb.sb);
|
||||
close(i->fd);
|
||||
}
|
||||
|
||||
return sb;
|
||||
return sb.sb;
|
||||
}
|
||||
|
||||
void bch2_super_write(int fd, struct bch_sb *sb)
|
||||
@ -553,11 +521,11 @@ static void bch2_sb_print_disk_groups(struct bch_sb *sb, struct bch_sb_field *f,
|
||||
|
||||
typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
|
||||
|
||||
struct bch_sb_field_ops {
|
||||
struct bch_sb_field_toolops {
|
||||
sb_field_print_fn print;
|
||||
};
|
||||
|
||||
static const struct bch_sb_field_ops bch2_sb_field_ops[] = {
|
||||
static const struct bch_sb_field_toolops bch2_sb_field_ops[] = {
|
||||
#define x(f, nr) \
|
||||
[BCH_SB_FIELD_##f] = { \
|
||||
.print = bch2_sb_print_##f, \
|
||||
|
@ -58,11 +58,13 @@
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "clock.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "io.h"
|
||||
@ -79,7 +81,7 @@
|
||||
#include <linux/sort.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static void bch2_recalc_min_prio(struct bch_fs *, struct bch_dev *, int);
|
||||
static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
|
||||
|
||||
/* Ratelimiting/PD controllers */
|
||||
|
||||
@ -130,8 +132,7 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
|
||||
return DIV_ROUND_UP(bytes, sizeof(u64));
|
||||
}
|
||||
|
||||
static const char *bch2_alloc_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (k.k->p.inode >= c->sb.nr_devices ||
|
||||
!c->devs[k.k->p.inode])
|
||||
@ -152,8 +153,8 @@ static const char *bch2_alloc_invalid(const struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
buf[0] = '\0';
|
||||
|
||||
@ -163,11 +164,6 @@ static void bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_alloc_ops = {
|
||||
.key_invalid = bch2_alloc_invalid,
|
||||
.val_to_text = bch2_alloc_to_text,
|
||||
};
|
||||
|
||||
static inline unsigned get_alloc_field(const u8 **p, unsigned bytes)
|
||||
{
|
||||
unsigned v;
|
||||
@ -236,9 +232,9 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
||||
d = a.v->data;
|
||||
if (a.v->fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
|
||||
g->prio[READ] = get_alloc_field(&d, 2);
|
||||
g->io_time[READ] = get_alloc_field(&d, 2);
|
||||
if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
g->prio[WRITE] = get_alloc_field(&d, 2);
|
||||
g->io_time[WRITE] = get_alloc_field(&d, 2);
|
||||
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
}
|
||||
@ -270,21 +266,21 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
||||
}
|
||||
|
||||
mutex_lock(&c->prio_clock[READ].lock);
|
||||
mutex_lock(&c->bucket_clock[READ].lock);
|
||||
for_each_member_device(ca, c, i) {
|
||||
down_read(&ca->bucket_lock);
|
||||
bch2_recalc_min_prio(c, ca, READ);
|
||||
bch2_recalc_oldest_io(c, ca, READ);
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
mutex_unlock(&c->prio_clock[READ].lock);
|
||||
mutex_unlock(&c->bucket_clock[READ].lock);
|
||||
|
||||
mutex_lock(&c->prio_clock[WRITE].lock);
|
||||
mutex_lock(&c->bucket_clock[WRITE].lock);
|
||||
for_each_member_device(ca, c, i) {
|
||||
down_read(&ca->bucket_lock);
|
||||
bch2_recalc_min_prio(c, ca, WRITE);
|
||||
bch2_recalc_oldest_io(c, ca, WRITE);
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
mutex_unlock(&c->prio_clock[WRITE].lock);
|
||||
mutex_unlock(&c->bucket_clock[WRITE].lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -320,9 +316,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
d = a->v.data;
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
|
||||
put_alloc_field(&d, 2, g->prio[READ]);
|
||||
put_alloc_field(&d, 2, g->io_time[READ]);
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
put_alloc_field(&d, 2, g->prio[WRITE]);
|
||||
put_alloc_field(&d, 2, g->io_time[WRITE]);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
|
||||
@ -395,38 +391,34 @@ int bch2_alloc_write(struct bch_fs *c)
|
||||
|
||||
/* Bucket IO clocks: */
|
||||
|
||||
static void bch2_recalc_min_prio(struct bch_fs *c, struct bch_dev *ca, int rw)
|
||||
static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
|
||||
{
|
||||
struct prio_clock *clock = &c->prio_clock[rw];
|
||||
struct bucket_clock *clock = &c->bucket_clock[rw];
|
||||
struct bucket_array *buckets = bucket_array(ca);
|
||||
struct bucket *g;
|
||||
u16 max_delta = 1;
|
||||
u16 max_last_io = 0;
|
||||
unsigned i;
|
||||
|
||||
lockdep_assert_held(&c->prio_clock[rw].lock);
|
||||
lockdep_assert_held(&c->bucket_clock[rw].lock);
|
||||
|
||||
/* Determine min prio for this particular device */
|
||||
/* Recalculate max_last_io for this device: */
|
||||
for_each_bucket(g, buckets)
|
||||
max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw]));
|
||||
max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
|
||||
|
||||
ca->min_prio[rw] = clock->hand - max_delta;
|
||||
ca->max_last_bucket_io[rw] = max_last_io;
|
||||
|
||||
/*
|
||||
* This may possibly increase the min prio for the whole device, check
|
||||
* that as well.
|
||||
*/
|
||||
max_delta = 1;
|
||||
/* Recalculate global max_last_io: */
|
||||
max_last_io = 0;
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
max_delta = max(max_delta,
|
||||
(u16) (clock->hand - ca->min_prio[rw]));
|
||||
max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
|
||||
|
||||
clock->min_prio = clock->hand - max_delta;
|
||||
clock->max_last_io = max_last_io;
|
||||
}
|
||||
|
||||
static void bch2_rescale_prios(struct bch_fs *c, int rw)
|
||||
static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
|
||||
{
|
||||
struct prio_clock *clock = &c->prio_clock[rw];
|
||||
struct bucket_clock *clock = &c->bucket_clock[rw];
|
||||
struct bucket_array *buckets;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
@ -439,10 +431,10 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw)
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
for_each_bucket(g, buckets)
|
||||
g->prio[rw] = clock->hand -
|
||||
(clock->hand - g->prio[rw]) / 2;
|
||||
g->io_time[rw] = clock->hand -
|
||||
bucket_last_io(c, g, rw) / 2;
|
||||
|
||||
bch2_recalc_min_prio(c, ca, rw);
|
||||
bch2_recalc_oldest_io(c, ca, rw);
|
||||
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
@ -450,19 +442,26 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw)
|
||||
|
||||
static void bch2_inc_clock_hand(struct io_timer *timer)
|
||||
{
|
||||
struct prio_clock *clock = container_of(timer,
|
||||
struct prio_clock, rescale);
|
||||
struct bucket_clock *clock = container_of(timer,
|
||||
struct bucket_clock, rescale);
|
||||
struct bch_fs *c = container_of(clock,
|
||||
struct bch_fs, prio_clock[clock->rw]);
|
||||
struct bch_fs, bucket_clock[clock->rw]);
|
||||
struct bch_dev *ca;
|
||||
u64 capacity;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&clock->lock);
|
||||
|
||||
clock->hand++;
|
||||
|
||||
/* if clock cannot be advanced more, rescale prio */
|
||||
if (clock->hand == (u16) (clock->min_prio - 1))
|
||||
bch2_rescale_prios(c, clock->rw);
|
||||
if (clock->max_last_io >= U16_MAX - 2)
|
||||
bch2_rescale_bucket_io_times(c, clock->rw);
|
||||
|
||||
BUG_ON(clock->max_last_io >= U16_MAX - 2);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
ca->max_last_bucket_io[clock->rw]++;
|
||||
clock->max_last_io++;
|
||||
clock->hand++;
|
||||
|
||||
mutex_unlock(&clock->lock);
|
||||
|
||||
@ -484,9 +483,9 @@ static void bch2_inc_clock_hand(struct io_timer *timer)
|
||||
bch2_io_timer_add(&c->io_clock[clock->rw], timer);
|
||||
}
|
||||
|
||||
static void bch2_prio_timer_init(struct bch_fs *c, int rw)
|
||||
static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
|
||||
{
|
||||
struct prio_clock *clock = &c->prio_clock[rw];
|
||||
struct bucket_clock *clock = &c->bucket_clock[rw];
|
||||
|
||||
clock->hand = 1;
|
||||
clock->rw = rw;
|
||||
@ -536,7 +535,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (kthread_should_stop()) {
|
||||
ret = -1;
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -635,13 +634,14 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct bucket_mark m)
|
||||
{
|
||||
unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
|
||||
unsigned max_last_io = ca->max_last_bucket_io[READ];
|
||||
|
||||
/*
|
||||
* Time since last read, scaled to [0, 8) where larger value indicates
|
||||
* more recently read data:
|
||||
*/
|
||||
unsigned long hotness =
|
||||
(bucket(ca, b)->prio[READ] - ca->min_prio[READ]) * 7 /
|
||||
(c->prio_clock[READ].hand - ca->min_prio[READ]);
|
||||
unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
|
||||
|
||||
/* How much we want to keep the data in this bucket: */
|
||||
unsigned long data_wantness =
|
||||
@ -659,23 +659,25 @@ static inline int bucket_alloc_cmp(alloc_heap *h,
|
||||
struct alloc_heap_entry l,
|
||||
struct alloc_heap_entry r)
|
||||
{
|
||||
return (l.key > r.key) - (l.key < r.key);
|
||||
return (l.key > r.key) - (l.key < r.key) ?:
|
||||
(l.nr < r.nr) - (l.nr > r.nr) ?:
|
||||
(l.bucket > r.bucket) - (l.bucket < r.bucket);
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
struct alloc_heap_entry e;
|
||||
struct alloc_heap_entry e = { 0 };
|
||||
size_t b;
|
||||
|
||||
ca->alloc_heap.used = 0;
|
||||
|
||||
mutex_lock(&c->prio_clock[READ].lock);
|
||||
mutex_lock(&c->bucket_clock[READ].lock);
|
||||
down_read(&ca->bucket_lock);
|
||||
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
bch2_recalc_min_prio(c, ca, READ);
|
||||
bch2_recalc_oldest_io(c, ca, READ);
|
||||
|
||||
/*
|
||||
* Find buckets with lowest read priority, by building a maxheap sorted
|
||||
@ -684,30 +686,45 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
*/
|
||||
for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) {
|
||||
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
|
||||
unsigned long key = bucket_sort_key(c, ca, b, m);
|
||||
|
||||
if (!bch2_can_invalidate_bucket(ca, b, m))
|
||||
continue;
|
||||
|
||||
e = (struct alloc_heap_entry) {
|
||||
.bucket = b,
|
||||
.key = bucket_sort_key(c, ca, b, m)
|
||||
};
|
||||
if (e.nr && e.bucket + e.nr == b && e.key == key) {
|
||||
e.nr++;
|
||||
} else {
|
||||
if (e.nr)
|
||||
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
|
||||
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
e = (struct alloc_heap_entry) {
|
||||
.bucket = b,
|
||||
.nr = 1,
|
||||
.key = key,
|
||||
};
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (e.nr)
|
||||
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
|
||||
up_read(&ca->bucket_lock);
|
||||
mutex_unlock(&c->prio_clock[READ].lock);
|
||||
mutex_unlock(&c->bucket_clock[READ].lock);
|
||||
|
||||
heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
|
||||
|
||||
/*
|
||||
* If we run out of buckets to invalidate, bch2_allocator_thread() will
|
||||
* kick stuff and retry us
|
||||
*/
|
||||
while (!fifo_full(&ca->free_inc) &&
|
||||
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp))
|
||||
bch2_invalidate_one_bucket(c, ca, e.bucket);
|
||||
while (heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp)) {
|
||||
for (b = e.bucket;
|
||||
b < e.bucket + e.nr;
|
||||
b++) {
|
||||
if (fifo_full(&ca->free_inc))
|
||||
return;
|
||||
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
@ -729,6 +746,8 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, b, m))
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
@ -749,6 +768,8 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, b, m))
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
@ -850,7 +871,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
|
||||
|
||||
if ((current->flags & PF_KTHREAD) &&
|
||||
kthread_should_stop()) {
|
||||
ret = -1;
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -880,7 +901,7 @@ static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
|
||||
if (push_invalidated_bucket(c, ca, bucket))
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -905,17 +926,32 @@ static int bch2_allocator_thread(void *arg)
|
||||
|
||||
while (1) {
|
||||
while (1) {
|
||||
cond_resched();
|
||||
|
||||
pr_debug("discarding %zu invalidated buckets",
|
||||
ca->nr_invalidated);
|
||||
|
||||
ret = discard_invalidated_buckets(c, ca);
|
||||
if (ret)
|
||||
return 0;
|
||||
goto stop;
|
||||
|
||||
if (fifo_empty(&ca->free_inc))
|
||||
break;
|
||||
|
||||
pr_debug("invalidating %zu buckets",
|
||||
fifo_used(&ca->free_inc));
|
||||
|
||||
journal_seq = 0;
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX);
|
||||
if (ret)
|
||||
return 0;
|
||||
if (ret) {
|
||||
bch_err(ca, "error invalidating buckets: %i", ret);
|
||||
goto stop;
|
||||
}
|
||||
|
||||
if (!ca->nr_invalidated) {
|
||||
bch_err(ca, "allocator thread unable to make forward progress!");
|
||||
goto stop;
|
||||
}
|
||||
|
||||
if (ca->allocator_invalidating_data)
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
@ -927,22 +963,29 @@ static int bch2_allocator_thread(void *arg)
|
||||
* journal error - buckets haven't actually been
|
||||
* invalidated, can't discard them:
|
||||
*/
|
||||
if (ret)
|
||||
return 0;
|
||||
if (ret) {
|
||||
bch_err(ca, "journal error: %i", ret);
|
||||
goto stop;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("free_inc now empty");
|
||||
|
||||
/* Reset front/back so we can easily sort fifo entries later: */
|
||||
ca->free_inc.front = ca->free_inc.back = 0;
|
||||
ca->allocator_journal_seq_flush = 0;
|
||||
ca->allocator_invalidating_data = false;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
|
||||
up_read(&c->gc_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
size_t prev = fifo_used(&ca->free_inc);
|
||||
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
|
||||
up_read(&c->gc_lock);
|
||||
bch_err(ca, "gc failure");
|
||||
goto stop;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find some buckets that we can invalidate, either
|
||||
* they're completely unused, or only contain clean data
|
||||
@ -950,7 +993,14 @@ static int bch2_allocator_thread(void *arg)
|
||||
* another cache tier
|
||||
*/
|
||||
|
||||
pr_debug("scanning for reclaimable buckets");
|
||||
|
||||
find_reclaimable_buckets(c, ca);
|
||||
|
||||
pr_debug("found %zu buckets (free_inc %zu/%zu)",
|
||||
fifo_used(&ca->free_inc) - prev,
|
||||
fifo_used(&ca->free_inc), ca->free_inc.size);
|
||||
|
||||
trace_alloc_batch(ca, fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
@ -977,15 +1027,20 @@ static int bch2_allocator_thread(void *arg)
|
||||
ca->allocator_blocked = true;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
if (wait_buckets_available(c, ca)) {
|
||||
ret = wait_buckets_available(c, ca);
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
return 0;
|
||||
goto stop;
|
||||
}
|
||||
}
|
||||
|
||||
ca->allocator_blocked = false;
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
pr_debug("free_inc now %zu/%zu",
|
||||
fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
sort_free_inc(c, ca);
|
||||
|
||||
/*
|
||||
@ -993,6 +1048,10 @@ static int bch2_allocator_thread(void *arg)
|
||||
* write out the new bucket gens:
|
||||
*/
|
||||
}
|
||||
|
||||
stop:
|
||||
pr_debug("alloc thread stopping (ret %i)", ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocation */
|
||||
@ -1046,8 +1105,8 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
return ob;
|
||||
}
|
||||
|
||||
/* _only_ for allocating the journal and btree roots on a brand new fs: */
|
||||
int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
|
||||
/* _only_ for allocating the journal on a new device: */
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
ssize_t b;
|
||||
@ -1056,14 +1115,8 @@ int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
|
||||
if (is_available_bucket(buckets->b[b].mark)) {
|
||||
bch2_mark_alloc_bucket(c, ca, b, true,
|
||||
gc_pos_alloc(c, NULL),
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
set_bit(b, ca->buckets_dirty);
|
||||
if (is_available_bucket(buckets->b[b].mark))
|
||||
goto success;
|
||||
}
|
||||
b = -1;
|
||||
success:
|
||||
rcu_read_unlock();
|
||||
@ -1135,9 +1188,8 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely(test_bit(BCH_FS_BRAND_NEW_FS, &c->flags)) &&
|
||||
(bucket = bch2_bucket_alloc_startup(c, ca)) >= 0)
|
||||
goto out;
|
||||
if (cl)
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
@ -1218,7 +1270,7 @@ void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
|
||||
*v = *v < scale ? 0 : *v - scale;
|
||||
}
|
||||
|
||||
static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
struct write_point *wp,
|
||||
unsigned nr_replicas,
|
||||
enum alloc_reserve reserve,
|
||||
@ -1284,52 +1336,22 @@ static enum bucket_alloc_ret __bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
EBUG_ON(reserve == RESERVE_MOVINGGC &&
|
||||
ret != ALLOC_SUCCESS &&
|
||||
ret != OPEN_BUCKETS_EMPTY);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_bucket_alloc_set(struct bch_fs *c, struct write_point *wp,
|
||||
unsigned nr_replicas,
|
||||
enum alloc_reserve reserve,
|
||||
struct bch_devs_mask *devs,
|
||||
struct closure *cl)
|
||||
{
|
||||
bool waiting = false;
|
||||
|
||||
while (1) {
|
||||
switch (__bch2_bucket_alloc_set(c, wp, nr_replicas,
|
||||
reserve, devs, cl)) {
|
||||
case ALLOC_SUCCESS:
|
||||
if (waiting)
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
return 0;
|
||||
|
||||
case NO_DEVICES:
|
||||
if (waiting)
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
return -EROFS;
|
||||
|
||||
case FREELIST_EMPTY:
|
||||
if (!cl)
|
||||
return -ENOSPC;
|
||||
|
||||
if (waiting)
|
||||
return -EAGAIN;
|
||||
|
||||
/* Retry allocation after adding ourself to waitlist: */
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
waiting = true;
|
||||
break;
|
||||
case OPEN_BUCKETS_EMPTY:
|
||||
return cl ? -EAGAIN : -ENOSPC;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
switch (ret) {
|
||||
case ALLOC_SUCCESS:
|
||||
return 0;
|
||||
case NO_DEVICES:
|
||||
return -EROFS;
|
||||
case FREELIST_EMPTY:
|
||||
case OPEN_BUCKETS_EMPTY:
|
||||
return cl ? -EAGAIN : -ENOSPC;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1530,11 +1552,12 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
nr_ptrs_have = wp->first_ptr;
|
||||
|
||||
/* does writepoint have ptrs we don't want to use? */
|
||||
writepoint_for_each_ptr(wp, ob, i)
|
||||
if (!dev_idx_in_target(c, ob->ptr.dev, target)) {
|
||||
swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
|
||||
wp->first_ptr++;
|
||||
}
|
||||
if (target)
|
||||
writepoint_for_each_ptr(wp, ob, i)
|
||||
if (!dev_idx_in_target(c, ob->ptr.dev, target)) {
|
||||
swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
|
||||
wp->first_ptr++;
|
||||
}
|
||||
|
||||
if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) {
|
||||
ret = open_bucket_add_buckets(c, target, wp, devs_have,
|
||||
@ -1551,7 +1574,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
nr_replicas, reserve, cl);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
if (ret && ret != -EROFS)
|
||||
goto err;
|
||||
alloc_done:
|
||||
/* check for more than one cache: */
|
||||
@ -1584,6 +1607,13 @@ alloc_done:
|
||||
nr_ptrs_effective += ca->mi.durability;
|
||||
}
|
||||
|
||||
if (ret == -EROFS &&
|
||||
nr_ptrs_effective >= nr_replicas_required)
|
||||
ret = 0;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (nr_ptrs_effective > nr_replicas) {
|
||||
writepoint_for_each_ptr(wp, ob, i) {
|
||||
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
@ -1749,14 +1779,14 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
if (c->capacity) {
|
||||
bch2_io_timer_add(&c->io_clock[READ],
|
||||
&c->prio_clock[READ].rescale);
|
||||
&c->bucket_clock[READ].rescale);
|
||||
bch2_io_timer_add(&c->io_clock[WRITE],
|
||||
&c->prio_clock[WRITE].rescale);
|
||||
&c->bucket_clock[WRITE].rescale);
|
||||
} else {
|
||||
bch2_io_timer_del(&c->io_clock[READ],
|
||||
&c->prio_clock[READ].rescale);
|
||||
&c->bucket_clock[READ].rescale);
|
||||
bch2_io_timer_del(&c->io_clock[WRITE],
|
||||
&c->prio_clock[WRITE].rescale);
|
||||
&c->bucket_clock[WRITE].rescale);
|
||||
}
|
||||
|
||||
/* Wake up case someone was waiting for buckets */
|
||||
@ -1889,7 +1919,8 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
|
||||
if (ca->alloc_thread)
|
||||
return 0;
|
||||
|
||||
p = kthread_create(bch2_allocator_thread, ca, "bcache_allocator");
|
||||
p = kthread_create(bch2_allocator_thread, ca,
|
||||
"bch_alloc[%s]", ca->name);
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
|
||||
@ -1923,7 +1954,7 @@ static void allocator_start_issue_discards(struct bch_fs *c)
|
||||
static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
size_t bu, i, devs_have_enough = 0;
|
||||
size_t bu, i;
|
||||
unsigned dev_iter;
|
||||
u64 journal_seq = 0;
|
||||
bool invalidating_data = false;
|
||||
@ -1964,16 +1995,21 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
|
||||
/* did we find enough buckets? */
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
devs_have_enough += (fifo_used(&ca->free_inc) >=
|
||||
ca->free[RESERVE_BTREE].size);
|
||||
if (fifo_used(&ca->free_inc) < ca->free[RESERVE_BTREE].size) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto not_enough;
|
||||
}
|
||||
|
||||
if (devs_have_enough >= c->opts.metadata_replicas)
|
||||
return 0;
|
||||
return 0;
|
||||
not_enough:
|
||||
pr_debug("did not find enough empty buckets; issuing discards");
|
||||
|
||||
/* clear out free_inc - find_reclaimable_buckets() assumes it's empty */
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
discard_invalidated_buckets(c, ca);
|
||||
|
||||
pr_debug("scanning for reclaimable buckets");
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
BUG_ON(!fifo_empty(&ca->free_inc));
|
||||
ca->free_inc.front = ca->free_inc.back = 0;
|
||||
@ -1988,6 +2024,8 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
break;
|
||||
}
|
||||
|
||||
pr_debug("done scanning for reclaimable buckets");
|
||||
|
||||
/*
|
||||
* We're moving buckets to freelists _before_ they've been marked as
|
||||
* invalidated on disk - we have to so that we can allocate new btree
|
||||
@ -1997,10 +2035,13 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
* have cached data in them, which is live until they're marked as
|
||||
* invalidated on disk:
|
||||
*/
|
||||
if (invalidating_data)
|
||||
if (invalidating_data) {
|
||||
pr_debug("invalidating existing data");
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
else
|
||||
} else {
|
||||
pr_debug("issuing discards");
|
||||
allocator_start_issue_discards(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: it's possible for this to deadlock waiting on journal reclaim,
|
||||
@ -2017,13 +2058,15 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
}
|
||||
|
||||
if (invalidating_data) {
|
||||
pr_debug("flushing journal");
|
||||
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (invalidating_data)
|
||||
pr_debug("issuing discards");
|
||||
allocator_start_issue_discards(c);
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
while (ca->nr_invalidated) {
|
||||
@ -2038,19 +2081,43 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
bool flush_updates;
|
||||
size_t nr_pending_updates;
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
pr_debug("flushing dirty btree nodes");
|
||||
cond_resched();
|
||||
|
||||
flush_updates = false;
|
||||
nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
|
||||
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
if (btree_node_dirty(b) && (!b->written || b->level)) {
|
||||
rcu_read_unlock();
|
||||
six_lock_read(&b->lock);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
if (btree_node_may_write(b)) {
|
||||
rcu_read_unlock();
|
||||
six_lock_read(&b->lock);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
} else {
|
||||
flush_updates = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* This is ugly, but it's needed to flush btree node writes
|
||||
* without spinning...
|
||||
*/
|
||||
if (flush_updates) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
bch2_btree_interior_updates_nr_pending(c) <
|
||||
nr_pending_updates);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2087,8 +2154,8 @@ void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
|
||||
mutex_init(&c->write_points_hash_lock);
|
||||
spin_lock_init(&c->freelist_lock);
|
||||
bch2_prio_timer_init(c, READ);
|
||||
bch2_prio_timer_init(c, WRITE);
|
||||
bch2_bucket_clock_init(c, READ);
|
||||
bch2_bucket_clock_init(c, WRITE);
|
||||
|
||||
/* open bucket 0 is a sentinal NULL: */
|
||||
spin_lock_init(&c->open_buckets[0].lock);
|
||||
|
@ -9,6 +9,14 @@ struct bch_dev;
|
||||
struct bch_fs;
|
||||
struct bch_devs_List;
|
||||
|
||||
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_alloc_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_alloc_invalid, \
|
||||
.val_to_text = bch2_alloc_to_text, \
|
||||
}
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
@ -30,6 +38,8 @@ enum bucket_alloc_ret {
|
||||
NO_DEVICES = -3, /* -EROFS */
|
||||
};
|
||||
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *);
|
||||
|
||||
int bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve, bool,
|
||||
struct closure *);
|
||||
|
||||
@ -127,6 +137,4 @@ int bch2_alloc_write(struct bch_fs *);
|
||||
int bch2_fs_allocator_start(struct bch_fs *);
|
||||
void bch2_fs_allocator_init(struct bch_fs *);
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_alloc_ops;
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_H */
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include "fifo.h"
|
||||
|
||||
/* There's two of these clocks, one for reads and one for writes: */
|
||||
struct prio_clock {
|
||||
struct bucket_clock {
|
||||
/*
|
||||
* "now" in (read/write) IO time - incremented whenever we do X amount
|
||||
* of reads or writes.
|
||||
@ -23,7 +23,7 @@ struct prio_clock {
|
||||
* consistent.
|
||||
*/
|
||||
u16 hand;
|
||||
u16 min_prio;
|
||||
u16 max_last_io;
|
||||
|
||||
int rw;
|
||||
|
||||
@ -80,6 +80,7 @@ struct write_point_specifier {
|
||||
|
||||
struct alloc_heap_entry {
|
||||
size_t bucket;
|
||||
size_t nr;
|
||||
unsigned long key;
|
||||
};
|
||||
|
||||
|
@ -384,7 +384,7 @@ struct bch_dev {
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
spinlock_t freelist_lock;
|
||||
unsigned nr_invalidated;
|
||||
size_t nr_invalidated;
|
||||
|
||||
u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
unsigned open_buckets_partial_nr;
|
||||
@ -392,7 +392,7 @@ struct bch_dev {
|
||||
size_t fifo_last_bucket;
|
||||
|
||||
/* last calculated minimum prio */
|
||||
u16 min_prio[2];
|
||||
u16 max_last_bucket_io[2];
|
||||
|
||||
atomic_long_t saturated_count;
|
||||
size_t inc_gen_needs_gc;
|
||||
@ -431,11 +431,11 @@ struct bch_dev {
|
||||
*/
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_BRAND_NEW_FS,
|
||||
BCH_FS_ALLOC_READ_DONE,
|
||||
BCH_FS_ALLOCATOR_STARTED,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
|
||||
/* shutdown: */
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
@ -519,8 +519,7 @@ struct bch_fs {
|
||||
u64 features;
|
||||
} sb;
|
||||
|
||||
struct bch_sb *disk_sb;
|
||||
unsigned disk_sb_order;
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
unsigned short block_bits; /* ilog2(block_size) */
|
||||
|
||||
@ -595,7 +594,7 @@ struct bch_fs {
|
||||
* those together consistently we keep track of the smallest nonzero
|
||||
* priority of any bucket.
|
||||
*/
|
||||
struct prio_clock prio_clock[2];
|
||||
struct bucket_clock bucket_clock[2];
|
||||
|
||||
struct io_clock io_clock[2];
|
||||
|
||||
|
@ -955,8 +955,9 @@ struct bch_disk_group {
|
||||
__le64 flags[2];
|
||||
};
|
||||
|
||||
LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
|
||||
LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
|
||||
LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
|
||||
LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
|
||||
LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
|
||||
|
||||
struct bch_sb_field_disk_groups {
|
||||
struct bch_sb_field field;
|
||||
|
@ -10,20 +10,20 @@
|
||||
#include "quota.h"
|
||||
#include "xattr.h"
|
||||
|
||||
const struct bkey_ops *bch2_bkey_ops[] = {
|
||||
[BKEY_TYPE_EXTENTS] = &bch2_bkey_extent_ops,
|
||||
[BKEY_TYPE_INODES] = &bch2_bkey_inode_ops,
|
||||
[BKEY_TYPE_DIRENTS] = &bch2_bkey_dirent_ops,
|
||||
[BKEY_TYPE_XATTRS] = &bch2_bkey_xattr_ops,
|
||||
[BKEY_TYPE_ALLOC] = &bch2_bkey_alloc_ops,
|
||||
[BKEY_TYPE_QUOTAS] = &bch2_bkey_quota_ops,
|
||||
[BKEY_TYPE_BTREE] = &bch2_bkey_btree_ops,
|
||||
const struct bkey_ops bch2_bkey_ops[] = {
|
||||
[BKEY_TYPE_EXTENTS] = bch2_bkey_extent_ops,
|
||||
[BKEY_TYPE_INODES] = bch2_bkey_inode_ops,
|
||||
[BKEY_TYPE_DIRENTS] = bch2_bkey_dirent_ops,
|
||||
[BKEY_TYPE_XATTRS] = bch2_bkey_xattr_ops,
|
||||
[BKEY_TYPE_ALLOC] = bch2_bkey_alloc_ops,
|
||||
[BKEY_TYPE_QUOTAS] = bch2_bkey_quota_ops,
|
||||
[BKEY_TYPE_BTREE] = bch2_bkey_btree_ops,
|
||||
};
|
||||
|
||||
const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[type];
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_DELETED:
|
||||
@ -51,7 +51,7 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[type];
|
||||
|
||||
if (k.k->u64s < BKEY_U64s)
|
||||
return "u64s too small";
|
||||
@ -100,7 +100,7 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
|
||||
void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
{
|
||||
enum bkey_type type = btree_node_type(b);
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[type];
|
||||
const char *invalid;
|
||||
|
||||
BUG_ON(!k.k->u64s);
|
||||
@ -141,7 +141,7 @@ int bch2_bkey_to_text(char *buf, size_t size, const struct bkey *k)
|
||||
int bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[type];
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
switch (k.k->type) {
|
||||
@ -182,7 +182,7 @@ void bch2_bkey_swab(enum bkey_type type,
|
||||
const struct bkey_format *f,
|
||||
struct bkey_packed *k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
const struct bkey_ops *ops = &bch2_bkey_ops[type];
|
||||
|
||||
bch2_bkey_swab_key(f, k);
|
||||
|
||||
|
@ -81,6 +81,6 @@ int bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
|
||||
struct bkey_packed *);
|
||||
|
||||
extern const struct bkey_ops *bch2_bkey_ops[];
|
||||
extern const struct bkey_ops bch2_bkey_ops[];
|
||||
|
||||
#endif /* _BCACHEFS_BKEY_METHODS_H */
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
@ -317,7 +318,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned i;
|
||||
u64 b;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
if (c)
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||
u64 offset = le64_to_cpu(layout->sb_offset[i]);
|
||||
@ -331,7 +333,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
BCH_DATA_SB, flags);
|
||||
}
|
||||
|
||||
spin_lock(&c->journal.lock);
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
for (i = 0; i < ca->journal.nr; i++) {
|
||||
b = ca->journal.buckets[i];
|
||||
@ -340,7 +343,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
gc_phase(GC_PHASE_SB), flags);
|
||||
}
|
||||
|
||||
spin_unlock(&c->journal.lock);
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
}
|
||||
|
||||
static void bch2_mark_superblocks(struct bch_fs *c)
|
||||
@ -1034,8 +1038,8 @@ static int __bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (!bch2_sb_get_replicas(c->disk_sb)) {
|
||||
if (BCH_SB_INITIALIZED(c->disk_sb))
|
||||
if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
|
||||
if (BCH_SB_INITIALIZED(c->disk_sb.sb))
|
||||
bch_info(c, "building replicas info");
|
||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
}
|
||||
|
@ -1290,16 +1290,19 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
|
||||
{
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
struct bkey_s_c k;
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
if (btree_iter_err(k))
|
||||
return k;
|
||||
}
|
||||
|
||||
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
|
||||
|
||||
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
|
||||
/*
|
||||
* XXX: when we just need to relock we should be able to avoid
|
||||
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
|
||||
* for that to work
|
||||
*/
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
|
||||
return bch2_btree_iter_peek_slot(iter);
|
||||
}
|
||||
|
||||
if (!bkey_deleted(&iter->k))
|
||||
__btree_iter_advance(&iter->l[0]);
|
||||
|
||||
@ -1318,6 +1321,8 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
|
||||
iter->c = c;
|
||||
iter->pos = pos;
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = pos;
|
||||
iter->flags = flags;
|
||||
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
iter->btree_id = btree_id;
|
||||
@ -1330,6 +1335,10 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
iter->l[iter->level].b = BTREE_ITER_NOT_END;
|
||||
iter->next = iter;
|
||||
|
||||
if (unlikely((flags & BTREE_ITER_IS_EXTENTS) &&
|
||||
!bkey_cmp(pos, POS_MAX)))
|
||||
iter->uptodate = BTREE_ITER_END;
|
||||
|
||||
prefetch(c->btree_roots[btree_id].b);
|
||||
}
|
||||
|
||||
|
@ -231,6 +231,20 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
|
||||
return __btree_iter_cmp(l->btree_id, l->pos, r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlocks before scheduling
|
||||
* Note: does not revalidate iterator
|
||||
*/
|
||||
static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter)
|
||||
{
|
||||
if (need_resched()) {
|
||||
bch2_btree_iter_unlock(iter);
|
||||
schedule();
|
||||
} else if (race_fault()) {
|
||||
bch2_btree_iter_unlock(iter);
|
||||
}
|
||||
}
|
||||
|
||||
#define __for_each_btree_node(_iter, _c, _btree_id, _start, \
|
||||
_locks_want, _depth, _flags, _b) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
|
||||
@ -253,6 +267,8 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
|
||||
static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
return flags & BTREE_ITER_SLOTS
|
||||
? bch2_btree_iter_next_slot(iter)
|
||||
: bch2_btree_iter_next(iter);
|
||||
@ -275,18 +291,4 @@ static inline int btree_iter_err(struct bkey_s_c k)
|
||||
return PTR_ERR_OR_ZERO(k.k);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlocks before scheduling
|
||||
* Note: does not revalidate iterator
|
||||
*/
|
||||
static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter)
|
||||
{
|
||||
if (need_resched()) {
|
||||
bch2_btree_iter_unlock(iter);
|
||||
schedule();
|
||||
} else if (race_fault()) {
|
||||
bch2_btree_iter_unlock(iter);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_ITER_H */
|
||||
|
@ -299,7 +299,7 @@ static inline enum bkey_type btree_node_type(struct btree *b)
|
||||
|
||||
static inline const struct bkey_ops *btree_node_ops(struct btree *b)
|
||||
{
|
||||
return bch2_bkey_ops[btree_node_type(b)];
|
||||
return &bch2_bkey_ops[btree_node_type(b)];
|
||||
}
|
||||
|
||||
static inline bool btree_node_has_ptrs(struct btree *b)
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/random.h>
|
||||
@ -2116,3 +2117,16 @@ ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
|
||||
{
|
||||
size_t ret = 0;
|
||||
struct list_head *i;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
list_for_each(i, &c->btree_interior_update_list)
|
||||
ret++;
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -343,4 +343,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans,
|
||||
|
||||
ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
|
||||
|
@ -443,8 +443,20 @@ split:
|
||||
* potentially blocks the allocator:
|
||||
*/
|
||||
ret = bch2_btree_split_leaf(c, split, trans->flags);
|
||||
|
||||
/*
|
||||
* This can happen when we insert part of an extent - with an update
|
||||
* with multiple keys, we don't want to redo the entire update - that's
|
||||
* just too confusing:
|
||||
*/
|
||||
if (!ret &&
|
||||
(trans->flags & BTREE_INSERT_ATOMIC) &&
|
||||
trans->did_work)
|
||||
ret = -EINTR;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* if the split didn't have to drop locks the insert will still be
|
||||
* atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked()
|
||||
|
@ -309,7 +309,7 @@ static bool bucket_became_unavailable(struct bch_fs *c,
|
||||
{
|
||||
return is_available_bucket(old) &&
|
||||
!is_available_bucket(new) &&
|
||||
c && c->gc_pos.phase == GC_PHASE_DONE;
|
||||
(!c || c->gc_pos.phase == GC_PHASE_DONE);
|
||||
}
|
||||
|
||||
void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
@ -351,12 +351,16 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
{
|
||||
struct bch_dev_usage *dev_usage;
|
||||
|
||||
lockdep_assert_held(&c->usage_lock);
|
||||
if (c)
|
||||
lockdep_assert_held(&c->usage_lock);
|
||||
|
||||
bch2_fs_inconsistent_on(old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type, c,
|
||||
if (old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type) {
|
||||
BUG_ON(!c);
|
||||
bch2_fs_inconsistent(c,
|
||||
"different types of data in same bucket: %u, %u",
|
||||
old.data_type, new.data_type);
|
||||
}
|
||||
|
||||
dev_usage = this_cpu_ptr(ca->usage_percpu);
|
||||
|
||||
@ -466,21 +470,29 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
BUG_ON(!type);
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
if (likely(c)) {
|
||||
lg_local_lock(&c->usage_lock);
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
return;
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
saturated_add(ca, new.dirty_sectors, sectors,
|
||||
GC_MAX_SECTORS_USED);
|
||||
new.data_type = type;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
preempt_enable();
|
||||
|
||||
if (likely(c))
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
|
||||
bucket_became_unavailable(c, old, new));
|
||||
@ -859,9 +871,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
|
||||
bch2_copygc_stop(ca);
|
||||
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
lg_global_lock(&c->usage_lock);
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
lg_global_lock(&c->usage_lock);
|
||||
}
|
||||
|
||||
old_buckets = bucket_array(ca);
|
||||
|
||||
@ -885,7 +899,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
swap(ca->oldest_gens, oldest_gens);
|
||||
swap(ca->buckets_dirty, buckets_dirty);
|
||||
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
if (resize)
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++) {
|
||||
@ -904,8 +919,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
|
||||
nbuckets = ca->mi.nbuckets;
|
||||
|
||||
up_write(&ca->bucket_lock);
|
||||
up_write(&c->gc_lock);
|
||||
if (resize) {
|
||||
up_write(&ca->bucket_lock);
|
||||
up_write(&c->gc_lock);
|
||||
}
|
||||
|
||||
if (start_copygc &&
|
||||
bch2_copygc_start(c, ca))
|
||||
|
@ -31,6 +31,7 @@
|
||||
static inline struct bucket_array *bucket_array(struct bch_dev *ca)
|
||||
{
|
||||
return rcu_dereference_check(ca->buckets,
|
||||
!ca->fs ||
|
||||
lockdep_is_held(&ca->fs->usage_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
@ -47,7 +48,12 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
|
||||
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, int rw)
|
||||
{
|
||||
bucket(ca, b)->prio[rw] = c->prio_clock[rw].hand;
|
||||
bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
|
||||
}
|
||||
|
||||
static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
|
||||
{
|
||||
return c->bucket_clock[rw].hand - g->io_time[rw];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -31,12 +31,12 @@ struct bucket_mark {
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
u16 prio[2];
|
||||
|
||||
union {
|
||||
struct bucket_mark _mark;
|
||||
const struct bucket_mark mark;
|
||||
};
|
||||
|
||||
u16 io_time[2];
|
||||
};
|
||||
|
||||
struct bucket_array {
|
||||
@ -85,8 +85,9 @@ struct disk_reservation {
|
||||
};
|
||||
|
||||
struct copygc_heap_entry {
|
||||
u8 gen;
|
||||
u32 sectors;
|
||||
u64 offset;
|
||||
struct bucket_mark mark;
|
||||
};
|
||||
|
||||
typedef HEAP(struct copygc_heap_entry) copygc_heap;
|
||||
|
@ -372,6 +372,9 @@ static long bch2_ioctl_usage(struct bch_fs *c,
|
||||
unsigned i, j;
|
||||
int ret;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&arg, user_arg, sizeof(arg)))
|
||||
return -EFAULT;
|
||||
|
||||
@ -460,7 +463,7 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
|
||||
|
||||
sb = ca->disk_sb.sb;
|
||||
} else {
|
||||
sb = c->disk_sb;
|
||||
sb = c->disk_sb.sb;
|
||||
}
|
||||
|
||||
if (vstruct_bytes(sb) > arg.size) {
|
||||
@ -535,13 +538,22 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
/* ioctls that do require admin cap: */
|
||||
switch (cmd) {
|
||||
case BCH_IOCTL_START:
|
||||
BCH_IOCTL(start, struct bch_ioctl_start);
|
||||
case BCH_IOCTL_STOP:
|
||||
return bch2_ioctl_stop(c);
|
||||
case BCH_IOCTL_READ_SUPER:
|
||||
BCH_IOCTL(read_super, struct bch_ioctl_read_super);
|
||||
case BCH_IOCTL_DISK_GET_IDX:
|
||||
BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
/* ioctls that do require admin cap: */
|
||||
switch (cmd) {
|
||||
case BCH_IOCTL_DISK_ADD:
|
||||
BCH_IOCTL(disk_add, struct bch_ioctl_disk);
|
||||
case BCH_IOCTL_DISK_REMOVE:
|
||||
@ -554,10 +566,6 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
||||
BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
|
||||
case BCH_IOCTL_DATA:
|
||||
BCH_IOCTL(data, struct bch_ioctl_data);
|
||||
case BCH_IOCTL_READ_SUPER:
|
||||
BCH_IOCTL(read_super, struct bch_ioctl_read_super);
|
||||
case BCH_IOCTL_DISK_GET_IDX:
|
||||
BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
|
||||
case BCH_IOCTL_DISK_RESIZE:
|
||||
BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
|
||||
|
||||
|
@ -569,7 +569,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
|
||||
if (!bch2_key_is_encrypted(&sb_key))
|
||||
goto out;
|
||||
|
||||
ret = bch2_request_key(c->disk_sb, &user_key);
|
||||
ret = bch2_request_key(c->disk_sb.sb, &user_key);
|
||||
if (ret) {
|
||||
bch_err(c, "error requesting encryption key: %i", ret);
|
||||
goto err;
|
||||
@ -623,7 +623,7 @@ int bch2_disable_encryption(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
crypt = bch2_sb_get_crypt(c->disk_sb.sb);
|
||||
if (!crypt)
|
||||
goto out;
|
||||
|
||||
@ -639,7 +639,7 @@ int bch2_disable_encryption(struct bch_fs *c)
|
||||
crypt->key.magic = BCH_KEY_MAGIC;
|
||||
crypt->key.key = key;
|
||||
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb, 0);
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
|
||||
bch2_write_super(c);
|
||||
out:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -657,7 +657,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
/* Do we already have an encryption key? */
|
||||
if (bch2_sb_get_crypt(c->disk_sb))
|
||||
if (bch2_sb_get_crypt(c->disk_sb.sb))
|
||||
goto err;
|
||||
|
||||
ret = bch2_alloc_ciphers(c);
|
||||
@ -668,7 +668,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
get_random_bytes(&key.key, sizeof(key.key));
|
||||
|
||||
if (keyed) {
|
||||
ret = bch2_request_key(c->disk_sb, &user_key);
|
||||
ret = bch2_request_key(c->disk_sb.sb, &user_key);
|
||||
if (ret) {
|
||||
bch_err(c, "error requesting encryption key: %i", ret);
|
||||
goto err;
|
||||
@ -685,7 +685,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
crypt = bch2_fs_sb_resize_crypt(c, sizeof(*crypt) / sizeof(u64));
|
||||
crypt = bch2_sb_resize_crypt(&c->disk_sb, sizeof(*crypt) / sizeof(u64));
|
||||
if (!crypt) {
|
||||
ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
|
||||
goto err;
|
||||
@ -694,7 +694,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
|
||||
crypt->key = key;
|
||||
|
||||
/* write superblock */
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb, 1);
|
||||
SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
|
||||
bch2_write_super(c);
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -728,7 +728,7 @@ int bch2_fs_encryption_init(struct bch_fs *c)
|
||||
goto out;
|
||||
}
|
||||
|
||||
crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
crypt = bch2_sb_get_crypt(c->disk_sb.sb);
|
||||
if (!crypt)
|
||||
goto out;
|
||||
|
||||
|
@ -117,6 +117,7 @@ static const unsigned bch_crc_bytes[] = {
|
||||
[BCH_CSUM_CHACHA20_POLY1305_128] = 16,
|
||||
};
|
||||
|
||||
/* returns true if not equal */
|
||||
static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
|
||||
{
|
||||
/*
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define NR_IO_TIMERS 8
|
||||
#define NR_IO_TIMERS (BCH_SB_MEMBERS_MAX * 3)
|
||||
|
||||
/*
|
||||
* Clocks/timers in units of sectors of IO:
|
||||
|
@ -500,7 +500,7 @@ int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
|
||||
return ret;
|
||||
}
|
||||
|
||||
c->disk_sb->features[0] |= cpu_to_le64(f);
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(f);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
|
@ -212,17 +212,20 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
|
||||
if (!i->size)
|
||||
return i->ret;
|
||||
|
||||
for_each_btree_key(&iter, i->c, i->id, i->from,
|
||||
BTREE_ITER_PREFETCH, k) {
|
||||
i->from = iter.pos;
|
||||
bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
|
||||
while (k.k && !(err = btree_iter_err(k))) {
|
||||
bch2_bkey_val_to_text(i->c, bkey_type(0, i->id),
|
||||
i->buf, sizeof(i->buf), k);
|
||||
i->buf, sizeof(i->buf), k);
|
||||
i->bytes = strlen(i->buf);
|
||||
BUG_ON(i->bytes >= PAGE_SIZE);
|
||||
i->buf[i->bytes] = '\n';
|
||||
i->bytes++;
|
||||
|
||||
k = bch2_btree_iter_next(&iter);
|
||||
i->from = iter.pos;
|
||||
|
||||
err = flush_buf(i);
|
||||
if (err)
|
||||
break;
|
||||
@ -230,7 +233,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
|
||||
if (!i->size)
|
||||
break;
|
||||
}
|
||||
err = bch2_btree_iter_unlock(&iter) ?: err;
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
return err < 0 ? err : i->ret;
|
||||
}
|
||||
|
@ -79,8 +79,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
|
||||
.cmp_bkey = dirent_cmp_bkey,
|
||||
};
|
||||
|
||||
static const char *bch2_dirent_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d;
|
||||
unsigned len;
|
||||
@ -116,8 +115,8 @@ static const char *bch2_dirent_invalid(const struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d;
|
||||
size_t n = 0;
|
||||
@ -136,11 +135,6 @@ static void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_dirent_ops = {
|
||||
.key_invalid = bch2_dirent_invalid,
|
||||
.val_to_text = bch2_dirent_to_text,
|
||||
};
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(u8 type,
|
||||
const struct qstr *name, u64 dst)
|
||||
{
|
||||
|
@ -4,7 +4,14 @@
|
||||
#include "str_hash.h"
|
||||
|
||||
extern const struct bch_hash_desc bch2_dirent_hash_desc;
|
||||
extern const struct bkey_ops bch2_bkey_dirent_ops;
|
||||
|
||||
const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_dirent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_dirent_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_dirent_invalid, \
|
||||
.val_to_text = bch2_dirent_to_text, \
|
||||
}
|
||||
|
||||
struct qstr;
|
||||
struct file;
|
||||
|
462
libbcachefs/disk_groups.c
Normal file
462
libbcachefs/disk_groups.c
Normal file
@ -0,0 +1,462 @@
|
||||
#include "bcachefs.h"
|
||||
#include "disk_groups.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int group_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct bch_disk_group *l = _l;
|
||||
const struct bch_disk_group *r = _r;
|
||||
|
||||
return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
|
||||
(BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
|
||||
((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
|
||||
(BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
|
||||
strncmp(l->label, r->label, sizeof(l->label));
|
||||
}
|
||||
|
||||
const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
field_to_type(f, disk_groups);
|
||||
struct bch_disk_group *g, *sorted = NULL;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_member *m;
|
||||
unsigned i, nr_groups, len;
|
||||
const char *err = NULL;
|
||||
|
||||
mi = bch2_sb_get_members(sb);
|
||||
groups = bch2_sb_get_disk_groups(sb);
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
|
||||
for (m = mi->members;
|
||||
m < mi->members + sb->nr_devices;
|
||||
m++) {
|
||||
unsigned g;
|
||||
|
||||
if (!BCH_MEMBER_GROUP(m))
|
||||
continue;
|
||||
|
||||
g = BCH_MEMBER_GROUP(m) - 1;
|
||||
|
||||
if (g >= nr_groups ||
|
||||
BCH_GROUP_DELETED(&groups->entries[g]))
|
||||
return "disk has invalid group";
|
||||
}
|
||||
|
||||
if (!nr_groups)
|
||||
return NULL;
|
||||
|
||||
for (g = groups->entries;
|
||||
g < groups->entries + nr_groups;
|
||||
g++) {
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
continue;
|
||||
|
||||
len = strnlen(g->label, sizeof(g->label));
|
||||
if (!len) {
|
||||
err = "group with empty label";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
|
||||
if (!sorted)
|
||||
return "cannot allocate memory";
|
||||
|
||||
memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
|
||||
sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
|
||||
|
||||
for (i = 0; i + 1 < nr_groups; i++)
|
||||
if (!BCH_GROUP_DELETED(sorted + i) &&
|
||||
!group_cmp(sorted + i, sorted + i + 1)) {
|
||||
err = "duplicate groups";
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
kfree(sorted);
|
||||
return err;
|
||||
}
|
||||
|
||||
static size_t bch2_sb_disk_groups_to_text(char *buf, size_t size,
|
||||
struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
field_to_type(f, disk_groups);
|
||||
struct bch_disk_group *g;
|
||||
unsigned nr_groups = disk_groups_nr(groups);
|
||||
|
||||
for (g = groups->entries;
|
||||
g < groups->entries + nr_groups;
|
||||
g++) {
|
||||
if (g != groups->entries)
|
||||
out += scnprintf(out, end - out, " ");
|
||||
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
out += scnprintf(out, end - out, "[deleted]");
|
||||
else
|
||||
out += scnprintf(out, end - out,
|
||||
"[parent %llu name %s]",
|
||||
BCH_GROUP_PARENT(g),
|
||||
g->label);
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
|
||||
.validate = bch2_sb_disk_groups_validate,
|
||||
.to_text = bch2_sb_disk_groups_to_text
|
||||
};
|
||||
|
||||
int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_groups_cpu *cpu_g, *old_g;
|
||||
unsigned i, g, nr_groups;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb.sb);
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
|
||||
if (!groups)
|
||||
return 0;
|
||||
|
||||
cpu_g = kzalloc(sizeof(*cpu_g) +
|
||||
sizeof(cpu_g->entries[0]) * nr_groups, GFP_KERNEL);
|
||||
if (!cpu_g)
|
||||
return -ENOMEM;
|
||||
|
||||
cpu_g->nr = nr_groups;
|
||||
|
||||
for (i = 0; i < nr_groups; i++) {
|
||||
struct bch_disk_group *src = &groups->entries[i];
|
||||
struct bch_disk_group_cpu *dst = &cpu_g->entries[i];
|
||||
|
||||
dst->deleted = BCH_GROUP_DELETED(src);
|
||||
dst->parent = BCH_GROUP_PARENT(src);
|
||||
}
|
||||
|
||||
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
struct bch_member *m = mi->members + i;
|
||||
struct bch_disk_group_cpu *dst =
|
||||
&cpu_g->entries[BCH_MEMBER_GROUP(m)];
|
||||
|
||||
if (!bch2_member_exists(m))
|
||||
continue;
|
||||
|
||||
g = BCH_MEMBER_GROUP(m);
|
||||
while (g) {
|
||||
dst = &cpu_g->entries[g - 1];
|
||||
__set_bit(i, dst->devs.d);
|
||||
g = dst->parent;
|
||||
}
|
||||
}
|
||||
|
||||
old_g = c->disk_groups;
|
||||
rcu_assign_pointer(c->disk_groups, cpu_g);
|
||||
if (old_g)
|
||||
kfree_rcu(old_g, rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
|
||||
{
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
return ca ? &ca->self : NULL;
|
||||
}
|
||||
case TARGET_GROUP: {
|
||||
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
|
||||
|
||||
return t.group < g->nr && !g->entries[t.group].deleted
|
||||
? &g->entries[t.group].devs
|
||||
: NULL;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
|
||||
unsigned parent,
|
||||
const char *name, unsigned namelen)
|
||||
{
|
||||
unsigned i, nr_groups = disk_groups_nr(groups);
|
||||
|
||||
if (!namelen || namelen > BCH_SB_LABEL_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < nr_groups; i++) {
|
||||
struct bch_disk_group *g = groups->entries + i;
|
||||
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
continue;
|
||||
|
||||
if (!BCH_GROUP_DELETED(g) &&
|
||||
BCH_GROUP_PARENT(g) == parent &&
|
||||
strnlen(g->label, sizeof(g->label)) == namelen &&
|
||||
!memcmp(name, g->label, namelen))
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
|
||||
const char *name, unsigned namelen)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
bch2_sb_get_disk_groups(sb->sb);
|
||||
unsigned i, nr_groups = disk_groups_nr(groups);
|
||||
struct bch_disk_group *g;
|
||||
|
||||
if (!namelen || namelen > BCH_SB_LABEL_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0;
|
||||
i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
|
||||
i++)
|
||||
;
|
||||
|
||||
if (i == nr_groups) {
|
||||
unsigned u64s =
|
||||
(sizeof(struct bch_sb_field_disk_groups) +
|
||||
sizeof(struct bch_disk_group) * (nr_groups + 1)) /
|
||||
sizeof(u64);
|
||||
|
||||
groups = bch2_sb_resize_disk_groups(sb, u64s);
|
||||
if (!groups)
|
||||
return -ENOSPC;
|
||||
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
}
|
||||
|
||||
BUG_ON(i >= nr_groups);
|
||||
|
||||
g = &groups->entries[i];
|
||||
|
||||
memcpy(g->label, name, namelen);
|
||||
if (namelen < sizeof(g->label))
|
||||
g->label[namelen] = '\0';
|
||||
SET_BCH_GROUP_DELETED(g, 0);
|
||||
SET_BCH_GROUP_PARENT(g, parent);
|
||||
SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
bch2_sb_get_disk_groups(sb->sb);
|
||||
int v = -1;
|
||||
|
||||
do {
|
||||
const char *next = strchrnul(name, '.');
|
||||
unsigned len = next - name;
|
||||
|
||||
if (*next == '.')
|
||||
next++;
|
||||
|
||||
v = __bch2_disk_group_find(groups, v + 1, name, len);
|
||||
name = next;
|
||||
} while (*name && v >= 0);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
unsigned parent = 0;
|
||||
int v = -1;
|
||||
|
||||
do {
|
||||
const char *next = strchrnul(name, '.');
|
||||
unsigned len = next - name;
|
||||
|
||||
if (*next == '.')
|
||||
next++;
|
||||
|
||||
groups = bch2_sb_get_disk_groups(sb->sb);
|
||||
|
||||
v = __bch2_disk_group_find(groups, parent, name, len);
|
||||
if (v < 0)
|
||||
v = __bch2_disk_group_add(sb, parent, name, len);
|
||||
if (v < 0)
|
||||
return v;
|
||||
|
||||
parent = v + 1;
|
||||
name = next;
|
||||
} while (*name && v >= 0);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
int bch2_disk_path_print(struct bch_sb_handle *sb,
|
||||
char *buf, size_t len, unsigned v)
|
||||
{
|
||||
char *out = buf, *end = out + len;
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
bch2_sb_get_disk_groups(sb->sb);
|
||||
struct bch_disk_group *g;
|
||||
unsigned nr = 0;
|
||||
u16 path[32];
|
||||
|
||||
while (1) {
|
||||
if (nr == ARRAY_SIZE(path))
|
||||
goto inval;
|
||||
|
||||
if (v >= disk_groups_nr(groups))
|
||||
goto inval;
|
||||
|
||||
g = groups->entries + v;
|
||||
|
||||
if (BCH_GROUP_DELETED(g))
|
||||
goto inval;
|
||||
|
||||
path[nr++] = v;
|
||||
|
||||
if (!BCH_GROUP_PARENT(g))
|
||||
break;
|
||||
|
||||
v = BCH_GROUP_PARENT(g) - 1;
|
||||
}
|
||||
|
||||
while (nr) {
|
||||
unsigned b = 0;
|
||||
|
||||
v = path[--nr];
|
||||
g = groups->entries + v;
|
||||
|
||||
if (end != out)
|
||||
b = min_t(size_t, end - out,
|
||||
strnlen(g->label, sizeof(g->label)));
|
||||
memcpy(out, g->label, b);
|
||||
if (b < end - out)
|
||||
out[b] = '\0';
|
||||
out += b;
|
||||
|
||||
if (nr)
|
||||
out += scnprintf(out, end - out, ".");
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
inval:
|
||||
return scnprintf(buf, len, "invalid group %u", v);
|
||||
}
|
||||
|
||||
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
{
|
||||
struct bch_member *mi;
|
||||
int v = -1;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
if (!strlen(name) || !strcmp(name, "none"))
|
||||
goto write_sb;
|
||||
|
||||
v = bch2_disk_path_find_or_create(&c->disk_sb, name);
|
||||
if (v < 0) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return v;
|
||||
}
|
||||
|
||||
write_sb:
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_GROUP(mi, v + 1);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
int g;
|
||||
|
||||
if (!strlen(buf) || !strcmp(buf, "none")) {
|
||||
*v = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Is it a device? */
|
||||
ca = bch2_dev_lookup(c, buf);
|
||||
if (!IS_ERR(ca)) {
|
||||
*v = dev_to_target(ca->dev_idx);
|
||||
percpu_ref_put(&ca->ref);
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
g = bch2_disk_path_find(&c->disk_sb, buf);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (g >= 0) {
|
||||
*v = group_to_target(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int bch2_opt_target_print(struct bch_fs *c, char *buf, size_t len, u64 v)
|
||||
{
|
||||
struct target t = target_decode(v);
|
||||
int ret;
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return scnprintf(buf, len, "none");
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
|
||||
if (ca && percpu_ref_tryget(&ca->io_ref)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
ret = scnprintf(buf, len, "/dev/%s",
|
||||
bdevname(ca->disk_sb.bdev, b));
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
} else if (ca) {
|
||||
ret = scnprintf(buf, len, "offline device %u", t.dev);
|
||||
} else {
|
||||
ret = scnprintf(buf, len, "invalid device %u", t.dev);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
case TARGET_GROUP:
|
||||
mutex_lock(&c->sb_lock);
|
||||
ret = bch2_disk_path_print(&c->disk_sb, buf, len, t.group);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
99
libbcachefs/disk_groups.h
Normal file
99
libbcachefs/disk_groups.h
Normal file
@ -0,0 +1,99 @@
|
||||
#ifndef _BCACHEFS_DISK_GROUPS_H
|
||||
#define _BCACHEFS_DISK_GROUPS_H
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
|
||||
|
||||
static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
|
||||
{
|
||||
return groups
|
||||
? (vstruct_end(&groups->field) -
|
||||
(void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
|
||||
: 0;
|
||||
}
|
||||
|
||||
struct target {
|
||||
enum {
|
||||
TARGET_NULL,
|
||||
TARGET_DEV,
|
||||
TARGET_GROUP,
|
||||
} type;
|
||||
union {
|
||||
unsigned dev;
|
||||
unsigned group;
|
||||
};
|
||||
};
|
||||
|
||||
#define TARGET_DEV_START 1
|
||||
#define TARGET_GROUP_START (256 + TARGET_DEV_START)
|
||||
|
||||
static inline u16 dev_to_target(unsigned dev)
|
||||
{
|
||||
return TARGET_DEV_START + dev;
|
||||
}
|
||||
|
||||
static inline u16 group_to_target(unsigned group)
|
||||
{
|
||||
return TARGET_GROUP_START + group;
|
||||
}
|
||||
|
||||
static inline struct target target_decode(unsigned target)
|
||||
{
|
||||
if (target >= TARGET_GROUP_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_GROUP,
|
||||
.group = target - TARGET_GROUP_START
|
||||
};
|
||||
|
||||
if (target >= TARGET_DEV_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_DEV,
|
||||
.group = target - TARGET_DEV_START
|
||||
};
|
||||
|
||||
return (struct target) { .type = TARGET_NULL };
|
||||
}
|
||||
|
||||
static inline bool dev_in_target(struct bch_dev *ca, unsigned target)
|
||||
{
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return false;
|
||||
case TARGET_DEV:
|
||||
return ca->dev_idx == t.dev;
|
||||
case TARGET_GROUP:
|
||||
return ca->mi.group && ca->mi.group - 1 == t.group;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool dev_idx_in_target(struct bch_fs *c, unsigned dev, unsigned target)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = dev_in_target(rcu_dereference(c->devs[dev]), target);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
|
||||
|
||||
int bch2_disk_path_find(struct bch_sb_handle *, const char *);
|
||||
int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
|
||||
int bch2_disk_path_print(struct bch_sb_handle *, char *, size_t, unsigned);
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
|
||||
int bch2_opt_target_print(struct bch_fs *, char *, size_t, u64);
|
||||
|
||||
int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
|
||||
|
||||
int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
|
||||
|
||||
const char *bch2_sb_validate_disk_groups(struct bch_sb *,
|
||||
struct bch_sb_field *);
|
||||
|
||||
#endif /* _BCACHEFS_DISK_GROUPS_H */
|
@ -14,10 +14,12 @@
|
||||
#include "checksum.h"
|
||||
#include "debug.h"
|
||||
#include "dirent.h"
|
||||
#include "disk_groups.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
@ -25,9 +27,6 @@
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
|
||||
struct bkey_i *, struct bkey_i *);
|
||||
|
||||
static void sort_key_next(struct btree_node_iter_large *iter,
|
||||
struct btree *b,
|
||||
struct btree_node_iter_set *i)
|
||||
@ -160,9 +159,13 @@ bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned targ
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (dev_in_target(c->devs[ptr->dev], target))
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (dev_in_target(ca, target) &&
|
||||
(!ptr->cached || !ptr_stale(ca, ptr)))
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -356,11 +359,25 @@ restart_narrow_pointers:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* returns true if not equal */
|
||||
static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
|
||||
struct bch_extent_crc_unpacked r)
|
||||
{
|
||||
return (l.csum_type != r.csum_type ||
|
||||
l.compression_type != r.compression_type ||
|
||||
l.compressed_size != r.compressed_size ||
|
||||
l.uncompressed_size != r.uncompressed_size ||
|
||||
l.offset != r.offset ||
|
||||
l.live_size != r.live_size ||
|
||||
l.nonce != r.nonce ||
|
||||
bch2_crc_cmp(l.csum, r.csum));
|
||||
}
|
||||
|
||||
void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e)
|
||||
{
|
||||
union bch_extent_entry *entry = e.v->start;
|
||||
union bch_extent_crc *crc, *prev = NULL;
|
||||
struct bch_extent_crc_unpacked u, prev_u;
|
||||
struct bch_extent_crc_unpacked u, prev_u = { 0 };
|
||||
|
||||
while (entry != extent_entry_last(e)) {
|
||||
union bch_extent_entry *next = extent_entry_next(entry);
|
||||
@ -382,7 +399,7 @@ void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e)
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (prev && !memcmp(&u, &prev_u, sizeof(u))) {
|
||||
if (prev && !bch2_crc_unpacked_cmp(u, prev_u)) {
|
||||
/* identical to previous crc entry: */
|
||||
goto drop;
|
||||
}
|
||||
@ -439,13 +456,12 @@ static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
|
||||
bch2_extent_drop_redundant_crcs(e);
|
||||
}
|
||||
|
||||
static bool bch2_ptr_normalize(struct bch_fs *c, struct btree *bk,
|
||||
struct bkey_s k)
|
||||
bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
|
||||
{
|
||||
return bch2_extent_normalize(c, k);
|
||||
}
|
||||
|
||||
static void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
|
||||
void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
|
||||
{
|
||||
switch (k->type) {
|
||||
case BCH_EXTENT:
|
||||
@ -628,8 +644,7 @@ use:
|
||||
|
||||
/* Btree ptrs */
|
||||
|
||||
static const char *bch2_btree_ptr_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (bkey_extent_is_cached(k.k))
|
||||
return "cached";
|
||||
@ -671,8 +686,8 @@ static const char *bch2_btree_ptr_invalid(const struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -727,8 +742,8 @@ err:
|
||||
mark.gen, (unsigned) mark.counter);
|
||||
}
|
||||
|
||||
static void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
const char *invalid;
|
||||
@ -756,13 +771,6 @@ bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
|
||||
return pick;
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_btree_ops = {
|
||||
.key_invalid = bch2_btree_ptr_invalid,
|
||||
.key_debugcheck = btree_ptr_debugcheck,
|
||||
.val_to_text = bch2_btree_ptr_to_text,
|
||||
.swab = bch2_ptr_swab,
|
||||
};
|
||||
|
||||
/* Extents */
|
||||
|
||||
static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
|
||||
@ -1436,7 +1444,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
|
||||
}
|
||||
|
||||
static enum btree_insert_ret
|
||||
bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
__bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
{
|
||||
struct bch_fs *c = s->trans->c;
|
||||
struct btree_iter *iter = s->insert->iter;
|
||||
@ -1450,8 +1458,7 @@ bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
|
||||
|
||||
s->whiteout = *insert;
|
||||
s->do_journal = false;
|
||||
s->whiteout = *insert;
|
||||
|
||||
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
|
||||
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
|
||||
@ -1474,12 +1481,12 @@ bch2_delete_fixup_extent(struct extent_insert_state *s)
|
||||
overlap = bch2_extent_overlap(&insert->k, k.k);
|
||||
|
||||
ret = extent_insert_check_split_compressed(s, k.s_c, overlap);
|
||||
if (ret != BTREE_INSERT_OK)
|
||||
goto stop;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = extent_insert_advance_pos(s, k.s_c);
|
||||
if (ret)
|
||||
goto stop;
|
||||
break;
|
||||
|
||||
s->do_journal = true;
|
||||
|
||||
@ -1520,25 +1527,65 @@ next:
|
||||
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
|
||||
}
|
||||
|
||||
if (ret == BTREE_INSERT_OK &&
|
||||
bkey_cmp(s->committed, insert->k.p) < 0)
|
||||
ret = extent_insert_advance_pos(s, bkey_s_c_null);
|
||||
stop:
|
||||
extent_insert_committed(s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_fs_usage_apply(c, &s->stats, s->trans->disk_res,
|
||||
gc_pos_btree_node(b));
|
||||
static enum btree_insert_ret
|
||||
__bch2_insert_fixup_extent(struct extent_insert_state *s)
|
||||
{
|
||||
struct btree_iter *iter = s->insert->iter;
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct btree *b = l->b;
|
||||
struct btree_node_iter *node_iter = &l->iter;
|
||||
struct bkey_packed *_k;
|
||||
struct bkey unpacked;
|
||||
struct bkey_i *insert = s->insert->k;
|
||||
enum btree_insert_ret ret = BTREE_INSERT_OK;
|
||||
|
||||
EBUG_ON(bkey_cmp(iter->pos, s->committed));
|
||||
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) !=
|
||||
!!(iter->flags & BTREE_ITER_AT_END_OF_LEAF));
|
||||
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
|
||||
(ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
|
||||
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
|
||||
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
|
||||
enum bch_extent_overlap overlap;
|
||||
|
||||
bch2_cut_front(iter->pos, insert);
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
|
||||
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
|
||||
|
||||
if (insert->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
|
||||
ret = BTREE_INSERT_NEED_TRAVERSE;
|
||||
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
|
||||
break;
|
||||
|
||||
EBUG_ON(insert->k.size && ret == BTREE_INSERT_OK);
|
||||
overlap = bch2_extent_overlap(&insert->k, k.k);
|
||||
|
||||
ret = extent_insert_check_split_compressed(s, k.s_c, overlap);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!k.k->size)
|
||||
goto squash;
|
||||
|
||||
/*
|
||||
* Only call advance pos & call hook for nonzero size extents:
|
||||
*/
|
||||
ret = extent_insert_advance_pos(s, k.s_c);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (k.k->size &&
|
||||
(k.k->needs_whiteout || bset_written(b, bset(b, t))))
|
||||
insert->k.needs_whiteout = true;
|
||||
|
||||
if (overlap == BCH_EXTENT_OVERLAP_ALL &&
|
||||
bkey_whiteout(k.k) &&
|
||||
k.k->needs_whiteout) {
|
||||
unreserve_whiteout(b, t, _k);
|
||||
_k->needs_whiteout = false;
|
||||
}
|
||||
squash:
|
||||
ret = extent_squash(s, insert, t, _k, k, overlap);
|
||||
if (ret != BTREE_INSERT_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1590,9 +1637,6 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
|
||||
struct btree_iter *iter = insert->iter;
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct btree *b = l->b;
|
||||
struct btree_node_iter *node_iter = &l->iter;
|
||||
struct bkey_packed *_k;
|
||||
struct bkey unpacked;
|
||||
enum btree_insert_ret ret = BTREE_INSERT_OK;
|
||||
|
||||
struct extent_insert_state s = {
|
||||
@ -1605,9 +1649,6 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
|
||||
EBUG_ON(iter->level);
|
||||
EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
|
||||
|
||||
if (s.deleting)
|
||||
return bch2_delete_fixup_extent(&s);
|
||||
|
||||
/*
|
||||
* As we process overlapping extents, we advance @iter->pos both to
|
||||
* signal to our caller (btree_insert_key()) how much of @insert->k has
|
||||
@ -1616,67 +1657,32 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
|
||||
*/
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
if (!s.deleting &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
bch2_add_sectors(&s, bkey_i_to_s_c(insert->k),
|
||||
bkey_start_offset(&insert->k->k),
|
||||
insert->k->k.size);
|
||||
|
||||
while (bkey_cmp(s.committed, insert->k->k.p) < 0 &&
|
||||
(ret = extent_insert_should_stop(&s)) == BTREE_INSERT_OK &&
|
||||
(_k = bch2_btree_node_iter_peek_all(node_iter, b))) {
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, _k);
|
||||
struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
|
||||
enum bch_extent_overlap overlap;
|
||||
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
|
||||
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
|
||||
|
||||
if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0)
|
||||
break;
|
||||
|
||||
overlap = bch2_extent_overlap(&insert->k->k, k.k);
|
||||
|
||||
ret = extent_insert_check_split_compressed(&s, k.s_c, overlap);
|
||||
if (ret != BTREE_INSERT_OK)
|
||||
goto stop;
|
||||
|
||||
if (!k.k->size)
|
||||
goto squash;
|
||||
|
||||
/*
|
||||
* Only call advance pos & call hook for nonzero size extents:
|
||||
*/
|
||||
ret = extent_insert_advance_pos(&s, k.s_c);
|
||||
if (ret != BTREE_INSERT_OK)
|
||||
goto stop;
|
||||
|
||||
if (k.k->size &&
|
||||
(k.k->needs_whiteout || bset_written(b, bset(b, t))))
|
||||
insert->k->k.needs_whiteout = true;
|
||||
|
||||
if (overlap == BCH_EXTENT_OVERLAP_ALL &&
|
||||
bkey_whiteout(k.k) &&
|
||||
k.k->needs_whiteout) {
|
||||
unreserve_whiteout(b, t, _k);
|
||||
_k->needs_whiteout = false;
|
||||
}
|
||||
squash:
|
||||
ret = extent_squash(&s, insert->k, t, _k, k, overlap);
|
||||
if (ret != BTREE_INSERT_OK)
|
||||
goto stop;
|
||||
}
|
||||
ret = !s.deleting
|
||||
? __bch2_insert_fixup_extent(&s)
|
||||
: __bch2_delete_fixup_extent(&s);
|
||||
|
||||
if (ret == BTREE_INSERT_OK &&
|
||||
bkey_cmp(s.committed, insert->k->k.p) < 0)
|
||||
ret = extent_insert_advance_pos(&s, bkey_s_c_null);
|
||||
stop:
|
||||
|
||||
extent_insert_committed(&s);
|
||||
|
||||
if (s.deleting)
|
||||
bch2_cut_front(iter->pos, insert->k);
|
||||
|
||||
/*
|
||||
* Subtract any remaining sectors from @insert, if we bailed out early
|
||||
* and didn't fully insert @insert:
|
||||
*/
|
||||
if (insert->k->k.size &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
if (!s.deleting &&
|
||||
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY) &&
|
||||
insert->k->k.size)
|
||||
bch2_subtract_sectors(&s, bkey_i_to_s_c(insert->k),
|
||||
bkey_start_offset(&insert->k->k),
|
||||
insert->k->k.size);
|
||||
@ -1692,13 +1698,13 @@ stop:
|
||||
if (insert->k->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
|
||||
ret = BTREE_INSERT_NEED_TRAVERSE;
|
||||
|
||||
EBUG_ON(insert->k->k.size && ret == BTREE_INSERT_OK);
|
||||
WARN_ONCE((ret == BTREE_INSERT_OK) != (insert->k->k.size == 0),
|
||||
"ret %u insert->k.size %u", ret, insert->k->k.size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *bch2_extent_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
|
||||
return "value too big";
|
||||
@ -1865,8 +1871,7 @@ bad_ptr:
|
||||
return;
|
||||
}
|
||||
|
||||
static void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_s_c k)
|
||||
void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
@ -1880,8 +1885,8 @@ static void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_extent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_extent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
const char *invalid;
|
||||
@ -1963,7 +1968,7 @@ void bch2_extent_crc_append(struct bkey_i_extent *e,
|
||||
extent_for_each_crc(extent_i_to_s(e), crc, i)
|
||||
;
|
||||
|
||||
if (!memcmp(&crc, &new, sizeof(crc)))
|
||||
if (!bch2_crc_unpacked_cmp(crc, new))
|
||||
return;
|
||||
|
||||
bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
|
||||
@ -2089,9 +2094,8 @@ void bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
}
|
||||
|
||||
static enum merge_result bch2_extent_merge(struct bch_fs *c,
|
||||
struct btree *bk,
|
||||
struct bkey_i *l, struct bkey_i *r)
|
||||
enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_i *l, struct bkey_i *r)
|
||||
{
|
||||
struct bkey_s_extent el, er;
|
||||
union bch_extent_entry *en_l, *en_r;
|
||||
@ -2410,13 +2414,3 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_extent_ops = {
|
||||
.key_invalid = bch2_extent_invalid,
|
||||
.key_debugcheck = bch2_extent_debugcheck,
|
||||
.val_to_text = bch2_extent_to_text,
|
||||
.swab = bch2_ptr_swab,
|
||||
.key_normalize = bch2_ptr_normalize,
|
||||
.key_merge = bch2_extent_merge,
|
||||
.is_extents = true,
|
||||
};
|
||||
|
@ -15,6 +15,36 @@ struct extent_insert_hook;
|
||||
struct bch_devs_mask;
|
||||
union bch_extent_crc;
|
||||
|
||||
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
void bch2_btree_ptr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
|
||||
|
||||
#define bch2_bkey_btree_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_btree_ptr_invalid, \
|
||||
.key_debugcheck = bch2_btree_ptr_debugcheck, \
|
||||
.val_to_text = bch2_btree_ptr_to_text, \
|
||||
.swab = bch2_ptr_swab, \
|
||||
}
|
||||
|
||||
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
void bch2_extent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s);
|
||||
enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
|
||||
struct bkey_i *, struct bkey_i *);
|
||||
|
||||
#define bch2_bkey_extent_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_extent_invalid, \
|
||||
.key_debugcheck = bch2_extent_debugcheck, \
|
||||
.val_to_text = bch2_extent_to_text, \
|
||||
.swab = bch2_ptr_swab, \
|
||||
.key_normalize = bch2_ptr_normalize, \
|
||||
.key_merge = bch2_extent_merge, \
|
||||
.is_extents = true, \
|
||||
}
|
||||
|
||||
struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *,
|
||||
struct btree *,
|
||||
struct btree_node_iter_large *);
|
||||
@ -23,9 +53,6 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
|
||||
struct btree *,
|
||||
struct btree_node_iter_large *);
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_btree_ops;
|
||||
extern const struct bkey_ops bch2_bkey_extent_ops;
|
||||
|
||||
struct extent_pick_ptr
|
||||
bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
|
||||
struct bch_devs_mask *avoid);
|
||||
|
@ -468,7 +468,10 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
|
||||
}
|
||||
|
||||
BUG_ON(bkey_cmp(extent_iter.pos, bkey_start_pos(&k->k)));
|
||||
BUG_ON(!ret != !k->k.size);
|
||||
|
||||
if (WARN_ONCE(!ret != !k->k.size,
|
||||
"ret %i k->size %u", ret, k->k.size))
|
||||
ret = k->k.size ? -EINTR : 0;
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
|
@ -175,8 +175,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *bch2_inode_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (k.k->p.offset)
|
||||
return "nonzero offset";
|
||||
@ -224,8 +223,8 @@ static const char *bch2_inode_invalid(const struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = out + size;
|
||||
struct bkey_s_c_inode inode;
|
||||
@ -247,11 +246,6 @@ static void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_inode_ops = {
|
||||
.key_invalid = bch2_inode_invalid,
|
||||
.val_to_text = bch2_inode_to_text,
|
||||
};
|
||||
|
||||
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
|
||||
struct bch_inode_unpacked *parent)
|
||||
|
@ -5,7 +5,13 @@
|
||||
|
||||
#include <linux/math64.h>
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_inode_ops;
|
||||
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_inode_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_inode_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_inode_invalid, \
|
||||
.val_to_text = bch2_inode_to_text, \
|
||||
}
|
||||
|
||||
struct bch_inode_unpacked {
|
||||
u64 bi_inum;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "tier.h"
|
||||
@ -196,8 +197,6 @@ static void bch2_write_done(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
|
||||
BUG_ON(!(op->flags & BCH_WRITE_DONE));
|
||||
|
||||
if (!op->error && (op->flags & BCH_WRITE_FLUSH))
|
||||
op->error = bch2_journal_error(&op->c->journal);
|
||||
|
||||
@ -205,7 +204,6 @@ static void bch2_write_done(struct closure *cl)
|
||||
bch2_disk_reservation_put(op->c, &op->res);
|
||||
percpu_ref_put(&op->c->writes);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
op->flags &= ~(BCH_WRITE_DONE|BCH_WRITE_LOOPED);
|
||||
|
||||
closure_return(cl);
|
||||
}
|
||||
@ -232,9 +230,8 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
/**
|
||||
* bch_write_index - after a write, update index to point to new data
|
||||
*/
|
||||
static void bch2_write_index(struct closure *cl)
|
||||
static void __bch2_write_index(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_s_extent e;
|
||||
@ -242,8 +239,6 @@ static void bch2_write_index(struct closure *cl)
|
||||
struct bkey_i *src, *dst = keys->keys, *n, *k;
|
||||
int ret;
|
||||
|
||||
op->flags |= BCH_WRITE_LOOPED;
|
||||
|
||||
for (src = keys->keys; src != keys->top; src = n) {
|
||||
n = bkey_next(src);
|
||||
bkey_copy(dst, src);
|
||||
@ -292,9 +287,19 @@ static void bch2_write_index(struct closure *cl)
|
||||
}
|
||||
out:
|
||||
bch2_open_bucket_put_refs(c, &op->open_buckets_nr, op->open_buckets);
|
||||
return;
|
||||
err:
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
continue_at(cl, __bch2_write, op->io_wq);
|
||||
static void bch2_write_index(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
|
||||
bch2_journal_flush_seq_async(&c->journal,
|
||||
@ -304,12 +309,6 @@ out:
|
||||
} else {
|
||||
continue_at_nobarrier(cl, bch2_write_done, NULL);
|
||||
}
|
||||
return;
|
||||
err:
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_write_endio(struct bio *bio)
|
||||
@ -730,18 +729,18 @@ static void __bch2_write(struct closure *cl)
|
||||
struct bch_fs *c = op->c;
|
||||
struct write_point *wp;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
do {
|
||||
/* +1 for possible cache device: */
|
||||
if (op->open_buckets_nr + op->nr_replicas + 1 >
|
||||
ARRAY_SIZE(op->open_buckets))
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
goto flush_io;
|
||||
|
||||
if (bch2_keylist_realloc(&op->insert_keys,
|
||||
op->inline_keys,
|
||||
ARRAY_SIZE(op->inline_keys),
|
||||
BKEY_EXTENT_U64s_MAX))
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
goto flush_io;
|
||||
|
||||
wp = bch2_alloc_sectors_start(c,
|
||||
op->target,
|
||||
@ -760,33 +759,7 @@ static void __bch2_write(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we already have some keys, must insert them first
|
||||
* before allocating another open bucket. We only hit
|
||||
* this case if open_bucket_nr > 1.
|
||||
*/
|
||||
if (!bch2_keylist_empty(&op->insert_keys))
|
||||
continue_at(cl, bch2_write_index,
|
||||
index_update_wq(op));
|
||||
|
||||
/*
|
||||
* If we've looped, we're running out of a workqueue -
|
||||
* not the bch2_write() caller's context - and we don't
|
||||
* want to block the workqueue:
|
||||
*/
|
||||
if (op->flags & BCH_WRITE_LOOPED)
|
||||
continue_at(cl, __bch2_write, op->io_wq);
|
||||
|
||||
/*
|
||||
* Otherwise, we do want to block the caller on alloc
|
||||
* failure instead of letting it queue up more and more
|
||||
* writes:
|
||||
* XXX: this technically needs a try_to_freeze() -
|
||||
* except that that's not safe because caller may have
|
||||
* issued other IO... hmm..
|
||||
*/
|
||||
closure_sync(cl);
|
||||
continue;
|
||||
goto flush_io;
|
||||
}
|
||||
|
||||
ret = bch2_write_extent(op, wp);
|
||||
@ -802,28 +775,24 @@ static void __bch2_write(struct closure *cl)
|
||||
goto err;
|
||||
} while (ret);
|
||||
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
err:
|
||||
/*
|
||||
* Right now we can only error here if we went RO - the
|
||||
* allocation failed, but we already checked for -ENOSPC when we
|
||||
* got our reservation.
|
||||
*
|
||||
* XXX capacity might have changed, but we don't check for that
|
||||
* yet:
|
||||
*/
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
/*
|
||||
* No reason not to insert keys for whatever data was successfully
|
||||
* written (especially for a cmpxchg operation that's moving data
|
||||
* around)
|
||||
*/
|
||||
continue_at(cl, !bch2_keylist_empty(&op->insert_keys)
|
||||
? bch2_write_index
|
||||
: bch2_write_done, index_update_wq(op));
|
||||
flush_io:
|
||||
closure_sync(cl);
|
||||
|
||||
if (!bch2_keylist_empty(&op->insert_keys)) {
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (op->error)
|
||||
continue_at_nobarrier(cl, bch2_write_done, NULL);
|
||||
}
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -969,7 +938,7 @@ static bool should_promote(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
if (percpu_ref_is_dying(&c->writes))
|
||||
return false;
|
||||
|
||||
return bch2_extent_has_target(c, e, target);
|
||||
return bch2_extent_has_target(c, e, target) == NULL;
|
||||
}
|
||||
|
||||
/* Read */
|
||||
|
@ -36,8 +36,6 @@ enum bch_write_flags {
|
||||
|
||||
/* Internal: */
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 9),
|
||||
BCH_WRITE_DONE = (1 << 10),
|
||||
BCH_WRITE_LOOPED = (1 << 11),
|
||||
};
|
||||
|
||||
static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "io.h"
|
||||
#include "keylist.h"
|
||||
#include "journal.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
@ -1582,40 +1583,19 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate more journal space at runtime - not currently making use if it, but
|
||||
* the code works:
|
||||
*/
|
||||
static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned nr)
|
||||
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
bool new_fs, struct closure *cl)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets;
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
struct closure cl;
|
||||
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
|
||||
int ret = 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr <= ja->nr)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* note: journal buckets aren't really counted as _sectors_ used yet, so
|
||||
* we don't need the disk reservation to avoid the BUG_ON() in buckets.c
|
||||
* when space used goes up without a reservation - but we do need the
|
||||
* reservation to ensure we'll actually be able to allocate:
|
||||
*/
|
||||
|
||||
if (bch2_disk_reservation_get(c, &disk_res,
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0))
|
||||
return -ENOSPC;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
ret = -ENOMEM;
|
||||
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
@ -1627,29 +1607,41 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (!journal_buckets)
|
||||
goto err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
|
||||
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
while (ja->nr < nr) {
|
||||
struct open_bucket *ob;
|
||||
size_t bucket;
|
||||
int ob_idx;
|
||||
struct open_bucket *ob = NULL;
|
||||
long bucket;
|
||||
|
||||
ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, &cl);
|
||||
if (ob_idx < 0) {
|
||||
if (!closure_wait(&c->freelist_wait, &cl))
|
||||
closure_sync(&cl);
|
||||
continue;
|
||||
if (new_fs) {
|
||||
bucket = bch2_bucket_alloc_new_fs(ca);
|
||||
if (bucket < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
int ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, cl);
|
||||
if (ob_idx < 0) {
|
||||
ret = cl ? -EAGAIN : -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ob = c->open_buckets + ob_idx;
|
||||
bucket = sector_to_bucket(ca, ob->ptr.offset);
|
||||
}
|
||||
|
||||
ob = c->open_buckets + ob_idx;
|
||||
bucket = sector_to_bucket(ca, ob->ptr.offset);
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
__array_insert_item(ja->buckets, ja->nr, ja->last_idx);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
|
||||
__array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx);
|
||||
@ -1664,34 +1656,77 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
ja->last_idx++;
|
||||
}
|
||||
ja->nr++;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB), 0);
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
new_fs
|
||||
? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
|
||||
: 0);
|
||||
|
||||
bch2_open_bucket_put(c, ob);
|
||||
if (!new_fs)
|
||||
bch2_open_bucket_put(c, ob);
|
||||
}
|
||||
|
||||
bch2_write_super(c);
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
kfree(new_bucket_seq);
|
||||
kfree(new_buckets);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
|
||||
if (!ret)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
/*
|
||||
* Allocate more journal space at runtime - not currently making use if it, but
|
||||
* the code works:
|
||||
*/
|
||||
int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned nr)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct closure cl;
|
||||
unsigned current_nr;
|
||||
int ret;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
do {
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
current_nr = ja->nr;
|
||||
|
||||
/*
|
||||
* note: journal buckets aren't really counted as _sectors_ used yet, so
|
||||
* we don't need the disk reservation to avoid the BUG_ON() in buckets.c
|
||||
* when space used goes up without a reservation - but we do need the
|
||||
* reservation to ensure we'll actually be able to allocate:
|
||||
*/
|
||||
|
||||
if (bch2_disk_reservation_get(c, &disk_res,
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
|
||||
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
|
||||
if (ja->nr != current_nr)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_dev *ca)
|
||||
{
|
||||
unsigned nr;
|
||||
|
||||
@ -1707,7 +1742,7 @@ int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
min(1 << 10,
|
||||
(1 << 20) / ca->mi.bucket_size));
|
||||
|
||||
return bch2_set_nr_journal_buckets(c, ca, nr);
|
||||
return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
|
||||
}
|
||||
|
||||
/* Journalling */
|
||||
@ -2320,8 +2355,8 @@ static void journal_write(struct closure *cl)
|
||||
|
||||
journal_write_compact(jset);
|
||||
|
||||
jset->read_clock = cpu_to_le16(c->prio_clock[READ].hand);
|
||||
jset->write_clock = cpu_to_le16(c->prio_clock[WRITE].hand);
|
||||
jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand);
|
||||
jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand);
|
||||
jset->magic = cpu_to_le64(jset_magic(c));
|
||||
jset->version = cpu_to_le32(BCACHE_JSET_VERSION);
|
||||
|
||||
|
@ -400,7 +400,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_fs *, struct bch_dev *);
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
|
||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
void bch2_fs_journal_stop(struct journal *);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "keylist.h"
|
||||
#include "migrate.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "keylist.h"
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "disk_groups.h"
|
||||
#include "extents.h"
|
||||
#include "eytzinger.h"
|
||||
#include "io.h"
|
||||
@ -51,7 +52,7 @@ static inline int sectors_used_cmp(copygc_heap *heap,
|
||||
struct copygc_heap_entry l,
|
||||
struct copygc_heap_entry r)
|
||||
{
|
||||
return bucket_sectors_used(l.mark) - bucket_sectors_used(r.mark);
|
||||
return (l.sectors > r.sectors) - (l.sectors < r.sectors);
|
||||
}
|
||||
|
||||
static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
|
||||
@ -78,7 +79,7 @@ static bool __copygc_pred(struct bch_dev *ca,
|
||||
|
||||
return (i >= 0 &&
|
||||
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
|
||||
ptr->gen == h->data[i].mark.gen);
|
||||
ptr->gen == h->data[i].gen);
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -154,8 +155,9 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
continue;
|
||||
|
||||
e = (struct copygc_heap_entry) {
|
||||
.offset = bucket_to_sector(ca, b),
|
||||
.mark = m
|
||||
.gen = m.gen,
|
||||
.sectors = bucket_sectors_used(m),
|
||||
.offset = bucket_to_sector(ca, b),
|
||||
};
|
||||
heap_add_or_replace(h, e, -sectors_used_cmp);
|
||||
}
|
||||
@ -163,11 +165,11 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
for (i = h->data; i < h->data + h->used; i++)
|
||||
sectors_to_move += bucket_sectors_used(i->mark);
|
||||
sectors_to_move += i->sectors;
|
||||
|
||||
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
|
||||
BUG_ON(!heap_pop(h, e, -sectors_used_cmp));
|
||||
sectors_to_move -= bucket_sectors_used(e.mark);
|
||||
sectors_to_move -= e.sectors;
|
||||
}
|
||||
|
||||
buckets_to_move = h->used;
|
||||
@ -191,7 +193,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
size_t b = sector_to_bucket(ca, i->offset);
|
||||
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
|
||||
|
||||
if (i->mark.gen == m.gen && bucket_sectors_used(m)) {
|
||||
if (i->gen == m.gen && bucket_sectors_used(m)) {
|
||||
sectors_not_moved += bucket_sectors_used(m);
|
||||
buckets_not_moved++;
|
||||
}
|
||||
@ -284,7 +286,8 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (bch2_fs_init_fault("copygc_start"))
|
||||
return -ENOMEM;
|
||||
|
||||
t = kthread_create(bch2_copygc_thread, ca, "bch_copygc");
|
||||
t = kthread_create(bch2_copygc_thread, ca,
|
||||
"bch_copygc[%s]", ca->name);
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "disk_groups.h"
|
||||
#include "opts.h"
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
|
@ -4,7 +4,22 @@
|
||||
#include "quota.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
static const char *bch2_sb_validate_quota(struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_quota *q = field_to_type(f, quota);
|
||||
|
||||
if (vstruct_bytes(&q->field) != sizeof(*q))
|
||||
return "invalid field quota: wrong size";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_quota = {
|
||||
.validate = bch2_sb_validate_quota,
|
||||
};
|
||||
|
||||
const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_quota dq;
|
||||
|
||||
@ -30,8 +45,8 @@ static const char * const bch2_quota_counters[] = {
|
||||
"inodes",
|
||||
};
|
||||
|
||||
static void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end= buf + size;
|
||||
struct bkey_s_c_quota dq;
|
||||
@ -50,11 +65,6 @@ static void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_quota_ops = {
|
||||
.key_invalid = bch2_quota_invalid,
|
||||
.val_to_text = bch2_quota_to_text,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
|
||||
#include <linux/cred.h>
|
||||
@ -399,7 +409,7 @@ static void bch2_sb_quota_read(struct bch_fs *c)
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
unsigned i, j;
|
||||
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb);
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
|
||||
if (!sb_quota)
|
||||
return;
|
||||
|
||||
@ -476,13 +486,13 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (uflags & FS_QUOTA_UDQ_ENFD)
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb, true);
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
|
||||
|
||||
if (uflags & FS_QUOTA_GDQ_ENFD)
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb, true);
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb, true);
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -499,13 +509,13 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (uflags & FS_QUOTA_UDQ_ENFD)
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb, false);
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
|
||||
|
||||
if (uflags & FS_QUOTA_GDQ_ENFD)
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb, false);
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb, false);
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -616,9 +626,10 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
q = &c->quotas[type];
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb);
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
|
||||
if (!sb_quota) {
|
||||
sb_quota = bch2_fs_sb_resize_quota(c, sizeof(*sb_quota) / sizeof(u64));
|
||||
sb_quota = bch2_sb_resize_quota(&c->disk_sb,
|
||||
sizeof(*sb_quota) / sizeof(u64));
|
||||
if (!sb_quota)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
@ -1,9 +1,18 @@
|
||||
#ifndef _BCACHEFS_QUOTA_H
|
||||
#define _BCACHEFS_QUOTA_H
|
||||
|
||||
#include "inode.h"
|
||||
#include "quota_types.h"
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_quota_ops;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
|
||||
|
||||
const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_quota_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_quota_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_quota_invalid, \
|
||||
.val_to_text = bch2_quota_to_text, \
|
||||
}
|
||||
|
||||
enum quota_acct_mode {
|
||||
BCH_QUOTA_PREALLOC,
|
||||
|
698
libbcachefs/replicas.c
Normal file
698
libbcachefs/replicas.c
Normal file
@ -0,0 +1,698 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
/* Replicas tracking - in memory: */
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
static inline struct bch_replicas_cpu_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
|
||||
}
|
||||
|
||||
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
|
||||
}
|
||||
|
||||
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
e->devs[dev >> 3] |= 1 << (dev & 7);
|
||||
}
|
||||
|
||||
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
|
||||
{
|
||||
return (r->entry_size -
|
||||
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
|
||||
}
|
||||
|
||||
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
|
||||
char *buf, size_t size)
|
||||
{
|
||||
char *out = buf, *end = out + size;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
bool first = true;
|
||||
unsigned i;
|
||||
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
bool first_e = true;
|
||||
|
||||
if (!first)
|
||||
out += scnprintf(out, end - out, " ");
|
||||
first = false;
|
||||
|
||||
out += scnprintf(out, end - out, "%u: [", e->data_type);
|
||||
|
||||
for (i = 0; i < replicas_dev_slots(r); i++)
|
||||
if (replicas_test_dev(e, i)) {
|
||||
if (!first_e)
|
||||
out += scnprintf(out, end - out, " ");
|
||||
first_e = false;
|
||||
out += scnprintf(out, end - out, "%u", i);
|
||||
}
|
||||
out += scnprintf(out, end - out, "]");
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_replicas_cpu_entry *r,
|
||||
unsigned *max_dev)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned nr = 0;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data_type = data_type;
|
||||
|
||||
*max_dev = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached) {
|
||||
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
|
||||
replicas_set_dev(r, ptr->dev);
|
||||
nr++;
|
||||
}
|
||||
return nr;
|
||||
}
|
||||
|
||||
static inline void devlist_to_replicas(struct bch_devs_list devs,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_replicas_cpu_entry *r,
|
||||
unsigned *max_dev)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data_type = data_type;
|
||||
|
||||
*max_dev = 0;
|
||||
|
||||
for (i = 0; i < devs.nr; i++) {
|
||||
*max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
|
||||
replicas_set_dev(r, devs.devs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu *
|
||||
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
struct bch_replicas_cpu_entry new_entry,
|
||||
unsigned max_dev)
|
||||
{
|
||||
struct bch_replicas_cpu *new;
|
||||
unsigned i, nr, entry_size;
|
||||
|
||||
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
|
||||
DIV_ROUND_UP(max_dev + 1, 8);
|
||||
entry_size = max(entry_size, old->entry_size);
|
||||
nr = old->nr + 1;
|
||||
|
||||
new = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
nr * entry_size, GFP_NOIO);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
new->nr = nr;
|
||||
new->entry_size = entry_size;
|
||||
|
||||
for (i = 0; i < old->nr; i++)
|
||||
memcpy(cpu_replicas_entry(new, i),
|
||||
cpu_replicas_entry(old, i),
|
||||
min(new->entry_size, old->entry_size));
|
||||
|
||||
memcpy(cpu_replicas_entry(new, old->nr),
|
||||
&new_entry,
|
||||
new->entry_size);
|
||||
|
||||
bch2_cpu_replicas_sort(new);
|
||||
return new;
|
||||
}
|
||||
|
||||
static bool replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
struct bch_replicas_cpu_entry search,
|
||||
unsigned max_dev)
|
||||
{
|
||||
return max_dev < replicas_dev_slots(r) &&
|
||||
eytzinger0_find(r->entries, r->nr,
|
||||
r->entry_size,
|
||||
memcmp, &search) < r->nr;
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry new_entry,
|
||||
unsigned max_dev)
|
||||
{
|
||||
struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
old_gc = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
|
||||
new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
|
||||
if (!new_gc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_r = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
if (!replicas_has_entry(old_r, new_entry, max_dev)) {
|
||||
new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
|
||||
if (!new_r)
|
||||
goto err;
|
||||
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, new_r);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* allocations done, now commit: */
|
||||
|
||||
if (new_r)
|
||||
bch2_write_super(c);
|
||||
|
||||
/* don't update in memory replicas until changes are persistent */
|
||||
|
||||
if (new_gc) {
|
||||
rcu_assign_pointer(c->replicas_gc, new_gc);
|
||||
kfree_rcu(old_gc, rcu);
|
||||
}
|
||||
|
||||
if (new_r) {
|
||||
rcu_assign_pointer(c->replicas, new_r);
|
||||
kfree_rcu(old_r, rcu);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return 0;
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
if (new_gc)
|
||||
kfree(new_gc);
|
||||
if (new_r)
|
||||
kfree(new_r);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_mark_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
struct bch_replicas_cpu *r, *gc_r;
|
||||
unsigned max_dev;
|
||||
bool marked;
|
||||
|
||||
if (!devs.nr)
|
||||
return 0;
|
||||
|
||||
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search, &max_dev);
|
||||
|
||||
rcu_read_lock();
|
||||
r = rcu_dereference(c->replicas);
|
||||
gc_r = rcu_dereference(c->replicas_gc);
|
||||
marked = replicas_has_entry(r, search, max_dev) &&
|
||||
(!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
|
||||
rcu_read_unlock();
|
||||
|
||||
return likely(marked) ? 0
|
||||
: bch2_mark_replicas_slowpath(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i]))))
|
||||
return ret;
|
||||
|
||||
return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int err)
|
||||
{
|
||||
struct bch_replicas_cpu *new_r, *old_r;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
new_r = rcu_dereference_protected(c->replicas_gc,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
if (err) {
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(new_r, rcu);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_cpu_replicas_to_sb_replicas(c, new_r)) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_r = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
rcu_assign_pointer(c->replicas, new_r);
|
||||
rcu_assign_pointer(c->replicas_gc, NULL);
|
||||
kfree_rcu(old_r, rcu);
|
||||
|
||||
bch2_write_super(c);
|
||||
err:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
{
|
||||
struct bch_replicas_cpu *dst, *src;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
BUG_ON(c->replicas_gc);
|
||||
|
||||
src = rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock));
|
||||
|
||||
dst = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
src->nr * src->entry_size, GFP_NOIO);
|
||||
if (!dst) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dst->nr = 0;
|
||||
dst->entry_size = src->entry_size;
|
||||
|
||||
for_each_cpu_replicas_entry(src, e)
|
||||
if (!((1 << e->data_type) & typemask))
|
||||
memcpy(cpu_replicas_entry(dst, dst->nr++),
|
||||
e, dst->entry_size);
|
||||
|
||||
bch2_cpu_replicas_sort(dst);
|
||||
|
||||
rcu_assign_pointer(c->replicas_gc, dst);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Replicas tracking - superblock: */
|
||||
|
||||
static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
|
||||
unsigned *nr,
|
||||
unsigned *bytes,
|
||||
unsigned *max_dev)
|
||||
{
|
||||
struct bch_replicas_entry *i;
|
||||
unsigned j;
|
||||
|
||||
*nr = 0;
|
||||
*bytes = sizeof(*r);
|
||||
*max_dev = 0;
|
||||
|
||||
if (!r)
|
||||
return;
|
||||
|
||||
for_each_replicas_entry(r, i) {
|
||||
for (j = 0; j < i->nr; j++)
|
||||
*max_dev = max_t(unsigned, *max_dev, i->devs[j]);
|
||||
(*nr)++;
|
||||
}
|
||||
|
||||
*bytes = (void *) i - (void *) r;
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu *
|
||||
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
|
||||
{
|
||||
struct bch_replicas_cpu *cpu_r;
|
||||
unsigned i, nr, bytes, max_dev, entry_size;
|
||||
|
||||
bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
|
||||
|
||||
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
|
||||
DIV_ROUND_UP(max_dev + 1, 8);
|
||||
|
||||
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
nr * entry_size, GFP_NOIO);
|
||||
if (!cpu_r)
|
||||
return NULL;
|
||||
|
||||
cpu_r->nr = nr;
|
||||
cpu_r->entry_size = entry_size;
|
||||
|
||||
if (nr) {
|
||||
struct bch_replicas_cpu_entry *dst =
|
||||
cpu_replicas_entry(cpu_r, 0);
|
||||
struct bch_replicas_entry *src = sb_r->entries;
|
||||
|
||||
while (dst < cpu_replicas_entry(cpu_r, nr)) {
|
||||
dst->data_type = src->data_type;
|
||||
for (i = 0; i < src->nr; i++)
|
||||
replicas_set_dev(dst, src->devs[i]);
|
||||
|
||||
src = replicas_entry_next(src);
|
||||
dst = (void *) dst + entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_cpu_replicas_sort(cpu_r);
|
||||
return cpu_r;
|
||||
}
|
||||
|
||||
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_cpu *cpu_r, *old_r;
|
||||
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
return -ENOMEM;
|
||||
|
||||
old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
|
||||
rcu_assign_pointer(c->replicas, cpu_r);
|
||||
if (old_r)
|
||||
kfree_rcu(old_r, rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry *sb_e;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
size_t i, bytes;
|
||||
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
bytes += sizeof(struct bch_replicas_entry);
|
||||
for (i = 0; i < r->entry_size - 1; i++)
|
||||
bytes += hweight8(e->devs[i]);
|
||||
}
|
||||
|
||||
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
|
||||
DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
|
||||
memset(&sb_r->entries, 0,
|
||||
vstruct_end(&sb_r->field) -
|
||||
(void *) &sb_r->entries);
|
||||
|
||||
sb_e = sb_r->entries;
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
sb_e->data_type = e->data_type;
|
||||
|
||||
for (i = 0; i < replicas_dev_slots(r); i++)
|
||||
if (replicas_test_dev(e, i))
|
||||
sb_e->devs[sb_e->nr++] = i;
|
||||
|
||||
sb_e = replicas_entry_next(sb_e);
|
||||
|
||||
BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
|
||||
struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
|
||||
struct bch_replicas_cpu *cpu_r = NULL;
|
||||
struct bch_replicas_entry *e;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
err = "invalid replicas entry: invalid data type";
|
||||
if (e->data_type >= BCH_DATA_NR)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: no devices";
|
||||
if (!e->nr)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: too many devices";
|
||||
if (e->nr >= BCH_REPLICAS_MAX)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
for (i = 0; i < e->nr; i++)
|
||||
if (!bch2_dev_exists(sb, mi, e->devs[i]))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
goto err;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
memcmp, NULL);
|
||||
|
||||
for (i = 0; i + 1 < cpu_r->nr; i++) {
|
||||
struct bch_replicas_cpu_entry *l =
|
||||
cpu_replicas_entry(cpu_r, i);
|
||||
struct bch_replicas_cpu_entry *r =
|
||||
cpu_replicas_entry(cpu_r, i + 1);
|
||||
|
||||
BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
|
||||
|
||||
err = "duplicate replicas entry";
|
||||
if (!memcmp(l, r, cpu_r->entry_size))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = NULL;
|
||||
err:
|
||||
kfree(cpu_r);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
|
||||
.validate = bch2_sb_validate_replicas,
|
||||
};
|
||||
|
||||
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t size)
|
||||
{
|
||||
char *out = buf, *end = out + size;
|
||||
struct bch_replicas_entry *e;
|
||||
bool first = true;
|
||||
unsigned i;
|
||||
|
||||
if (!r) {
|
||||
out += scnprintf(out, end - out, "(no replicas section found)");
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
for_each_replicas_entry(r, e) {
|
||||
if (!first)
|
||||
out += scnprintf(out, end - out, " ");
|
||||
first = false;
|
||||
|
||||
out += scnprintf(out, end - out, "%u: [", e->data_type);
|
||||
|
||||
for (i = 0; i < e->nr; i++)
|
||||
out += scnprintf(out, end - out,
|
||||
i ? " %u" : "%u", e->devs[i]);
|
||||
out += scnprintf(out, end - out, "]");
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
bool ret;
|
||||
|
||||
if (!devs.nr)
|
||||
return true;
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search, &max_dev);
|
||||
|
||||
rcu_read_lock();
|
||||
ret = replicas_has_entry(rcu_dereference(c->replicas),
|
||||
search, max_dev);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i])))
|
||||
return false;
|
||||
|
||||
return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
struct bch_replicas_cpu *r;
|
||||
unsigned i, dev, dev_slots, nr_online, nr_offline;
|
||||
struct replicas_status ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
ret.replicas[i].nr_online = UINT_MAX;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
rcu_read_lock();
|
||||
|
||||
r = rcu_dereference(c->replicas);
|
||||
dev_slots = replicas_dev_slots(r);
|
||||
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
if (e->data_type >= ARRAY_SIZE(ret.replicas))
|
||||
panic("e %p data_type %u\n", e, e->data_type);
|
||||
|
||||
nr_online = nr_offline = 0;
|
||||
|
||||
for (dev = 0; dev < dev_slots; dev++) {
|
||||
if (!replicas_test_dev(e, dev))
|
||||
continue;
|
||||
|
||||
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
|
||||
|
||||
if (test_bit(dev, online_devs.d))
|
||||
nr_online++;
|
||||
else
|
||||
nr_offline++;
|
||||
}
|
||||
|
||||
ret.replicas[e->data_type].nr_online =
|
||||
min(ret.replicas[e->data_type].nr_online,
|
||||
nr_online);
|
||||
|
||||
ret.replicas[e->data_type].nr_offline =
|
||||
max(ret.replicas[e->data_type].nr_offline,
|
||||
nr_offline);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *c)
|
||||
{
|
||||
return __bch2_replicas_status(c, bch2_online_devs(c));
|
||||
}
|
||||
|
||||
static bool have_enough_devs(struct replicas_status s,
|
||||
enum bch_data_type type,
|
||||
bool force_if_degraded,
|
||||
bool force_if_lost)
|
||||
{
|
||||
return (!s.replicas[type].nr_offline || force_if_degraded) &&
|
||||
(s.replicas[type].nr_online || force_if_lost);
|
||||
}
|
||||
|
||||
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
{
|
||||
return (have_enough_devs(s, BCH_DATA_JOURNAL,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_BTREE,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_USER,
|
||||
flags & BCH_FORCE_IF_DATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_DATA_LOST));
|
||||
}
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
{
|
||||
struct replicas_status s = bch2_replicas_status(c);
|
||||
|
||||
return meta
|
||||
? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
|
||||
s.replicas[BCH_DATA_BTREE].nr_online)
|
||||
: s.replicas[BCH_DATA_USER].nr_online;
|
||||
}
|
||||
|
||||
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_replicas_cpu_entry *e;
|
||||
struct bch_replicas_cpu *r;
|
||||
unsigned ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
r = rcu_dereference(c->replicas);
|
||||
|
||||
if (ca->dev_idx >= replicas_dev_slots(r))
|
||||
goto out;
|
||||
|
||||
for_each_cpu_replicas_entry(r, e)
|
||||
if (replicas_test_dev(e, ca->dev_idx))
|
||||
ret |= 1 << e->data_type;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
51
libbcachefs/replicas.h
Normal file
51
libbcachefs/replicas.h
Normal file
@ -0,0 +1,51 @@
|
||||
#ifndef _BCACHEFS_REPLICAS_H
|
||||
#define _BCACHEFS_REPLICAS_H
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
|
||||
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
|
||||
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t);
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
unsigned nr_online;
|
||||
unsigned nr_offline;
|
||||
} replicas[BCH_DATA_NR];
|
||||
};
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *,
|
||||
struct bch_devs_mask);
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *);
|
||||
bool bch2_have_enough_devs(struct replicas_status, unsigned);
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *, bool);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
|
||||
/* iterate over superblock replicas - used by userspace tools: */
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
replicas_entry_next(struct bch_replicas_entry *i)
|
||||
{
|
||||
return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
|
||||
}
|
||||
|
||||
#define for_each_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_H */
|
File diff suppressed because it is too large
Load Diff
@ -11,8 +11,6 @@
|
||||
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
|
||||
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
|
||||
enum bch_sb_field_type, unsigned);
|
||||
struct bch_sb_field *bch2_fs_sb_field_resize(struct bch_fs *,
|
||||
enum bch_sb_field_type, unsigned);
|
||||
|
||||
#define field_to_type(_f, _name) \
|
||||
container_of_or_null(_f, struct bch_sb_field_##_name, field)
|
||||
@ -30,13 +28,6 @@ bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s) \
|
||||
{ \
|
||||
return field_to_type(bch2_sb_field_resize(sb, \
|
||||
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||
} \
|
||||
\
|
||||
static inline struct bch_sb_field_##_name * \
|
||||
bch2_fs_sb_resize_##_name(struct bch_fs *c, unsigned u64s) \
|
||||
{ \
|
||||
return field_to_type(bch2_fs_sb_field_resize(c, \
|
||||
BCH_SB_FIELD_##_name, u64s), _name); \
|
||||
}
|
||||
|
||||
BCH_SB_FIELDS()
|
||||
@ -44,6 +35,12 @@ BCH_SB_FIELDS()
|
||||
|
||||
extern const char * const bch2_sb_fields[];
|
||||
|
||||
struct bch_sb_field_ops {
|
||||
const char * (*validate)(struct bch_sb *, struct bch_sb_field *);
|
||||
size_t (*to_text)(char *, size_t, struct bch_sb *,
|
||||
struct bch_sb_field *);
|
||||
};
|
||||
|
||||
static inline bool bch2_sb_test_feature(struct bch_sb *sb,
|
||||
enum bch_sb_features f)
|
||||
{
|
||||
@ -90,7 +87,7 @@ int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
|
||||
int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_free_super(struct bch_sb_handle *);
|
||||
int bch2_super_realloc(struct bch_sb_handle *, unsigned);
|
||||
int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
|
||||
|
||||
const char *bch2_sb_validate(struct bch_sb_handle *);
|
||||
|
||||
@ -139,135 +136,4 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
};
|
||||
}
|
||||
|
||||
/* BCH_SB_FIELD_replicas: */
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bkey_s_c);
|
||||
|
||||
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
|
||||
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t);
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
unsigned nr_online;
|
||||
unsigned nr_offline;
|
||||
} replicas[BCH_DATA_NR];
|
||||
};
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *,
|
||||
struct bch_devs_mask);
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *);
|
||||
bool bch2_have_enough_devs(struct replicas_status, unsigned);
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *, bool);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
|
||||
/* iterate over superblock replicas - used by userspace tools: */
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
replicas_entry_next(struct bch_replicas_entry *i)
|
||||
{
|
||||
return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
|
||||
}
|
||||
|
||||
#define for_each_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
/* disk groups: */
|
||||
|
||||
static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
|
||||
{
|
||||
return groups
|
||||
? (vstruct_end(&groups->field) -
|
||||
(void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
|
||||
: 0;
|
||||
}
|
||||
|
||||
struct target {
|
||||
enum {
|
||||
TARGET_NULL,
|
||||
TARGET_DEV,
|
||||
TARGET_GROUP,
|
||||
} type;
|
||||
union {
|
||||
unsigned dev;
|
||||
unsigned group;
|
||||
};
|
||||
};
|
||||
|
||||
#define TARGET_DEV_START 1
|
||||
#define TARGET_GROUP_START (256 + TARGET_DEV_START)
|
||||
|
||||
static inline u16 dev_to_target(unsigned dev)
|
||||
{
|
||||
return TARGET_DEV_START + dev;
|
||||
}
|
||||
|
||||
static inline u16 group_to_target(unsigned group)
|
||||
{
|
||||
return TARGET_GROUP_START + group;
|
||||
}
|
||||
|
||||
static inline struct target target_decode(unsigned target)
|
||||
{
|
||||
if (target >= TARGET_GROUP_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_GROUP,
|
||||
.group = target - TARGET_GROUP_START
|
||||
};
|
||||
|
||||
if (target >= TARGET_DEV_START)
|
||||
return (struct target) {
|
||||
.type = TARGET_DEV,
|
||||
.group = target - TARGET_DEV_START
|
||||
};
|
||||
|
||||
return (struct target) { .type = TARGET_NULL };
|
||||
}
|
||||
|
||||
static inline bool dev_in_target(struct bch_dev *ca, unsigned target)
|
||||
{
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return false;
|
||||
case TARGET_DEV:
|
||||
return ca->dev_idx == t.dev;
|
||||
case TARGET_GROUP:
|
||||
return ca->mi.group && ca->mi.group - 1 == t.group;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool dev_idx_in_target(struct bch_fs *c, unsigned dev, unsigned target)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = dev_in_target(rcu_dereference(c->devs[dev]), target);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
|
||||
|
||||
int __bch2_disk_group_find(struct bch_sb_field_disk_groups *, const char *);
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
|
||||
int bch2_opt_target_print(struct bch_fs *, char *, size_t, u64);
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_IO_H */
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "clock.h"
|
||||
#include "compress.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-io.h"
|
||||
@ -30,6 +31,7 @@
|
||||
#include "migrate.h"
|
||||
#include "movinggc.h"
|
||||
#include "quota.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "sysfs.h"
|
||||
@ -122,7 +124,7 @@ static struct bch_fs *__bch2_uuid_to_fs(uuid_le uuid)
|
||||
lockdep_assert_held(&bch_fs_list_lock);
|
||||
|
||||
list_for_each_entry(c, &bch_fs_list, list)
|
||||
if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
|
||||
if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid_le)))
|
||||
return c;
|
||||
|
||||
return NULL;
|
||||
@ -203,23 +205,12 @@ static void bch_fs_mark_clean(struct bch_fs *c)
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb, true);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static bool btree_interior_updates_done(struct bch_fs *c)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
ret = list_empty(&c->btree_interior_update_list);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
@ -251,7 +242,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
* fully complete:
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
btree_interior_updates_done(c));
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
|
||||
if (!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
bch2_btree_verify_flushed(c);
|
||||
@ -433,7 +424,8 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
if (c->wq)
|
||||
destroy_workqueue(c->wq);
|
||||
|
||||
free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
|
||||
free_pages((unsigned long) c->disk_sb.sb,
|
||||
c->disk_sb.page_order);
|
||||
kvpfree(c, sizeof(*c));
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
@ -501,11 +493,54 @@ void bch2_fs_stop(struct bch_fs *c)
|
||||
kobject_put(&c->kobj);
|
||||
}
|
||||
|
||||
static const char *bch2_fs_online(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
const char *err = NULL;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&bch_fs_list_lock);
|
||||
|
||||
if (!list_empty(&c->list))
|
||||
return NULL;
|
||||
|
||||
if (__bch2_uuid_to_fs(c->sb.uuid))
|
||||
return "filesystem UUID already open";
|
||||
|
||||
ret = bch2_fs_chardev_init(c);
|
||||
if (ret)
|
||||
return "error creating character device";
|
||||
|
||||
bch2_fs_debug_init(c);
|
||||
|
||||
if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
|
||||
kobject_add(&c->internal, &c->kobj, "internal") ||
|
||||
kobject_add(&c->opts_dir, &c->kobj, "options") ||
|
||||
kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
|
||||
bch2_opts_create_sysfs_files(&c->opts_dir))
|
||||
return "error creating sysfs objects";
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
err = "error creating sysfs objects";
|
||||
__for_each_member_device(ca, c, i, NULL)
|
||||
if (bch2_dev_sysfs_online(c, ca))
|
||||
goto err;
|
||||
|
||||
list_add(&c->list, &bch_fs_list);
|
||||
err = NULL;
|
||||
err:
|
||||
mutex_unlock(&c->state_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_fs *c;
|
||||
unsigned i, iter_size;
|
||||
const char *err;
|
||||
|
||||
pr_verbose_init(opts, "");
|
||||
|
||||
@ -516,6 +551,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
c->minor = -1;
|
||||
c->disk_sb.fs_sb = true;
|
||||
|
||||
mutex_init(&c->state_lock);
|
||||
mutex_init(&c->sb_lock);
|
||||
@ -627,9 +663,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_fsio_init(c))
|
||||
goto err;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
for (i = 0; i < c->sb.nr_devices; i++)
|
||||
if (bch2_dev_exists(c->disk_sb, mi, i) &&
|
||||
if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
|
||||
bch2_dev_alloc(c, i))
|
||||
goto err;
|
||||
|
||||
@ -644,6 +680,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
kobject_init(&c->internal, &bch2_fs_internal_ktype);
|
||||
kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
|
||||
kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
|
||||
|
||||
mutex_lock(&bch_fs_list_lock);
|
||||
err = bch2_fs_online(c);
|
||||
mutex_unlock(&bch_fs_list_lock);
|
||||
if (err) {
|
||||
bch_err(c, "bch2_fs_online() error: %s", err);
|
||||
goto err;
|
||||
}
|
||||
out:
|
||||
pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
|
||||
return c;
|
||||
@ -653,60 +697,7 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static const char *__bch2_fs_online(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
const char *err = NULL;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&bch_fs_list_lock);
|
||||
|
||||
if (!list_empty(&c->list))
|
||||
return NULL;
|
||||
|
||||
if (__bch2_uuid_to_fs(c->sb.uuid))
|
||||
return "filesystem UUID already open";
|
||||
|
||||
ret = bch2_fs_chardev_init(c);
|
||||
if (ret)
|
||||
return "error creating character device";
|
||||
|
||||
bch2_fs_debug_init(c);
|
||||
|
||||
if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
|
||||
kobject_add(&c->internal, &c->kobj, "internal") ||
|
||||
kobject_add(&c->opts_dir, &c->kobj, "options") ||
|
||||
kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
|
||||
bch2_opts_create_sysfs_files(&c->opts_dir))
|
||||
return "error creating sysfs objects";
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
err = "error creating sysfs objects";
|
||||
__for_each_member_device(ca, c, i, NULL)
|
||||
if (bch2_dev_sysfs_online(c, ca))
|
||||
goto err;
|
||||
|
||||
list_add(&c->list, &bch_fs_list);
|
||||
err = NULL;
|
||||
err:
|
||||
mutex_unlock(&c->state_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char *bch2_fs_online(struct bch_fs *c)
|
||||
{
|
||||
const char *err;
|
||||
|
||||
mutex_lock(&bch_fs_list_lock);
|
||||
err = __bch2_fs_online(c);
|
||||
mutex_unlock(&bch_fs_list_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
const char *bch2_fs_start(struct bch_fs *c)
|
||||
{
|
||||
const char *err = "cannot allocate memory";
|
||||
struct bch_sb_field_members *mi;
|
||||
@ -730,15 +721,15 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
if (BCH_SB_INITIALIZED(c->disk_sb)) {
|
||||
if (BCH_SB_INITIALIZED(c->disk_sb.sb)) {
|
||||
ret = bch2_journal_read(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
j = &list_entry(journal.prev, struct journal_replay, list)->j;
|
||||
|
||||
c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
|
||||
c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
|
||||
c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
|
||||
c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
unsigned level;
|
||||
@ -824,21 +815,18 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
set_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
|
||||
|
||||
ret = bch2_initial_gc(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_rw_member(ca, c, i)
|
||||
if (bch2_dev_journal_alloc(c, ca)) {
|
||||
for_each_online_member(ca, c, i)
|
||||
if (bch2_dev_journal_alloc(ca)) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
|
||||
clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
@ -889,18 +877,20 @@ recovery_done:
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
now = ktime_get_seconds();
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
|
||||
|
||||
SET_BCH_SB_INITIALIZED(c->disk_sb, true);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb, false);
|
||||
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
set_bit(BCH_FS_STARTED, &c->flags);
|
||||
|
||||
err = NULL;
|
||||
out:
|
||||
mutex_unlock(&c->state_lock);
|
||||
@ -939,11 +929,6 @@ fsck_err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
const char *bch2_fs_start(struct bch_fs *c)
|
||||
{
|
||||
return __bch2_fs_start(c) ?: bch2_fs_online(c);
|
||||
}
|
||||
|
||||
static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_members *sb_mi;
|
||||
@ -956,7 +941,7 @@ static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
|
||||
return "mismatched block size";
|
||||
|
||||
if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
|
||||
BCH_SB_BTREE_NODE_SIZE(c->disk_sb))
|
||||
BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
|
||||
return "new cache bucket size is too small";
|
||||
|
||||
return NULL;
|
||||
@ -1082,28 +1067,19 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
struct bch_member *member)
|
||||
{
|
||||
struct bch_member *member;
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
if (bch2_fs_init_fault("dev_alloc"))
|
||||
goto err;
|
||||
struct bch_dev *ca;
|
||||
|
||||
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
||||
if (!ca)
|
||||
goto err;
|
||||
return NULL;
|
||||
|
||||
kobject_init(&ca->kobj, &bch2_dev_ktype);
|
||||
init_completion(&ca->ref_completion);
|
||||
init_completion(&ca->io_ref_completion);
|
||||
|
||||
ca->dev_idx = dev_idx;
|
||||
__set_bit(ca->dev_idx, ca->self.d);
|
||||
|
||||
init_rwsem(&ca->bucket_lock);
|
||||
|
||||
writepoint_init(&ca->copygc_write_point, BCH_DATA_USER);
|
||||
@ -1113,14 +1089,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
|
||||
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
|
||||
|
||||
if (bch2_fs_init_fault("dev_alloc"))
|
||||
goto err;
|
||||
|
||||
member = bch2_sb_get_members(c->disk_sb)->members + dev_idx;
|
||||
|
||||
ca->mi = bch2_mi_to_cpu(member);
|
||||
ca->uuid = member->uuid;
|
||||
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
|
||||
|
||||
if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
|
||||
0, GFP_KERNEL) ||
|
||||
@ -1132,11 +1102,43 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
!(ca->io_done = alloc_percpu(*ca->io_done)))
|
||||
goto err;
|
||||
|
||||
return ca;
|
||||
err:
|
||||
bch2_dev_free(ca);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned dev_idx)
|
||||
{
|
||||
ca->dev_idx = dev_idx;
|
||||
__set_bit(ca->dev_idx, ca->self.d);
|
||||
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
|
||||
|
||||
ca->fs = c;
|
||||
rcu_assign_pointer(c->devs[ca->dev_idx], ca);
|
||||
|
||||
if (bch2_dev_sysfs_online(c, ca))
|
||||
pr_warn("error creating sysfs objects");
|
||||
}
|
||||
|
||||
static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
{
|
||||
struct bch_member *member =
|
||||
bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx;
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret = 0;
|
||||
|
||||
pr_verbose_init(c->opts, "");
|
||||
|
||||
if (bch2_fs_init_fault("dev_alloc"))
|
||||
goto err;
|
||||
|
||||
ca = __bch2_dev_alloc(c, member);
|
||||
if (!ca)
|
||||
goto err;
|
||||
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
return ret;
|
||||
@ -1147,21 +1149,9 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (le64_to_cpu(sb->sb->seq) >
|
||||
le64_to_cpu(c->disk_sb->seq))
|
||||
bch2_sb_to_fs(c, sb->sb);
|
||||
|
||||
BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
|
||||
!c->devs[sb->sb->dev_idx]);
|
||||
|
||||
ca = bch_dev_locked(c, sb->sb->dev_idx);
|
||||
unsigned ret;
|
||||
|
||||
if (bch2_dev_is_online(ca)) {
|
||||
bch_err(ca, "already have device online in slot %u",
|
||||
@ -1179,7 +1169,7 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
|
||||
if (get_capacity(sb->bdev->bd_disk) <
|
||||
ca->mi.bucket_size * ca->mi.nbuckets) {
|
||||
bch_err(c, "device too small");
|
||||
bch_err(ca, "device too small");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -1187,35 +1177,50 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Increase journal write timeout if flushes to this device are
|
||||
* expensive:
|
||||
*/
|
||||
if (!blk_queue_nonrot(bdev_get_queue(sb->bdev)) &&
|
||||
journal_flushes_device(ca))
|
||||
c->journal.write_delay_ms =
|
||||
max(c->journal.write_delay_ms, 1000U);
|
||||
|
||||
/* Commit: */
|
||||
ca->disk_sb = *sb;
|
||||
if (sb->mode & FMODE_EXCL)
|
||||
ca->disk_sb.bdev->bd_holder = ca;
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
|
||||
if (ca->fs)
|
||||
mutex_lock(&ca->fs->sb_lock);
|
||||
|
||||
bch2_mark_dev_superblock(ca->fs, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
|
||||
if (ca->fs)
|
||||
mutex_unlock(&ca->fs->sb_lock);
|
||||
|
||||
percpu_ref_reinit(&ca->io_ref);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (le64_to_cpu(sb->sb->seq) >
|
||||
le64_to_cpu(c->disk_sb.sb->seq))
|
||||
bch2_sb_to_fs(c, sb->sb);
|
||||
|
||||
BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
|
||||
!c->devs[sb->sb->dev_idx]);
|
||||
|
||||
ca = bch_dev_locked(c, sb->sb->dev_idx);
|
||||
|
||||
ret = __bch2_dev_attach_bdev(ca, sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (c->sb.nr_devices == 1)
|
||||
bdevname(ca->disk_sb.bdev, c->name);
|
||||
bdevname(ca->disk_sb.bdev, ca->name);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
|
||||
rebalance_wakeup(c);
|
||||
|
||||
percpu_ref_reinit(&ca->io_ref);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1289,10 +1294,10 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
|
||||
if (!c->opts.degraded) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
|
||||
for (i = 0; i < c->disk_sb->nr_devices; i++) {
|
||||
if (!bch2_dev_exists(c->disk_sb, mi, i))
|
||||
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
if (!bch2_dev_exists(c->disk_sb.sb, mi, i))
|
||||
continue;
|
||||
|
||||
ca = bch_dev_locked(c, i);
|
||||
@ -1360,7 +1365,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch_notice(ca, "%s", bch2_dev_state[new_state]);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -1470,7 +1475,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
* this device must be gone:
|
||||
*/
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
|
||||
|
||||
bch2_write_super(c);
|
||||
@ -1492,8 +1497,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
struct bch_sb_handle sb;
|
||||
const char *err;
|
||||
struct bch_dev *ca = NULL;
|
||||
struct bch_sb_field_members *mi, *dev_mi;
|
||||
struct bch_member saved_mi;
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_member dev_mi;
|
||||
unsigned dev_idx, nr_devices, u64s;
|
||||
int ret;
|
||||
|
||||
@ -1505,24 +1510,52 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
if (err)
|
||||
return -EINVAL;
|
||||
|
||||
dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
|
||||
|
||||
err = bch2_dev_may_add(sb.sb, c);
|
||||
if (err)
|
||||
return -EINVAL;
|
||||
|
||||
ca = __bch2_dev_alloc(c, &dev_mi);
|
||||
if (!ca) {
|
||||
bch2_free_super(&sb);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = __bch2_dev_attach_bdev(ca, &sb);
|
||||
if (ret) {
|
||||
bch2_dev_free(ca);
|
||||
return ret;
|
||||
}
|
||||
|
||||
err = "journal alloc failed";
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
/* Grab member info for new disk: */
|
||||
dev_mi = bch2_sb_get_members(sb.sb);
|
||||
saved_mi = dev_mi->members[sb.sb->dev_idx];
|
||||
saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
|
||||
err = "insufficient space in new superblock";
|
||||
ret = bch2_sb_from_fs(c, ca);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
mi = bch2_sb_get_members(ca->disk_sb.sb);
|
||||
|
||||
if (!bch2_sb_resize_members(&ca->disk_sb,
|
||||
le32_to_cpu(mi->field.u64s) +
|
||||
sizeof(dev_mi) / sizeof(u64))) {
|
||||
ret = -ENOSPC;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (dynamic_fault("bcachefs:add:no_slot"))
|
||||
goto no_slot;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
|
||||
if (!bch2_dev_exists(c->disk_sb, mi, dev_idx))
|
||||
if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
|
||||
goto have_slot;
|
||||
no_slot:
|
||||
err = "no slots available in superblock";
|
||||
@ -1533,64 +1566,47 @@ have_slot:
|
||||
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
|
||||
u64s = (sizeof(struct bch_sb_field_members) +
|
||||
sizeof(struct bch_member) * nr_devices) / sizeof(u64);
|
||||
|
||||
err = "no space in superblock for member info";
|
||||
ret = -ENOSPC;
|
||||
|
||||
dev_mi = bch2_sb_resize_members(&sb, u64s);
|
||||
if (!dev_mi)
|
||||
goto err_unlock;
|
||||
|
||||
mi = bch2_fs_sb_resize_members(c, u64s);
|
||||
mi = bch2_sb_resize_members(&c->disk_sb, u64s);
|
||||
if (!mi)
|
||||
goto err_unlock;
|
||||
|
||||
memcpy(dev_mi, mi, u64s * sizeof(u64));
|
||||
dev_mi->members[dev_idx] = saved_mi;
|
||||
/* success: */
|
||||
|
||||
sb.sb->uuid = c->disk_sb->uuid;
|
||||
sb.sb->dev_idx = dev_idx;
|
||||
sb.sb->nr_devices = nr_devices;
|
||||
mi->members[dev_idx] = dev_mi;
|
||||
mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_seconds());
|
||||
c->disk_sb.sb->nr_devices = nr_devices;
|
||||
|
||||
/* commit new member info */
|
||||
memcpy(mi, dev_mi, u64s * sizeof(u64));
|
||||
c->disk_sb->nr_devices = nr_devices;
|
||||
c->sb.nr_devices = nr_devices;
|
||||
ca->disk_sb.sb->dev_idx = dev_idx;
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (bch2_dev_alloc(c, dev_idx)) {
|
||||
err = "cannot allocate memory";
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (__bch2_dev_online(c, &sb)) {
|
||||
err = "bch2_dev_online() error";
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ca = bch_dev_locked(c, dev_idx);
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
err = __bch2_dev_read_write(c, ca);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
err = "journal alloc failed";
|
||||
if (bch2_dev_journal_alloc(c, ca))
|
||||
goto err;
|
||||
goto err_late;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
err:
|
||||
mutex_unlock(&c->state_lock);
|
||||
err:
|
||||
if (ca)
|
||||
bch2_dev_free(ca);
|
||||
bch2_free_super(&sb);
|
||||
|
||||
bch_err(c, "Unable to add device: %s", err);
|
||||
return ret ?: -EINVAL;
|
||||
return ret;
|
||||
err_late:
|
||||
bch_err(c, "Error going rw after adding device: %s", err);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Hot add existing device to running filesystem: */
|
||||
@ -1613,12 +1629,12 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
|
||||
dev_idx = sb.sb->dev_idx;
|
||||
|
||||
err = bch2_dev_in_fs(c->disk_sb, sb.sb);
|
||||
err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
if (__bch2_dev_online(c, &sb)) {
|
||||
err = "__bch2_dev_online() error";
|
||||
if (bch2_dev_attach_bdev(c, &sb)) {
|
||||
err = "bch2_dev_attach_bdev() error";
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1688,7 +1704,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
mi->nbuckets = cpu_to_le64(nbuckets);
|
||||
|
||||
bch2_write_super(c);
|
||||
@ -1721,74 +1737,6 @@ found:
|
||||
return ca;
|
||||
}
|
||||
|
||||
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *label)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_group *g;
|
||||
struct bch_member *mi;
|
||||
unsigned i, v, nr_groups;
|
||||
int ret;
|
||||
|
||||
if (strlen(label) > BCH_SB_LABEL_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb);
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
|
||||
if (!strcmp(label, "none")) {
|
||||
v = 0;
|
||||
goto write_sb;
|
||||
}
|
||||
|
||||
ret = __bch2_disk_group_find(groups, label);
|
||||
if (ret >= 0) {
|
||||
v = ret + 1;
|
||||
goto write_sb;
|
||||
}
|
||||
|
||||
/* not found - create a new disk group: */
|
||||
|
||||
for (i = 0;
|
||||
i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
|
||||
i++)
|
||||
;
|
||||
|
||||
if (i == nr_groups) {
|
||||
unsigned u64s =
|
||||
(sizeof(struct bch_sb_field_disk_groups) +
|
||||
sizeof(struct bch_disk_group) * (nr_groups + 1)) /
|
||||
sizeof(u64);
|
||||
|
||||
groups = bch2_fs_sb_resize_disk_groups(c, u64s);
|
||||
if (!groups) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
nr_groups = disk_groups_nr(groups);
|
||||
}
|
||||
|
||||
BUG_ON(i >= nr_groups);
|
||||
|
||||
g = &groups->entries[i];
|
||||
v = i + 1;
|
||||
|
||||
memcpy(g->label, label, strlen(label));
|
||||
if (strlen(label) < sizeof(g->label))
|
||||
g->label[strlen(label)] = '\0';
|
||||
SET_BCH_GROUP_DELETED(g, 0);
|
||||
SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
|
||||
write_sb:
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_GROUP(mi, v);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Filesystem open: */
|
||||
|
||||
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
@ -1845,7 +1793,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
err = "bch2_dev_online() error";
|
||||
mutex_lock(&c->state_lock);
|
||||
for (i = 0; i < nr_devices; i++)
|
||||
if (__bch2_dev_online(c, &sb[i])) {
|
||||
if (bch2_dev_attach_bdev(c, &sb[i])) {
|
||||
mutex_unlock(&c->state_lock);
|
||||
goto err_print;
|
||||
}
|
||||
@ -1856,15 +1804,10 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
goto err_print;
|
||||
|
||||
if (!c->opts.nostart) {
|
||||
err = __bch2_fs_start(c);
|
||||
err = bch2_fs_start(c);
|
||||
if (err)
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
err = bch2_fs_online(c);
|
||||
if (err)
|
||||
goto err_print;
|
||||
|
||||
out:
|
||||
kfree(sb);
|
||||
module_put(THIS_MODULE);
|
||||
@ -1900,7 +1843,7 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
|
||||
if (c) {
|
||||
closure_get(&c->cl);
|
||||
|
||||
err = bch2_dev_in_fs(c->disk_sb, sb->sb);
|
||||
err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
|
||||
if (err)
|
||||
goto err;
|
||||
} else {
|
||||
@ -1915,22 +1858,18 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
|
||||
err = "bch2_dev_online() error";
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (__bch2_dev_online(c, sb)) {
|
||||
if (bch2_dev_attach_bdev(c, sb)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
goto err;
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (!c->opts.nostart && bch2_fs_may_start(c)) {
|
||||
err = __bch2_fs_start(c);
|
||||
err = bch2_fs_start(c);
|
||||
if (err)
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = __bch2_fs_online(c);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
closure_put(&c->cl);
|
||||
mutex_unlock(&bch_fs_list_lock);
|
||||
|
||||
|
@ -195,7 +195,6 @@ int bch2_dev_online(struct bch_fs *, const char *);
|
||||
int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
|
||||
int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
|
||||
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
|
||||
int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
|
||||
|
||||
bool bch2_fs_emergency_read_only(struct bch_fs *);
|
||||
void bch2_fs_read_only(struct bch_fs *);
|
||||
|
@ -7,6 +7,9 @@ struct bch_sb_handle {
|
||||
struct bio *bio;
|
||||
unsigned page_order;
|
||||
fmode_t mode;
|
||||
unsigned have_layout:1;
|
||||
unsigned have_bio:1;
|
||||
unsigned fs_sb:1;
|
||||
};
|
||||
|
||||
struct bch_devs_mask {
|
||||
@ -44,8 +47,9 @@ struct bch_replicas_cpu {
|
||||
};
|
||||
|
||||
struct bch_disk_group_cpu {
|
||||
struct bch_devs_mask devs;
|
||||
bool deleted;
|
||||
u16 parent;
|
||||
struct bch_devs_mask devs;
|
||||
};
|
||||
|
||||
struct bch_disk_groups_cpu {
|
||||
|
@ -18,11 +18,13 @@
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
#include "disk_groups.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "opts.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "tier.h"
|
||||
|
||||
@ -140,10 +142,10 @@ read_attribute(first_bucket);
|
||||
read_attribute(nbuckets);
|
||||
read_attribute(durability);
|
||||
read_attribute(iostats);
|
||||
read_attribute(read_priority_stats);
|
||||
read_attribute(write_priority_stats);
|
||||
read_attribute(fragmentation_stats);
|
||||
read_attribute(oldest_gen_stats);
|
||||
read_attribute(last_read_quantiles);
|
||||
read_attribute(last_write_quantiles);
|
||||
read_attribute(fragmentation_quantiles);
|
||||
read_attribute(oldest_gen_quantiles);
|
||||
read_attribute(reserve_stats);
|
||||
read_attribute(btree_cache_size);
|
||||
read_attribute(compression_stats);
|
||||
@ -167,7 +169,7 @@ rw_attribute(journal_reclaim_delay_ms);
|
||||
|
||||
rw_attribute(discard);
|
||||
rw_attribute(cache_replacement_policy);
|
||||
rw_attribute(group);
|
||||
rw_attribute(label);
|
||||
|
||||
rw_attribute(copy_gc_enabled);
|
||||
sysfs_pd_controller_attribute(copy_gc);
|
||||
@ -546,7 +548,7 @@ STORE(bch2_fs_opts_dir)
|
||||
|
||||
if (opt->set_sb != SET_NO_SB_OPT) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
opt->set_sb(c->disk_sb, v);
|
||||
opt->set_sb(c->disk_sb.sb, v);
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
@ -621,36 +623,41 @@ struct attribute *bch2_fs_time_stats_files[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *);
|
||||
typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
|
||||
size_t, void *);
|
||||
|
||||
static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b,
|
||||
void *private)
|
||||
static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, void *private)
|
||||
{
|
||||
struct bucket *g = bucket(ca, b);
|
||||
int rw = (private ? 1 : 0);
|
||||
|
||||
return ca->fs->prio_clock[rw].hand - g->prio[rw];
|
||||
return bucket_last_io(c, bucket(ca, b), rw);
|
||||
}
|
||||
|
||||
static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b,
|
||||
void *private)
|
||||
static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, void *private)
|
||||
{
|
||||
struct bucket *g = bucket(ca, b);
|
||||
return bucket_sectors_used(g->mark);
|
||||
}
|
||||
|
||||
static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b,
|
||||
void *private)
|
||||
static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, void *private)
|
||||
{
|
||||
return bucket_gc_gen(ca, b);
|
||||
}
|
||||
|
||||
static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
|
||||
bucket_map_fn *fn, void *private)
|
||||
static int unsigned_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
int cmp(const void *l, const void *r)
|
||||
{ return *((unsigned *) r) - *((unsigned *) l); }
|
||||
unsigned l = *((unsigned *) _l);
|
||||
unsigned r = *((unsigned *) _r);
|
||||
|
||||
return (l > r) - (l < r);
|
||||
}
|
||||
|
||||
static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
|
||||
char *buf, bucket_map_fn *fn, void *private)
|
||||
{
|
||||
size_t i, n;
|
||||
/* Compute 31 quantiles */
|
||||
unsigned q[31], *p;
|
||||
@ -666,9 +673,9 @@ static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
|
||||
}
|
||||
|
||||
for (i = ca->mi.first_bucket; i < n; i++)
|
||||
p[i] = fn(ca, i, private);
|
||||
p[i] = fn(c, ca, i, private);
|
||||
|
||||
sort(p, n, sizeof(unsigned), cmp, NULL);
|
||||
sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
|
||||
up_read(&ca->bucket_lock);
|
||||
|
||||
while (n &&
|
||||
@ -804,24 +811,18 @@ SHOW(bch2_dev)
|
||||
sysfs_print(durability, ca->mi.durability);
|
||||
sysfs_print(discard, ca->mi.discard);
|
||||
|
||||
if (attr == &sysfs_group) {
|
||||
struct bch_sb_field_disk_groups *groups;
|
||||
struct bch_disk_group *g;
|
||||
unsigned len;
|
||||
if (attr == &sysfs_label) {
|
||||
if (ca->mi.group) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
out += bch2_disk_path_print(&c->disk_sb, out, end - out,
|
||||
ca->mi.group - 1);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else {
|
||||
out += scnprintf(out, end - out, "none");
|
||||
}
|
||||
|
||||
if (!ca->mi.group)
|
||||
return scnprintf(out, end - out, "none\n");
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
groups = bch2_sb_get_disk_groups(c->disk_sb);
|
||||
|
||||
g = &groups->entries[ca->mi.group - 1];
|
||||
len = strnlen(g->label, sizeof(g->label));
|
||||
memcpy(buf, g->label, len);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
buf[len++] = '\n';
|
||||
return len;
|
||||
out += scnprintf(out, end - out, "\n");
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_has_data) {
|
||||
@ -852,14 +853,16 @@ SHOW(bch2_dev)
|
||||
|
||||
if (attr == &sysfs_iostats)
|
||||
return show_dev_iostats(ca, buf);
|
||||
if (attr == &sysfs_read_priority_stats)
|
||||
return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
|
||||
if (attr == &sysfs_write_priority_stats)
|
||||
return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
|
||||
if (attr == &sysfs_fragmentation_stats)
|
||||
return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
|
||||
if (attr == &sysfs_oldest_gen_stats)
|
||||
return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
|
||||
|
||||
if (attr == &sysfs_last_read_quantiles)
|
||||
return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
|
||||
if (attr == &sysfs_last_write_quantiles)
|
||||
return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
|
||||
if (attr == &sysfs_fragmentation_quantiles)
|
||||
return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
|
||||
if (attr == &sysfs_oldest_gen_quantiles)
|
||||
return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
|
||||
|
||||
if (attr == &sysfs_reserve_stats)
|
||||
return show_reserve_stats(ca, buf);
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
@ -880,7 +883,7 @@ STORE(bch2_dev)
|
||||
bool v = strtoul_or_return(buf);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
|
||||
if (v != BCH_MEMBER_DISCARD(mi)) {
|
||||
SET_BCH_MEMBER_DISCARD(mi, v);
|
||||
@ -896,7 +899,7 @@ STORE(bch2_dev)
|
||||
return v;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
|
||||
if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
|
||||
SET_BCH_MEMBER_REPLACEMENT(mi, v);
|
||||
@ -905,7 +908,7 @@ STORE(bch2_dev)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_group) {
|
||||
if (attr == &sysfs_label) {
|
||||
char *tmp;
|
||||
int ret;
|
||||
|
||||
@ -938,16 +941,16 @@ struct attribute *bch2_dev_files[] = {
|
||||
&sysfs_discard,
|
||||
&sysfs_cache_replacement_policy,
|
||||
&sysfs_state_rw,
|
||||
&sysfs_group,
|
||||
&sysfs_label,
|
||||
|
||||
&sysfs_has_data,
|
||||
&sysfs_iostats,
|
||||
|
||||
/* alloc info - other stats: */
|
||||
&sysfs_read_priority_stats,
|
||||
&sysfs_write_priority_stats,
|
||||
&sysfs_fragmentation_stats,
|
||||
&sysfs_oldest_gen_stats,
|
||||
&sysfs_last_read_quantiles,
|
||||
&sysfs_last_write_quantiles,
|
||||
&sysfs_fragmentation_quantiles,
|
||||
&sysfs_oldest_gen_quantiles,
|
||||
&sysfs_reserve_stats,
|
||||
|
||||
/* debug: */
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "disk_groups.h"
|
||||
#include "extents.h"
|
||||
#include "io.h"
|
||||
#include "move.h"
|
||||
|
@ -86,8 +86,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
|
||||
.cmp_bkey = xattr_cmp_bkey,
|
||||
};
|
||||
|
||||
static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
@ -126,8 +125,8 @@ static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
@ -159,11 +158,6 @@ static void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_xattr_ops = {
|
||||
.key_invalid = bch2_xattr_invalid,
|
||||
.val_to_text = bch2_xattr_to_text,
|
||||
};
|
||||
|
||||
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
const char *name, void *buffer, size_t size, int type)
|
||||
{
|
||||
|
@ -4,7 +4,14 @@
|
||||
#include "str_hash.h"
|
||||
|
||||
extern const struct bch_hash_desc bch2_xattr_hash_desc;
|
||||
extern const struct bkey_ops bch2_bkey_xattr_ops;
|
||||
|
||||
const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_xattr_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_xattr_invalid, \
|
||||
.val_to_text = bch2_xattr_to_text, \
|
||||
}
|
||||
|
||||
struct dentry;
|
||||
struct xattr_handler;
|
||||
|
Loading…
Reference in New Issue
Block a user