mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 10ab39f2fa bcachefs: Improvements to the journal read error paths
This commit is contained in:
parent
21ade39653
commit
cd9892e543
@ -1 +1 @@
|
||||
f385d13bf8510277eedcc24de4a6a3c3bf8f334b
|
||||
10ab39f2faede817eebfd04a4990e739d0cedcb8
|
||||
|
10
cmd_fs.c
10
cmd_fs.c
@ -42,7 +42,7 @@ static void print_dev_usage(struct bchfs_handle fs,
|
||||
printf("%-20s%12s%12s%12s\n",
|
||||
"", "data", "buckets", "fragmented");
|
||||
|
||||
for (i = BCH_DATA_SB; i < BCH_DATA_NR; i++) {
|
||||
for (i = BCH_DATA_sb; i < BCH_DATA_NR; i++) {
|
||||
print_dev_usage_type(bch2_data_types[i],
|
||||
u.bucket_size,
|
||||
u.buckets[i],
|
||||
@ -162,21 +162,21 @@ static void print_fs_usage(const char *path, enum units units)
|
||||
struct bch_replicas_usage *r;
|
||||
|
||||
for_each_usage_replica(u, r)
|
||||
if (r->r.data_type < BCH_DATA_USER)
|
||||
if (r->r.data_type < BCH_DATA_user)
|
||||
print_replicas_usage(r, &dev_names, units);
|
||||
|
||||
for_each_usage_replica(u, r)
|
||||
if (r->r.data_type == BCH_DATA_USER &&
|
||||
if (r->r.data_type == BCH_DATA_user &&
|
||||
r->r.nr_required <= 1)
|
||||
print_replicas_usage(r, &dev_names, units);
|
||||
|
||||
for_each_usage_replica(u, r)
|
||||
if (r->r.data_type == BCH_DATA_USER &&
|
||||
if (r->r.data_type == BCH_DATA_user &&
|
||||
r->r.nr_required > 1)
|
||||
print_replicas_usage(r, &dev_names, units);
|
||||
|
||||
for_each_usage_replica(u, r)
|
||||
if (r->r.data_type > BCH_DATA_USER)
|
||||
if (r->r.data_type > BCH_DATA_user)
|
||||
print_replicas_usage(r, &dev_names, units);
|
||||
|
||||
free(u);
|
||||
|
@ -201,6 +201,7 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
|
||||
|
||||
int blk_status_to_errno(blk_status_t status);
|
||||
blk_status_t errno_to_blk_status(int errno);
|
||||
const char *blk_status_to_str(blk_status_t status);
|
||||
|
||||
#endif /* __TOOLS_LINUX_BLKDEV_H */
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#define vfree(p) free(p)
|
||||
|
||||
static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask, unsigned prot)
|
||||
static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask)
|
||||
{
|
||||
void *p;
|
||||
|
||||
@ -22,26 +22,36 @@ static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask, unsigned prot)
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
if (prot == PAGE_KERNEL_EXEC &&
|
||||
mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (gfp_mask & __GFP_ZERO)
|
||||
memset(p, 0, size);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
|
||||
{
|
||||
void *p;
|
||||
|
||||
p = __vmalloc(size, gfp_mask);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline void *vmalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
|
||||
return __vmalloc(size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO, PAGE_KERNEL);
|
||||
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_LINUX_VMALLOC_H */
|
||||
|
@ -470,10 +470,10 @@ TRACE_EVENT(move_data,
|
||||
);
|
||||
|
||||
TRACE_EVENT(copygc,
|
||||
TP_PROTO(struct bch_dev *ca,
|
||||
TP_PROTO(struct bch_fs *c,
|
||||
u64 sectors_moved, u64 sectors_not_moved,
|
||||
u64 buckets_moved, u64 buckets_not_moved),
|
||||
TP_ARGS(ca,
|
||||
TP_ARGS(c,
|
||||
sectors_moved, sectors_not_moved,
|
||||
buckets_moved, buckets_not_moved),
|
||||
|
||||
@ -486,7 +486,7 @@ TRACE_EVENT(copygc,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, ca->uuid.b, 16);
|
||||
memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
|
||||
__entry->sectors_moved = sectors_moved;
|
||||
__entry->sectors_not_moved = sectors_not_moved;
|
||||
__entry->buckets_moved = buckets_moved;
|
||||
|
@ -948,8 +948,8 @@ int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
|
||||
bch2_data_types[e.p.data_type]);
|
||||
|
||||
switch (e.p.data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_USER:
|
||||
case BCH_DATA_btree:
|
||||
case BCH_DATA_user:
|
||||
printf(" %s:%llu:%llu",
|
||||
bch2_btree_ids[e.p.btree_id],
|
||||
e.p.pos.inode,
|
||||
|
@ -41,29 +41,26 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
struct bch_fs,
|
||||
pd_controllers_update);
|
||||
struct bch_dev *ca;
|
||||
s64 free = 0, fragmented = 0;
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(ca);
|
||||
|
||||
u64 free = bucket_to_sector(ca,
|
||||
free += bucket_to_sector(ca,
|
||||
__dev_buckets_free(ca, stats)) << 9;
|
||||
/*
|
||||
* Bytes of internal fragmentation, which can be
|
||||
* reclaimed by copy GC
|
||||
*/
|
||||
s64 fragmented = (bucket_to_sector(ca,
|
||||
stats.buckets[BCH_DATA_USER] +
|
||||
stats.buckets[BCH_DATA_CACHED]) -
|
||||
(stats.sectors[BCH_DATA_USER] +
|
||||
stats.sectors[BCH_DATA_CACHED])) << 9;
|
||||
|
||||
fragmented = max(0LL, fragmented);
|
||||
|
||||
bch2_pd_controller_update(&ca->copygc_pd,
|
||||
free, fragmented, -1);
|
||||
fragmented += max_t(s64, 0, (bucket_to_sector(ca,
|
||||
stats.buckets[BCH_DATA_user] +
|
||||
stats.buckets[BCH_DATA_cached]) -
|
||||
(stats.sectors[BCH_DATA_user] +
|
||||
stats.sectors[BCH_DATA_cached])) << 9);
|
||||
}
|
||||
|
||||
bch2_pd_controller_update(&c->copygc_pd, free, fragmented, -1);
|
||||
schedule_delayed_work(&c->pd_controllers_update,
|
||||
c->pd_controllers_update_seconds * HZ);
|
||||
}
|
||||
@ -353,6 +350,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
|
||||
bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
|
||||
|
||||
while (1) {
|
||||
bch2_trans_cond_resched(&trans);
|
||||
|
||||
ret = bch2_alloc_write_key(&trans, iter, flags);
|
||||
if (ret < 0 || ret == ALLOC_END)
|
||||
break;
|
||||
@ -517,11 +516,13 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (gc_count != c->gc_count)
|
||||
ca->inc_gen_really_needs_gc = 0;
|
||||
|
||||
available = max_t(s64, 0, dev_buckets_available(c, ca) -
|
||||
available = max_t(s64, 0, dev_buckets_available(ca) -
|
||||
ca->inc_gen_really_needs_gc);
|
||||
|
||||
if (available > fifo_free(&ca->free_inc) ||
|
||||
(available && !fifo_full(&ca->free[RESERVE_BTREE])))
|
||||
(available &&
|
||||
(!fifo_full(&ca->free[RESERVE_BTREE]) ||
|
||||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))))
|
||||
break;
|
||||
|
||||
up_read(&c->gc_lock);
|
||||
@ -1191,7 +1192,7 @@ stop:
|
||||
void bch2_recalc_capacity(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
u64 capacity = 0, reserved_sectors = 0, gc_reserve;
|
||||
u64 capacity = 0, reserved_sectors = 0, gc_reserve, copygc_threshold = 0;
|
||||
unsigned bucket_size_max = 0;
|
||||
unsigned long ra_pages = 0;
|
||||
unsigned i, j;
|
||||
@ -1234,7 +1235,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
dev_reserve *= ca->mi.bucket_size;
|
||||
|
||||
ca->copygc_threshold = dev_reserve;
|
||||
copygc_threshold += dev_reserve;
|
||||
|
||||
capacity += bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket);
|
||||
@ -1253,6 +1254,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
|
||||
reserved_sectors = min(reserved_sectors, capacity);
|
||||
|
||||
c->copygc_threshold = copygc_threshold;
|
||||
c->capacity = capacity - reserved_sectors;
|
||||
|
||||
c->bucket_size_max = bucket_size_max;
|
||||
@ -1312,7 +1314,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||
for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
|
||||
bch2_writepoint_stop(c, ca, &c->write_points[i]);
|
||||
|
||||
bch2_writepoint_stop(c, ca, &ca->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->copygc_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
|
||||
bch2_writepoint_stop(c, ca, &c->btree_write_point);
|
||||
|
||||
|
@ -70,12 +70,6 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
enum bucket_alloc_ret {
|
||||
ALLOC_SUCCESS,
|
||||
OPEN_BUCKETS_EMPTY,
|
||||
FREELIST_EMPTY, /* Allocator thread not keeping up */
|
||||
};
|
||||
|
||||
/*
|
||||
* Open buckets represent a bucket that's currently being allocated from. They
|
||||
* serve two purposes:
|
||||
@ -150,12 +144,13 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
}
|
||||
|
||||
static void open_bucket_free_unused(struct bch_fs *c,
|
||||
struct open_bucket *ob,
|
||||
bool may_realloc)
|
||||
struct write_point *wp,
|
||||
struct open_bucket *ob)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
bool may_realloc = wp->type == BCH_DATA_user;
|
||||
|
||||
BUG_ON(ca->open_buckets_partial_nr >=
|
||||
BUG_ON(ca->open_buckets_partial_nr >
|
||||
ARRAY_SIZE(ca->open_buckets_partial));
|
||||
|
||||
if (ca->open_buckets_partial_nr <
|
||||
@ -234,13 +229,22 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (may_alloc_partial &&
|
||||
ca->open_buckets_partial_nr) {
|
||||
ob = c->open_buckets +
|
||||
ca->open_buckets_partial[--ca->open_buckets_partial_nr];
|
||||
ob->on_partial_list = false;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ob;
|
||||
if (may_alloc_partial) {
|
||||
int i;
|
||||
|
||||
for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
ob = c->open_buckets + ca->open_buckets_partial[i];
|
||||
|
||||
if (reserve <= ob->alloc_reserve) {
|
||||
array_remove_item(ca->open_buckets_partial,
|
||||
ca->open_buckets_partial_nr,
|
||||
i);
|
||||
ob->on_partial_list = false;
|
||||
ob->alloc_reserve = reserve;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ob;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
|
||||
@ -297,6 +301,7 @@ out:
|
||||
|
||||
ob->valid = true;
|
||||
ob->sectors_free = ca->mi.bucket_size;
|
||||
ob->alloc_reserve = reserve;
|
||||
ob->ptr = (struct bch_extent_ptr) {
|
||||
.type = 1 << BCH_EXTENT_ENTRY_ptr,
|
||||
.gen = buckets->b[bucket].mark.gen,
|
||||
@ -344,21 +349,20 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
|
||||
struct bch_devs_mask *devs)
|
||||
{
|
||||
struct dev_alloc_list ret = { .nr = 0 };
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device_rcu(ca, c, i, devs)
|
||||
for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
|
||||
ret.devs[ret.nr++] = i;
|
||||
|
||||
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_dev_stripe_increment(struct bch_fs *c, struct bch_dev *ca,
|
||||
void bch2_dev_stripe_increment(struct bch_dev *ca,
|
||||
struct dev_stripe_state *stripe)
|
||||
{
|
||||
u64 *v = stripe->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_free(c, ca);
|
||||
u64 free_space = dev_buckets_free(ca);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
@ -396,21 +400,22 @@ static void add_new_bucket(struct bch_fs *c,
|
||||
ob_push(c, ptrs, ob);
|
||||
}
|
||||
|
||||
static int bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
struct open_buckets *ptrs,
|
||||
struct dev_stripe_state *stripe,
|
||||
struct bch_devs_mask *devs_may_alloc,
|
||||
unsigned nr_replicas,
|
||||
unsigned *nr_effective,
|
||||
bool *have_cache,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
enum bucket_alloc_ret
|
||||
bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
struct open_buckets *ptrs,
|
||||
struct dev_stripe_state *stripe,
|
||||
struct bch_devs_mask *devs_may_alloc,
|
||||
unsigned nr_replicas,
|
||||
unsigned *nr_effective,
|
||||
bool *have_cache,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct dev_alloc_list devs_sorted =
|
||||
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
struct bch_dev *ca;
|
||||
bool alloc_failure = false;
|
||||
enum bucket_alloc_ret ret = INSUFFICIENT_DEVICES;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(*nr_effective >= nr_replicas);
|
||||
@ -428,101 +433,27 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
ob = bch2_bucket_alloc(c, ca, reserve,
|
||||
flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
|
||||
if (IS_ERR(ob)) {
|
||||
enum bucket_alloc_ret ret = -PTR_ERR(ob);
|
||||
|
||||
WARN_ON(reserve == RESERVE_MOVINGGC &&
|
||||
ret != OPEN_BUCKETS_EMPTY);
|
||||
ret = -PTR_ERR(ob);
|
||||
|
||||
if (cl)
|
||||
return -EAGAIN;
|
||||
if (ret == OPEN_BUCKETS_EMPTY)
|
||||
return -ENOSPC;
|
||||
alloc_failure = true;
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
|
||||
add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_effective, have_cache, flags, ob);
|
||||
|
||||
bch2_dev_stripe_increment(c, ca, stripe);
|
||||
bch2_dev_stripe_increment(ca, stripe);
|
||||
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
return ALLOC_SUCCESS;
|
||||
}
|
||||
|
||||
return alloc_failure ? -ENOSPC : -EROFS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Allocate from stripes: */
|
||||
|
||||
/*
|
||||
* XXX: use a higher watermark for allocating open buckets here:
|
||||
*/
|
||||
static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
{
|
||||
struct bch_devs_mask devs;
|
||||
struct open_bucket *ob;
|
||||
unsigned i, nr_have = 0, nr_data =
|
||||
min_t(unsigned, h->nr_active_devs,
|
||||
EC_STRIPE_MAX) - h->redundancy;
|
||||
bool have_cache = true;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(h->blocks.nr > nr_data);
|
||||
BUG_ON(h->parity.nr > h->redundancy);
|
||||
|
||||
devs = h->devs;
|
||||
|
||||
open_bucket_for_each(c, &h->parity, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
open_bucket_for_each(c, &h->blocks, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
if (h->parity.nr < h->redundancy) {
|
||||
nr_have = h->parity.nr;
|
||||
|
||||
ret = bch2_bucket_alloc_set(c, &h->parity,
|
||||
&h->parity_stripe,
|
||||
&devs,
|
||||
h->redundancy,
|
||||
&nr_have,
|
||||
&have_cache,
|
||||
RESERVE_NONE,
|
||||
0,
|
||||
NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (h->blocks.nr < nr_data) {
|
||||
nr_have = h->blocks.nr;
|
||||
|
||||
ret = bch2_bucket_alloc_set(c, &h->blocks,
|
||||
&h->block_stripe,
|
||||
&devs,
|
||||
nr_data,
|
||||
&nr_have,
|
||||
&have_cache,
|
||||
RESERVE_NONE,
|
||||
0,
|
||||
NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return bch2_ec_stripe_new_alloc(c, h);
|
||||
err:
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* if we can't allocate a new stripe because there are already too many
|
||||
* partially filled stripes, force allocating from an existing stripe even when
|
||||
@ -555,34 +486,30 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
|
||||
if (ec_open_bucket(c, ptrs))
|
||||
return;
|
||||
|
||||
h = bch2_ec_stripe_head_get(c, target, erasure_code, nr_replicas - 1);
|
||||
h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
|
||||
if (!h)
|
||||
return;
|
||||
|
||||
if (!h->s && ec_stripe_alloc(c, h))
|
||||
goto out_put_head;
|
||||
|
||||
rcu_read_lock();
|
||||
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
|
||||
rcu_read_unlock();
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++)
|
||||
open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
|
||||
if (ob->ptr.dev == devs_sorted.devs[i] &&
|
||||
!test_and_set_bit(ec_idx, h->s->blocks_allocated))
|
||||
!test_and_set_bit(h->s->data_block_idx[ec_idx],
|
||||
h->s->blocks_allocated))
|
||||
goto got_bucket;
|
||||
goto out_put_head;
|
||||
got_bucket:
|
||||
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec_idx = h->s->data_block_idx[ec_idx];
|
||||
ob->ec = h->s;
|
||||
|
||||
add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_effective, have_cache, flags, ob);
|
||||
atomic_inc(&h->s->pin);
|
||||
out_put_head:
|
||||
bch2_ec_stripe_head_put(h);
|
||||
bch2_ec_stripe_head_put(c, h);
|
||||
}
|
||||
|
||||
/* Sector allocator */
|
||||
@ -607,7 +534,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
|
||||
if (*nr_effective < nr_replicas &&
|
||||
test_bit(ob->ptr.dev, devs_may_alloc->d) &&
|
||||
(ca->mi.durability ||
|
||||
(wp->type == BCH_DATA_USER && !*have_cache)) &&
|
||||
(wp->type == BCH_DATA_user && !*have_cache)) &&
|
||||
(ob->ec || !need_ec)) {
|
||||
add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_effective, have_cache,
|
||||
@ -619,24 +546,25 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
|
||||
wp->ptrs = ptrs_skip;
|
||||
}
|
||||
|
||||
static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
struct open_buckets *ptrs,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_list *devs_have,
|
||||
u16 target,
|
||||
unsigned erasure_code,
|
||||
unsigned nr_replicas,
|
||||
unsigned *nr_effective,
|
||||
bool *have_cache,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *_cl)
|
||||
static enum bucket_alloc_ret
|
||||
open_bucket_add_buckets(struct bch_fs *c,
|
||||
struct open_buckets *ptrs,
|
||||
struct write_point *wp,
|
||||
struct bch_devs_list *devs_have,
|
||||
u16 target,
|
||||
unsigned erasure_code,
|
||||
unsigned nr_replicas,
|
||||
unsigned *nr_effective,
|
||||
bool *have_cache,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *_cl)
|
||||
{
|
||||
struct bch_devs_mask devs;
|
||||
struct open_bucket *ob;
|
||||
struct closure *cl = NULL;
|
||||
enum bucket_alloc_ret ret;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
devs = target_rw_devs(c, wp->type, target);
|
||||
@ -650,18 +578,22 @@ static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
|
||||
if (erasure_code) {
|
||||
get_buckets_from_writepoint(c, ptrs, wp, &devs,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, flags, true);
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
if (!ec_open_bucket(c, ptrs)) {
|
||||
get_buckets_from_writepoint(c, ptrs, wp, &devs,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, flags, true);
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bucket_alloc_from_stripe(c, ptrs, wp, &devs,
|
||||
target, erasure_code,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, flags);
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
if (!ec_open_bucket(c, ptrs)) {
|
||||
bucket_alloc_from_stripe(c, ptrs, wp, &devs,
|
||||
target, erasure_code,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, flags);
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
get_buckets_from_writepoint(c, ptrs, wp, &devs,
|
||||
@ -681,7 +613,7 @@ retry_blocking:
|
||||
ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
|
||||
nr_replicas, nr_effective, have_cache,
|
||||
reserve, flags, cl);
|
||||
if (ret && ret != -EROFS && !cl && _cl) {
|
||||
if (ret && ret != INSUFFICIENT_DEVICES && !cl && _cl) {
|
||||
cl = _cl;
|
||||
goto retry_blocking;
|
||||
}
|
||||
@ -872,7 +804,8 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
unsigned nr_effective, write_points_nr;
|
||||
unsigned ob_flags = 0;
|
||||
bool have_cache;
|
||||
int ret, i;
|
||||
enum bucket_alloc_ret ret;
|
||||
int i;
|
||||
|
||||
if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
|
||||
ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
|
||||
@ -886,11 +819,11 @@ retry:
|
||||
|
||||
wp = writepoint_find(c, write_point.v);
|
||||
|
||||
if (wp->type == BCH_DATA_USER)
|
||||
if (wp->type == BCH_DATA_user)
|
||||
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
|
||||
|
||||
/* metadata may not allocate on cache devices: */
|
||||
if (wp->type != BCH_DATA_USER)
|
||||
if (wp->type != BCH_DATA_user)
|
||||
have_cache = true;
|
||||
|
||||
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
|
||||
@ -920,7 +853,7 @@ alloc_done:
|
||||
if (erasure_code && !ec_open_bucket(c, &ptrs))
|
||||
pr_debug("failed to get ec bucket: ret %u", ret);
|
||||
|
||||
if (ret == -EROFS &&
|
||||
if (ret == INSUFFICIENT_DEVICES &&
|
||||
nr_effective >= nr_replicas_required)
|
||||
ret = 0;
|
||||
|
||||
@ -929,7 +862,7 @@ alloc_done:
|
||||
|
||||
/* Free buckets we didn't use: */
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
open_bucket_free_unused(c, ob, wp->type == BCH_DATA_USER);
|
||||
open_bucket_free_unused(c, wp, ob);
|
||||
|
||||
wp->ptrs = ptrs;
|
||||
|
||||
@ -948,17 +881,24 @@ err:
|
||||
if (ptrs.nr < ARRAY_SIZE(ptrs.v))
|
||||
ob_push(c, &ptrs, ob);
|
||||
else
|
||||
open_bucket_free_unused(c, ob,
|
||||
wp->type == BCH_DATA_USER);
|
||||
open_bucket_free_unused(c, wp, ob);
|
||||
wp->ptrs = ptrs;
|
||||
|
||||
mutex_unlock(&wp->lock);
|
||||
|
||||
if (ret == -ENOSPC &&
|
||||
if (ret == FREELIST_EMPTY &&
|
||||
try_decrease_writepoints(c, write_points_nr))
|
||||
goto retry;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
switch (ret) {
|
||||
case OPEN_BUCKETS_EMPTY:
|
||||
case FREELIST_EMPTY:
|
||||
return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
|
||||
case INSUFFICIENT_DEVICES:
|
||||
return ERR_PTR(-EROFS);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -980,7 +920,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
|
||||
struct bch_extent_ptr tmp = ob->ptr;
|
||||
|
||||
tmp.cached = !ca->mi.durability &&
|
||||
wp->type == BCH_DATA_USER;
|
||||
wp->type == BCH_DATA_user;
|
||||
|
||||
tmp.offset += ca->mi.bucket_size - ob->sectors_free;
|
||||
bch2_bkey_append_ptr(k, tmp);
|
||||
@ -1009,6 +949,13 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
|
||||
bch2_open_buckets_put(c, &ptrs);
|
||||
}
|
||||
|
||||
static inline void writepoint_init(struct write_point *wp,
|
||||
enum bch_data_type type)
|
||||
{
|
||||
mutex_init(&wp->lock);
|
||||
wp->type = type;
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
@ -1029,12 +976,13 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
c->open_buckets_freelist = ob - c->open_buckets;
|
||||
}
|
||||
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
|
||||
writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
|
||||
writepoint_init(&c->btree_write_point, BCH_DATA_btree);
|
||||
writepoint_init(&c->rebalance_write_point, BCH_DATA_user);
|
||||
writepoint_init(&c->copygc_write_point, BCH_DATA_user);
|
||||
|
||||
for (wp = c->write_points;
|
||||
wp < c->write_points + c->write_points_nr; wp++) {
|
||||
writepoint_init(wp, BCH_DATA_USER);
|
||||
writepoint_init(wp, BCH_DATA_user);
|
||||
|
||||
wp->last_used = sched_clock();
|
||||
wp->write_point = (unsigned long) wp;
|
||||
|
@ -12,6 +12,13 @@ struct bch_dev;
|
||||
struct bch_fs;
|
||||
struct bch_devs_List;
|
||||
|
||||
enum bucket_alloc_ret {
|
||||
ALLOC_SUCCESS,
|
||||
OPEN_BUCKETS_EMPTY,
|
||||
FREELIST_EMPTY, /* Allocator thread not keeping up */
|
||||
INSUFFICIENT_DEVICES,
|
||||
};
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
@ -20,8 +27,7 @@ struct dev_alloc_list {
|
||||
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
struct dev_stripe_state *,
|
||||
struct bch_devs_mask *);
|
||||
void bch2_dev_stripe_increment(struct bch_fs *, struct bch_dev *,
|
||||
struct dev_stripe_state *);
|
||||
void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
|
||||
|
||||
long bch2_bucket_alloc_new_fs(struct bch_dev *);
|
||||
|
||||
@ -92,6 +98,12 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
enum bucket_alloc_ret
|
||||
bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
|
||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||
unsigned, unsigned *, bool *, enum alloc_reserve,
|
||||
unsigned, struct closure *);
|
||||
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
|
||||
unsigned, unsigned,
|
||||
struct write_point_specifier,
|
||||
@ -121,13 +133,6 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
|
||||
return (struct write_point_specifier) { .v = (unsigned long) wp };
|
||||
}
|
||||
|
||||
static inline void writepoint_init(struct write_point *wp,
|
||||
enum bch_data_type type)
|
||||
{
|
||||
mutex_init(&wp->lock);
|
||||
wp->type = type;
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
|
||||
|
@ -66,6 +66,7 @@ struct open_bucket {
|
||||
u8 type;
|
||||
unsigned valid:1;
|
||||
unsigned on_partial_list:1;
|
||||
int alloc_reserve:3;
|
||||
unsigned sectors_free;
|
||||
struct bch_extent_ptr ptr;
|
||||
struct ec_stripe_new *ec;
|
||||
|
@ -202,7 +202,8 @@
|
||||
#include "opts.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <linux/dynamic_fault.h>
|
||||
#define dynamic_fault(...) 0
|
||||
#define race_fault(...) 0
|
||||
|
||||
#define bch2_fs_init_fault(name) \
|
||||
dynamic_fault("bcachefs:bch_fs_init:" name)
|
||||
@ -451,13 +452,6 @@ struct bch_dev {
|
||||
|
||||
alloc_heap alloc_heap;
|
||||
|
||||
/* Copying GC: */
|
||||
struct task_struct *copygc_thread;
|
||||
copygc_heap copygc_heap;
|
||||
struct bch_pd_controller copygc_pd;
|
||||
struct write_point copygc_write_point;
|
||||
u64 copygc_threshold;
|
||||
|
||||
atomic64_t rebalance_work;
|
||||
|
||||
struct journal_device journal;
|
||||
@ -751,16 +745,27 @@ struct bch_fs {
|
||||
/* REBALANCE */
|
||||
struct bch_fs_rebalance rebalance;
|
||||
|
||||
/* COPYGC */
|
||||
struct task_struct *copygc_thread;
|
||||
copygc_heap copygc_heap;
|
||||
struct bch_pd_controller copygc_pd;
|
||||
struct write_point copygc_write_point;
|
||||
u64 copygc_threshold;
|
||||
|
||||
/* STRIPES: */
|
||||
GENRADIX(struct stripe) stripes[2];
|
||||
struct mutex ec_stripe_create_lock;
|
||||
|
||||
ec_stripes_heap ec_stripes_heap;
|
||||
spinlock_t ec_stripes_heap_lock;
|
||||
|
||||
/* ERASURE CODING */
|
||||
struct list_head ec_new_stripe_list;
|
||||
struct mutex ec_new_stripe_lock;
|
||||
struct list_head ec_stripe_head_list;
|
||||
struct mutex ec_stripe_head_lock;
|
||||
|
||||
struct list_head ec_stripe_new_list;
|
||||
struct mutex ec_stripe_new_lock;
|
||||
|
||||
struct work_struct ec_stripe_create_work;
|
||||
u64 ec_stripe_hint;
|
||||
|
||||
struct bio_set ec_bioset;
|
||||
|
@ -1026,14 +1026,19 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
|
||||
|
||||
/* BCH_SB_FIELD_replicas: */
|
||||
|
||||
#define BCH_DATA_TYPES() \
|
||||
x(none, 0) \
|
||||
x(sb, 1) \
|
||||
x(journal, 2) \
|
||||
x(btree, 3) \
|
||||
x(user, 4) \
|
||||
x(cached, 5)
|
||||
|
||||
enum bch_data_type {
|
||||
BCH_DATA_NONE = 0,
|
||||
BCH_DATA_SB = 1,
|
||||
BCH_DATA_JOURNAL = 2,
|
||||
BCH_DATA_BTREE = 3,
|
||||
BCH_DATA_USER = 4,
|
||||
BCH_DATA_CACHED = 5,
|
||||
BCH_DATA_NR = 6,
|
||||
#define x(t, n) BCH_DATA_##t,
|
||||
BCH_DATA_TYPES()
|
||||
#undef x
|
||||
BCH_DATA_NR
|
||||
};
|
||||
|
||||
struct bch_replicas_entry_v0 {
|
||||
|
@ -313,44 +313,6 @@ struct rw_aux_tree {
|
||||
struct bpos k;
|
||||
};
|
||||
|
||||
/*
|
||||
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
|
||||
* it used to be 64, but I realized the lookup code would touch slightly less
|
||||
* memory if it was 128.
|
||||
*
|
||||
* It definites the number of bytes (in struct bset) per struct bkey_float in
|
||||
* the auxiliar search tree - when we're done searching the bset_float tree we
|
||||
* have this many bytes left that we do a linear search over.
|
||||
*
|
||||
* Since (after level 5) every level of the bset_tree is on a new cacheline,
|
||||
* we're touching one fewer cacheline in the bset tree in exchange for one more
|
||||
* cacheline in the linear search - but the linear search might stop before it
|
||||
* gets to the second cacheline.
|
||||
*/
|
||||
|
||||
#define BSET_CACHELINE 128
|
||||
|
||||
/* Space required for the btree node keys */
|
||||
static inline size_t btree_keys_bytes(struct btree *b)
|
||||
{
|
||||
return PAGE_SIZE << b->page_order;
|
||||
}
|
||||
|
||||
static inline size_t btree_keys_cachelines(struct btree *b)
|
||||
{
|
||||
return btree_keys_bytes(b) / BSET_CACHELINE;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_bytes(struct btree *b)
|
||||
{
|
||||
return btree_keys_cachelines(b) * 8;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_u64s(struct btree *b)
|
||||
{
|
||||
return btree_aux_data_bytes(b) / sizeof(u64);
|
||||
}
|
||||
|
||||
static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
|
||||
{
|
||||
BUG_ON(t->aux_data_offset == U16_MAX);
|
||||
@ -426,29 +388,6 @@ static void bset_aux_tree_verify(struct btree *b)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory allocation */
|
||||
|
||||
void bch2_btree_keys_free(struct btree *b)
|
||||
{
|
||||
vfree(b->aux_data);
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
#ifndef PAGE_KERNEL_EXEC
|
||||
# define PAGE_KERNEL_EXEC PAGE_KERNEL
|
||||
#endif
|
||||
|
||||
int bch2_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
|
||||
{
|
||||
b->page_order = page_order;
|
||||
b->aux_data = __vmalloc(btree_aux_data_bytes(b), gfp,
|
||||
PAGE_KERNEL_EXEC);
|
||||
if (!b->aux_data)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
|
||||
{
|
||||
unsigned i;
|
||||
|
@ -184,6 +184,38 @@ static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* BSET_CACHELINE was originally intended to match the hardware cacheline size -
|
||||
* it used to be 64, but I realized the lookup code would touch slightly less
|
||||
* memory if it was 128.
|
||||
*
|
||||
* It definites the number of bytes (in struct bset) per struct bkey_float in
|
||||
* the auxiliar search tree - when we're done searching the bset_float tree we
|
||||
* have this many bytes left that we do a linear search over.
|
||||
*
|
||||
* Since (after level 5) every level of the bset_tree is on a new cacheline,
|
||||
* we're touching one fewer cacheline in the bset tree in exchange for one more
|
||||
* cacheline in the linear search - but the linear search might stop before it
|
||||
* gets to the second cacheline.
|
||||
*/
|
||||
|
||||
#define BSET_CACHELINE 128
|
||||
|
||||
static inline size_t btree_keys_cachelines(struct btree *b)
|
||||
{
|
||||
return (1U << b->byte_order) / BSET_CACHELINE;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_bytes(struct btree *b)
|
||||
{
|
||||
return btree_keys_cachelines(b) * 8;
|
||||
}
|
||||
|
||||
static inline size_t btree_aux_data_u64s(struct btree *b)
|
||||
{
|
||||
return btree_aux_data_bytes(b) / sizeof(u64);
|
||||
}
|
||||
|
||||
typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
|
||||
|
||||
static inline void
|
||||
@ -334,8 +366,6 @@ static inline struct bset *bset_next_set(struct btree *b,
|
||||
return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
|
||||
}
|
||||
|
||||
void bch2_btree_keys_free(struct btree *);
|
||||
int bch2_btree_keys_alloc(struct btree *, unsigned, gfp_t);
|
||||
void bch2_btree_keys_init(struct btree *, bool *);
|
||||
|
||||
void bch2_bset_init_first(struct btree *, struct bset *);
|
||||
|
@ -44,7 +44,8 @@ static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
bch2_btree_keys_free(b);
|
||||
vfree(b->aux_data);
|
||||
b->aux_data = NULL;
|
||||
}
|
||||
|
||||
static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
@ -72,7 +73,7 @@ static const struct rhashtable_params bch_btree_cache_params = {
|
||||
.obj_cmpfn = bch2_btree_cache_cmp_fn,
|
||||
};
|
||||
|
||||
static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
BUG_ON(b->data || b->aux_data);
|
||||
|
||||
@ -80,7 +81,8 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
if (!b->data)
|
||||
return -ENOMEM;
|
||||
|
||||
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp)) {
|
||||
b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
|
||||
if (!b->aux_data) {
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
return -ENOMEM;
|
||||
@ -89,21 +91,9 @@ static int __btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
|
||||
if (!__btree_node_data_alloc(c, b, gfp)) {
|
||||
bc->used++;
|
||||
list_move(&b->list, &bc->freeable);
|
||||
} else {
|
||||
list_move(&b->list, &bc->freed);
|
||||
}
|
||||
}
|
||||
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
{
|
||||
struct btree *b = kzalloc(sizeof(struct btree), gfp);
|
||||
struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
@ -111,9 +101,25 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
six_lock_init(&b->c.lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
b->byte_order = ilog2(btree_bytes(c));
|
||||
return b;
|
||||
}
|
||||
|
||||
btree_node_data_alloc(c, b, gfp);
|
||||
return b->data ? b : NULL;
|
||||
static struct btree *btree_node_mem_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b = __btree_node_mem_alloc(c);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
|
||||
kfree(b);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bc->used++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Btree in memory cache - hash table */
|
||||
@ -124,6 +130,8 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||
|
||||
/* Cause future lookups for this node to fail: */
|
||||
b->hash_val = 0;
|
||||
|
||||
six_lock_wakeup_all(&b->c.lock);
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||
@ -402,7 +410,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bch2_recalc_btree_reserve(c);
|
||||
|
||||
for (i = 0; i < bc->reserve; i++)
|
||||
if (!btree_node_mem_alloc(c, GFP_KERNEL)) {
|
||||
if (!btree_node_mem_alloc(c)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -418,7 +426,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
goto out;
|
||||
}
|
||||
|
||||
c->verify_data = btree_node_mem_alloc(c, GFP_KERNEL);
|
||||
c->verify_data = btree_node_mem_alloc(c);
|
||||
if (!c->verify_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@ -550,21 +558,16 @@ got_node:
|
||||
mutex_unlock(&bc->lock);
|
||||
|
||||
if (!b) {
|
||||
b = kzalloc(sizeof(struct btree), GFP_KERNEL);
|
||||
b = __btree_node_mem_alloc(c);
|
||||
if (!b)
|
||||
goto err;
|
||||
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
six_lock_init(&b->c.lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
}
|
||||
|
||||
if (!b->data) {
|
||||
if (__btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
|
||||
if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
|
||||
goto err;
|
||||
|
||||
mutex_lock(&bc->lock);
|
||||
|
@ -79,14 +79,9 @@ static inline size_t btree_max_u64s(struct bch_fs *c)
|
||||
return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline size_t btree_page_order(struct bch_fs *c)
|
||||
{
|
||||
return get_order(btree_bytes(c));
|
||||
}
|
||||
|
||||
static inline size_t btree_pages(struct bch_fs *c)
|
||||
{
|
||||
return 1 << btree_page_order(c);
|
||||
return btree_bytes(c) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline unsigned btree_blocks(struct bch_fs *c)
|
||||
|
@ -109,7 +109,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
atomic64_set(&c->key_version, k.k->version.lo);
|
||||
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c,
|
||||
fsck_err_on(!bch2_bkey_replicas_marked(c, k), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
k.k->type)) {
|
||||
ret = bch2_mark_bkey_replicas(c, k);
|
||||
@ -433,16 +433,16 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
if (offset == BCH_SB_SECTOR)
|
||||
mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
|
||||
BCH_DATA_SB, flags);
|
||||
BCH_DATA_sb, flags);
|
||||
|
||||
mark_metadata_sectors(c, ca, offset,
|
||||
offset + (1 << layout->sb_max_size_bits),
|
||||
BCH_DATA_SB, flags);
|
||||
BCH_DATA_sb, flags);
|
||||
}
|
||||
|
||||
for (i = 0; i < ca->journal.nr; i++) {
|
||||
b = ca->journal.buckets[i];
|
||||
bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_JOURNAL,
|
||||
bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB), flags);
|
||||
}
|
||||
@ -617,8 +617,11 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
copy_stripe_field(block_sectors[i],
|
||||
"block_sectors[%u]", i);
|
||||
|
||||
if (dst->alive)
|
||||
if (dst->alive) {
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
genradix_iter_advance(&dst_iter, &c->stripes[0]);
|
||||
genradix_iter_advance(&src_iter, &c->stripes[1]);
|
||||
@ -673,8 +676,8 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
char buf[80];
|
||||
|
||||
if (metadata_only &&
|
||||
(e->data_type == BCH_DATA_USER ||
|
||||
e->data_type == BCH_DATA_CACHED))
|
||||
(e->data_type == BCH_DATA_user ||
|
||||
e->data_type == BCH_DATA_cached))
|
||||
continue;
|
||||
|
||||
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
||||
@ -759,8 +762,8 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
d->gen_valid = s->gen_valid;
|
||||
|
||||
if (metadata_only &&
|
||||
(s->mark.data_type == BCH_DATA_USER ||
|
||||
s->mark.data_type == BCH_DATA_CACHED)) {
|
||||
(s->mark.data_type == BCH_DATA_user ||
|
||||
s->mark.data_type == BCH_DATA_cached)) {
|
||||
d->_mark = s->mark;
|
||||
d->_mark.owned_by_allocator = 0;
|
||||
}
|
||||
@ -949,8 +952,10 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (btree_node_type_needs_gc(i)) {
|
||||
ret = bch2_gc_btree_gens(c, i);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch_err(c, "error recalculating oldest_gen: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
@ -961,6 +966,8 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
g->oldest_gen = g->gc_gen;
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
|
||||
c->gc_count++;
|
||||
err:
|
||||
up_read(&c->gc_lock);
|
||||
return ret;
|
||||
|
@ -57,25 +57,25 @@ static void set_needs_whiteout(struct bset *i, int v)
|
||||
k->needs_whiteout = v;
|
||||
}
|
||||
|
||||
static void btree_bounce_free(struct bch_fs *c, unsigned order,
|
||||
static void btree_bounce_free(struct bch_fs *c, size_t size,
|
||||
bool used_mempool, void *p)
|
||||
{
|
||||
if (used_mempool)
|
||||
mempool_free(p, &c->btree_bounce_pool);
|
||||
else
|
||||
vpfree(p, PAGE_SIZE << order);
|
||||
vpfree(p, size);
|
||||
}
|
||||
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
|
||||
bool *used_mempool)
|
||||
{
|
||||
unsigned flags = memalloc_nofs_save();
|
||||
void *p;
|
||||
|
||||
BUG_ON(order > btree_page_order(c));
|
||||
BUG_ON(size > btree_bytes(c));
|
||||
|
||||
*used_mempool = false;
|
||||
p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
|
||||
p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
|
||||
if (!p) {
|
||||
*used_mempool = true;
|
||||
p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
|
||||
@ -125,16 +125,14 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
|
||||
bool used_mempool = false;
|
||||
unsigned order;
|
||||
size_t bytes = b->whiteout_u64s * sizeof(u64);
|
||||
|
||||
if (!b->whiteout_u64s)
|
||||
return;
|
||||
|
||||
order = get_order(b->whiteout_u64s * sizeof(u64));
|
||||
new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
new_whiteouts = btree_bounce_alloc(c, order, &used_mempool);
|
||||
|
||||
ptrs = ptrs_end = ((void *) new_whiteouts + (PAGE_SIZE << order));
|
||||
ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
|
||||
|
||||
for (k = unwritten_whiteouts_start(c, b);
|
||||
k != unwritten_whiteouts_end(c, b);
|
||||
@ -158,7 +156,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
||||
memcpy_u64s(unwritten_whiteouts_start(c, b),
|
||||
new_whiteouts, b->whiteout_u64s);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, new_whiteouts);
|
||||
btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
|
||||
}
|
||||
|
||||
static bool should_compact_bset(struct btree *b, struct bset_tree *t,
|
||||
@ -187,7 +185,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
struct bkey_packed *whiteouts = NULL;
|
||||
struct bkey_packed *u_start, *u_pos;
|
||||
struct sort_iter sort_iter;
|
||||
unsigned order, whiteout_u64s = 0, u64s;
|
||||
unsigned bytes, whiteout_u64s = 0, u64s;
|
||||
bool used_mempool, compacting = false;
|
||||
|
||||
BUG_ON(!btree_node_is_extents(b));
|
||||
@ -204,9 +202,9 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
sort_iter_init(&sort_iter, b);
|
||||
|
||||
whiteout_u64s += b->whiteout_u64s;
|
||||
order = get_order(whiteout_u64s * sizeof(u64));
|
||||
bytes = whiteout_u64s * sizeof(u64);
|
||||
|
||||
whiteouts = btree_bounce_alloc(c, order, &used_mempool);
|
||||
whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
u_start = u_pos = whiteouts;
|
||||
|
||||
memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
|
||||
@ -306,7 +304,7 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
||||
unwritten_whiteouts_end(c, b),
|
||||
true);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, whiteouts);
|
||||
btree_bounce_free(c, bytes, used_mempool, whiteouts);
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
@ -401,7 +399,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
struct bset *start_bset = bset(b, &b->set[start_idx]);
|
||||
bool used_mempool = false;
|
||||
u64 start_time, seq = 0;
|
||||
unsigned i, u64s = 0, order, shift = end_idx - start_idx - 1;
|
||||
unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
|
||||
bool sorting_entire_node = start_idx == 0 &&
|
||||
end_idx == b->nsets;
|
||||
|
||||
@ -416,11 +414,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
btree_bkey_last(b, t));
|
||||
}
|
||||
|
||||
order = sorting_entire_node
|
||||
? btree_page_order(c)
|
||||
: get_order(__vstruct_bytes(struct btree_node, u64s));
|
||||
bytes = sorting_entire_node
|
||||
? btree_bytes(c)
|
||||
: __vstruct_bytes(struct btree_node, u64s);
|
||||
|
||||
out = btree_bounce_alloc(c, order, &used_mempool);
|
||||
out = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
start_time = local_clock();
|
||||
|
||||
@ -435,7 +433,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
|
||||
out->keys.u64s = cpu_to_le16(u64s);
|
||||
|
||||
BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
|
||||
BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
|
||||
|
||||
if (sorting_entire_node)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
|
||||
@ -449,7 +447,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
if (sorting_entire_node) {
|
||||
unsigned u64s = le16_to_cpu(out->keys.u64s);
|
||||
|
||||
BUG_ON(order != btree_page_order(c));
|
||||
BUG_ON(bytes != btree_bytes(c));
|
||||
|
||||
/*
|
||||
* Our temporary buffer is the same size as the btree node's
|
||||
@ -484,7 +482,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
||||
set_btree_bset_end(b, &b->set[start_idx]);
|
||||
bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, out);
|
||||
btree_bounce_free(c, bytes, used_mempool, out);
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
}
|
||||
@ -599,34 +597,6 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
|
||||
bch2_btree_iter_reinit_node(iter, b);
|
||||
}
|
||||
|
||||
static struct nonce btree_nonce(struct bset *i, unsigned offset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
[0] = cpu_to_le32(offset),
|
||||
[1] = ((__le32 *) &i->seq)[0],
|
||||
[2] = ((__le32 *) &i->seq)[1],
|
||||
[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
|
||||
}};
|
||||
}
|
||||
|
||||
static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
|
||||
{
|
||||
struct nonce nonce = btree_nonce(i, offset);
|
||||
|
||||
if (!offset) {
|
||||
struct btree_node *bn = container_of(i, struct btree_node, keys);
|
||||
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
|
||||
bytes);
|
||||
|
||||
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
||||
vstruct_end(i) - (void *) i->_data);
|
||||
}
|
||||
|
||||
static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
|
||||
struct btree *b, struct bset *i,
|
||||
unsigned offset, int write)
|
||||
@ -917,6 +887,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
struct sort_iter *iter;
|
||||
struct btree_node *sorted;
|
||||
struct bkey_packed *k;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bset *i;
|
||||
bool used_mempool, blacklisted;
|
||||
unsigned u64s;
|
||||
@ -971,8 +942,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
bset_encrypt(c, i, b->written << 9);
|
||||
|
||||
if (btree_node_is_extents(b) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
|
||||
set_btree_node_old_extent_overwrite(b);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
|
||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||
} else {
|
||||
@ -1040,7 +1013,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
BTREE_ERR_WANT_RETRY, c, b, NULL,
|
||||
"found bset signature after last bset");
|
||||
|
||||
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
|
||||
sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
set_btree_bset(b, b->set, &b->data->keys);
|
||||
@ -1058,7 +1031,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
|
||||
BUG_ON(b->nr.live_u64s != u64s);
|
||||
|
||||
btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
|
||||
btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
|
||||
|
||||
i = &b->data->keys;
|
||||
for (k = i->start; k != vstruct_last(i);) {
|
||||
@ -1098,6 +1071,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
set_needs_whiteout(btree_bset_first(b), true);
|
||||
|
||||
btree_node_reset_sib_u64s(b);
|
||||
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
if (ca->mi.state != BCH_MEMBER_STATE_RW)
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
out:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
return retry_read;
|
||||
@ -1139,7 +1119,8 @@ static void btree_node_read_work(struct work_struct *work)
|
||||
bio->bi_status = BLK_STS_REMOVED;
|
||||
}
|
||||
start:
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, "btree read");
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status));
|
||||
if (rb->have_ioref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
rb->have_ioref = false;
|
||||
@ -1220,7 +1201,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
if (rb->have_ioref) {
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_BTREE],
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
|
||||
bio_sectors(bio));
|
||||
bio_set_dev(bio, ca->disk_sb.bdev);
|
||||
|
||||
@ -1392,7 +1373,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
|
||||
btree_bounce_free(c,
|
||||
wbio->wbio.order,
|
||||
wbio->bytes,
|
||||
wbio->wbio.used_mempool,
|
||||
wbio->data);
|
||||
|
||||
@ -1423,8 +1404,8 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
if (wbio->have_ioref)
|
||||
bch2_latency_acct(ca, wbio->submit_time, WRITE);
|
||||
|
||||
if (bio->bi_status == BLK_STS_REMOVED ||
|
||||
bch2_dev_io_err_on(bio->bi_status, ca, "btree write") ||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("btree")) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bch2_dev_list_add_dev(&orig->failed, wbio->dev);
|
||||
@ -1475,7 +1456,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct sort_iter sort_iter;
|
||||
struct nonce nonce;
|
||||
unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
|
||||
unsigned bytes_to_write, sectors_to_write, bytes, u64s;
|
||||
u64 seq = 0;
|
||||
bool used_mempool;
|
||||
unsigned long old, new;
|
||||
@ -1545,8 +1526,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
seq = max(seq, le64_to_cpu(i->journal_seq));
|
||||
}
|
||||
|
||||
order = get_order(bytes);
|
||||
data = btree_bounce_alloc(c, order, &used_mempool);
|
||||
data = btree_bounce_alloc(c, bytes, &used_mempool);
|
||||
|
||||
if (!b->written) {
|
||||
bn = data;
|
||||
@ -1658,7 +1638,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write_bio, wbio.bio);
|
||||
wbio_init(&wbio->wbio.bio);
|
||||
wbio->data = data;
|
||||
wbio->wbio.order = order;
|
||||
wbio->bytes = bytes;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META;
|
||||
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
|
||||
@ -1689,13 +1669,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
b->written += sectors_to_write;
|
||||
|
||||
/* XXX: submitting IO with btree locks held: */
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_BTREE, &k.key);
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, order, used_mempool, data);
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
btree_node_write_done(c, b);
|
||||
}
|
||||
|
||||
@ -1826,9 +1806,8 @@ void bch2_btree_verify_flushed(struct bch_fs *c)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
|
||||
void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
@ -1841,7 +1820,7 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
|
||||
if (!(flags & (1 << BTREE_NODE_dirty)))
|
||||
continue;
|
||||
|
||||
pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
|
||||
pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
|
||||
b,
|
||||
(flags & (1 << BTREE_NODE_dirty)) != 0,
|
||||
(flags & (1 << BTREE_NODE_need_write)) != 0,
|
||||
@ -1852,6 +1831,4 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
|
||||
b->will_make_reachable & 1);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "bkey_methods.h"
|
||||
#include "bset.h"
|
||||
#include "btree_locking.h"
|
||||
#include "checksum.h"
|
||||
#include "extents.h"
|
||||
#include "io_types.h"
|
||||
|
||||
@ -23,8 +24,9 @@ struct btree_read_bio {
|
||||
};
|
||||
|
||||
struct btree_write_bio {
|
||||
void *data;
|
||||
struct work_struct work;
|
||||
void *data;
|
||||
unsigned bytes;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
@ -81,6 +83,34 @@ static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
|
||||
{
|
||||
return (struct nonce) {{
|
||||
[0] = cpu_to_le32(offset),
|
||||
[1] = ((__le32 *) &i->seq)[0],
|
||||
[2] = ((__le32 *) &i->seq)[1],
|
||||
[3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
|
||||
}};
|
||||
}
|
||||
|
||||
static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
|
||||
{
|
||||
struct nonce nonce = btree_nonce(i, offset);
|
||||
|
||||
if (!offset) {
|
||||
struct btree_node *bn = container_of(i, struct btree_node, keys);
|
||||
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
|
||||
bytes);
|
||||
|
||||
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
||||
vstruct_end(i) - (void *) i->_data);
|
||||
}
|
||||
|
||||
void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
|
||||
|
||||
void bch2_btree_build_aux_trees(struct btree *);
|
||||
@ -139,7 +169,7 @@ do { \
|
||||
void bch2_btree_flush_all_reads(struct bch_fs *);
|
||||
void bch2_btree_flush_all_writes(struct bch_fs *);
|
||||
void bch2_btree_verify_flushed(struct bch_fs *);
|
||||
ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
|
||||
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
|
||||
unsigned version, unsigned big_endian,
|
||||
|
@ -391,7 +391,7 @@ static void btree_key_cache_journal_flush(struct journal *j,
|
||||
struct btree_trans trans;
|
||||
|
||||
six_lock_read(&ck->c.lock, NULL, NULL);
|
||||
key = READ_ONCE(ck->key);
|
||||
key = ck->key;
|
||||
|
||||
if (ck->journal.seq != seq ||
|
||||
!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
|
@ -94,7 +94,7 @@ struct btree {
|
||||
struct btree_nr_keys nr;
|
||||
u16 sib_u64s[2];
|
||||
u16 whiteout_u64s;
|
||||
u8 page_order;
|
||||
u8 byte_order;
|
||||
u8 unpack_fn_len;
|
||||
|
||||
/*
|
||||
@ -409,6 +409,7 @@ enum btree_flags {
|
||||
BTREE_NODE_dying,
|
||||
BTREE_NODE_fake,
|
||||
BTREE_NODE_old_extent_overwrite,
|
||||
BTREE_NODE_need_rewrite,
|
||||
};
|
||||
|
||||
BTREE_FLAG(read_in_flight);
|
||||
@ -423,6 +424,7 @@ BTREE_FLAG(just_written);
|
||||
BTREE_FLAG(dying);
|
||||
BTREE_FLAG(fake);
|
||||
BTREE_FLAG(old_extent_overwrite);
|
||||
BTREE_FLAG(need_rewrite);
|
||||
|
||||
static inline struct btree_write *btree_current_write(struct btree *b)
|
||||
{
|
||||
@ -593,7 +595,6 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
|
||||
|
||||
enum btree_trigger_flags {
|
||||
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
|
||||
__BTREE_TRIGGER_NOOVERWRITES, /* Don't run triggers on overwrites */
|
||||
|
||||
__BTREE_TRIGGER_INSERT,
|
||||
__BTREE_TRIGGER_OVERWRITE,
|
||||
@ -606,7 +607,6 @@ enum btree_trigger_flags {
|
||||
};
|
||||
|
||||
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
|
||||
#define BTREE_TRIGGER_NOOVERWRITES (1U << __BTREE_TRIGGER_NOOVERWRITES)
|
||||
|
||||
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
|
||||
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
|
||||
|
@ -26,7 +26,7 @@
|
||||
/*
|
||||
* Verify that child nodes correctly span parent node's range:
|
||||
*/
|
||||
static void btree_node_interior_verify(struct btree *b)
|
||||
static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct bpos next_node = b->data->min_key;
|
||||
@ -37,6 +37,9 @@ static void btree_node_interior_verify(struct btree *b)
|
||||
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
|
||||
while (1) {
|
||||
@ -135,8 +138,6 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
six_lock_wakeup_all(&b->c.lock);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
@ -290,8 +291,10 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
|
||||
|
||||
if (btree_node_is_extents(b) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
|
||||
set_btree_node_old_extent_overwrite(b);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
@ -1118,8 +1121,8 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
bch2_verify_btree_nr_keys(n2);
|
||||
|
||||
if (n1->c.level) {
|
||||
btree_node_interior_verify(n1);
|
||||
btree_node_interior_verify(n2);
|
||||
btree_node_interior_verify(as->c, n1);
|
||||
btree_node_interior_verify(as->c, n2);
|
||||
}
|
||||
|
||||
return n2;
|
||||
@ -1178,7 +1181,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
|
||||
BUG_ON(b->nsets != 1 ||
|
||||
b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
|
||||
|
||||
btree_node_interior_verify(b);
|
||||
btree_node_interior_verify(as->c, b);
|
||||
}
|
||||
|
||||
static void btree_split(struct btree_update *as, struct btree *b,
|
||||
@ -1376,7 +1379,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
|
||||
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
|
||||
btree_node_interior_verify(b);
|
||||
btree_node_interior_verify(c, b);
|
||||
|
||||
/*
|
||||
* when called from the btree_split path the new nodes aren't added to
|
||||
@ -1864,7 +1867,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
|
||||
new_hash = bch2_btree_node_mem_alloc(c);
|
||||
}
|
||||
|
||||
retry:
|
||||
as = bch2_btree_update_start(iter->trans, iter->btree_id,
|
||||
parent ? btree_update_reserve_required(c, parent) : 0,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
@ -1877,16 +1880,17 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
if (ret == -EAGAIN)
|
||||
ret = -EINTR;
|
||||
|
||||
if (ret != -EINTR)
|
||||
goto err;
|
||||
if (ret == -EINTR) {
|
||||
bch2_trans_unlock(iter->trans);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
bch2_trans_unlock(iter->trans);
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
down_read(&c->gc_lock);
|
||||
if (bch2_trans_relock(iter->trans))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!bch2_trans_relock(iter->trans))
|
||||
goto err;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
|
||||
@ -1943,6 +1947,7 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
|
||||
set_btree_node_fake(b);
|
||||
set_btree_node_need_rewrite(b);
|
||||
b->c.level = 0;
|
||||
b->c.btree_id = id;
|
||||
|
||||
@ -1969,22 +1974,19 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
||||
six_unlock_intent(&b->c.lock);
|
||||
}
|
||||
|
||||
ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
|
||||
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct btree_update *as;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list)
|
||||
pr_buf(&out, "%p m %u w %u r %u j %llu\n",
|
||||
pr_buf(out, "%p m %u w %u r %u j %llu\n",
|
||||
as,
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
|
||||
as->journal.seq);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
|
||||
|
@ -311,13 +311,13 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
|
||||
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
|
||||
struct btree *b, unsigned u64s)
|
||||
{
|
||||
if (unlikely(btree_node_fake(b)))
|
||||
if (unlikely(btree_node_need_rewrite(b)))
|
||||
return false;
|
||||
|
||||
return u64s <= bch_btree_keys_u64s_remaining(c, b);
|
||||
}
|
||||
|
||||
ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
|
||||
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
|
||||
|
||||
|
@ -264,23 +264,12 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
static enum btree_insert_ret
|
||||
btree_key_can_insert(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *insert,
|
||||
unsigned u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = iter_l(iter)->b;
|
||||
|
||||
if (unlikely(btree_node_fake(b)))
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
|
||||
/*
|
||||
* old bch2_extent_sort_fix_overlapping() algorithm won't work with new
|
||||
* style extent updates:
|
||||
*/
|
||||
if (unlikely(btree_node_old_extent_overwrite(b)))
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
|
||||
if (unlikely(u64s > bch_btree_keys_u64s_remaining(c, b)))
|
||||
if (!bch2_btree_node_insert_fits(c, b, u64s))
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
|
||||
return BTREE_INSERT_OK;
|
||||
@ -289,7 +278,6 @@ btree_key_can_insert(struct btree_trans *trans,
|
||||
static enum btree_insert_ret
|
||||
btree_key_can_insert_cached(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *insert,
|
||||
unsigned u64s)
|
||||
{
|
||||
struct bkey_cached *ck = (void *) iter->l[0].b;
|
||||
@ -407,8 +395,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
|
||||
u64s += i->k->k.u64s;
|
||||
ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
|
||||
? btree_key_can_insert(trans, i->iter, i->k, u64s)
|
||||
: btree_key_can_insert_cached(trans, i->iter, i->k, u64s);
|
||||
? btree_key_can_insert(trans, i->iter, u64s)
|
||||
: btree_key_can_insert_cached(trans, i->iter, u64s);
|
||||
if (ret) {
|
||||
*stopped_at = i;
|
||||
return ret;
|
||||
|
@ -133,13 +133,13 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
|
||||
switch (e->data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_btree:
|
||||
usage->btree += usage->replicas[i];
|
||||
break;
|
||||
case BCH_DATA_USER:
|
||||
case BCH_DATA_user:
|
||||
usage->data += usage->replicas[i];
|
||||
break;
|
||||
case BCH_DATA_CACHED:
|
||||
case BCH_DATA_cached:
|
||||
usage->cached += usage->replicas[i];
|
||||
break;
|
||||
}
|
||||
@ -179,7 +179,7 @@ out_pool:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_dev_usage ret;
|
||||
|
||||
@ -367,7 +367,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
if (!m.owned_by_allocator &&
|
||||
m.data_type == BCH_DATA_USER &&
|
||||
m.data_type == BCH_DATA_user &&
|
||||
bucket_sectors_used(m))
|
||||
return max_t(int, 0, (int) ca->mi.bucket_size -
|
||||
bucket_sectors_used(m));
|
||||
@ -382,7 +382,7 @@ static inline int bucket_stripe_sectors(struct bucket_mark m)
|
||||
static inline enum bch_data_type bucket_type(struct bucket_mark m)
|
||||
{
|
||||
return m.cached_sectors && !m.dirty_sectors
|
||||
? BCH_DATA_CACHED
|
||||
? BCH_DATA_cached
|
||||
: m.data_type;
|
||||
}
|
||||
|
||||
@ -435,7 +435,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
enum bch_data_type type,
|
||||
int nr, s64 size)
|
||||
{
|
||||
if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
|
||||
if (type == BCH_DATA_sb || type == BCH_DATA_journal)
|
||||
fs_usage->hidden += size;
|
||||
|
||||
dev_usage->buckets[type] += nr;
|
||||
@ -472,7 +472,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
u->sectors[old.data_type] -= old.dirty_sectors;
|
||||
u->sectors[new.data_type] += new.dirty_sectors;
|
||||
u->sectors[BCH_DATA_CACHED] +=
|
||||
u->sectors[BCH_DATA_cached] +=
|
||||
(int) new.cached_sectors - (int) old.cached_sectors;
|
||||
u->sectors_fragmented +=
|
||||
is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
|
||||
@ -520,13 +520,13 @@ static inline int update_replicas(struct bch_fs *c,
|
||||
return 0;
|
||||
|
||||
switch (r->data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_btree:
|
||||
fs_usage->btree += sectors;
|
||||
break;
|
||||
case BCH_DATA_USER:
|
||||
case BCH_DATA_user:
|
||||
fs_usage->data += sectors;
|
||||
break;
|
||||
case BCH_DATA_CACHED:
|
||||
case BCH_DATA_cached:
|
||||
fs_usage->cached += sectors;
|
||||
break;
|
||||
}
|
||||
@ -713,7 +713,8 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
static int bch2_mark_alloc(struct bch_fs *c,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
@ -721,7 +722,11 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, m;
|
||||
struct bucket_mark old_m, m;
|
||||
|
||||
/* We don't do anything for deletions - do we?: */
|
||||
if (new.k->type != KEY_TYPE_alloc)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* alloc btree is read in by bch2_alloc_read, not gc:
|
||||
@ -730,15 +735,15 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
ca = bch_dev_bkey_exists(c, new.k->p.inode);
|
||||
|
||||
if (k.k->p.offset >= ca->mi.nbuckets)
|
||||
if (new.k->p.offset >= ca->mi.nbuckets)
|
||||
return 0;
|
||||
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
u = bch2_alloc_unpack(k);
|
||||
g = __bucket(ca, new.k->p.offset, gc);
|
||||
u = bch2_alloc_unpack(new);
|
||||
|
||||
old = bucket_cmpxchg(g, m, ({
|
||||
old_m = bucket_cmpxchg(g, m, ({
|
||||
m.gen = u.gen;
|
||||
m.data_type = u.data_type;
|
||||
m.dirty_sectors = u.dirty_sectors;
|
||||
@ -751,7 +756,7 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
}));
|
||||
|
||||
if (!(flags & BTREE_TRIGGER_ALLOC_READ))
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, m, gc);
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
|
||||
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
@ -764,11 +769,11 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
*/
|
||||
|
||||
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
|
||||
old.cached_sectors) {
|
||||
old_m.cached_sectors) {
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old.cached_sectors);
|
||||
trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
|
||||
old.cached_sectors);
|
||||
-old_m.cached_sectors);
|
||||
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
|
||||
old_m.cached_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -792,8 +797,8 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket_mark old, new;
|
||||
bool overflow;
|
||||
|
||||
BUG_ON(data_type != BCH_DATA_SB &&
|
||||
data_type != BCH_DATA_JOURNAL);
|
||||
BUG_ON(data_type != BCH_DATA_sb &&
|
||||
data_type != BCH_DATA_journal);
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.data_type = data_type;
|
||||
@ -824,8 +829,8 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned sectors, struct gc_pos pos,
|
||||
unsigned flags)
|
||||
{
|
||||
BUG_ON(type != BCH_DATA_SB &&
|
||||
type != BCH_DATA_JOURNAL);
|
||||
BUG_ON(type != BCH_DATA_sb &&
|
||||
type != BCH_DATA_journal);
|
||||
|
||||
preempt_disable();
|
||||
|
||||
@ -878,51 +883,46 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
|
||||
}
|
||||
|
||||
static void bucket_set_stripe(struct bch_fs *c,
|
||||
const struct bch_stripe *v,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq,
|
||||
unsigned flags)
|
||||
unsigned flags,
|
||||
bool enabled)
|
||||
{
|
||||
bool enabled = !(flags & BTREE_TRIGGER_OVERWRITE);
|
||||
bool gc = flags & BTREE_TRIGGER_GC;
|
||||
unsigned i;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
|
||||
struct bucket_mark new, old;
|
||||
|
||||
for (i = 0; i < v->nr_blocks; i++) {
|
||||
const struct bch_extent_ptr *ptr = v->ptrs + i;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
|
||||
struct bucket_mark new, old;
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
|
||||
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
|
||||
|
||||
/*
|
||||
* XXX write repair code for these, flag stripe as possibly bad
|
||||
*/
|
||||
if (old.gen != ptr->gen)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"stripe with stale pointer");
|
||||
/*
|
||||
* XXX write repair code for these, flag stripe as possibly bad
|
||||
*/
|
||||
if (old.gen != ptr->gen)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"stripe with stale pointer");
|
||||
#if 0
|
||||
/*
|
||||
* We'd like to check for these, but these checks don't work
|
||||
* yet:
|
||||
*/
|
||||
if (old.stripe && enabled)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"multiple stripes using same bucket");
|
||||
/*
|
||||
* We'd like to check for these, but these checks don't work
|
||||
* yet:
|
||||
*/
|
||||
if (old.stripe && enabled)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"multiple stripes using same bucket");
|
||||
|
||||
if (!old.stripe && !enabled)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"deleting stripe but bucket not marked as stripe bucket");
|
||||
if (!old.stripe && !enabled)
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"deleting stripe but bucket not marked as stripe bucket");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
|
||||
@ -1064,8 +1064,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
{
|
||||
bool gc = flags & BTREE_TRIGGER_GC;
|
||||
struct stripe *m;
|
||||
unsigned old, new;
|
||||
int blocks_nonempty_delta;
|
||||
unsigned i, blocks_nonempty = 0;
|
||||
|
||||
m = genradix_ptr(&c->stripes[gc], p.idx);
|
||||
|
||||
@ -1084,31 +1083,30 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
*nr_parity = m->nr_redundant;
|
||||
*r = m->r;
|
||||
|
||||
old = m->block_sectors[p.block];
|
||||
m->block_sectors[p.block] += sectors;
|
||||
new = m->block_sectors[p.block];
|
||||
|
||||
blocks_nonempty_delta = (int) !!new - (int) !!old;
|
||||
if (blocks_nonempty_delta) {
|
||||
m->blocks_nonempty += blocks_nonempty_delta;
|
||||
for (i = 0; i < m->nr_blocks; i++)
|
||||
blocks_nonempty += m->block_sectors[i] != 0;
|
||||
|
||||
if (m->blocks_nonempty != blocks_nonempty) {
|
||||
m->blocks_nonempty = blocks_nonempty;
|
||||
if (!gc)
|
||||
bch2_stripes_heap_update(c, m, p.idx);
|
||||
}
|
||||
|
||||
m->dirty = true;
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
static int bch2_mark_extent(struct bch_fs *c,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned offset, s64 sectors,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned journal_seq, unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
@ -1124,7 +1122,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
BUG_ON(!sectors);
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
s64 disk_sectors = data_type == BCH_DATA_BTREE
|
||||
s64 disk_sectors = data_type == BCH_DATA_btree
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, offset, sectors, flags);
|
||||
|
||||
@ -1177,72 +1175,98 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
static int bch2_mark_stripe(struct bch_fs *c,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
bool gc = flags & BTREE_TRIGGER_GC;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
size_t idx = s.k->p.offset;
|
||||
size_t idx = new.k->p.offset;
|
||||
const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
|
||||
? bkey_s_c_to_stripe(old).v : NULL;
|
||||
const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
|
||||
? bkey_s_c_to_stripe(new).v : NULL;
|
||||
struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (!m || ((flags & BTREE_TRIGGER_OVERWRITE) && !m->alive)) {
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
if (!m || (old_s && !m->alive)) {
|
||||
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
|
||||
idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
|
||||
m->sectors = le16_to_cpu(s.v->sectors);
|
||||
m->algorithm = s.v->algorithm;
|
||||
m->nr_blocks = s.v->nr_blocks;
|
||||
m->nr_redundant = s.v->nr_redundant;
|
||||
if (!new_s) {
|
||||
/* Deleting: */
|
||||
for (i = 0; i < old_s->nr_blocks; i++)
|
||||
bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
|
||||
journal_seq, flags, false);
|
||||
|
||||
bch2_bkey_to_replicas(&m->r.e, k);
|
||||
if (!gc && m->on_heap) {
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
bch2_stripes_heap_del(c, m, idx);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: account for stripes somehow here
|
||||
*/
|
||||
#if 0
|
||||
update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
|
||||
#endif
|
||||
memset(m, 0, sizeof(*m));
|
||||
} else {
|
||||
BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
|
||||
BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
|
||||
|
||||
for (i = 0; i < new_s->nr_blocks; i++) {
|
||||
if (!old_s ||
|
||||
memcmp(new_s->ptrs + i,
|
||||
old_s->ptrs + i,
|
||||
sizeof(struct bch_extent_ptr))) {
|
||||
|
||||
if (old_s)
|
||||
bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
|
||||
journal_seq, flags, false);
|
||||
bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
|
||||
journal_seq, flags, true);
|
||||
}
|
||||
}
|
||||
|
||||
m->alive = true;
|
||||
m->sectors = le16_to_cpu(new_s->sectors);
|
||||
m->algorithm = new_s->algorithm;
|
||||
m->nr_blocks = new_s->nr_blocks;
|
||||
m->nr_redundant = new_s->nr_redundant;
|
||||
|
||||
bch2_bkey_to_replicas(&m->r.e, new);
|
||||
|
||||
/* gc recalculates these fields: */
|
||||
if (!(flags & BTREE_TRIGGER_GC)) {
|
||||
for (i = 0; i < s.v->nr_blocks; i++) {
|
||||
m->blocks_nonempty = 0;
|
||||
|
||||
for (i = 0; i < new_s->nr_blocks; i++) {
|
||||
m->block_sectors[i] =
|
||||
stripe_blockcount_get(s.v, i);
|
||||
stripe_blockcount_get(new_s, i);
|
||||
m->blocks_nonempty += !!m->block_sectors[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!gc)
|
||||
if (!gc) {
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
bch2_stripes_heap_update(c, m, idx);
|
||||
m->alive = true;
|
||||
} else {
|
||||
if (!gc)
|
||||
bch2_stripes_heap_del(c, m, idx);
|
||||
memset(m, 0, sizeof(*m));
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
bucket_set_stripe(c, s.v, fs_usage, 0, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_s_c new,
|
||||
unsigned offset, s64 sectors,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
|
||||
|
||||
preempt_disable();
|
||||
|
||||
if (!fs_usage || (flags & BTREE_TRIGGER_GC))
|
||||
@ -1251,7 +1275,7 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_alloc:
|
||||
ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
|
||||
ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags);
|
||||
break;
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
@ -1259,16 +1283,16 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size;
|
||||
|
||||
ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_BTREE,
|
||||
fs_usage, journal_seq, flags);
|
||||
ret = bch2_mark_extent(c, old, new, offset, sectors,
|
||||
BCH_DATA_btree, fs_usage, journal_seq, flags);
|
||||
break;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
|
||||
fs_usage, journal_seq, flags);
|
||||
ret = bch2_mark_extent(c, old, new, offset, sectors,
|
||||
BCH_DATA_user, fs_usage, journal_seq, flags);
|
||||
break;
|
||||
case KEY_TYPE_stripe:
|
||||
ret = bch2_mark_stripe(c, k, fs_usage, journal_seq, flags);
|
||||
ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
|
||||
break;
|
||||
case KEY_TYPE_inode:
|
||||
if (!(flags & BTREE_TRIGGER_OVERWRITE))
|
||||
@ -1294,82 +1318,38 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new,
|
||||
unsigned offset, s64 sectors,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
struct bkey deleted;
|
||||
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
|
||||
int ret;
|
||||
|
||||
bkey_init(&deleted);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
ret = bch2_mark_key_locked(c, k, offset, sectors,
|
||||
fs_usage, journal_seq, flags);
|
||||
ret = bch2_mark_key_locked(c, old, new, offset, sectors,
|
||||
fs_usage, journal_seq,
|
||||
BTREE_TRIGGER_INSERT|flags);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline int bch2_mark_overwrite(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_i *new,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned flags,
|
||||
bool is_extents)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned offset = 0;
|
||||
s64 sectors = -((s64) old.k->size);
|
||||
|
||||
flags |= BTREE_TRIGGER_OVERWRITE;
|
||||
|
||||
if (is_extents
|
||||
? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
|
||||
: bkey_cmp(new->k.p, old.k->p))
|
||||
return 0;
|
||||
|
||||
if (is_extents) {
|
||||
switch (bch2_extent_overlap(&new->k, old.k)) {
|
||||
case BCH_EXTENT_OVERLAP_ALL:
|
||||
offset = 0;
|
||||
sectors = -((s64) old.k->size);
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_BACK:
|
||||
offset = bkey_start_offset(&new->k) -
|
||||
bkey_start_offset(old.k);
|
||||
sectors = bkey_start_offset(&new->k) -
|
||||
old.k->p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_FRONT:
|
||||
offset = 0;
|
||||
sectors = bkey_start_offset(old.k) -
|
||||
new->k.p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_MIDDLE:
|
||||
offset = bkey_start_offset(&new->k) -
|
||||
bkey_start_offset(old.k);
|
||||
sectors = -((s64) new->k.size);
|
||||
flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
|
||||
break;
|
||||
}
|
||||
|
||||
BUG_ON(sectors >= 0);
|
||||
}
|
||||
|
||||
return bch2_mark_key_locked(c, old, offset, sectors, fs_usage,
|
||||
trans->journal_res.seq, flags) ?: 1;
|
||||
}
|
||||
|
||||
int bch2_mark_update(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *insert,
|
||||
struct bkey_i *new,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = iter_l(iter)->b;
|
||||
struct btree_node_iter node_iter = iter_l(iter)->iter;
|
||||
struct bkey_packed *_k;
|
||||
struct bkey_packed *_old;
|
||||
struct bkey_s_c old;
|
||||
struct bkey unpacked;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(flags & BTREE_TRIGGER_NORUN))
|
||||
@ -1378,34 +1358,87 @@ int bch2_mark_update(struct btree_trans *trans,
|
||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||
return 0;
|
||||
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
|
||||
0, insert->k.size,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BTREE_TRIGGER_INSERT|flags);
|
||||
bkey_init(&unpacked);
|
||||
old = (struct bkey_s_c) { &unpacked, NULL };
|
||||
|
||||
if (unlikely(flags & BTREE_TRIGGER_NOOVERWRITES))
|
||||
return 0;
|
||||
if (!btree_node_type_is_extents(iter->btree_id)) {
|
||||
if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
|
||||
_old = bch2_btree_node_iter_peek(&node_iter, b);
|
||||
if (_old)
|
||||
old = bkey_disassemble(b, _old, &unpacked);
|
||||
} else {
|
||||
struct bkey_cached *ck = (void *) iter->l[0].b;
|
||||
|
||||
/*
|
||||
* For non extents, we only mark the new key, not the key being
|
||||
* overwritten - unless we're actually deleting:
|
||||
*/
|
||||
if ((iter->btree_id == BTREE_ID_ALLOC ||
|
||||
iter->btree_id == BTREE_ID_EC) &&
|
||||
!bkey_deleted(&insert->k))
|
||||
return 0;
|
||||
if (ck->valid)
|
||||
old = bkey_i_to_s_c(ck->k);
|
||||
}
|
||||
|
||||
while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
|
||||
if (old.k->type == new->k.type) {
|
||||
bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
|
||||
|
||||
ret = bch2_mark_overwrite(trans, iter, k, insert,
|
||||
fs_usage, flags,
|
||||
btree_node_type_is_extents(iter->btree_id));
|
||||
if (ret <= 0)
|
||||
break;
|
||||
} else {
|
||||
bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BTREE_TRIGGER_INSERT|flags);
|
||||
bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BTREE_TRIGGER_OVERWRITE|flags);
|
||||
}
|
||||
} else {
|
||||
BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
|
||||
bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
|
||||
0, new->k.size,
|
||||
fs_usage, trans->journal_res.seq,
|
||||
BTREE_TRIGGER_INSERT|flags);
|
||||
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
|
||||
unsigned offset = 0;
|
||||
s64 sectors;
|
||||
|
||||
old = bkey_disassemble(b, _old, &unpacked);
|
||||
sectors = -((s64) old.k->size);
|
||||
|
||||
flags |= BTREE_TRIGGER_OVERWRITE;
|
||||
|
||||
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
|
||||
return 0;
|
||||
|
||||
switch (bch2_extent_overlap(&new->k, old.k)) {
|
||||
case BCH_EXTENT_OVERLAP_ALL:
|
||||
offset = 0;
|
||||
sectors = -((s64) old.k->size);
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_BACK:
|
||||
offset = bkey_start_offset(&new->k) -
|
||||
bkey_start_offset(old.k);
|
||||
sectors = bkey_start_offset(&new->k) -
|
||||
old.k->p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_FRONT:
|
||||
offset = 0;
|
||||
sectors = bkey_start_offset(old.k) -
|
||||
new->k.p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_MIDDLE:
|
||||
offset = bkey_start_offset(&new->k) -
|
||||
bkey_start_offset(old.k);
|
||||
sectors = -((s64) new->k.size);
|
||||
flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
|
||||
break;
|
||||
}
|
||||
|
||||
BUG_ON(sectors >= 0);
|
||||
|
||||
ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
|
||||
offset, sectors, fs_usage,
|
||||
trans->journal_res.seq, flags) ?: 1;
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -1460,8 +1493,10 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
} else {
|
||||
struct bkey_cached *ck = (void *) i->iter->l[0].b;
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
|
||||
pr_err("%s", buf);
|
||||
if (ck->valid) {
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
|
||||
pr_err("%s", buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1632,7 +1667,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
BUG_ON(!sectors);
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
s64 disk_sectors = data_type == BCH_DATA_BTREE
|
||||
s64 disk_sectors = data_type == BCH_DATA_btree
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, offset, sectors, flags);
|
||||
|
||||
@ -1774,11 +1809,11 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
|
||||
: -c->opts.btree_node_size;
|
||||
|
||||
return bch2_trans_mark_extent(trans, k, offset, sectors,
|
||||
flags, BCH_DATA_BTREE);
|
||||
flags, BCH_DATA_btree);
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return bch2_trans_mark_extent(trans, k, offset, sectors,
|
||||
flags, BCH_DATA_USER);
|
||||
flags, BCH_DATA_user);
|
||||
case KEY_TYPE_inode:
|
||||
d = replicas_deltas_realloc(trans, 0);
|
||||
|
||||
@ -1829,9 +1864,6 @@ int bch2_trans_mark_update(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (unlikely(flags & BTREE_TRIGGER_NOOVERWRITES))
|
||||
return 0;
|
||||
|
||||
if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
|
||||
struct bkey_cached *ck = (void *) iter->l[0].b;
|
||||
|
||||
@ -1992,7 +2024,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
alloc_heap alloc_heap;
|
||||
copygc_heap copygc_heap;
|
||||
|
||||
size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
|
||||
ca->mi.bucket_size / c->opts.btree_node_size);
|
||||
@ -2001,15 +2032,13 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
|
||||
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
|
||||
btree_reserve * 2);
|
||||
bool resize = ca->buckets[0] != NULL,
|
||||
start_copygc = ca->copygc_thread != NULL;
|
||||
bool resize = ca->buckets[0] != NULL;
|
||||
int ret = -ENOMEM;
|
||||
unsigned i;
|
||||
|
||||
memset(&free, 0, sizeof(free));
|
||||
memset(&free_inc, 0, sizeof(free_inc));
|
||||
memset(&alloc_heap, 0, sizeof(alloc_heap));
|
||||
memset(©gc_heap, 0, sizeof(copygc_heap));
|
||||
|
||||
if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
nbuckets * sizeof(struct bucket),
|
||||
@ -2022,14 +2051,13 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
copygc_reserve, GFP_KERNEL) ||
|
||||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||
!init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) ||
|
||||
!init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL) ||
|
||||
!init_heap(©gc_heap, copygc_reserve, GFP_KERNEL))
|
||||
!init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
|
||||
goto err;
|
||||
|
||||
buckets->first_bucket = ca->mi.first_bucket;
|
||||
buckets->nbuckets = nbuckets;
|
||||
|
||||
bch2_copygc_stop(ca);
|
||||
bch2_copygc_stop(c);
|
||||
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
@ -2072,21 +2100,13 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
/* with gc lock held, alloc_heap can't be in use: */
|
||||
swap(ca->alloc_heap, alloc_heap);
|
||||
|
||||
/* and we shut down copygc: */
|
||||
swap(ca->copygc_heap, copygc_heap);
|
||||
|
||||
nbuckets = ca->mi.nbuckets;
|
||||
|
||||
if (resize)
|
||||
up_write(&ca->bucket_lock);
|
||||
|
||||
if (start_copygc &&
|
||||
bch2_copygc_start(c, ca))
|
||||
bch_err(ca, "error restarting copygc thread");
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
free_heap(©gc_heap);
|
||||
free_heap(&alloc_heap);
|
||||
free_fifo(&free_inc);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
@ -2103,7 +2123,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
free_heap(&ca->copygc_heap);
|
||||
free_heap(&ca->alloc_heap);
|
||||
free_fifo(&ca->free_inc);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
|
@ -99,9 +99,9 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k,
|
||||
{
|
||||
if (k->type == KEY_TYPE_btree_ptr ||
|
||||
k->type == KEY_TYPE_btree_ptr_v2)
|
||||
return BCH_DATA_BTREE;
|
||||
return BCH_DATA_btree;
|
||||
|
||||
return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER;
|
||||
return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
|
||||
}
|
||||
|
||||
static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
|
||||
@ -182,7 +182,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
|
||||
/* Device usage: */
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
|
||||
|
||||
void bch2_dev_usage_from_buckets(struct bch_fs *);
|
||||
|
||||
@ -202,9 +202,9 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
/*
|
||||
* Number of reclaimable buckets - only for use by the allocator thread:
|
||||
*/
|
||||
static inline u64 dev_buckets_available(struct bch_fs *c, struct bch_dev *ca)
|
||||
static inline u64 dev_buckets_available(struct bch_dev *ca)
|
||||
{
|
||||
return __dev_buckets_available(ca, bch2_dev_usage_read(c, ca));
|
||||
return __dev_buckets_available(ca, bch2_dev_usage_read(ca));
|
||||
}
|
||||
|
||||
static inline u64 __dev_buckets_free(struct bch_dev *ca,
|
||||
@ -215,9 +215,9 @@ static inline u64 __dev_buckets_free(struct bch_dev *ca,
|
||||
fifo_used(&ca->free_inc);
|
||||
}
|
||||
|
||||
static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
|
||||
static inline u64 dev_buckets_free(struct bch_dev *ca)
|
||||
{
|
||||
return __dev_buckets_free(ca, bch2_dev_usage_read(c, ca));
|
||||
return __dev_buckets_free(ca, bch2_dev_usage_read(ca));
|
||||
}
|
||||
|
||||
/* Filesystem usage: */
|
||||
@ -259,14 +259,11 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, enum bch_data_type, unsigned,
|
||||
struct gc_pos, unsigned);
|
||||
|
||||
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
|
||||
s64, struct bch_fs_usage *, u64, unsigned);
|
||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, unsigned);
|
||||
|
||||
int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_s_c, struct bkey_i *,
|
||||
struct bch_fs_usage *, unsigned, bool);
|
||||
int bch2_mark_update(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, struct bch_fs_usage *, unsigned);
|
||||
|
||||
|
@ -123,7 +123,9 @@ struct disk_reservation {
|
||||
};
|
||||
|
||||
struct copygc_heap_entry {
|
||||
u8 dev;
|
||||
u8 gen;
|
||||
u16 fragmentation;
|
||||
u32 sectors;
|
||||
u64 offset;
|
||||
};
|
||||
|
@ -468,7 +468,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
src = bch2_dev_usage_read(c, ca);
|
||||
src = bch2_dev_usage_read(ca);
|
||||
|
||||
arg.state = ca->mi.state;
|
||||
arg.bucket_size = ca->mi.bucket_size;
|
||||
|
@ -152,9 +152,8 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
|
||||
timer->fn(timer);
|
||||
}
|
||||
|
||||
ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
|
||||
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
unsigned long now;
|
||||
unsigned i;
|
||||
|
||||
@ -162,12 +161,10 @@ ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
|
||||
now = atomic_long_read(&clock->now);
|
||||
|
||||
for (i = 0; i < clock->timers.used; i++)
|
||||
pr_buf(&out, "%ps:\t%li\n",
|
||||
pr_buf(out, "%ps:\t%li\n",
|
||||
clock->timers.data[i]->fn,
|
||||
clock->timers.data[i]->expire - now);
|
||||
spin_unlock(&clock->timer_lock);
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
void bch2_io_clock_exit(struct io_clock *clock)
|
||||
|
@ -30,7 +30,7 @@ void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
|
||||
__ret; \
|
||||
})
|
||||
|
||||
ssize_t bch2_io_timers_show(struct io_clock *, char *);
|
||||
void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
|
||||
|
||||
void bch2_io_clock_exit(struct io_clock *);
|
||||
int bch2_io_clock_init(struct io_clock *);
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/lz4.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/zlib.h>
|
||||
#include <linux/zstd.h>
|
||||
|
||||
@ -64,7 +63,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
struct bbuf ret;
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
unsigned nr_pages = 0, flags;
|
||||
unsigned nr_pages = 0;
|
||||
struct page *stack_pages[16];
|
||||
struct page **pages = NULL;
|
||||
void *data;
|
||||
@ -104,10 +103,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
__bio_for_each_segment(bv, bio, iter, start)
|
||||
pages[nr_pages++] = bv.bv_page;
|
||||
|
||||
flags = memalloc_nofs_save();
|
||||
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
memalloc_nofs_restore(flags);
|
||||
|
||||
if (pages != stack_pages)
|
||||
kfree(pages);
|
||||
|
||||
|
@ -183,7 +183,7 @@ const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned targe
|
||||
case TARGET_GROUP: {
|
||||
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
|
||||
|
||||
return t.group < g->nr && !g->entries[t.group].deleted
|
||||
return g && t.group < g->nr && !g->entries[t.group].deleted
|
||||
? &g->entries[t.group].devs
|
||||
: NULL;
|
||||
}
|
||||
@ -208,7 +208,7 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
|
||||
|
||||
rcu_read_lock();
|
||||
g = rcu_dereference(c->disk_groups);
|
||||
m = t.group < g->nr && !g->entries[t.group].deleted
|
||||
m = g && t.group < g->nr && !g->entries[t.group].deleted
|
||||
? &g->entries[t.group].devs
|
||||
: NULL;
|
||||
|
||||
@ -387,6 +387,7 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
{
|
||||
struct bch_member *mi;
|
||||
int v = -1;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@ -399,14 +400,18 @@ int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
|
||||
return v;
|
||||
}
|
||||
|
||||
ret = bch2_sb_disk_groups_to_cpu(c);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
write_sb:
|
||||
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
|
||||
SET_BCH_MEMBER_GROUP(mi, v + 1);
|
||||
|
||||
bch2_write_super(c);
|
||||
unlock:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
|
||||
|
@ -71,7 +71,10 @@ static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
|
||||
bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
|
||||
|
||||
int bch2_disk_path_find(struct bch_sb_handle *, const char *);
|
||||
|
||||
/* Exported for userspace bcachefs-tools: */
|
||||
int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
|
||||
|
||||
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *,
|
||||
unsigned);
|
||||
|
||||
|
563
libbcachefs/ec.c
563
libbcachefs/ec.c
@ -200,40 +200,6 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ec_stripe_key_init(struct bch_fs *c,
|
||||
struct bkey_i_stripe *s,
|
||||
struct open_buckets *blocks,
|
||||
struct open_buckets *parity,
|
||||
unsigned stripe_size)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
unsigned i, u64s;
|
||||
|
||||
bkey_stripe_init(&s->k_i);
|
||||
s->v.sectors = cpu_to_le16(stripe_size);
|
||||
s->v.algorithm = 0;
|
||||
s->v.nr_blocks = parity->nr + blocks->nr;
|
||||
s->v.nr_redundant = parity->nr;
|
||||
s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
|
||||
s->v.csum_type = BCH_CSUM_CRC32C;
|
||||
s->v.pad = 0;
|
||||
|
||||
open_bucket_for_each(c, blocks, ob, i)
|
||||
s->v.ptrs[i] = ob->ptr;
|
||||
|
||||
open_bucket_for_each(c, parity, ob, i)
|
||||
s->v.ptrs[blocks->nr + i] = ob->ptr;
|
||||
|
||||
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
|
||||
BUG_ON(1 << s->v.csum_granularity_bits >=
|
||||
le16_to_cpu(s->v.sectors) ||
|
||||
s->v.csum_granularity_bits == U8_MAX);
|
||||
s->v.csum_granularity_bits++;
|
||||
}
|
||||
|
||||
set_bkey_val_u64s(&s->k, u64s);
|
||||
}
|
||||
|
||||
/* Checksumming: */
|
||||
|
||||
static void ec_generate_checksums(struct ec_stripe_buf *buf)
|
||||
@ -360,7 +326,9 @@ static void ec_block_endio(struct bio *bio)
|
||||
struct bch_dev *ca = ec_bio->ca;
|
||||
struct closure *cl = bio->bi_private;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding"))
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
|
||||
bio_data_dir(bio) ? "write" : "read",
|
||||
bch2_blk_status_to_str(bio->bi_status)))
|
||||
clear_bit(ec_bio->idx, ec_bio->buf->valid);
|
||||
|
||||
bio_put(&ec_bio->bio);
|
||||
@ -605,39 +573,16 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
||||
BUG_ON(h->data[m->heap_idx].idx != idx);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
size_t i;
|
||||
|
||||
if (m->alive) {
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
|
||||
|
||||
i = m->heap_idx;
|
||||
heap_sift_up(h, i, ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
heap_sift_down(h, i, ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
} else {
|
||||
bch2_stripes_heap_insert(c, m, idx);
|
||||
}
|
||||
|
||||
if (stripe_idx_to_delete(c) >= 0 &&
|
||||
!percpu_ref_is_dying(&c->writes))
|
||||
schedule_work(&c->ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
if (!m->on_heap)
|
||||
return;
|
||||
|
||||
m->on_heap = false;
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
m->alive = false;
|
||||
heap_del(&c->ec_stripes_heap, m->heap_idx,
|
||||
ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
@ -646,23 +591,54 @@ void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
void bch2_stripes_heap_insert(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
if (m->on_heap)
|
||||
return;
|
||||
|
||||
BUG_ON(heap_full(&c->ec_stripes_heap));
|
||||
|
||||
m->on_heap = true;
|
||||
|
||||
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
|
||||
.idx = idx,
|
||||
.blocks_nonempty = m->blocks_nonempty,
|
||||
}),
|
||||
ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
m->alive = true;
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
size_t i;
|
||||
|
||||
if (!m->on_heap)
|
||||
return;
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
|
||||
|
||||
i = m->heap_idx;
|
||||
heap_sift_up(h, i, ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
heap_sift_down(h, i, ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
if (stripe_idx_to_delete(c) >= 0 &&
|
||||
!percpu_ref_is_dying(&c->writes))
|
||||
schedule_work(&c->ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
/* stripe deletion */
|
||||
|
||||
static int ec_stripe_delete(struct bch_fs *c, size_t idx)
|
||||
{
|
||||
//pr_info("deleting stripe %zu", idx);
|
||||
return bch2_btree_delete_range(c, BTREE_ID_EC,
|
||||
POS(0, idx),
|
||||
POS(0, idx + 1),
|
||||
@ -675,23 +651,20 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
container_of(work, struct bch_fs, ec_stripe_delete_work);
|
||||
ssize_t idx;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
mutex_lock(&c->ec_stripe_create_lock);
|
||||
|
||||
while (1) {
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
idx = stripe_idx_to_delete(c);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (idx < 0)
|
||||
if (idx < 0) {
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (ec_stripe_delete(c, idx))
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->ec_stripe_create_lock);
|
||||
up_read(&c->gc_lock);
|
||||
}
|
||||
|
||||
/* stripe creation: */
|
||||
@ -784,6 +757,8 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
||||
bkey_on_stack_init(&sk);
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
|
||||
/* XXX this doesn't support the reflink btree */
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(pos),
|
||||
BTREE_ITER_INTENT);
|
||||
@ -809,12 +784,9 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
||||
bkey_on_stack_reassemble(&sk, c, k);
|
||||
e = bkey_i_to_s_extent(sk.k);
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
if (ptr->dev == dev)
|
||||
ec_ptr = ptr;
|
||||
else
|
||||
ptr->cached = true;
|
||||
}
|
||||
bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
|
||||
ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
|
||||
BUG_ON(!ec_ptr);
|
||||
|
||||
extent_stripe_ptr_add(e, s, ec_ptr, idx);
|
||||
|
||||
@ -844,6 +816,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
struct bch_fs *c = s->c;
|
||||
struct open_bucket *ob;
|
||||
struct bkey_i *k;
|
||||
struct stripe *m;
|
||||
struct bch_stripe *v = &s->stripe.key.v;
|
||||
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
||||
struct closure cl;
|
||||
@ -854,10 +827,13 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
closure_init_stack(&cl);
|
||||
|
||||
if (s->err) {
|
||||
bch_err(c, "error creating stripe: error writing data buckets");
|
||||
if (s->err != -EROFS)
|
||||
bch_err(c, "error creating stripe: error writing data buckets");
|
||||
goto err;
|
||||
}
|
||||
|
||||
BUG_ON(!s->allocated);
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
goto err;
|
||||
|
||||
@ -880,22 +856,33 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
goto err_put_writes;
|
||||
}
|
||||
|
||||
mutex_lock(&c->ec_stripe_create_lock);
|
||||
|
||||
ret = ec_stripe_bkey_insert(c, &s->stripe.key);
|
||||
ret = s->existing_stripe
|
||||
? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL)
|
||||
: ec_stripe_bkey_insert(c, &s->stripe.key);
|
||||
if (ret) {
|
||||
bch_err(c, "error creating stripe: error creating stripe key");
|
||||
goto err_unlock;
|
||||
goto err_put_writes;
|
||||
}
|
||||
|
||||
for_each_keylist_key(&s->keys, k) {
|
||||
ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch_err(c, "error creating stripe: error updating pointers");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&c->ec_stripe_create_lock);
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
|
||||
#if 0
|
||||
pr_info("created a %s stripe %llu",
|
||||
s->existing_stripe ? "existing" : "new",
|
||||
s->stripe.key.k.p.offset);
|
||||
#endif
|
||||
BUG_ON(m->on_heap);
|
||||
bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
err_put_writes:
|
||||
percpu_ref_put(&c->writes);
|
||||
err:
|
||||
@ -908,30 +895,52 @@ err:
|
||||
|
||||
bch2_keylist_free(&s->keys, s->inline_keys);
|
||||
|
||||
mutex_lock(&s->h->lock);
|
||||
list_del(&s->list);
|
||||
mutex_unlock(&s->h->lock);
|
||||
|
||||
for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
|
||||
kvpfree(s->stripe.data[i], s->stripe.size << 9);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
static struct ec_stripe_new *ec_stripe_set_pending(struct ec_stripe_head *h)
|
||||
static void ec_stripe_create_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work,
|
||||
struct bch_fs, ec_stripe_create_work);
|
||||
struct ec_stripe_new *s, *n;
|
||||
restart:
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
list_for_each_entry_safe(s, n, &c->ec_stripe_new_list, list)
|
||||
if (!atomic_read(&s->pin)) {
|
||||
list_del(&s->list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
ec_stripe_create(s);
|
||||
goto restart;
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
}
|
||||
|
||||
static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
|
||||
{
|
||||
BUG_ON(atomic_read(&s->pin) <= 0);
|
||||
|
||||
if (atomic_dec_and_test(&s->pin)) {
|
||||
BUG_ON(!s->pending);
|
||||
queue_work(system_long_wq, &c->ec_stripe_create_work);
|
||||
}
|
||||
}
|
||||
|
||||
static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
{
|
||||
struct ec_stripe_new *s = h->s;
|
||||
|
||||
list_add(&s->list, &h->stripes);
|
||||
h->s = NULL;
|
||||
BUG_ON(!s->allocated && !s->err);
|
||||
|
||||
return s;
|
||||
}
|
||||
h->s = NULL;
|
||||
s->pending = true;
|
||||
|
||||
static void ec_stripe_new_put(struct ec_stripe_new *s)
|
||||
{
|
||||
BUG_ON(atomic_read(&s->pin) <= 0);
|
||||
if (atomic_dec_and_test(&s->pin))
|
||||
ec_stripe_create(s);
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
list_add(&s->list, &c->ec_stripe_new_list);
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
|
||||
ec_stripe_new_put(c, s);
|
||||
}
|
||||
|
||||
/* have a full bucket - hand it off to be erasure coded: */
|
||||
@ -942,7 +951,7 @@ void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
|
||||
if (ob->sectors_free)
|
||||
s->err = -1;
|
||||
|
||||
ec_stripe_new_put(s);
|
||||
ec_stripe_new_put(c, s);
|
||||
}
|
||||
|
||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
||||
@ -976,6 +985,8 @@ void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
|
||||
if (!ob)
|
||||
return;
|
||||
|
||||
//pr_info("adding backpointer at %llu:%llu", pos.inode, pos.offset);
|
||||
|
||||
ec = ob->ec;
|
||||
mutex_lock(&ec->lock);
|
||||
|
||||
@ -1034,14 +1045,43 @@ static unsigned pick_blocksize(struct bch_fs *c,
|
||||
return best.size;
|
||||
}
|
||||
|
||||
int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
static bool may_create_new_stripe(struct bch_fs *c)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ec_stripe_key_init(struct bch_fs *c,
|
||||
struct bkey_i_stripe *s,
|
||||
unsigned nr_data,
|
||||
unsigned nr_parity,
|
||||
unsigned stripe_size)
|
||||
{
|
||||
unsigned u64s;
|
||||
|
||||
bkey_stripe_init(&s->k_i);
|
||||
s->v.sectors = cpu_to_le16(stripe_size);
|
||||
s->v.algorithm = 0;
|
||||
s->v.nr_blocks = nr_data + nr_parity;
|
||||
s->v.nr_redundant = nr_parity;
|
||||
s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
|
||||
s->v.csum_type = BCH_CSUM_CRC32C;
|
||||
s->v.pad = 0;
|
||||
|
||||
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
|
||||
BUG_ON(1 << s->v.csum_granularity_bits >=
|
||||
le16_to_cpu(s->v.sectors) ||
|
||||
s->v.csum_granularity_bits == U8_MAX);
|
||||
s->v.csum_granularity_bits++;
|
||||
}
|
||||
|
||||
set_bkey_val_u64s(&s->k, u64s);
|
||||
}
|
||||
|
||||
static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
{
|
||||
struct ec_stripe_new *s;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(h->parity.nr != h->redundancy);
|
||||
BUG_ON(!h->blocks.nr);
|
||||
BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX);
|
||||
lockdep_assert_held(&h->lock);
|
||||
|
||||
s = kzalloc(sizeof(*s), GFP_KERNEL);
|
||||
@ -1052,11 +1092,9 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
atomic_set(&s->pin, 1);
|
||||
s->c = c;
|
||||
s->h = h;
|
||||
s->blocks = h->blocks;
|
||||
s->parity = h->parity;
|
||||
|
||||
memset(&h->blocks, 0, sizeof(h->blocks));
|
||||
memset(&h->parity, 0, sizeof(h->parity));
|
||||
s->nr_data = min_t(unsigned, h->nr_active_devs,
|
||||
EC_STRIPE_MAX) - h->redundancy;
|
||||
s->nr_parity = h->redundancy;
|
||||
|
||||
bch2_keylist_init(&s->keys, s->inline_keys);
|
||||
|
||||
@ -1064,9 +1102,8 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
s->stripe.size = h->blocksize;
|
||||
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
|
||||
|
||||
ec_stripe_key_init(c, &s->stripe.key,
|
||||
&s->blocks, &s->parity,
|
||||
h->blocksize);
|
||||
ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
|
||||
s->nr_parity, h->blocksize);
|
||||
|
||||
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
|
||||
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
|
||||
@ -1098,14 +1135,13 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
|
||||
|
||||
mutex_init(&h->lock);
|
||||
mutex_lock(&h->lock);
|
||||
INIT_LIST_HEAD(&h->stripes);
|
||||
|
||||
h->target = target;
|
||||
h->algo = algo;
|
||||
h->redundancy = redundancy;
|
||||
|
||||
rcu_read_lock();
|
||||
h->devs = target_rw_devs(c, BCH_DATA_USER, target);
|
||||
h->devs = target_rw_devs(c, BCH_DATA_user, target);
|
||||
|
||||
for_each_member_device_rcu(ca, c, i, &h->devs)
|
||||
if (!ca->mi.durability)
|
||||
@ -1118,26 +1154,22 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
|
||||
h->nr_active_devs++;
|
||||
|
||||
rcu_read_unlock();
|
||||
list_add(&h->list, &c->ec_new_stripe_list);
|
||||
list_add(&h->list, &c->ec_stripe_head_list);
|
||||
return h;
|
||||
}
|
||||
|
||||
void bch2_ec_stripe_head_put(struct ec_stripe_head *h)
|
||||
void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
{
|
||||
struct ec_stripe_new *s = NULL;
|
||||
|
||||
if (h->s &&
|
||||
h->s->allocated &&
|
||||
bitmap_weight(h->s->blocks_allocated,
|
||||
h->s->blocks.nr) == h->s->blocks.nr)
|
||||
s = ec_stripe_set_pending(h);
|
||||
ec_stripe_set_pending(c, h);
|
||||
|
||||
mutex_unlock(&h->lock);
|
||||
|
||||
if (s)
|
||||
ec_stripe_new_put(s);
|
||||
}
|
||||
|
||||
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||
struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||
unsigned target,
|
||||
unsigned algo,
|
||||
unsigned redundancy)
|
||||
@ -1147,8 +1179,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||
if (!redundancy)
|
||||
return NULL;
|
||||
|
||||
mutex_lock(&c->ec_new_stripe_lock);
|
||||
list_for_each_entry(h, &c->ec_new_stripe_list, list)
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
||||
if (h->target == target &&
|
||||
h->algo == algo &&
|
||||
h->redundancy == redundancy) {
|
||||
@ -1158,7 +1190,196 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||
|
||||
h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
|
||||
found:
|
||||
mutex_unlock(&c->ec_new_stripe_lock);
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
return h;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: use a higher watermark for allocating open buckets here:
|
||||
*/
|
||||
static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
|
||||
{
|
||||
struct bch_devs_mask devs;
|
||||
struct open_bucket *ob;
|
||||
unsigned i, nr_have, nr_data =
|
||||
min_t(unsigned, h->nr_active_devs,
|
||||
EC_STRIPE_MAX) - h->redundancy;
|
||||
bool have_cache = true;
|
||||
int ret = 0;
|
||||
|
||||
devs = h->devs;
|
||||
|
||||
for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
|
||||
__clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
|
||||
--nr_data;
|
||||
}
|
||||
|
||||
BUG_ON(h->s->blocks.nr > nr_data);
|
||||
BUG_ON(h->s->parity.nr > h->redundancy);
|
||||
|
||||
open_bucket_for_each(c, &h->s->parity, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
open_bucket_for_each(c, &h->s->blocks, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
if (h->s->parity.nr < h->redundancy) {
|
||||
nr_have = h->s->parity.nr;
|
||||
|
||||
ret = bch2_bucket_alloc_set(c, &h->s->parity,
|
||||
&h->parity_stripe,
|
||||
&devs,
|
||||
h->redundancy,
|
||||
&nr_have,
|
||||
&have_cache,
|
||||
RESERVE_NONE,
|
||||
0,
|
||||
NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (h->s->blocks.nr < nr_data) {
|
||||
nr_have = h->s->blocks.nr;
|
||||
|
||||
ret = bch2_bucket_alloc_set(c, &h->s->blocks,
|
||||
&h->block_stripe,
|
||||
&devs,
|
||||
nr_data,
|
||||
&nr_have,
|
||||
&have_cache,
|
||||
RESERVE_NONE,
|
||||
0,
|
||||
NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* XXX: doesn't obey target: */
|
||||
static s64 get_existing_stripe(struct bch_fs *c,
|
||||
unsigned target,
|
||||
unsigned algo,
|
||||
unsigned redundancy)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
struct stripe *m;
|
||||
size_t heap_idx;
|
||||
u64 stripe_idx;
|
||||
|
||||
if (may_create_new_stripe(c))
|
||||
return -1;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
|
||||
if (!h->data[heap_idx].blocks_nonempty)
|
||||
continue;
|
||||
|
||||
stripe_idx = h->data[heap_idx].idx;
|
||||
m = genradix_ptr(&c->stripes[0], stripe_idx);
|
||||
|
||||
if (m->algorithm == algo &&
|
||||
m->nr_redundant == redundancy &&
|
||||
m->blocks_nonempty < m->nr_blocks - m->nr_redundant) {
|
||||
bch2_stripes_heap_del(c, m, stripe_idx);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
return stripe_idx;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = bkey_err(k);
|
||||
if (!ret)
|
||||
bkey_reassemble(&stripe->key.k_i, k);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
|
||||
unsigned target,
|
||||
unsigned algo,
|
||||
unsigned redundancy)
|
||||
{
|
||||
struct closure cl;
|
||||
struct ec_stripe_head *h;
|
||||
struct open_bucket *ob;
|
||||
unsigned i, data_idx = 0;
|
||||
s64 idx;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
|
||||
if (!h)
|
||||
return NULL;
|
||||
|
||||
if (!h->s && ec_new_stripe_alloc(c, h)) {
|
||||
bch2_ec_stripe_head_put(c, h);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!h->s->allocated) {
|
||||
if (!h->s->existing_stripe &&
|
||||
(idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
|
||||
//pr_info("got existing stripe %llu", idx);
|
||||
|
||||
h->s->existing_stripe = true;
|
||||
h->s->existing_stripe_idx = idx;
|
||||
if (get_stripe_key(c, idx, &h->s->stripe)) {
|
||||
/* btree error */
|
||||
BUG();
|
||||
}
|
||||
|
||||
for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
|
||||
if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
|
||||
__set_bit(i, h->s->blocks_allocated);
|
||||
ec_block_io(c, &h->s->stripe, READ, i, &cl);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_stripe_alloc_buckets(c, h)) {
|
||||
bch2_ec_stripe_head_put(c, h);
|
||||
h = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
open_bucket_for_each(c, &h->s->blocks, ob, i) {
|
||||
data_idx = find_next_zero_bit(h->s->blocks_allocated,
|
||||
h->s->nr_data, data_idx);
|
||||
BUG_ON(data_idx >= h->s->nr_data);
|
||||
|
||||
h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
|
||||
h->s->data_block_idx[i] = data_idx;
|
||||
data_idx++;
|
||||
}
|
||||
|
||||
open_bucket_for_each(c, &h->s->parity, ob, i)
|
||||
h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
|
||||
|
||||
//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
|
||||
h->s->allocated = true;
|
||||
}
|
||||
out:
|
||||
closure_sync(&cl);
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -1168,14 +1389,10 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&c->ec_new_stripe_lock);
|
||||
list_for_each_entry(h, &c->ec_new_stripe_list, list) {
|
||||
struct ec_stripe_new *s = NULL;
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
|
||||
mutex_lock(&h->lock);
|
||||
bch2_open_buckets_stop_dev(c, ca, &h->blocks);
|
||||
bch2_open_buckets_stop_dev(c, ca, &h->parity);
|
||||
|
||||
if (!h->s)
|
||||
goto unlock;
|
||||
|
||||
@ -1187,15 +1404,12 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
|
||||
goto found;
|
||||
goto unlock;
|
||||
found:
|
||||
h->s->err = -1;
|
||||
s = ec_stripe_set_pending(h);
|
||||
h->s->err = -EROFS;
|
||||
ec_stripe_set_pending(c, h);
|
||||
unlock:
|
||||
mutex_unlock(&h->lock);
|
||||
|
||||
if (s)
|
||||
ec_stripe_new_put(s);
|
||||
}
|
||||
mutex_unlock(&c->ec_new_stripe_lock);
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
}
|
||||
|
||||
static int __bch2_stripe_write_key(struct btree_trans *trans,
|
||||
@ -1278,11 +1492,21 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type == KEY_TYPE_stripe)
|
||||
if (k.k->type == KEY_TYPE_stripe) {
|
||||
struct stripe *m;
|
||||
|
||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
|
||||
bch2_mark_key(c, k, 0, 0, NULL, 0,
|
||||
BTREE_TRIGGER_ALLOC_READ|
|
||||
BTREE_TRIGGER_NOATOMIC);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
m = genradix_ptr(&c->stripes[0], k.k->p.offset);
|
||||
bch2_stripes_heap_insert(c, m, k.k->p.offset);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1333,25 +1557,73 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
struct stripe *m;
|
||||
size_t i;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
for (i = 0; i < min(h->used, 20UL); i++) {
|
||||
m = genradix_ptr(&c->stripes[0], h->data[i].idx);
|
||||
|
||||
pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
|
||||
h->data[i].blocks_nonempty,
|
||||
m->nr_blocks - m->nr_redundant,
|
||||
m->nr_redundant);
|
||||
}
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
struct ec_stripe_new *s;
|
||||
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||
pr_buf(out, "target %u algo %u redundancy %u:\n",
|
||||
h->target, h->algo, h->redundancy);
|
||||
|
||||
if (h->s)
|
||||
pr_buf(out, "\tpending: blocks %u allocated %u\n",
|
||||
h->s->blocks.nr,
|
||||
bitmap_weight(h->s->blocks_allocated,
|
||||
h->s->blocks.nr));
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
|
||||
mutex_lock(&c->ec_stripe_new_lock);
|
||||
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
|
||||
pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n",
|
||||
s->blocks.nr,
|
||||
bitmap_weight(s->blocks_allocated,
|
||||
s->blocks.nr),
|
||||
atomic_read(&s->pin));
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_new_lock);
|
||||
}
|
||||
|
||||
void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
{
|
||||
struct ec_stripe_head *h;
|
||||
|
||||
while (1) {
|
||||
mutex_lock(&c->ec_new_stripe_lock);
|
||||
h = list_first_entry_or_null(&c->ec_new_stripe_list,
|
||||
mutex_lock(&c->ec_stripe_head_lock);
|
||||
h = list_first_entry_or_null(&c->ec_stripe_head_list,
|
||||
struct ec_stripe_head, list);
|
||||
if (h)
|
||||
list_del(&h->list);
|
||||
mutex_unlock(&c->ec_new_stripe_lock);
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
if (!h)
|
||||
break;
|
||||
|
||||
BUG_ON(h->s);
|
||||
BUG_ON(!list_empty(&h->stripes));
|
||||
kfree(h);
|
||||
}
|
||||
|
||||
BUG_ON(!list_empty(&c->ec_stripe_new_list));
|
||||
|
||||
free_heap(&c->ec_stripes_heap);
|
||||
genradix_free(&c->stripes[0]);
|
||||
bioset_exit(&c->ec_bioset);
|
||||
@ -1359,6 +1631,7 @@ void bch2_fs_ec_exit(struct bch_fs *c)
|
||||
|
||||
int bch2_fs_ec_init(struct bch_fs *c)
|
||||
{
|
||||
INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
|
||||
INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
|
||||
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
|
@ -93,9 +93,17 @@ struct ec_stripe_new {
|
||||
|
||||
int err;
|
||||
|
||||
u8 nr_data;
|
||||
u8 nr_parity;
|
||||
bool allocated;
|
||||
bool pending;
|
||||
bool existing_stripe;
|
||||
u64 existing_stripe_idx;
|
||||
|
||||
unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
|
||||
|
||||
struct open_buckets blocks;
|
||||
u8 data_block_idx[EC_STRIPE_MAX];
|
||||
struct open_buckets parity;
|
||||
|
||||
struct keylist keys;
|
||||
@ -108,8 +116,6 @@ struct ec_stripe_head {
|
||||
struct list_head list;
|
||||
struct mutex lock;
|
||||
|
||||
struct list_head stripes;
|
||||
|
||||
unsigned target;
|
||||
unsigned algo;
|
||||
unsigned redundancy;
|
||||
@ -122,9 +128,6 @@ struct ec_stripe_head {
|
||||
struct dev_stripe_state block_stripe;
|
||||
struct dev_stripe_state parity_stripe;
|
||||
|
||||
struct open_buckets blocks;
|
||||
struct open_buckets parity;
|
||||
|
||||
struct ec_stripe_new *s;
|
||||
};
|
||||
|
||||
@ -139,7 +142,7 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
|
||||
|
||||
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
|
||||
|
||||
void bch2_ec_stripe_head_put(struct ec_stripe_head *);
|
||||
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
|
||||
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
|
||||
unsigned, unsigned);
|
||||
|
||||
@ -157,6 +160,9 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
|
||||
|
||||
int bch2_ec_mem_alloc(struct bch_fs *, bool);
|
||||
|
||||
void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
|
||||
void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_fs_ec_exit(struct bch_fs *);
|
||||
int bch2_fs_ec_init(struct bch_fs *);
|
||||
|
||||
|
@ -22,6 +22,7 @@ struct stripe {
|
||||
|
||||
unsigned alive:1;
|
||||
unsigned dirty:1;
|
||||
unsigned on_heap:1;
|
||||
u8 blocks_nonempty;
|
||||
u16 block_sectors[EC_STRIPE_MAX];
|
||||
|
||||
|
@ -179,11 +179,6 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
if (!percpu_down_read_trylock(&c->mark_lock))
|
||||
return;
|
||||
|
||||
bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
|
||||
!bch2_bkey_replicas_marked_locked(c, k, false), c,
|
||||
"btree key bad (replicas not marked in superblock):\n%s",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
@ -194,7 +189,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
goto err;
|
||||
|
||||
err = "inconsistent";
|
||||
if (mark.data_type != BCH_DATA_BTREE ||
|
||||
if (mark.data_type != BCH_DATA_btree ||
|
||||
mark.dirty_sectors < c->opts.btree_node_size)
|
||||
goto err;
|
||||
}
|
||||
@ -267,11 +262,6 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
if (!percpu_down_read_trylock(&c->mark_lock))
|
||||
return;
|
||||
|
||||
bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
|
||||
!bch2_bkey_replicas_marked_locked(c, e.s_c, false), c,
|
||||
"extent key bad (replicas not marked in superblock):\n%s",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf));
|
||||
|
||||
extent_for_each_ptr_decode(e, p, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
|
||||
@ -289,7 +279,7 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
|
||||
"key too stale: %i", stale);
|
||||
|
||||
bch2_fs_inconsistent_on(!stale &&
|
||||
(mark.data_type != BCH_DATA_USER ||
|
||||
(mark.data_type != BCH_DATA_user ||
|
||||
mark_sectors < disk_sectors), c,
|
||||
"extent pointer not marked: %s:\n"
|
||||
"type %u sectors %u < %u",
|
||||
@ -724,7 +714,7 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
|
||||
if (WARN_ON(!s))
|
||||
goto out;
|
||||
|
||||
durability = max_t(unsigned, durability, s->nr_redundant);
|
||||
durability += s->nr_redundant;
|
||||
}
|
||||
out:
|
||||
return durability;
|
||||
|
@ -1516,24 +1516,24 @@ retry_reservation:
|
||||
if (!pg_copied)
|
||||
break;
|
||||
|
||||
if (!PageUptodate(page) &&
|
||||
pg_copied != PAGE_SIZE &&
|
||||
pos + copied + pg_copied < inode->v.i_size) {
|
||||
zero_user(page, 0, PAGE_SIZE);
|
||||
break;
|
||||
}
|
||||
|
||||
flush_dcache_page(page);
|
||||
iov_iter_advance(iter, pg_copied);
|
||||
copied += pg_copied;
|
||||
|
||||
if (pg_copied != pg_len)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!copied)
|
||||
goto out;
|
||||
|
||||
if (copied < len &&
|
||||
((offset + copied) & (PAGE_SIZE - 1))) {
|
||||
struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
zero_user(page, 0, PAGE_SIZE);
|
||||
copied -= (offset + copied) & (PAGE_SIZE - 1);
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&inode->v.i_lock);
|
||||
if (pos + copied > inode->v.i_size)
|
||||
i_size_write(&inode->v, pos + copied);
|
||||
@ -1630,6 +1630,7 @@ again:
|
||||
}
|
||||
pos += ret;
|
||||
written += ret;
|
||||
ret = 0;
|
||||
|
||||
balance_dirty_pages_ratelimited(mapping);
|
||||
} while (iov_iter_count(iter));
|
||||
@ -1818,7 +1819,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
|
||||
while (1) {
|
||||
if (kthread)
|
||||
use_mm(dio->mm);
|
||||
kthread_use_mm(dio->mm);
|
||||
BUG_ON(current->faults_disabled_mapping);
|
||||
current->faults_disabled_mapping = mapping;
|
||||
|
||||
@ -1826,7 +1827,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
|
||||
current->faults_disabled_mapping = NULL;
|
||||
if (kthread)
|
||||
unuse_mm(dio->mm);
|
||||
kthread_unuse_mm(dio->mm);
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
goto err;
|
||||
|
@ -138,6 +138,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
if (fa.fsx_projid >= U32_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* inode fields accessible via the xattr interface are stored with a +1
|
||||
* bias, so that 0 means unset:
|
||||
*/
|
||||
s.projid = fa.fsx_projid + 1;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
@ -151,7 +155,7 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_set_projid(c, inode, s.projid);
|
||||
ret = bch2_set_projid(c, inode, fa.fsx_projid);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/aio.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/exportfs.h>
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/random.h>
|
||||
@ -860,6 +861,10 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
|
||||
bool have_extent = false;
|
||||
int ret = 0;
|
||||
|
||||
ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (start + len < start)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1236,8 +1241,8 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
buf->f_blocks = usage.capacity >> shift;
|
||||
buf->f_bfree = (usage.capacity - usage.used) >> shift;
|
||||
buf->f_bavail = buf->f_bfree;
|
||||
buf->f_files = usage.nr_inodes;
|
||||
buf->f_ffree = U64_MAX;
|
||||
buf->f_files = 0;
|
||||
buf->f_ffree = 0;
|
||||
|
||||
fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
|
||||
le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
|
||||
|
@ -1265,6 +1265,8 @@ static int check_inode(struct btree_trans *trans,
|
||||
u.bi_inum))) {
|
||||
bch_verbose(c, "deleting inode %llu", u.bi_inum);
|
||||
|
||||
bch2_fs_lazy_rw(c);
|
||||
|
||||
ret = bch2_inode_rm(c, u.bi_inum);
|
||||
if (ret)
|
||||
bch_err(c, "error in fsck: error %i while deleting inode", ret);
|
||||
@ -1277,6 +1279,8 @@ static int check_inode(struct btree_trans *trans,
|
||||
u.bi_inum))) {
|
||||
bch_verbose(c, "truncating inode %llu", u.bi_inum);
|
||||
|
||||
bch2_fs_lazy_rw(c);
|
||||
|
||||
/*
|
||||
* XXX: need to truncate partial blocks too here - or ideally
|
||||
* just switch units to bytes and that issue goes away
|
||||
|
@ -31,9 +31,17 @@
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
const char *bch2_blk_status_to_str(blk_status_t status)
|
||||
{
|
||||
if (status == BLK_STS_REMOVED)
|
||||
return "device removed";
|
||||
return blk_status_to_str(status);
|
||||
}
|
||||
|
||||
static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
{
|
||||
const struct bch_devs_mask *devs;
|
||||
@ -46,7 +54,9 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
devs = bch2_target_to_mask(c, target);
|
||||
devs = bch2_target_to_mask(c, target) ?:
|
||||
&c->rw_devs[BCH_DATA_user];
|
||||
|
||||
for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
|
||||
ca = rcu_dereference(c->devs[d]);
|
||||
if (!ca)
|
||||
@ -463,7 +473,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
|
||||
n->c = c;
|
||||
n->dev = ptr->dev;
|
||||
n->have_ioref = bch2_dev_get_ioref(ca, WRITE);
|
||||
n->have_ioref = bch2_dev_get_ioref(ca,
|
||||
type == BCH_DATA_btree ? READ : WRITE);
|
||||
n->submit_time = local_clock();
|
||||
n->bio.bi_iter.bi_sector = ptr->offset;
|
||||
|
||||
@ -611,7 +622,8 @@ static void bch2_write_endio(struct bio *bio)
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "data write"))
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)))
|
||||
set_bit(wbio->dev, op->failed.d);
|
||||
|
||||
if (wbio->have_ioref) {
|
||||
@ -1053,7 +1065,10 @@ static void __bch2_write(struct closure *cl)
|
||||
struct write_point *wp;
|
||||
struct bio *bio;
|
||||
bool skip_put = true;
|
||||
unsigned nofs_flags;
|
||||
int ret;
|
||||
|
||||
nofs_flags = memalloc_nofs_save();
|
||||
again:
|
||||
memset(&op->failed, 0, sizeof(op->failed));
|
||||
|
||||
@ -1079,6 +1094,11 @@ again:
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* The copygc thread is now global, which means it's no longer
|
||||
* freeing up space on specific disks, which means that
|
||||
* allocations for specific disks may hang arbitrarily long:
|
||||
*/
|
||||
wp = bch2_alloc_sectors_start(c,
|
||||
op->target,
|
||||
op->opts.erasure_code,
|
||||
@ -1088,7 +1108,8 @@ again:
|
||||
op->nr_replicas_required,
|
||||
op->alloc_reserve,
|
||||
op->flags,
|
||||
(op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl);
|
||||
(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
|
||||
EBUG_ON(!wp);
|
||||
|
||||
if (unlikely(IS_ERR(wp))) {
|
||||
@ -1100,6 +1121,16 @@ again:
|
||||
goto flush_io;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible for the allocator to fail, put us on the
|
||||
* freelist waitlist, and then succeed in one of various retry
|
||||
* paths: if that happens, we need to disable the skip_put
|
||||
* optimization because otherwise there won't necessarily be a
|
||||
* barrier before we free the bch_write_op:
|
||||
*/
|
||||
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
|
||||
skip_put = false;
|
||||
|
||||
bch2_open_bucket_get(c, wp, &op->open_buckets);
|
||||
ret = bch2_write_extent(op, wp, &bio);
|
||||
bch2_alloc_sectors_done(c, wp);
|
||||
@ -1129,19 +1160,21 @@ again:
|
||||
key_to_write = (void *) (op->insert_keys.keys_p +
|
||||
key_to_write_offset);
|
||||
|
||||
bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_USER,
|
||||
bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
|
||||
key_to_write);
|
||||
} while (ret);
|
||||
|
||||
if (!skip_put)
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
out:
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
return;
|
||||
err:
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
return;
|
||||
goto out;
|
||||
flush_io:
|
||||
/*
|
||||
* If the write can't all be submitted at once, we generally want to
|
||||
@ -1152,7 +1185,7 @@ flush_io:
|
||||
*/
|
||||
if (current->flags & PF_WQ_WORKER) {
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
closure_sync(cl);
|
||||
@ -1163,7 +1196,7 @@ flush_io:
|
||||
if (op->error) {
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
continue_at_nobarrier(cl, bch2_write_done, NULL);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1921,7 +1954,8 @@ static void bch2_read_endio(struct bio *bio)
|
||||
if (!rbio->split)
|
||||
rbio->bio.bi_end_io = rbio->end_io;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "data read")) {
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
|
||||
bch2_blk_status_to_str(bio->bi_status))) {
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
|
||||
return;
|
||||
}
|
||||
@ -2174,7 +2208,7 @@ get_bio:
|
||||
goto out;
|
||||
}
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER],
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
|
||||
bio_sectors(&rbio->bio));
|
||||
bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
|
||||
|
||||
|
@ -22,6 +22,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
|
||||
#define BLK_STS_REMOVED ((__force blk_status_t)128)
|
||||
|
||||
const char *bch2_blk_status_to_str(blk_status_t);
|
||||
|
||||
enum bch_write_flags {
|
||||
BCH_WRITE_ALLOC_NOWAIT = (1 << 0),
|
||||
BCH_WRITE_CACHED = (1 << 1),
|
||||
|
@ -78,7 +78,6 @@ struct bch_write_bio {
|
||||
u64 submit_time;
|
||||
|
||||
struct bch_devs_list failed;
|
||||
u8 order;
|
||||
u8 dev;
|
||||
|
||||
unsigned split:1,
|
||||
|
@ -847,7 +847,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
0);
|
||||
@ -1135,9 +1135,8 @@ out:
|
||||
|
||||
/* debug: */
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
union journal_res_state s;
|
||||
struct bch_dev *ca;
|
||||
@ -1147,7 +1146,7 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
spin_lock(&j->lock);
|
||||
s = READ_ONCE(j->reservations);
|
||||
|
||||
pr_buf(&out,
|
||||
pr_buf(out,
|
||||
"active journal entries:\t%llu\n"
|
||||
"seq:\t\t\t%llu\n"
|
||||
"last_seq:\t\t%llu\n"
|
||||
@ -1165,44 +1164,44 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
|
||||
switch (s.cur_entry_offset) {
|
||||
case JOURNAL_ENTRY_ERROR_VAL:
|
||||
pr_buf(&out, "error\n");
|
||||
pr_buf(out, "error\n");
|
||||
break;
|
||||
case JOURNAL_ENTRY_CLOSED_VAL:
|
||||
pr_buf(&out, "closed\n");
|
||||
pr_buf(out, "closed\n");
|
||||
break;
|
||||
default:
|
||||
pr_buf(&out, "%u/%u\n",
|
||||
pr_buf(out, "%u/%u\n",
|
||||
s.cur_entry_offset,
|
||||
j->cur_entry_u64s);
|
||||
break;
|
||||
}
|
||||
|
||||
pr_buf(&out,
|
||||
pr_buf(out,
|
||||
"current entry refs:\t%u\n"
|
||||
"prev entry unwritten:\t",
|
||||
journal_state_count(s, s.idx));
|
||||
|
||||
if (s.prev_buf_unwritten)
|
||||
pr_buf(&out, "yes, ref %u sectors %u\n",
|
||||
pr_buf(out, "yes, ref %u sectors %u\n",
|
||||
journal_state_count(s, !s.idx),
|
||||
journal_prev_buf(j)->sectors);
|
||||
else
|
||||
pr_buf(&out, "no\n");
|
||||
pr_buf(out, "no\n");
|
||||
|
||||
pr_buf(&out,
|
||||
pr_buf(out,
|
||||
"need write:\t\t%i\n"
|
||||
"replay done:\t\t%i\n",
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags),
|
||||
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
||||
|
||||
for_each_member_device_rcu(ca, c, iter,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
&c->rw_devs[BCH_DATA_journal]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
pr_buf(&out,
|
||||
pr_buf(out,
|
||||
"dev %u:\n"
|
||||
"\tnr\t\t%u\n"
|
||||
"\tavailable\t%u:%u\n"
|
||||
@ -1221,34 +1220,29 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
|
||||
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *pin;
|
||||
u64 i;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
|
||||
pr_buf(&out, "%llu: count %u\n",
|
||||
pr_buf(out, "%llu: count %u\n",
|
||||
i, atomic_read(&pin_list->count));
|
||||
|
||||
list_for_each_entry(pin, &pin_list->list, list)
|
||||
pr_buf(&out, "\t%px %ps\n",
|
||||
pr_buf(out, "\t%px %ps\n",
|
||||
pin, pin->flush);
|
||||
|
||||
if (!list_empty(&pin_list->flushed))
|
||||
pr_buf(&out, "flushed:\n");
|
||||
pr_buf(out, "flushed:\n");
|
||||
|
||||
list_for_each_entry(pin, &pin_list->flushed, list)
|
||||
pr_buf(&out, "\t%px %ps\n",
|
||||
pr_buf(out, "\t%px %ps\n",
|
||||
pin, pin->flush);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
@ -499,8 +499,8 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
void bch2_journal_unblock(struct journal *);
|
||||
void bch2_journal_block(struct journal *);
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
||||
void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
|
||||
|
||||
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
|
||||
unsigned nr);
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "error.h"
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
@ -28,9 +29,11 @@ struct journal_list {
|
||||
* be replayed:
|
||||
*/
|
||||
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct journal_list *jlist, struct jset *j)
|
||||
struct journal_list *jlist, struct jset *j,
|
||||
bool bad)
|
||||
{
|
||||
struct journal_replay *i, *pos;
|
||||
struct bch_devs_list devs = { .nr = 0 };
|
||||
struct list_head *where;
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
__le64 last_seq;
|
||||
@ -59,15 +62,6 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
list_for_each_entry_reverse(i, jlist->head, list) {
|
||||
/* Duplicate? */
|
||||
if (le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
|
||||
fsck_err_on(bytes != vstruct_bytes(&i->j) ||
|
||||
memcmp(j, &i->j, bytes), c,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
goto found;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
|
||||
where = &i->list;
|
||||
goto add;
|
||||
@ -76,6 +70,32 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
where = jlist->head;
|
||||
add:
|
||||
i = where->next != jlist->head
|
||||
? container_of(where->next, struct journal_replay, list)
|
||||
: NULL;
|
||||
|
||||
/*
|
||||
* Duplicate journal entries? If so we want the one that didn't have a
|
||||
* checksum error:
|
||||
*/
|
||||
if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
|
||||
if (i->bad) {
|
||||
devs = i->devs;
|
||||
list_del(&i->list);
|
||||
kvpfree(i, offsetof(struct journal_replay, j) +
|
||||
vstruct_bytes(&i->j));
|
||||
} else if (bad) {
|
||||
goto found;
|
||||
} else {
|
||||
fsck_err_on(bytes != vstruct_bytes(&i->j) ||
|
||||
memcmp(j, &i->j, bytes), c,
|
||||
"found duplicate but non identical journal entries (seq %llu)",
|
||||
le64_to_cpu(j->seq));
|
||||
goto found;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
|
||||
if (!i) {
|
||||
ret = -ENOMEM;
|
||||
@ -83,7 +103,8 @@ add:
|
||||
}
|
||||
|
||||
list_add(&i->list, where);
|
||||
i->devs.nr = 0;
|
||||
i->devs = devs;
|
||||
i->bad = bad;
|
||||
memcpy(&i->j, j, bytes);
|
||||
found:
|
||||
if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
|
||||
@ -390,6 +411,7 @@ fsck_err:
|
||||
}
|
||||
|
||||
static int jset_validate(struct bch_fs *c,
|
||||
struct bch_dev *ca,
|
||||
struct jset *jset, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read,
|
||||
@ -404,16 +426,19 @@ static int jset_validate(struct bch_fs *c,
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
version = le32_to_cpu(jset->version);
|
||||
if ((version != BCH_JSET_VERSION_OLD &&
|
||||
version < bcachefs_metadata_version_min) ||
|
||||
version >= bcachefs_metadata_version_max) {
|
||||
bch_err(c, "unknown journal entry version %u", jset->version);
|
||||
return BCH_FSCK_UNKNOWN_VERSION;
|
||||
if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
|
||||
version < bcachefs_metadata_version_min) ||
|
||||
version >= bcachefs_metadata_version_max, c,
|
||||
"%s sector %llu seq %llu: unknown journal entry version %u",
|
||||
ca->name, sector, le64_to_cpu(jset->seq),
|
||||
version)) {
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
||||
"journal entry too big (%zu bytes), sector %lluu",
|
||||
bytes, sector)) {
|
||||
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
|
||||
ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
@ -422,13 +447,15 @@ static int jset_validate(struct bch_fs *c,
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
|
||||
"journal entry with unknown csum type %llu sector %lluu",
|
||||
JSET_CSUM_TYPE(jset), sector))
|
||||
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
|
||||
ca->name, sector, le64_to_cpu(jset->seq),
|
||||
JSET_CSUM_TYPE(jset)))
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
|
||||
"journal checksum bad, sector %llu", sector)) {
|
||||
"%s sector %llu seq %llu: journal checksum bad",
|
||||
ca->name, sector, le64_to_cpu(jset->seq))) {
|
||||
/* XXX: retry IO, when we start retrying checksum errors */
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
@ -439,8 +466,10 @@ static int jset_validate(struct bch_fs *c,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
|
||||
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
|
||||
"invalid journal entry: last_seq > seq"))
|
||||
"invalid journal entry: last_seq > seq")) {
|
||||
jset->last_seq = jset->seq;
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fsck_err:
|
||||
@ -515,11 +544,12 @@ reread:
|
||||
j = buf->data;
|
||||
}
|
||||
|
||||
ret = jset_validate(c, j, offset,
|
||||
ret = jset_validate(c, ca, j, offset,
|
||||
end - offset, sectors_read,
|
||||
READ);
|
||||
switch (ret) {
|
||||
case BCH_FSCK_OK:
|
||||
sectors = vstruct_sectors(j, c->block_bits);
|
||||
break;
|
||||
case JOURNAL_ENTRY_REREAD:
|
||||
if (vstruct_bytes(j) > buf->size) {
|
||||
@ -536,8 +566,13 @@ reread:
|
||||
goto next_block;
|
||||
case JOURNAL_ENTRY_BAD:
|
||||
saw_bad = true;
|
||||
/*
|
||||
* On checksum error we don't really trust the size
|
||||
* field of the journal entry we read, so try reading
|
||||
* again at next block boundary:
|
||||
*/
|
||||
sectors = c->opts.block_size;
|
||||
goto next_block;
|
||||
break;
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
@ -554,7 +589,7 @@ reread:
|
||||
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, jlist, j);
|
||||
ret = journal_entry_add(c, ca, jlist, j, ret != 0);
|
||||
mutex_unlock(&jlist->lock);
|
||||
|
||||
switch (ret) {
|
||||
@ -565,8 +600,6 @@ reread:
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
|
||||
sectors = vstruct_sectors(j, c->block_bits);
|
||||
next_block:
|
||||
pr_debug("next");
|
||||
offset += sectors;
|
||||
@ -661,7 +694,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
|
||||
for_each_member_device(ca, c, iter) {
|
||||
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
|
||||
!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_JOURNAL)))
|
||||
!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
|
||||
continue;
|
||||
|
||||
if ((ca->mi.state == BCH_MEMBER_STATE_RW ||
|
||||
@ -695,11 +728,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
* the devices - this is wrong:
|
||||
*/
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
|
||||
|
||||
if (!degraded &&
|
||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
|
||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
|
||||
"superblock not marked as containing replicas %s",
|
||||
(bch2_replicas_entry_to_text(&PBUF(buf),
|
||||
&replicas.e), buf)))) {
|
||||
@ -759,7 +792,7 @@ static void __journal_write_alloc(struct journal *j,
|
||||
sectors > ja->sectors_free)
|
||||
continue;
|
||||
|
||||
bch2_dev_stripe_increment(c, ca, &j->wp.stripe);
|
||||
bch2_dev_stripe_increment(ca, &j->wp.stripe);
|
||||
|
||||
bch2_bkey_append_ptr(&w->key,
|
||||
(struct bch_extent_ptr) {
|
||||
@ -796,7 +829,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
rcu_read_lock();
|
||||
|
||||
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]);
|
||||
&c->rw_devs[BCH_DATA_journal]);
|
||||
|
||||
__journal_write_alloc(j, w, &devs_sorted,
|
||||
sectors, &replicas, replicas_want);
|
||||
@ -914,7 +947,7 @@ static void journal_write_done(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, devs);
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
|
||||
|
||||
if (bch2_mark_replicas(c, &replicas.e))
|
||||
goto err;
|
||||
@ -961,7 +994,8 @@ static void journal_write_endio(struct bio *bio)
|
||||
struct bch_dev *ca = bio->bi_private;
|
||||
struct journal *j = &ca->fs->journal;
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write") ||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)) ||
|
||||
bch2_meta_write_fault("journal")) {
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
unsigned long flags;
|
||||
@ -1105,7 +1139,7 @@ retry_alloc:
|
||||
continue;
|
||||
}
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_JOURNAL],
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
|
||||
sectors);
|
||||
|
||||
bio = ca->journal.bio;
|
||||
|
@ -9,6 +9,8 @@
|
||||
struct journal_replay {
|
||||
struct list_head list;
|
||||
struct bch_devs_list devs;
|
||||
/* checksum error, but we may want to try using it anyways: */
|
||||
bool bad;
|
||||
/* must be last: */
|
||||
struct jset j;
|
||||
};
|
||||
|
@ -70,7 +70,7 @@ static struct journal_space {
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
&c->rw_devs[BCH_DATA_journal]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned buckets_this_device, sectors_this_device;
|
||||
|
||||
@ -139,7 +139,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
&c->rw_devs[BCH_DATA_journal]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!ja->nr)
|
||||
@ -618,7 +618,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_JOURNAL);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
|
||||
|
||||
seq = 0;
|
||||
|
||||
@ -627,7 +627,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
struct bch_replicas_padded replicas;
|
||||
|
||||
seq = max(seq, journal_last_seq(j));
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL,
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
journal_seq_pin(j, seq)->devs);
|
||||
seq++;
|
||||
|
||||
|
@ -36,15 +36,6 @@
|
||||
* that bset, until that btree node is rewritten.
|
||||
*/
|
||||
|
||||
static unsigned
|
||||
blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
|
||||
{
|
||||
return bl
|
||||
? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
|
||||
sizeof(struct journal_seq_blacklist_entry))
|
||||
: 0;
|
||||
}
|
||||
|
||||
static unsigned sb_blacklist_u64s(unsigned nr)
|
||||
{
|
||||
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||
|
@ -2,6 +2,15 @@
|
||||
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||
|
||||
static inline unsigned
|
||||
blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
|
||||
{
|
||||
return bl
|
||||
? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
|
||||
sizeof(struct journal_seq_blacklist_entry))
|
||||
: 0;
|
||||
}
|
||||
|
||||
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
|
||||
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
|
||||
int bch2_blacklist_table_initialize(struct bch_fs *);
|
||||
|
@ -247,11 +247,15 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
|
||||
m->op.target = data_opts.target,
|
||||
m->op.write_point = wp;
|
||||
|
||||
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
|
||||
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
|
||||
m->op.alloc_reserve = RESERVE_MOVINGGC;
|
||||
m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
|
||||
} else {
|
||||
/* XXX: this should probably be passed in */
|
||||
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
|
||||
}
|
||||
|
||||
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
|
||||
BCH_WRITE_PAGES_STABLE|
|
||||
m->op.flags |= BCH_WRITE_PAGES_STABLE|
|
||||
BCH_WRITE_PAGES_OWNED|
|
||||
BCH_WRITE_DATA_ENCODED|
|
||||
BCH_WRITE_FROM_INTERNAL;
|
||||
@ -517,7 +521,7 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
bkey_on_stack_init(&sk);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
stats->data_type = BCH_DATA_USER;
|
||||
stats->data_type = BCH_DATA_user;
|
||||
stats->btree_id = btree_id;
|
||||
stats->pos = POS_MIN;
|
||||
|
||||
@ -642,7 +646,7 @@ int bch2_move_data(struct bch_fs *c,
|
||||
INIT_LIST_HEAD(&ctxt.reads);
|
||||
init_waitqueue_head(&ctxt.wait);
|
||||
|
||||
stats->data_type = BCH_DATA_USER;
|
||||
stats->data_type = BCH_DATA_user;
|
||||
|
||||
ret = __bch2_move_data(c, &ctxt, rate, wp, start, end,
|
||||
pred, arg, stats, BTREE_ID_EXTENTS) ?:
|
||||
@ -677,7 +681,7 @@ static int bch2_move_btree(struct bch_fs *c,
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
stats->data_type = BCH_DATA_BTREE;
|
||||
stats->data_type = BCH_DATA_btree;
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
stats->btree_id = id;
|
||||
@ -773,7 +777,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
switch (op.op) {
|
||||
case BCH_DATA_OP_REREPLICATE:
|
||||
stats->data_type = BCH_DATA_JOURNAL;
|
||||
stats->data_type = BCH_DATA_journal;
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
||||
|
||||
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
|
||||
@ -794,7 +798,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
if (op.migrate.dev >= c->sb.nr_devices)
|
||||
return -EINVAL;
|
||||
|
||||
stats->data_type = BCH_DATA_JOURNAL;
|
||||
stats->data_type = BCH_DATA_journal;
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
||||
|
||||
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "disk_groups.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "eytzinger.h"
|
||||
#include "io.h"
|
||||
@ -43,37 +44,27 @@
|
||||
#define COPYGC_BUCKETS_PER_ITER(ca) \
|
||||
((ca)->free[RESERVE_MOVINGGC].size / 2)
|
||||
|
||||
/*
|
||||
* Max sectors to move per iteration: Have to take into account internal
|
||||
* fragmentation from the multiple write points for each generation:
|
||||
*/
|
||||
#define COPYGC_SECTORS_PER_ITER(ca) \
|
||||
((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca))
|
||||
|
||||
static inline int sectors_used_cmp(copygc_heap *heap,
|
||||
struct copygc_heap_entry l,
|
||||
struct copygc_heap_entry r)
|
||||
{
|
||||
return cmp_int(l.sectors, r.sectors);
|
||||
}
|
||||
|
||||
static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
|
||||
{
|
||||
const struct copygc_heap_entry *l = _l;
|
||||
const struct copygc_heap_entry *r = _r;
|
||||
|
||||
return cmp_int(l->offset, r->offset);
|
||||
return cmp_int(l->dev, r->dev) ?:
|
||||
cmp_int(l->offset, r->offset);
|
||||
}
|
||||
|
||||
static bool __copygc_pred(struct bch_dev *ca,
|
||||
struct bkey_s_c k)
|
||||
static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
copygc_heap *h = &ca->copygc_heap;
|
||||
const struct bch_extent_ptr *ptr =
|
||||
bch2_bkey_has_device(k, ca->dev_idx);
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
if (ptr) {
|
||||
struct copygc_heap_entry search = { .offset = ptr->offset };
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct copygc_heap_entry search = {
|
||||
.dev = ptr->dev,
|
||||
.offset = ptr->offset
|
||||
};
|
||||
|
||||
ssize_t i = eytzinger0_find_le(h->data, h->used,
|
||||
sizeof(h->data[0]),
|
||||
@ -89,12 +80,13 @@ static bool __copygc_pred(struct bch_dev *ca,
|
||||
|
||||
BUG_ON(i != j);
|
||||
#endif
|
||||
return (i >= 0 &&
|
||||
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
|
||||
ptr->gen == h->data[i].gen);
|
||||
if (i >= 0 &&
|
||||
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
|
||||
ptr->gen == h->data[i].gen)
|
||||
return ptr->dev;
|
||||
}
|
||||
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
@ -102,14 +94,13 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
|
||||
if (!__copygc_pred(ca, k))
|
||||
int dev_idx = __copygc_pred(c, k);
|
||||
if (dev_idx < 0)
|
||||
return DATA_SKIP;
|
||||
|
||||
data_opts->target = dev_to_target(ca->dev_idx);
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE;
|
||||
data_opts->rewrite_dev = ca->dev_idx;
|
||||
data_opts->rewrite_dev = dev_idx;
|
||||
return DATA_REWRITE;
|
||||
}
|
||||
|
||||
@ -125,20 +116,28 @@ static bool have_copygc_reserve(struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
static inline int fragmentation_cmp(copygc_heap *heap,
|
||||
struct copygc_heap_entry l,
|
||||
struct copygc_heap_entry r)
|
||||
{
|
||||
copygc_heap *h = &ca->copygc_heap;
|
||||
return cmp_int(l.fragmentation, r.fragmentation);
|
||||
}
|
||||
|
||||
static int bch2_copygc(struct bch_fs *c)
|
||||
{
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct copygc_heap_entry e, *i;
|
||||
struct bucket_array *buckets;
|
||||
struct bch_move_stats move_stats;
|
||||
u64 sectors_to_move = 0, sectors_not_moved = 0;
|
||||
u64 sectors_reserved = 0;
|
||||
u64 buckets_to_move, buckets_not_moved = 0;
|
||||
size_t b;
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_idx;
|
||||
size_t b, heap_size = 0;
|
||||
int ret;
|
||||
|
||||
memset(&move_stats, 0, sizeof(move_stats));
|
||||
closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
|
||||
|
||||
/*
|
||||
* Find buckets with lowest sector counts, skipping completely
|
||||
* empty buckets, by building a maxheap sorted by sector count,
|
||||
@ -147,69 +146,99 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
*/
|
||||
h->used = 0;
|
||||
|
||||
/*
|
||||
* We need bucket marks to be up to date - gc can't be recalculating
|
||||
* them:
|
||||
*/
|
||||
down_read(&c->gc_lock);
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
for_each_rw_member(ca, c, dev_idx)
|
||||
heap_size += ca->mi.nbuckets >> 7;
|
||||
|
||||
for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
|
||||
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
|
||||
struct copygc_heap_entry e;
|
||||
|
||||
if (m.owned_by_allocator ||
|
||||
m.data_type != BCH_DATA_USER ||
|
||||
!bucket_sectors_used(m) ||
|
||||
bucket_sectors_used(m) >= ca->mi.bucket_size)
|
||||
continue;
|
||||
|
||||
e = (struct copygc_heap_entry) {
|
||||
.gen = m.gen,
|
||||
.sectors = bucket_sectors_used(m),
|
||||
.offset = bucket_to_sector(ca, b),
|
||||
};
|
||||
heap_add_or_replace(h, e, -sectors_used_cmp, NULL);
|
||||
if (h->size < heap_size) {
|
||||
free_heap(&c->copygc_heap);
|
||||
if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
|
||||
bch_err(c, "error allocating copygc heap");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
|
||||
|
||||
spin_lock(&ca->fs->freelist_lock);
|
||||
sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size;
|
||||
spin_unlock(&ca->fs->freelist_lock);
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
|
||||
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
|
||||
struct copygc_heap_entry e;
|
||||
|
||||
if (m.owned_by_allocator ||
|
||||
m.data_type != BCH_DATA_user ||
|
||||
!bucket_sectors_used(m) ||
|
||||
bucket_sectors_used(m) >= ca->mi.bucket_size)
|
||||
continue;
|
||||
|
||||
e = (struct copygc_heap_entry) {
|
||||
.dev = dev_idx,
|
||||
.gen = m.gen,
|
||||
.fragmentation = bucket_sectors_used(m) * (1U << 15)
|
||||
/ ca->mi.bucket_size,
|
||||
.sectors = bucket_sectors_used(m),
|
||||
.offset = bucket_to_sector(ca, b),
|
||||
};
|
||||
heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
|
||||
}
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
|
||||
if (!sectors_reserved) {
|
||||
bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
|
||||
return -1;
|
||||
}
|
||||
up_read(&ca->bucket_lock);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
for (i = h->data; i < h->data + h->used; i++)
|
||||
sectors_to_move += i->sectors;
|
||||
|
||||
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
|
||||
BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL));
|
||||
while (sectors_to_move > sectors_reserved) {
|
||||
BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
|
||||
sectors_to_move -= e.sectors;
|
||||
}
|
||||
|
||||
buckets_to_move = h->used;
|
||||
|
||||
if (!buckets_to_move)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
eytzinger0_sort(h->data, h->used,
|
||||
sizeof(h->data[0]),
|
||||
bucket_offset_cmp, NULL);
|
||||
|
||||
ret = bch2_move_data(c, &ca->copygc_pd.rate,
|
||||
writepoint_ptr(&ca->copygc_write_point),
|
||||
ret = bch2_move_data(c, &c->copygc_pd.rate,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
POS_MIN, POS_MAX,
|
||||
copygc_pred, ca,
|
||||
copygc_pred, NULL,
|
||||
&move_stats);
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
for (i = h->data; i < h->data + h->used; i++) {
|
||||
size_t b = sector_to_bucket(ca, i->offset);
|
||||
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
for (i = h->data; i < h->data + h->used; i++) {
|
||||
struct bucket_mark m;
|
||||
size_t b;
|
||||
|
||||
if (i->gen == m.gen && bucket_sectors_used(m)) {
|
||||
sectors_not_moved += bucket_sectors_used(m);
|
||||
buckets_not_moved++;
|
||||
if (i->dev != dev_idx)
|
||||
continue;
|
||||
|
||||
b = sector_to_bucket(ca, i->offset);
|
||||
m = READ_ONCE(buckets->b[b].mark);
|
||||
|
||||
if (i->gen == m.gen &&
|
||||
bucket_sectors_used(m)) {
|
||||
sectors_not_moved += bucket_sectors_used(m);
|
||||
buckets_not_moved++;
|
||||
}
|
||||
}
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
up_read(&ca->bucket_lock);
|
||||
|
||||
if (sectors_not_moved && !ret)
|
||||
bch_warn_ratelimited(c,
|
||||
@ -220,9 +249,10 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
atomic64_read(&move_stats.keys_raced),
|
||||
atomic64_read(&move_stats.sectors_raced));
|
||||
|
||||
trace_copygc(ca,
|
||||
trace_copygc(c,
|
||||
atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
|
||||
buckets_to_move, buckets_not_moved);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -239,20 +269,27 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
* often and continually reduce the amount of fragmented space as the device
|
||||
* fills up. So, we increase the threshold by half the current free space.
|
||||
*/
|
||||
unsigned long bch2_copygc_wait_amount(struct bch_dev *ca)
|
||||
unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_dev_usage usage = bch2_dev_usage_read(c, ca);
|
||||
u64 fragmented_allowed = ca->copygc_threshold +
|
||||
((__dev_buckets_available(ca, usage) * ca->mi.bucket_size) >> 1);
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_idx;
|
||||
u64 fragmented_allowed = c->copygc_threshold;
|
||||
u64 fragmented = 0;
|
||||
|
||||
return max_t(s64, 0, fragmented_allowed - usage.sectors_fragmented);
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
||||
|
||||
fragmented_allowed += ((__dev_buckets_available(ca, usage) *
|
||||
ca->mi.bucket_size) >> 1);
|
||||
fragmented += usage.sectors_fragmented;
|
||||
}
|
||||
|
||||
return max_t(s64, 0, fragmented_allowed - fragmented);
|
||||
}
|
||||
|
||||
static int bch2_copygc_thread(void *arg)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_fs *c = arg;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
unsigned long last, wait;
|
||||
|
||||
@ -263,7 +300,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
break;
|
||||
|
||||
last = atomic_long_read(&clock->now);
|
||||
wait = bch2_copygc_wait_amount(ca);
|
||||
wait = bch2_copygc_wait_amount(c);
|
||||
|
||||
if (wait > clock->max_slop) {
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
@ -271,29 +308,30 @@ static int bch2_copygc_thread(void *arg)
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_copygc(c, ca);
|
||||
if (bch2_copygc(c))
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_copygc_stop(struct bch_dev *ca)
|
||||
void bch2_copygc_stop(struct bch_fs *c)
|
||||
{
|
||||
ca->copygc_pd.rate.rate = UINT_MAX;
|
||||
bch2_ratelimit_reset(&ca->copygc_pd.rate);
|
||||
c->copygc_pd.rate.rate = UINT_MAX;
|
||||
bch2_ratelimit_reset(&c->copygc_pd.rate);
|
||||
|
||||
if (ca->copygc_thread) {
|
||||
kthread_stop(ca->copygc_thread);
|
||||
put_task_struct(ca->copygc_thread);
|
||||
if (c->copygc_thread) {
|
||||
kthread_stop(c->copygc_thread);
|
||||
put_task_struct(c->copygc_thread);
|
||||
}
|
||||
ca->copygc_thread = NULL;
|
||||
c->copygc_thread = NULL;
|
||||
}
|
||||
|
||||
int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
|
||||
int bch2_copygc_start(struct bch_fs *c)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
if (ca->copygc_thread)
|
||||
if (c->copygc_thread)
|
||||
return 0;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
@ -302,21 +340,20 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (bch2_fs_init_fault("copygc_start"))
|
||||
return -ENOMEM;
|
||||
|
||||
t = kthread_create(bch2_copygc_thread, ca,
|
||||
"bch_copygc[%s]", ca->name);
|
||||
t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
|
||||
get_task_struct(t);
|
||||
|
||||
ca->copygc_thread = t;
|
||||
wake_up_process(ca->copygc_thread);
|
||||
c->copygc_thread = t;
|
||||
wake_up_process(c->copygc_thread);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_dev_copygc_init(struct bch_dev *ca)
|
||||
void bch2_fs_copygc_init(struct bch_fs *c)
|
||||
{
|
||||
bch2_pd_controller_init(&ca->copygc_pd);
|
||||
ca->copygc_pd.d_term = 0;
|
||||
bch2_pd_controller_init(&c->copygc_pd);
|
||||
c->copygc_pd.d_term = 0;
|
||||
}
|
||||
|
@ -2,8 +2,8 @@
|
||||
#ifndef _BCACHEFS_MOVINGGC_H
|
||||
#define _BCACHEFS_MOVINGGC_H
|
||||
|
||||
void bch2_copygc_stop(struct bch_dev *);
|
||||
int bch2_copygc_start(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_copygc_init(struct bch_dev *);
|
||||
void bch2_copygc_stop(struct bch_fs *);
|
||||
int bch2_copygc_start(struct bch_fs *);
|
||||
void bch2_fs_copygc_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_MOVINGGC_H */
|
||||
|
@ -45,12 +45,9 @@ const char * const bch2_str_hash_types[] = {
|
||||
};
|
||||
|
||||
const char * const bch2_data_types[] = {
|
||||
"none",
|
||||
"sb",
|
||||
"journal",
|
||||
"btree",
|
||||
"data",
|
||||
"cached",
|
||||
#define x(t, n) #t,
|
||||
BCH_DATA_TYPES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -83,7 +83,7 @@ enum opt_type {
|
||||
"size", NULL) \
|
||||
x(btree_node_size, u16, \
|
||||
OPT_FORMAT, \
|
||||
OPT_SECTORS(1, 128), \
|
||||
OPT_SECTORS(1, 512), \
|
||||
BCH_SB_BTREE_NODE_SIZE, 512, \
|
||||
"size", "Btree node size, default 256k") \
|
||||
x(errors, u8, \
|
||||
@ -260,6 +260,11 @@ enum opt_type {
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Don't replay the journal") \
|
||||
x(rebuild_replicas, u8, \
|
||||
OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false, \
|
||||
NULL, "Rebuild the superblock replicas section") \
|
||||
x(keep_journal, u8, \
|
||||
OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -249,45 +249,42 @@ static int bch2_rebalance_thread(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf)
|
||||
void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct rebalance_work w = rebalance_work(c);
|
||||
char h1[21], h2[21];
|
||||
|
||||
bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9);
|
||||
bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9);
|
||||
pr_buf(&out, "fullest_dev (%i):\t%s/%s\n",
|
||||
pr_buf(out, "fullest_dev (%i):\t%s/%s\n",
|
||||
w.dev_most_full_idx, h1, h2);
|
||||
|
||||
bch2_hprint(&PBUF(h1), w.total_work << 9);
|
||||
bch2_hprint(&PBUF(h2), c->capacity << 9);
|
||||
pr_buf(&out, "total work:\t\t%s/%s\n", h1, h2);
|
||||
pr_buf(out, "total work:\t\t%s/%s\n", h1, h2);
|
||||
|
||||
pr_buf(&out, "rate:\t\t\t%u\n", r->pd.rate.rate);
|
||||
pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate);
|
||||
|
||||
switch (r->state) {
|
||||
case REBALANCE_WAITING:
|
||||
pr_buf(&out, "waiting\n");
|
||||
pr_buf(out, "waiting\n");
|
||||
break;
|
||||
case REBALANCE_THROTTLED:
|
||||
bch2_hprint(&PBUF(h1),
|
||||
(r->throttled_until_iotime -
|
||||
atomic_long_read(&c->io_clock[WRITE].now)) << 9);
|
||||
pr_buf(&out, "throttled for %lu sec or %s io\n",
|
||||
pr_buf(out, "throttled for %lu sec or %s io\n",
|
||||
(r->throttled_until_cputime - jiffies) / HZ,
|
||||
h1);
|
||||
break;
|
||||
case REBALANCE_RUNNING:
|
||||
pr_buf(&out, "running\n");
|
||||
pr_buf(&out, "pos %llu:%llu\n",
|
||||
pr_buf(out, "running\n");
|
||||
pr_buf(out, "pos %llu:%llu\n",
|
||||
r->move_stats.pos.inode,
|
||||
r->move_stats.pos.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
void bch2_rebalance_stop(struct bch_fs *c)
|
||||
|
@ -19,7 +19,7 @@ void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_io_opts *);
|
||||
void bch2_rebalance_add_work(struct bch_fs *, u64);
|
||||
|
||||
ssize_t bch2_rebalance_work_show(struct bch_fs *, char *);
|
||||
void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_rebalance_stop(struct bch_fs *);
|
||||
int bch2_rebalance_start(struct bch_fs *);
|
||||
|
@ -442,11 +442,18 @@ retry:
|
||||
* regular keys
|
||||
*/
|
||||
__bch2_btree_iter_set_pos(split_iter, split->k.p, false);
|
||||
bch2_trans_update(&trans, split_iter, split, !remark
|
||||
? BTREE_TRIGGER_NORUN
|
||||
: BTREE_TRIGGER_NOOVERWRITES);
|
||||
bch2_trans_update(&trans, split_iter, split,
|
||||
BTREE_TRIGGER_NORUN);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, split->k.p);
|
||||
|
||||
if (remark) {
|
||||
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
|
||||
0, split->k.size,
|
||||
BTREE_TRIGGER_INSERT);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
} while (bkey_cmp(iter->pos, k->k.p) < 0);
|
||||
|
||||
if (remark) {
|
||||
@ -967,7 +974,8 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
bch_info(c, "recovering from clean shutdown, journal seq %llu",
|
||||
le64_to_cpu(clean->journal_seq));
|
||||
|
||||
if (!c->replicas.entries) {
|
||||
if (!c->replicas.entries ||
|
||||
c->opts.rebuild_replicas) {
|
||||
bch_info(c, "building replicas info");
|
||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
}
|
||||
@ -1031,6 +1039,11 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
}
|
||||
|
||||
journal_seq += 4;
|
||||
|
||||
/*
|
||||
* The superblock needs to be written before we do any btree
|
||||
* node writes: it will be in the read_write() path
|
||||
*/
|
||||
}
|
||||
|
||||
ret = bch2_blacklist_table_initialize(c);
|
||||
|
@ -113,16 +113,16 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
e->data_type = BCH_DATA_BTREE;
|
||||
e->data_type = BCH_DATA_btree;
|
||||
extent_to_replicas(k, e);
|
||||
break;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
e->data_type = BCH_DATA_USER;
|
||||
e->data_type = BCH_DATA_user;
|
||||
extent_to_replicas(k, e);
|
||||
break;
|
||||
case KEY_TYPE_stripe:
|
||||
e->data_type = BCH_DATA_USER;
|
||||
e->data_type = BCH_DATA_user;
|
||||
stripe_to_replicas(k, e);
|
||||
break;
|
||||
}
|
||||
@ -137,7 +137,7 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type == BCH_DATA_sb ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
e->data_type = data_type;
|
||||
@ -213,29 +213,20 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
return __replicas_entry_idx(r, search) >= 0;
|
||||
}
|
||||
|
||||
static bool bch2_replicas_marked_locked(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search,
|
||||
bool check_gc_replicas)
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search)
|
||||
{
|
||||
bool marked;
|
||||
|
||||
if (!search->nr_devs)
|
||||
return true;
|
||||
|
||||
verify_replicas_entry(search);
|
||||
|
||||
return __replicas_has_entry(&c->replicas, search) &&
|
||||
(!check_gc_replicas ||
|
||||
likely((!c->replicas_gc.entries)) ||
|
||||
__replicas_has_entry(&c->replicas_gc, search));
|
||||
}
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
bool marked;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
|
||||
marked = __replicas_has_entry(&c->replicas, search) &&
|
||||
(likely((!c->replicas_gc.entries)) ||
|
||||
__replicas_has_entry(&c->replicas_gc, search));
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return marked;
|
||||
@ -423,49 +414,22 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_mark_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_entry *r)
|
||||
static int __bch2_mark_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_entry *r,
|
||||
bool check)
|
||||
{
|
||||
return likely(bch2_replicas_marked(c, r, true))
|
||||
? 0
|
||||
return likely(bch2_replicas_marked(c, r)) ? 0
|
||||
: check ? -1
|
||||
: bch2_mark_replicas_slowpath(c, r);
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
bool check_gc_replicas)
|
||||
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
|
||||
{
|
||||
struct bch_replicas_padded search;
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cached.nr; i++) {
|
||||
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
|
||||
|
||||
if (!bch2_replicas_marked_locked(c, &search.e,
|
||||
check_gc_replicas))
|
||||
return false;
|
||||
}
|
||||
|
||||
bch2_bkey_to_replicas(&search.e, k);
|
||||
|
||||
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
|
||||
return __bch2_mark_replicas(c, r, false);
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
bool marked;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return marked;
|
||||
}
|
||||
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
|
||||
bool check)
|
||||
{
|
||||
struct bch_replicas_padded search;
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
@ -475,14 +439,25 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
for (i = 0; i < cached.nr; i++) {
|
||||
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
|
||||
|
||||
ret = bch2_mark_replicas(c, &search.e);
|
||||
ret = __bch2_mark_replicas(c, &search.e, check);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_bkey_to_replicas(&search.e, k);
|
||||
|
||||
return bch2_mark_replicas(c, &search.e);
|
||||
return __bch2_mark_replicas(c, &search.e, check);
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
return __bch2_mark_bkey_replicas(c, k, true) == 0;
|
||||
}
|
||||
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
return __bch2_mark_bkey_replicas(c, k, false);
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
@ -611,7 +586,7 @@ retry:
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
|
||||
if (e->data_type == BCH_DATA_JOURNAL ||
|
||||
if (e->data_type == BCH_DATA_journal ||
|
||||
c->usage_base->replicas[i] ||
|
||||
percpu_u64_get(&c->usage[0]->replicas[i]) ||
|
||||
percpu_u64_get(&c->usage[1]->replicas[i]))
|
||||
@ -1037,13 +1012,13 @@ static bool have_enough_devs(struct replicas_status s,
|
||||
|
||||
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
{
|
||||
return (have_enough_devs(s, BCH_DATA_JOURNAL,
|
||||
return (have_enough_devs(s, BCH_DATA_journal,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_BTREE,
|
||||
have_enough_devs(s, BCH_DATA_btree,
|
||||
flags & BCH_FORCE_IF_METADATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_METADATA_LOST) &&
|
||||
have_enough_devs(s, BCH_DATA_USER,
|
||||
have_enough_devs(s, BCH_DATA_user,
|
||||
flags & BCH_FORCE_IF_DATA_DEGRADED,
|
||||
flags & BCH_FORCE_IF_DATA_LOST));
|
||||
}
|
||||
@ -1053,9 +1028,9 @@ int bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
struct replicas_status s = bch2_replicas_status(c);
|
||||
|
||||
return (meta
|
||||
? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
|
||||
s.replicas[BCH_DATA_BTREE].redundancy)
|
||||
: s.replicas[BCH_DATA_USER].redundancy) + 1;
|
||||
? min(s.replicas[BCH_DATA_journal].redundancy,
|
||||
s.replicas[BCH_DATA_btree].redundancy)
|
||||
: s.replicas[BCH_DATA_user].redundancy) + 1;
|
||||
}
|
||||
|
||||
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
@ -21,22 +21,18 @@ int bch2_replicas_entry_idx(struct bch_fs *,
|
||||
void bch2_devlist_to_replicas(struct bch_replicas_entry *,
|
||||
enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
bool bch2_replicas_marked(struct bch_fs *,
|
||||
struct bch_replicas_entry *, bool);
|
||||
bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
|
||||
int bch2_mark_replicas(struct bch_fs *,
|
||||
struct bch_replicas_entry *);
|
||||
|
||||
bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
|
||||
struct bkey_s_c, bool);
|
||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *,
|
||||
struct bkey_s_c, bool);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
e->data_type = BCH_DATA_CACHED;
|
||||
e->data_type = BCH_DATA_cached;
|
||||
e->nr_devs = 1;
|
||||
e->nr_required = 1;
|
||||
e->devs[0] = dev;
|
||||
|
@ -636,7 +636,8 @@ static void write_super_endio(struct bio *bio)
|
||||
|
||||
/* XXX: return errors directly */
|
||||
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write"))
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
|
||||
bch2_blk_status_to_str(bio->bi_status)))
|
||||
ca->sb_write_error = 1;
|
||||
|
||||
closure_put(&ca->fs->sb_write);
|
||||
@ -656,7 +657,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
||||
bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
|
||||
bio_sectors(bio));
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
@ -684,7 +685,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
|
||||
roundup((size_t) vstruct_bytes(sb),
|
||||
bdev_logical_block_size(ca->disk_sb.bdev)));
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_SB],
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
|
||||
bio_sectors(bio));
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
|
@ -169,10 +169,9 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsigned target = READ_ONCE(c->opts.foreground_target);
|
||||
const struct bch_devs_mask *devs = target
|
||||
? bch2_target_to_mask(c, target)
|
||||
: &c->rw_devs[BCH_DATA_USER];
|
||||
const struct bch_devs_mask *devs =
|
||||
bch2_target_to_mask(c, c->opts.foreground_target) ?:
|
||||
&c->rw_devs[BCH_DATA_user];
|
||||
|
||||
for_each_member_device_rcu(ca, c, i, devs) {
|
||||
bdi = ca->disk_sb.bdev->bd_bdi;
|
||||
@ -213,10 +212,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
bch2_rebalance_stop(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_copygc_stop(ca);
|
||||
|
||||
bch2_copygc_stop(c);
|
||||
bch2_gc_thread_stop(c);
|
||||
|
||||
/*
|
||||
@ -387,8 +383,8 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
|
||||
{
|
||||
bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
|
||||
|
||||
bch2_fs_read_only_async(c);
|
||||
bch2_journal_halt(&c->journal);
|
||||
bch2_fs_read_only_async(c);
|
||||
|
||||
wake_up(&bch_read_only_wait);
|
||||
return ret;
|
||||
@ -396,8 +392,6 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
|
||||
|
||||
static int bch2_fs_read_write_late(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
ret = bch2_gc_thread_start(c);
|
||||
@ -406,13 +400,10 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
ret = bch2_copygc_start(c, ca);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting copygc threads");
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
ret = bch2_copygc_start(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting copygc thread");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = bch2_rebalance_start(c);
|
||||
@ -450,6 +441,13 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* We need to write out a journal entry before we start doing btree
|
||||
* updates, to ensure that on unclean shutdown new journal blacklist
|
||||
* entries are created:
|
||||
*/
|
||||
bch2_journal_meta(&c->journal);
|
||||
|
||||
clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
|
||||
|
||||
for_each_rw_member(ca, c, i)
|
||||
@ -535,6 +533,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->replicas_gc.entries);
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
free_heap(&c->copygc_heap);
|
||||
|
||||
if (c->journal_reclaim_wq)
|
||||
destroy_workqueue(c->journal_reclaim_wq);
|
||||
@ -684,6 +683,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_init(&c->times[i]);
|
||||
|
||||
bch2_fs_copygc_init(c);
|
||||
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
|
||||
bch2_fs_allocator_background_init(c);
|
||||
bch2_fs_allocator_foreground_init(c);
|
||||
@ -708,9 +708,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_new_stripe_list);
|
||||
mutex_init(&c->ec_new_stripe_lock);
|
||||
mutex_init(&c->ec_stripe_create_lock);
|
||||
INIT_LIST_HEAD(&c->ec_stripe_head_list);
|
||||
mutex_init(&c->ec_stripe_head_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->ec_stripe_new_list);
|
||||
mutex_init(&c->ec_stripe_new_lock);
|
||||
|
||||
spin_lock_init(&c->ec_stripes_heap_lock);
|
||||
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
@ -1108,10 +1111,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
|
||||
init_rwsem(&ca->bucket_lock);
|
||||
|
||||
writepoint_init(&ca->copygc_write_point, BCH_DATA_USER);
|
||||
|
||||
bch2_dev_copygc_init(ca);
|
||||
|
||||
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
|
||||
|
||||
bch2_time_stats_init(&ca->io_latency[READ]);
|
||||
@ -1241,7 +1240,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
return ret;
|
||||
|
||||
if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
|
||||
!percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_SB])) {
|
||||
!percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_sb])) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(ca->fs, ca, 0);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -1352,7 +1351,11 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
|
||||
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
bch2_copygc_stop(ca);
|
||||
/*
|
||||
* Device going read only means the copygc reserve get smaller, so we
|
||||
* don't want that happening while copygc is in progress:
|
||||
*/
|
||||
bch2_copygc_stop(c);
|
||||
|
||||
/*
|
||||
* The allocator thread itself allocates btree nodes, so stop it first:
|
||||
@ -1360,6 +1363,8 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
bch2_dev_allocator_remove(c, ca);
|
||||
bch2_dev_journal_stop(&c->journal, ca);
|
||||
|
||||
bch2_copygc_start(c);
|
||||
}
|
||||
|
||||
static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
@ -1374,9 +1379,6 @@ static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (bch2_dev_allocator_start(ca))
|
||||
return "error starting allocator thread";
|
||||
|
||||
if (bch2_copygc_start(c, ca))
|
||||
return "error starting copygc thread";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -222,6 +222,15 @@ void bch2_fs_read_only(struct bch_fs *);
|
||||
int bch2_fs_read_write(struct bch_fs *);
|
||||
int bch2_fs_read_write_early(struct bch_fs *);
|
||||
|
||||
/*
|
||||
* Only for use in the recovery/fsck path:
|
||||
*/
|
||||
static inline void bch2_fs_lazy_rw(struct bch_fs *c)
|
||||
{
|
||||
if (percpu_ref_is_zero(&c->writes))
|
||||
bch2_fs_read_write_early(c);
|
||||
}
|
||||
|
||||
void bch2_fs_stop(struct bch_fs *);
|
||||
|
||||
int bch2_fs_start(struct bch_fs *);
|
||||
|
@ -75,7 +75,6 @@ do { \
|
||||
#define sysfs_hprint(file, val) \
|
||||
do { \
|
||||
if (attr == &sysfs_ ## file) { \
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE); \
|
||||
bch2_hprint(&out, val); \
|
||||
pr_buf(&out, "\n"); \
|
||||
return out.pos - buf; \
|
||||
@ -168,6 +167,7 @@ read_attribute(btree_updates);
|
||||
read_attribute(dirty_btree_nodes);
|
||||
read_attribute(btree_key_cache);
|
||||
read_attribute(btree_transactions);
|
||||
read_attribute(stripes_heap);
|
||||
|
||||
read_attribute(internal_uuid);
|
||||
|
||||
@ -238,24 +238,22 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
|
||||
|
||||
if (!fs_usage)
|
||||
return -ENOMEM;
|
||||
|
||||
bch2_fs_usage_to_text(&out, c, fs_usage);
|
||||
bch2_fs_usage_to_text(out, c, fs_usage);
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
kfree(fs_usage);
|
||||
|
||||
return out.pos - buf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
|
||||
static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
@ -298,59 +296,26 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
"uncompressed data:\n"
|
||||
" nr extents: %llu\n"
|
||||
" size (bytes): %llu\n"
|
||||
"compressed data:\n"
|
||||
" nr extents: %llu\n"
|
||||
" compressed size (bytes): %llu\n"
|
||||
" uncompressed size (bytes): %llu\n",
|
||||
nr_uncompressed_extents,
|
||||
uncompressed_sectors << 9,
|
||||
nr_compressed_extents,
|
||||
compressed_sectors_compressed << 9,
|
||||
compressed_sectors_uncompressed << 9);
|
||||
}
|
||||
|
||||
static ssize_t bch2_new_stripes(struct bch_fs *c, char *buf)
|
||||
{
|
||||
char *out = buf, *end = buf + PAGE_SIZE;
|
||||
struct ec_stripe_head *h;
|
||||
struct ec_stripe_new *s;
|
||||
|
||||
mutex_lock(&c->ec_new_stripe_lock);
|
||||
list_for_each_entry(h, &c->ec_new_stripe_list, list) {
|
||||
out += scnprintf(out, end - out,
|
||||
"target %u algo %u redundancy %u:\n",
|
||||
h->target, h->algo, h->redundancy);
|
||||
|
||||
if (h->s)
|
||||
out += scnprintf(out, end - out,
|
||||
"\tpending: blocks %u allocated %u\n",
|
||||
h->s->blocks.nr,
|
||||
bitmap_weight(h->s->blocks_allocated,
|
||||
h->s->blocks.nr));
|
||||
|
||||
mutex_lock(&h->lock);
|
||||
list_for_each_entry(s, &h->stripes, list)
|
||||
out += scnprintf(out, end - out,
|
||||
"\tin flight: blocks %u allocated %u pin %u\n",
|
||||
s->blocks.nr,
|
||||
bitmap_weight(s->blocks_allocated,
|
||||
s->blocks.nr),
|
||||
atomic_read(&s->pin));
|
||||
mutex_unlock(&h->lock);
|
||||
|
||||
}
|
||||
mutex_unlock(&c->ec_new_stripe_lock);
|
||||
|
||||
return out - buf;
|
||||
pr_buf(out,
|
||||
"uncompressed data:\n"
|
||||
" nr extents: %llu\n"
|
||||
" size (bytes): %llu\n"
|
||||
"compressed data:\n"
|
||||
" nr extents: %llu\n"
|
||||
" compressed size (bytes): %llu\n"
|
||||
" uncompressed size (bytes): %llu\n",
|
||||
nr_uncompressed_extents,
|
||||
uncompressed_sectors << 9,
|
||||
nr_compressed_extents,
|
||||
compressed_sectors_compressed << 9,
|
||||
compressed_sectors_uncompressed << 9);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SHOW(bch2_fs)
|
||||
{
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
|
||||
sysfs_print(minor, c->minor);
|
||||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
@ -378,9 +343,12 @@ SHOW(bch2_fs)
|
||||
|
||||
sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
|
||||
sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
|
||||
sysfs_pd_controller_show(copy_gc, &c->copygc_pd);
|
||||
|
||||
if (attr == &sysfs_rebalance_work)
|
||||
return bch2_rebalance_work_show(c, buf);
|
||||
if (attr == &sysfs_rebalance_work) {
|
||||
bch2_rebalance_work_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
sysfs_print(promote_whole_extents, c->promote_whole_extents);
|
||||
|
||||
@ -390,44 +358,61 @@ SHOW(bch2_fs)
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
return show_fs_alloc_debug(c, buf);
|
||||
return fs_alloc_debug_to_text(&out, c) ?: out.pos - buf;
|
||||
|
||||
if (attr == &sysfs_journal_debug)
|
||||
return bch2_journal_print_debug(&c->journal, buf);
|
||||
if (attr == &sysfs_journal_debug) {
|
||||
bch2_journal_debug_to_text(&out, &c->journal);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_journal_pins)
|
||||
return bch2_journal_print_pins(&c->journal, buf);
|
||||
if (attr == &sysfs_journal_pins) {
|
||||
bch2_journal_pins_to_text(&out, &c->journal);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_btree_updates)
|
||||
return bch2_btree_updates_print(c, buf);
|
||||
if (attr == &sysfs_btree_updates) {
|
||||
bch2_btree_updates_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_dirty_btree_nodes)
|
||||
return bch2_dirty_btree_nodes_print(c, buf);
|
||||
if (attr == &sysfs_dirty_btree_nodes) {
|
||||
bch2_dirty_btree_nodes_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_btree_key_cache) {
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
|
||||
bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_btree_transactions) {
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
|
||||
bch2_btree_trans_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_compression_stats)
|
||||
return bch2_compression_stats(c, buf);
|
||||
if (attr == &sysfs_stripes_heap) {
|
||||
bch2_stripes_heap_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_new_stripes)
|
||||
return bch2_new_stripes(c, buf);
|
||||
if (attr == &sysfs_compression_stats) {
|
||||
bch2_compression_stats_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_io_timers_read)
|
||||
return bch2_io_timers_show(&c->io_clock[READ], buf);
|
||||
if (attr == &sysfs_io_timers_write)
|
||||
return bch2_io_timers_show(&c->io_clock[WRITE], buf);
|
||||
if (attr == &sysfs_new_stripes) {
|
||||
bch2_new_stripes_to_text(&out, c);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_io_timers_read) {
|
||||
bch2_io_timers_to_text(&out, &c->io_clock[READ]);
|
||||
return out.pos - buf;
|
||||
}
|
||||
if (attr == &sysfs_io_timers_write) {
|
||||
bch2_io_timers_to_text(&out, &c->io_clock[WRITE]);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
|
||||
BCH_DEBUG_PARAMS()
|
||||
@ -452,14 +437,11 @@ STORE(bch2_fs)
|
||||
}
|
||||
|
||||
if (attr == &sysfs_copy_gc_enabled) {
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
|
||||
?: (ssize_t) size;
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
if (ca->copygc_thread)
|
||||
wake_up_process(ca->copygc_thread);
|
||||
if (c->copygc_thread)
|
||||
wake_up_process(c->copygc_thread);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -474,6 +456,7 @@ STORE(bch2_fs)
|
||||
sysfs_strtoul(pd_controllers_update_seconds,
|
||||
c->pd_controllers_update_seconds);
|
||||
sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
|
||||
sysfs_pd_controller_store(copy_gc, &c->copygc_pd);
|
||||
|
||||
sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
|
||||
|
||||
@ -583,6 +566,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_dirty_btree_nodes,
|
||||
&sysfs_btree_key_cache,
|
||||
&sysfs_btree_transactions,
|
||||
&sysfs_stripes_heap,
|
||||
|
||||
&sysfs_read_realloc_races,
|
||||
&sysfs_extent_migrate_done,
|
||||
@ -598,6 +582,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_rebalance_enabled,
|
||||
&sysfs_rebalance_work,
|
||||
sysfs_pd_controller_files(rebalance),
|
||||
sysfs_pd_controller_files(copy_gc),
|
||||
|
||||
&sysfs_new_stripes,
|
||||
|
||||
@ -696,11 +681,13 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj)
|
||||
SHOW(bch2_fs_time_stats)
|
||||
{
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
|
||||
#define x(name) \
|
||||
if (attr == &sysfs_time_stat_##name) \
|
||||
return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
|
||||
buf, PAGE_SIZE);
|
||||
#define x(name) \
|
||||
if (attr == &sysfs_time_stat_##name) { \
|
||||
bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\
|
||||
return out.pos - buf; \
|
||||
}
|
||||
BCH_TIME_STATS()
|
||||
#undef x
|
||||
|
||||
@ -753,13 +740,13 @@ static int unsigned_cmp(const void *_l, const void *_r)
|
||||
return cmp_int(*l, *r);
|
||||
}
|
||||
|
||||
static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
|
||||
char *buf, bucket_map_fn *fn, void *private)
|
||||
static int quantiles_to_text(struct printbuf *out,
|
||||
struct bch_fs *c, struct bch_dev *ca,
|
||||
bucket_map_fn *fn, void *private)
|
||||
{
|
||||
size_t i, n;
|
||||
/* Compute 31 quantiles */
|
||||
unsigned q[31], *p;
|
||||
ssize_t ret = 0;
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
n = ca->mi.nbuckets;
|
||||
@ -786,38 +773,33 @@ static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
|
||||
vfree(p);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(q); i++)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"%u ", q[i]);
|
||||
buf[ret - 1] = '\n';
|
||||
|
||||
return ret;
|
||||
pr_buf(out, "%u ", q[i]);
|
||||
pr_buf(out, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
|
||||
static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
enum alloc_reserve i;
|
||||
|
||||
spin_lock(&ca->fs->freelist_lock);
|
||||
|
||||
pr_buf(&out, "free_inc:\t%zu\t%zu\n",
|
||||
pr_buf(out, "free_inc:\t%zu\t%zu\n",
|
||||
fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i,
|
||||
pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
|
||||
fifo_used(&ca->free[i]),
|
||||
ca->free[i].size);
|
||||
|
||||
spin_unlock(&ca->fs->freelist_lock);
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(ca);
|
||||
unsigned i, nr[BCH_DATA_NR];
|
||||
|
||||
memset(nr, 0, sizeof(nr));
|
||||
@ -825,7 +807,7 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].type]++;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
pr_buf(out,
|
||||
"free_inc: %zu/%zu\n"
|
||||
"free[RESERVE_BTREE]: %zu/%zu\n"
|
||||
"free[RESERVE_MOVINGGC]: %zu/%zu\n"
|
||||
@ -861,27 +843,27 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
|
||||
ca->mi.nbuckets - ca->mi.first_bucket,
|
||||
stats.buckets_alloc,
|
||||
stats.buckets[BCH_DATA_SB],
|
||||
stats.buckets[BCH_DATA_JOURNAL],
|
||||
stats.buckets[BCH_DATA_BTREE],
|
||||
stats.buckets[BCH_DATA_USER],
|
||||
stats.buckets[BCH_DATA_CACHED],
|
||||
stats.buckets[BCH_DATA_sb],
|
||||
stats.buckets[BCH_DATA_journal],
|
||||
stats.buckets[BCH_DATA_btree],
|
||||
stats.buckets[BCH_DATA_user],
|
||||
stats.buckets[BCH_DATA_cached],
|
||||
stats.buckets_ec,
|
||||
ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
|
||||
stats.sectors[BCH_DATA_SB],
|
||||
stats.sectors[BCH_DATA_JOURNAL],
|
||||
stats.sectors[BCH_DATA_BTREE],
|
||||
stats.sectors[BCH_DATA_USER],
|
||||
stats.sectors[BCH_DATA_CACHED],
|
||||
__dev_buckets_available(ca, stats),
|
||||
stats.sectors[BCH_DATA_sb],
|
||||
stats.sectors[BCH_DATA_journal],
|
||||
stats.sectors[BCH_DATA_btree],
|
||||
stats.sectors[BCH_DATA_user],
|
||||
stats.sectors[BCH_DATA_cached],
|
||||
stats.sectors_ec,
|
||||
stats.sectors_fragmented,
|
||||
ca->copygc_threshold,
|
||||
c->copygc_threshold,
|
||||
c->freelist_wait.list.first ? "waiting" : "empty",
|
||||
c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
|
||||
BTREE_NODE_OPEN_BUCKET_RESERVE,
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty",
|
||||
nr[BCH_DATA_BTREE],
|
||||
nr[BCH_DATA_USER],
|
||||
nr[BCH_DATA_btree],
|
||||
nr[BCH_DATA_user],
|
||||
c->btree_reserve_cache_nr);
|
||||
}
|
||||
|
||||
@ -891,21 +873,18 @@ static const char * const bch2_rw[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
|
||||
static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
int rw, i;
|
||||
|
||||
for (rw = 0; rw < 2; rw++) {
|
||||
pr_buf(&out, "%s:\n", bch2_rw[rw]);
|
||||
pr_buf(out, "%s:\n", bch2_rw[rw]);
|
||||
|
||||
for (i = 1; i < BCH_DATA_NR; i++)
|
||||
pr_buf(&out, "%-12s:%12llu\n",
|
||||
pr_buf(out, "%-12s:%12llu\n",
|
||||
bch2_data_types[i],
|
||||
percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
|
||||
}
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
SHOW(bch2_dev)
|
||||
@ -942,8 +921,6 @@ SHOW(bch2_dev)
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
|
||||
|
||||
if (attr == &sysfs_cache_replacement_policy) {
|
||||
bch2_string_opt_to_text(&out,
|
||||
bch2_cache_replacement_policies,
|
||||
@ -959,34 +936,44 @@ SHOW(bch2_dev)
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_iodone)
|
||||
return show_dev_iodone(ca, buf);
|
||||
if (attr == &sysfs_iodone) {
|
||||
dev_iodone_to_text(&out, ca);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
|
||||
sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
|
||||
|
||||
if (attr == &sysfs_io_latency_stats_read)
|
||||
return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
|
||||
if (attr == &sysfs_io_latency_stats_write)
|
||||
return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
|
||||
if (attr == &sysfs_io_latency_stats_read) {
|
||||
bch2_time_stats_to_text(&out, &ca->io_latency[READ]);
|
||||
return out.pos - buf;
|
||||
}
|
||||
if (attr == &sysfs_io_latency_stats_write) {
|
||||
bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
sysfs_printf(congested, "%u%%",
|
||||
clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
|
||||
* 100 / CONGESTED_MAX);
|
||||
|
||||
if (attr == &sysfs_bucket_quantiles_last_read)
|
||||
return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
|
||||
return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf;
|
||||
if (attr == &sysfs_bucket_quantiles_last_write)
|
||||
return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
|
||||
return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf;
|
||||
if (attr == &sysfs_bucket_quantiles_fragmentation)
|
||||
return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
|
||||
return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL) ?: out.pos - buf;
|
||||
if (attr == &sysfs_bucket_quantiles_oldest_gen)
|
||||
return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
|
||||
return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL) ?: out.pos - buf;
|
||||
|
||||
if (attr == &sysfs_reserve_stats)
|
||||
return show_reserve_stats(ca, buf);
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
return show_dev_alloc_debug(ca, buf);
|
||||
if (attr == &sysfs_reserve_stats) {
|
||||
reserve_stats_to_text(&out, ca);
|
||||
return out.pos - buf;
|
||||
}
|
||||
if (attr == &sysfs_alloc_debug) {
|
||||
dev_alloc_debug_to_text(&out, ca);
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -997,8 +984,6 @@ STORE(bch2_dev)
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_member *mi;
|
||||
|
||||
sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
|
||||
|
||||
if (attr == &sysfs_discard) {
|
||||
bool v = strtoul_or_return(buf);
|
||||
|
||||
@ -1083,8 +1068,6 @@ struct attribute *bch2_dev_files[] = {
|
||||
/* debug: */
|
||||
&sysfs_alloc_debug,
|
||||
&sysfs_wake_allocator,
|
||||
|
||||
sysfs_pd_controller_files(copy_gc),
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -318,43 +318,40 @@ static void pr_time_units(struct printbuf *out, u64 ns)
|
||||
pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
size_t bch2_time_stats_print(struct time_stats *stats, char *buf, size_t len)
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, len);
|
||||
const struct time_unit *u;
|
||||
u64 freq = READ_ONCE(stats->average_frequency);
|
||||
u64 q, last_q = 0;
|
||||
int i;
|
||||
|
||||
pr_buf(&out, "count:\t\t%llu\n",
|
||||
pr_buf(out, "count:\t\t%llu\n",
|
||||
stats->count);
|
||||
pr_buf(&out, "rate:\t\t%llu/sec\n",
|
||||
pr_buf(out, "rate:\t\t%llu/sec\n",
|
||||
freq ? div64_u64(NSEC_PER_SEC, freq) : 0);
|
||||
|
||||
pr_buf(&out, "frequency:\t");
|
||||
pr_time_units(&out, freq);
|
||||
pr_buf(out, "frequency:\t");
|
||||
pr_time_units(out, freq);
|
||||
|
||||
pr_buf(&out, "\navg duration:\t");
|
||||
pr_time_units(&out, stats->average_duration);
|
||||
pr_buf(out, "\navg duration:\t");
|
||||
pr_time_units(out, stats->average_duration);
|
||||
|
||||
pr_buf(&out, "\nmax duration:\t");
|
||||
pr_time_units(&out, stats->max_duration);
|
||||
pr_buf(out, "\nmax duration:\t");
|
||||
pr_time_units(out, stats->max_duration);
|
||||
|
||||
i = eytzinger0_first(NR_QUANTILES);
|
||||
u = pick_time_units(stats->quantiles.entries[i].m);
|
||||
|
||||
pr_buf(&out, "\nquantiles (%s):\t", u->name);
|
||||
pr_buf(out, "\nquantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
|
||||
q = max(stats->quantiles.entries[i].m, last_q);
|
||||
pr_buf(&out, "%llu%s",
|
||||
pr_buf(out, "%llu%s",
|
||||
div_u64(q, u->nsecs),
|
||||
is_last ? "\n" : " ");
|
||||
last_q = q;
|
||||
}
|
||||
|
||||
return out.pos - buf;
|
||||
}
|
||||
|
||||
void bch2_time_stats_exit(struct time_stats *stats)
|
||||
|
@ -99,7 +99,7 @@ static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||
get_order(size)) ?:
|
||||
__vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||
__vmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
static inline void kvpfree(void *p, size_t size)
|
||||
@ -398,7 +398,7 @@ static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
|
||||
__bch2_time_stats_update(stats, start, local_clock());
|
||||
}
|
||||
|
||||
size_t bch2_time_stats_print(struct time_stats *, char *, size_t);
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
|
||||
|
||||
void bch2_time_stats_exit(struct time_stats *);
|
||||
void bch2_time_stats_init(struct time_stats *);
|
||||
|
@ -511,7 +511,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
if (inode_opt_id == Inode_opt_project) {
|
||||
ret = bch2_set_projid(c, inode, s.v);
|
||||
/*
|
||||
* inode fields accessible via the xattr interface are stored
|
||||
* with a +1 bias, so that 0 means unset:
|
||||
*/
|
||||
ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
@ -52,6 +52,15 @@ int blk_status_to_errno(blk_status_t status)
|
||||
return blk_errors[idx].err;
|
||||
}
|
||||
|
||||
const char *blk_status_to_str(blk_status_t status)
|
||||
{
|
||||
int idx = (__force int)status;
|
||||
|
||||
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
|
||||
return "(invalid error)";
|
||||
return blk_errors[idx].name;
|
||||
}
|
||||
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user