mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to ec2ddb95112b bcachefs: bch2_opts_to_text()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
f9ec00d5ca
commit
cd35891eb9
@ -1 +1 @@
|
|||||||
22fa8fc32e6aafb8bd76c6b746868dbdbc6a934d
|
ec2ddb95112b8967753591b16e2e439eee76c5b1
|
||||||
|
@ -65,6 +65,8 @@
|
|||||||
#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
|
#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
|
||||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||||
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
|
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
|
||||||
|
#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
|
||||||
|
#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */
|
||||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
||||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||||
|
7
include/linux/swap.h
Normal file
7
include/linux/swap.h
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _LINUX_SWAP_H
|
||||||
|
#define _LINUX_SWAP_H
|
||||||
|
|
||||||
|
static inline void mm_account_reclaimed_pages(unsigned long pages) {}
|
||||||
|
|
||||||
|
#endif /* _LINUX_SWAP_H */
|
@ -44,6 +44,20 @@ static inline struct timespec timespec_trunc(struct timespec t, unsigned gran)
|
|||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
|
||||||
|
{
|
||||||
|
while (nsec >= NSEC_PER_SEC) {
|
||||||
|
nsec -= NSEC_PER_SEC;
|
||||||
|
++sec;
|
||||||
|
}
|
||||||
|
while (nsec < 0) {
|
||||||
|
nsec += NSEC_PER_SEC;
|
||||||
|
--sec;
|
||||||
|
}
|
||||||
|
ts->tv_sec = sec;
|
||||||
|
ts->tv_nsec = nsec;
|
||||||
|
}
|
||||||
|
|
||||||
#define ns_to_timespec64 ns_to_timespec
|
#define ns_to_timespec64 ns_to_timespec
|
||||||
#define timespec64_to_ns timespec_to_ns
|
#define timespec64_to_ns timespec_to_ns
|
||||||
#define timespec64_trunc timespec_trunc
|
#define timespec64_trunc timespec_trunc
|
||||||
|
@ -37,6 +37,8 @@ typedef unsigned gfp_t;
|
|||||||
#define __GFP_NOWARN 0
|
#define __GFP_NOWARN 0
|
||||||
#define __GFP_NORETRY 0
|
#define __GFP_NORETRY 0
|
||||||
#define __GFP_NOFAIL 0
|
#define __GFP_NOFAIL 0
|
||||||
|
#define __GFP_ACCOUNT 0
|
||||||
|
#define __GFP_RECLAIMABLE 0
|
||||||
#define __GFP_ZERO 1
|
#define __GFP_ZERO 1
|
||||||
#define GFP_KERNEL 2
|
#define GFP_KERNEL 2
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans,
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
acl = allocate_dropping_locks(trans, ret,
|
acl = allocate_dropping_locks(trans, ret,
|
||||||
posix_acl_alloc(count, _gfp));
|
posix_acl_alloc(count, GFP_KERNEL));
|
||||||
if (!acl)
|
if (!acl)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
@ -427,7 +427,8 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = allocate_dropping_locks_errcode(trans, __posix_acl_chmod(&acl, _gfp, mode));
|
ret = allocate_dropping_locks_errcode(trans,
|
||||||
|
__posix_acl_chmod(&acl, GFP_KERNEL, mode));
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -1969,8 +1969,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
|
||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
|
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
|
||||||
@ -1980,18 +1980,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
|
|||||||
if (discard_in_flight_add(ca, bucket, false))
|
if (discard_in_flight_add(ca, bucket, false))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
|
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
||||||
goto put_ioref;
|
goto put_ref;
|
||||||
|
|
||||||
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
|
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
|
||||||
put_ioref:
|
|
||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
|
put_ref:
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||||
@ -2133,26 +2133,26 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
|||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
err:
|
err:
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
|
||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_dev_do_invalidates(struct bch_dev *ca)
|
void bch2_dev_do_invalidates(struct bch_dev *ca)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = ca->fs;
|
struct bch_fs *c = ca->fs;
|
||||||
|
|
||||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
|
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
||||||
goto put_ioref;
|
goto put_ref;
|
||||||
|
|
||||||
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
|
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
|
||||||
put_ioref:
|
|
||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
|
put_ref:
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_do_invalidates(struct bch_fs *c)
|
void bch2_do_invalidates(struct bch_fs *c)
|
||||||
@ -2298,6 +2298,36 @@ int bch2_fs_freespace_init(struct bch_fs *c)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* device removal */
|
||||||
|
|
||||||
|
int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||||
|
{
|
||||||
|
struct bpos start = POS(ca->dev_idx, 0);
|
||||||
|
struct bpos end = POS(ca->dev_idx, U64_MAX);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We clear the LRU and need_discard btrees first so that we don't race
|
||||||
|
* with bch2_do_invalidates() and bch2_do_discards()
|
||||||
|
*/
|
||||||
|
ret = bch2_dev_remove_stripes(c, ca) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
|
||||||
|
BTREE_TRIGGER_norun, NULL) ?:
|
||||||
|
bch2_dev_usage_remove(c, ca->dev_idx);
|
||||||
|
bch_err_msg(c, ret, "removing dev alloc info");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* Bucket IO clocks: */
|
/* Bucket IO clocks: */
|
||||||
|
|
||||||
int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
|
int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
|
||||||
@ -2433,13 +2463,15 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
|
|||||||
/* device goes ro: */
|
/* device goes ro: */
|
||||||
void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
||||||
{
|
{
|
||||||
unsigned i;
|
lockdep_assert_held(&c->state_lock);
|
||||||
|
|
||||||
/* First, remove device from allocation groups: */
|
/* First, remove device from allocation groups: */
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||||
clear_bit(ca->dev_idx, c->rw_devs[i].d);
|
clear_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||||
|
|
||||||
|
c->rw_devs_change_count++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Capacity is calculated based off of devices in allocation groups:
|
* Capacity is calculated based off of devices in allocation groups:
|
||||||
*/
|
*/
|
||||||
@ -2468,11 +2500,13 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
|
|||||||
/* device goes rw: */
|
/* device goes rw: */
|
||||||
void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||||
{
|
{
|
||||||
unsigned i;
|
lockdep_assert_held(&c->state_lock);
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
|
||||||
if (ca->mi.data_allowed & (1 << i))
|
if (ca->mi.data_allowed & (1 << i))
|
||||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||||
|
|
||||||
|
c->rw_devs_change_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
|
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
|
||||||
|
@ -338,6 +338,7 @@ static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct
|
|||||||
|
|
||||||
int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
|
int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
|
||||||
int bch2_fs_freespace_init(struct bch_fs *);
|
int bch2_fs_freespace_init(struct bch_fs *);
|
||||||
|
int bch2_dev_remove_alloc(struct bch_fs *, struct bch_dev *);
|
||||||
|
|
||||||
void bch2_recalc_capacity(struct bch_fs *);
|
void bch2_recalc_capacity(struct bch_fs *);
|
||||||
u64 bch2_min_rw_member_capacity(struct bch_fs *);
|
u64 bch2_min_rw_member_capacity(struct bch_fs *);
|
||||||
|
@ -600,6 +600,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
|
|||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
enum bch_data_type data_type,
|
enum bch_data_type data_type,
|
||||||
struct closure *cl,
|
struct closure *cl,
|
||||||
|
bool nowait,
|
||||||
struct bch_dev_usage *usage)
|
struct bch_dev_usage *usage)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -609,7 +610,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
|
|||||||
struct bucket_alloc_state s = {
|
struct bucket_alloc_state s = {
|
||||||
.btree_bitmap = data_type == BCH_DATA_btree,
|
.btree_bitmap = data_type == BCH_DATA_btree,
|
||||||
};
|
};
|
||||||
bool waiting = false;
|
bool waiting = nowait;
|
||||||
again:
|
again:
|
||||||
bch2_dev_usage_read_fast(ca, usage);
|
bch2_dev_usage_read_fast(ca, usage);
|
||||||
avail = dev_buckets_free(ca, *usage, watermark);
|
avail = dev_buckets_free(ca, *usage, watermark);
|
||||||
@ -685,7 +686,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
|||||||
|
|
||||||
bch2_trans_do(c, NULL, NULL, 0,
|
bch2_trans_do(c, NULL, NULL, 0,
|
||||||
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
|
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
|
||||||
data_type, cl, &usage)));
|
data_type, cl, false, &usage)));
|
||||||
return ob;
|
return ob;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -748,7 +749,6 @@ static int add_new_bucket(struct bch_fs *c,
|
|||||||
unsigned nr_replicas,
|
unsigned nr_replicas,
|
||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
unsigned flags,
|
|
||||||
struct open_bucket *ob)
|
struct open_bucket *ob)
|
||||||
{
|
{
|
||||||
unsigned durability = ob_dev(c, ob)->mi.durability;
|
unsigned durability = ob_dev(c, ob)->mi.durability;
|
||||||
@ -775,7 +775,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
|||||||
unsigned nr_replicas,
|
unsigned nr_replicas,
|
||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
unsigned flags,
|
enum bch_write_flags flags,
|
||||||
enum bch_data_type data_type,
|
enum bch_data_type data_type,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
struct closure *cl)
|
struct closure *cl)
|
||||||
@ -801,7 +801,8 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, cl, &usage);
|
ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
||||||
|
cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
|
||||||
if (!IS_ERR(ob))
|
if (!IS_ERR(ob))
|
||||||
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
|
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
|
||||||
bch2_dev_put(ca);
|
bch2_dev_put(ca);
|
||||||
@ -815,7 +816,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
|||||||
|
|
||||||
if (add_new_bucket(c, ptrs, devs_may_alloc,
|
if (add_new_bucket(c, ptrs, devs_may_alloc,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, flags, ob)) {
|
have_cache, ob)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -841,7 +842,7 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
|||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
unsigned flags,
|
enum bch_write_flags flags,
|
||||||
struct closure *cl)
|
struct closure *cl)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -883,7 +884,7 @@ got_bucket:
|
|||||||
|
|
||||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, flags, ob);
|
have_cache, ob);
|
||||||
out_put_head:
|
out_put_head:
|
||||||
bch2_ec_stripe_head_put(c, h);
|
bch2_ec_stripe_head_put(c, h);
|
||||||
return ret;
|
return ret;
|
||||||
@ -922,7 +923,7 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c,
|
|||||||
unsigned nr_replicas,
|
unsigned nr_replicas,
|
||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
bool ec, unsigned flags)
|
bool ec)
|
||||||
{
|
{
|
||||||
struct open_buckets ptrs_skip = { .nr = 0 };
|
struct open_buckets ptrs_skip = { .nr = 0 };
|
||||||
struct open_bucket *ob;
|
struct open_bucket *ob;
|
||||||
@ -934,7 +935,7 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c,
|
|||||||
have_cache, ec, ob))
|
have_cache, ec, ob))
|
||||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, flags, ob);
|
have_cache, ob);
|
||||||
else
|
else
|
||||||
ob_push(c, &ptrs_skip, ob);
|
ob_push(c, &ptrs_skip, ob);
|
||||||
}
|
}
|
||||||
@ -950,8 +951,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
|||||||
unsigned nr_replicas,
|
unsigned nr_replicas,
|
||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache, bool ec,
|
bool *have_cache, bool ec,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark)
|
||||||
unsigned flags)
|
|
||||||
{
|
{
|
||||||
int i, ret = 0;
|
int i, ret = 0;
|
||||||
|
|
||||||
@ -983,7 +983,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
|
|||||||
|
|
||||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, flags, ob);
|
have_cache, ob);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1003,7 +1003,7 @@ static int __open_bucket_add_buckets(struct btree_trans *trans,
|
|||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
unsigned flags,
|
enum bch_write_flags flags,
|
||||||
struct closure *_cl)
|
struct closure *_cl)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -1024,13 +1024,13 @@ static int __open_bucket_add_buckets(struct btree_trans *trans,
|
|||||||
|
|
||||||
ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
|
ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, erasure_code, flags);
|
have_cache, erasure_code);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
|
ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
|
||||||
nr_replicas, nr_effective,
|
nr_replicas, nr_effective,
|
||||||
have_cache, erasure_code, watermark, flags);
|
have_cache, erasure_code, watermark);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -1071,7 +1071,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
|
|||||||
unsigned *nr_effective,
|
unsigned *nr_effective,
|
||||||
bool *have_cache,
|
bool *have_cache,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
unsigned flags,
|
enum bch_write_flags flags,
|
||||||
struct closure *cl)
|
struct closure *cl)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -1373,7 +1373,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
|||||||
unsigned nr_replicas,
|
unsigned nr_replicas,
|
||||||
unsigned nr_replicas_required,
|
unsigned nr_replicas_required,
|
||||||
enum bch_watermark watermark,
|
enum bch_watermark watermark,
|
||||||
unsigned flags,
|
enum bch_write_flags flags,
|
||||||
struct closure *cl,
|
struct closure *cl,
|
||||||
struct write_point **wp_ret)
|
struct write_point **wp_ret)
|
||||||
{
|
{
|
||||||
@ -1389,8 +1389,6 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
|||||||
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
||||||
erasure_code = false;
|
erasure_code = false;
|
||||||
|
|
||||||
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
|
||||||
|
|
||||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||||
retry:
|
retry:
|
||||||
ptrs.nr = 0;
|
ptrs.nr = 0;
|
||||||
@ -1495,11 +1493,12 @@ err:
|
|||||||
try_decrease_writepoints(trans, write_points_nr))
|
try_decrease_writepoints(trans, write_points_nr))
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
|
if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
||||||
|
ret = -BCH_ERR_bucket_alloc_blocked;
|
||||||
|
|
||||||
|
if (cl && !(flags & BCH_WRITE_ALLOC_NOWAIT) &&
|
||||||
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
||||||
return cl
|
ret = -BCH_ERR_bucket_alloc_blocked;
|
||||||
? -BCH_ERR_bucket_alloc_blocked
|
|
||||||
: -BCH_ERR_ENOSPC_bucket_alloc;
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1730,13 +1729,6 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
|||||||
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||||
nr[c->open_buckets[i].data_type]++;
|
nr[c->open_buckets[i].data_type]++;
|
||||||
|
|
||||||
printbuf_tabstops_reset(out);
|
|
||||||
printbuf_tabstop_push(out, 12);
|
|
||||||
printbuf_tabstop_push(out, 16);
|
|
||||||
printbuf_tabstop_push(out, 16);
|
|
||||||
printbuf_tabstop_push(out, 16);
|
|
||||||
printbuf_tabstop_push(out, 16);
|
|
||||||
|
|
||||||
bch2_dev_usage_to_text(out, ca, &stats);
|
bch2_dev_usage_to_text(out, ca, &stats);
|
||||||
|
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
@ -155,9 +155,10 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum bch_write_flags;
|
||||||
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
|
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
|
||||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||||
unsigned, unsigned *, bool *, unsigned,
|
unsigned, unsigned *, bool *, enum bch_write_flags,
|
||||||
enum bch_data_type, enum bch_watermark,
|
enum bch_data_type, enum bch_watermark,
|
||||||
struct closure *);
|
struct closure *);
|
||||||
|
|
||||||
@ -167,7 +168,7 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *,
|
|||||||
struct bch_devs_list *,
|
struct bch_devs_list *,
|
||||||
unsigned, unsigned,
|
unsigned, unsigned,
|
||||||
enum bch_watermark,
|
enum bch_watermark,
|
||||||
unsigned,
|
enum bch_write_flags,
|
||||||
struct closure *,
|
struct closure *,
|
||||||
struct write_point **);
|
struct write_point **);
|
||||||
|
|
||||||
|
@ -3,12 +3,14 @@
|
|||||||
#include "bbpos.h"
|
#include "bbpos.h"
|
||||||
#include "alloc_background.h"
|
#include "alloc_background.h"
|
||||||
#include "backpointers.h"
|
#include "backpointers.h"
|
||||||
|
#include "bbpos.h"
|
||||||
#include "bkey_buf.h"
|
#include "bkey_buf.h"
|
||||||
#include "btree_cache.h"
|
#include "btree_cache.h"
|
||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
#include "btree_update_interior.h"
|
#include "btree_update_interior.h"
|
||||||
#include "btree_write_buffer.h"
|
#include "btree_write_buffer.h"
|
||||||
#include "checksum.h"
|
#include "checksum.h"
|
||||||
|
#include "disk_accounting.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
@ -750,10 +752,12 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|||||||
s64 mem_may_pin = mem_may_pin_bytes(c);
|
s64 mem_may_pin = mem_may_pin_bytes(c);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
bch2_btree_cache_unpin(c);
|
||||||
|
|
||||||
btree_interior_mask |= btree_leaf_mask;
|
btree_interior_mask |= btree_leaf_mask;
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
|
c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask;
|
||||||
c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
|
c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask;
|
||||||
c->btree_cache.pinned_nodes_start = start;
|
c->btree_cache.pinned_nodes_start = start;
|
||||||
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
|
c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
|
||||||
|
|
||||||
@ -775,6 +779,7 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|||||||
BBPOS(btree, b->key.k.p);
|
BBPOS(btree, b->key.k.p);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
bch2_node_pin(c, b);
|
||||||
0;
|
0;
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -782,12 +787,80 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct progress_indicator_state {
|
||||||
|
unsigned long next_print;
|
||||||
|
u64 nodes_seen;
|
||||||
|
u64 nodes_total;
|
||||||
|
struct btree *last_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void progress_init(struct progress_indicator_state *s,
|
||||||
|
struct bch_fs *c,
|
||||||
|
u64 btree_id_mask)
|
||||||
|
{
|
||||||
|
memset(s, 0, sizeof(*s));
|
||||||
|
|
||||||
|
s->next_print = jiffies + HZ * 10;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||||
|
if (!(btree_id_mask & BIT_ULL(i)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
struct disk_accounting_pos acc = {
|
||||||
|
.type = BCH_DISK_ACCOUNTING_btree,
|
||||||
|
.btree.id = i,
|
||||||
|
};
|
||||||
|
|
||||||
|
u64 v;
|
||||||
|
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
||||||
|
s->nodes_total += div64_ul(v, btree_sectors(c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool progress_update_p(struct progress_indicator_state *s)
|
||||||
|
{
|
||||||
|
bool ret = time_after_eq(jiffies, s->next_print);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
s->next_print = jiffies + HZ * 10;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void progress_update_iter(struct btree_trans *trans,
|
||||||
|
struct progress_indicator_state *s,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
const char *msg)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
||||||
|
|
||||||
|
s->nodes_seen += b != s->last_node;
|
||||||
|
s->last_node = b;
|
||||||
|
|
||||||
|
if (progress_update_p(s)) {
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
unsigned percent = s->nodes_total
|
||||||
|
? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
||||||
|
msg, percent, s->nodes_seen, s->nodes_total);
|
||||||
|
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
||||||
|
|
||||||
|
bch_info(c, "%s", buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||||
struct extents_to_bp_state *s)
|
struct extents_to_bp_state *s)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
|
struct progress_indicator_state progress;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||||
|
|
||||||
for (enum btree_id btree_id = 0;
|
for (enum btree_id btree_id = 0;
|
||||||
btree_id < btree_id_nr_alive(c);
|
btree_id < btree_id_nr_alive(c);
|
||||||
btree_id++) {
|
btree_id++) {
|
||||||
@ -805,6 +878,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
|||||||
BTREE_ITER_prefetch);
|
BTREE_ITER_prefetch);
|
||||||
|
|
||||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||||
|
progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||||
}));
|
}));
|
||||||
@ -865,8 +939,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
|||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
bch2_btree_cache_unpin(c);
|
||||||
c->btree_cache.pinned_nodes_interior_mask = 0;
|
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
@ -920,19 +993,24 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
|||||||
struct bbpos start,
|
struct bbpos start,
|
||||||
struct bbpos end)
|
struct bbpos end)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
struct bkey_buf last_flushed;
|
struct bkey_buf last_flushed;
|
||||||
|
struct progress_indicator_state progress;
|
||||||
|
|
||||||
bch2_bkey_buf_init(&last_flushed);
|
bch2_bkey_buf_init(&last_flushed);
|
||||||
bkey_init(&last_flushed.k->k);
|
bkey_init(&last_flushed.k->k);
|
||||||
|
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||||
|
|
||||||
int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
||||||
POS_MIN, BTREE_ITER_prefetch, k,
|
POS_MIN, BTREE_ITER_prefetch, k,
|
||||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||||
|
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||||
check_one_backpointer(trans, start, end,
|
check_one_backpointer(trans, start, end,
|
||||||
bkey_s_c_to_backpointer(k),
|
bkey_s_c_to_backpointer(k),
|
||||||
&last_flushed));
|
&last_flushed);
|
||||||
|
}));
|
||||||
|
|
||||||
bch2_bkey_buf_exit(&last_flushed, trans->c);
|
bch2_bkey_buf_exit(&last_flushed, c);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -977,8 +1055,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
|
|
||||||
c->btree_cache.pinned_nodes_leaf_mask = 0;
|
bch2_btree_cache_unpin(c);
|
||||||
c->btree_cache.pinned_nodes_interior_mask = 0;
|
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -542,7 +542,7 @@ struct bch_dev {
|
|||||||
* gc_gens_lock, for device resize - holding any is sufficient for
|
* gc_gens_lock, for device resize - holding any is sufficient for
|
||||||
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||||
*/
|
*/
|
||||||
struct bucket_array __rcu *buckets_gc;
|
GENRADIX(struct bucket) buckets_gc;
|
||||||
struct bucket_gens __rcu *bucket_gens;
|
struct bucket_gens __rcu *bucket_gens;
|
||||||
u8 *oldest_gen;
|
u8 *oldest_gen;
|
||||||
unsigned long *buckets_nouse;
|
unsigned long *buckets_nouse;
|
||||||
@ -871,6 +871,7 @@ struct bch_fs {
|
|||||||
|
|
||||||
/* ALLOCATION */
|
/* ALLOCATION */
|
||||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||||
|
unsigned long rw_devs_change_count;
|
||||||
|
|
||||||
u64 capacity; /* sectors */
|
u64 capacity; /* sectors */
|
||||||
u64 reserved; /* sectors */
|
u64 reserved; /* sectors */
|
||||||
@ -1045,8 +1046,6 @@ struct bch_fs {
|
|||||||
* for signaling to the toplevel code which pass we want to run now.
|
* for signaling to the toplevel code which pass we want to run now.
|
||||||
*/
|
*/
|
||||||
enum bch_recovery_pass curr_recovery_pass;
|
enum bch_recovery_pass curr_recovery_pass;
|
||||||
/* bitmap of explicitly enabled recovery passes: */
|
|
||||||
u64 recovery_passes_explicit;
|
|
||||||
/* bitmask of recovery passes that we actually ran */
|
/* bitmask of recovery passes that we actually ran */
|
||||||
u64 recovery_passes_complete;
|
u64 recovery_passes_complete;
|
||||||
/* never rewinds version of curr_recovery_pass */
|
/* never rewinds version of curr_recovery_pass */
|
||||||
@ -1195,12 +1194,15 @@ static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
|
|||||||
static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time)
|
static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time)
|
||||||
{
|
{
|
||||||
struct timespec64 t;
|
struct timespec64 t;
|
||||||
|
s64 sec;
|
||||||
s32 rem;
|
s32 rem;
|
||||||
|
|
||||||
time += c->sb.time_base_lo;
|
time += c->sb.time_base_lo;
|
||||||
|
|
||||||
t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
|
sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
|
||||||
t.tv_nsec = rem * c->sb.nsec_per_time_unit;
|
|
||||||
|
set_normalized_timespec64(&t, sec, rem * (s64)c->sb.nsec_per_time_unit);
|
||||||
|
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,11 +15,12 @@
|
|||||||
|
|
||||||
#include <linux/prefetch.h>
|
#include <linux/prefetch.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/swap.h>
|
||||||
|
|
||||||
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
|
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
|
||||||
do { \
|
do { \
|
||||||
if (shrinker_counter) \
|
if (shrinker_counter) \
|
||||||
bc->not_freed_##counter++; \
|
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
const char * const bch2_btree_node_flags[] = {
|
const char * const bch2_btree_node_flags[] = {
|
||||||
@ -31,24 +32,29 @@ const char * const bch2_btree_node_flags[] = {
|
|||||||
|
|
||||||
void bch2_recalc_btree_reserve(struct bch_fs *c)
|
void bch2_recalc_btree_reserve(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
unsigned i, reserve = 16;
|
unsigned reserve = 16;
|
||||||
|
|
||||||
if (!c->btree_roots_known[0].b)
|
if (!c->btree_roots_known[0].b)
|
||||||
reserve += 8;
|
reserve += 8;
|
||||||
|
|
||||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||||
|
|
||||||
if (r->b)
|
if (r->b)
|
||||||
reserve += min_t(unsigned, 1, r->b->c.level) * 8;
|
reserve += min_t(unsigned, 1, r->b->c.level) * 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->btree_cache.reserve = reserve;
|
c->btree_cache.nr_reserve = reserve;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned btree_cache_can_free(struct btree_cache *bc)
|
static inline size_t btree_cache_can_free(struct btree_cache_list *list)
|
||||||
{
|
{
|
||||||
return max_t(int, 0, bc->used - bc->reserve);
|
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||||
|
|
||||||
|
size_t can_free = list->nr;
|
||||||
|
if (!list->idx)
|
||||||
|
can_free = max_t(ssize_t, 0, can_free - bc->nr_reserve);
|
||||||
|
return can_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
|
||||||
@ -63,6 +69,18 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
|||||||
{
|
{
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
|
||||||
|
BUG_ON(btree_node_hashed(b));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This should really be done in slub/vmalloc, but we're using the
|
||||||
|
* kmalloc_large() path, so we're working around a slub bug by doing
|
||||||
|
* this here:
|
||||||
|
*/
|
||||||
|
if (b->data)
|
||||||
|
mm_account_reclaimed_pages(btree_buf_bytes(b) / PAGE_SIZE);
|
||||||
|
if (b->aux_data)
|
||||||
|
mm_account_reclaimed_pages(btree_aux_data_bytes(b) / PAGE_SIZE);
|
||||||
|
|
||||||
EBUG_ON(btree_node_write_in_flight(b));
|
EBUG_ON(btree_node_write_in_flight(b));
|
||||||
|
|
||||||
clear_btree_node_just_written(b);
|
clear_btree_node_just_written(b);
|
||||||
@ -76,7 +94,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
|||||||
#endif
|
#endif
|
||||||
b->aux_data = NULL;
|
b->aux_data = NULL;
|
||||||
|
|
||||||
bc->used--;
|
bc->nr_freeable--;
|
||||||
|
|
||||||
btree_node_to_freedlist(bc, b);
|
btree_node_to_freedlist(bc, b);
|
||||||
}
|
}
|
||||||
@ -102,6 +120,8 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
|||||||
{
|
{
|
||||||
BUG_ON(b->data || b->aux_data);
|
BUG_ON(b->data || b->aux_data);
|
||||||
|
|
||||||
|
gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE;
|
||||||
|
|
||||||
b->data = kvmalloc(btree_buf_bytes(b), gfp);
|
b->data = kvmalloc(btree_buf_bytes(b), gfp);
|
||||||
if (!b->data)
|
if (!b->data)
|
||||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||||
@ -154,7 +174,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
|||||||
|
|
||||||
bch2_btree_lock_init(&b->c, 0);
|
bch2_btree_lock_init(&b->c, 0);
|
||||||
|
|
||||||
bc->used++;
|
bc->nr_freeable++;
|
||||||
list_add(&b->list, &bc->freeable);
|
list_add(&b->list, &bc->freeable);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
@ -169,10 +189,56 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
|
|||||||
six_unlock_intent(&b->c.lock);
|
six_unlock_intent(&b->c.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
|
||||||
|
{
|
||||||
|
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
||||||
|
|
||||||
|
u64 mask = bc->pinned_nodes_mask[!!b->c.level];
|
||||||
|
|
||||||
|
return ((mask & BIT_ULL(b->c.btree_id)) &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
|
||||||
|
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_node_pin(struct bch_fs *c, struct btree *b)
|
||||||
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
BUG_ON(!__btree_node_pinned(bc, b));
|
||||||
|
if (b != btree_node_root(c, b) && !btree_node_pinned(b)) {
|
||||||
|
set_btree_node_pinned(b);
|
||||||
|
list_move(&b->list, &bc->live[1].list);
|
||||||
|
bc->live[0].nr--;
|
||||||
|
bc->live[1].nr++;
|
||||||
|
}
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_btree_cache_unpin(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
struct btree *b, *n;
|
||||||
|
|
||||||
|
mutex_lock(&bc->lock);
|
||||||
|
c->btree_cache.pinned_nodes_mask[0] = 0;
|
||||||
|
c->btree_cache.pinned_nodes_mask[1] = 0;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(b, n, &bc->live[1].list, list) {
|
||||||
|
clear_btree_node_pinned(b);
|
||||||
|
list_move(&b->list, &bc->live[0].list);
|
||||||
|
bc->live[0].nr++;
|
||||||
|
bc->live[1].nr--;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Btree in memory cache - hash table */
|
/* Btree in memory cache - hash table */
|
||||||
|
|
||||||
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&bc->lock);
|
||||||
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
|
||||||
|
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
@ -181,7 +247,11 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
|
|||||||
b->hash_val = 0;
|
b->hash_val = 0;
|
||||||
|
|
||||||
if (b->c.btree_id < BTREE_ID_NR)
|
if (b->c.btree_id < BTREE_ID_NR)
|
||||||
--bc->used_by_btree[b->c.btree_id];
|
--bc->nr_by_btree[b->c.btree_id];
|
||||||
|
|
||||||
|
bc->live[btree_node_pinned(b)].nr--;
|
||||||
|
bc->nr_freeable++;
|
||||||
|
list_move(&b->list, &bc->freeable);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
||||||
@ -191,23 +261,30 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
|
|||||||
|
|
||||||
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
|
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
|
||||||
bch_btree_cache_params);
|
bch_btree_cache_params);
|
||||||
if (!ret && b->c.btree_id < BTREE_ID_NR)
|
if (ret)
|
||||||
bc->used_by_btree[b->c.btree_id]++;
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
if (b->c.btree_id < BTREE_ID_NR)
|
||||||
|
bc->nr_by_btree[b->c.btree_id]++;
|
||||||
|
|
||||||
|
bool p = __btree_node_pinned(bc, b);
|
||||||
|
mod_bit(BTREE_NODE_pinned, &b->flags, p);
|
||||||
|
|
||||||
|
list_move_tail(&b->list, &bc->live[p].list);
|
||||||
|
bc->live[p].nr++;
|
||||||
|
|
||||||
|
bc->nr_freeable--;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
|
int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
|
||||||
unsigned level, enum btree_id id)
|
unsigned level, enum btree_id id)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
|
|
||||||
b->c.level = level;
|
b->c.level = level;
|
||||||
b->c.btree_id = id;
|
b->c.btree_id = id;
|
||||||
|
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
ret = __bch2_btree_node_hash_insert(bc, b);
|
int ret = __bch2_btree_node_hash_insert(bc, b);
|
||||||
if (!ret)
|
|
||||||
list_add_tail(&b->list, &bc->live);
|
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -261,18 +338,6 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, b
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&bc->lock);
|
lockdep_assert_held(&bc->lock);
|
||||||
|
|
||||||
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
|
|
||||||
|
|
||||||
u64 mask = b->c.level
|
|
||||||
? bc->pinned_nodes_interior_mask
|
|
||||||
: bc->pinned_nodes_leaf_mask;
|
|
||||||
|
|
||||||
if ((mask & BIT_ULL(b->c.btree_id)) &&
|
|
||||||
bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
|
|
||||||
bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
|
|
||||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
|
||||||
|
|
||||||
wait_on_io:
|
wait_on_io:
|
||||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||||
(1U << BTREE_NODE_read_in_flight)|
|
(1U << BTREE_NODE_read_in_flight)|
|
||||||
@ -377,8 +442,9 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
|||||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = shrink->private_data;
|
struct btree_cache_list *list = shrink->private_data;
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]);
|
||||||
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
||||||
struct btree *b, *t;
|
struct btree *b, *t;
|
||||||
unsigned long nr = sc->nr_to_scan;
|
unsigned long nr = sc->nr_to_scan;
|
||||||
unsigned long can_free = 0;
|
unsigned long can_free = 0;
|
||||||
@ -386,8 +452,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
unsigned long touched = 0;
|
unsigned long touched = 0;
|
||||||
unsigned i, flags;
|
unsigned i, flags;
|
||||||
unsigned long ret = SHRINK_STOP;
|
unsigned long ret = SHRINK_STOP;
|
||||||
bool trigger_writes = atomic_read(&bc->dirty) + nr >=
|
bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4;
|
||||||
bc->used * 3 / 4;
|
|
||||||
|
|
||||||
if (bch2_btree_shrinker_disabled)
|
if (bch2_btree_shrinker_disabled)
|
||||||
return SHRINK_STOP;
|
return SHRINK_STOP;
|
||||||
@ -402,7 +467,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
* succeed, so that inserting keys into the btree can always succeed and
|
* succeed, so that inserting keys into the btree can always succeed and
|
||||||
* IO can always make forward progress:
|
* IO can always make forward progress:
|
||||||
*/
|
*/
|
||||||
can_free = btree_cache_can_free(bc);
|
can_free = btree_cache_can_free(list);
|
||||||
nr = min_t(unsigned long, nr, can_free);
|
nr = min_t(unsigned long, nr, can_free);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
@ -424,22 +489,24 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
|||||||
six_unlock_write(&b->c.lock);
|
six_unlock_write(&b->c.lock);
|
||||||
six_unlock_intent(&b->c.lock);
|
six_unlock_intent(&b->c.lock);
|
||||||
freed++;
|
freed++;
|
||||||
bc->freed++;
|
bc->nr_freed++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
restart:
|
restart:
|
||||||
list_for_each_entry_safe(b, t, &bc->live, list) {
|
list_for_each_entry_safe(b, t, &list->list, list) {
|
||||||
touched++;
|
touched++;
|
||||||
|
|
||||||
if (btree_node_accessed(b)) {
|
if (btree_node_accessed(b)) {
|
||||||
clear_btree_node_accessed(b);
|
clear_btree_node_accessed(b);
|
||||||
bc->not_freed_access_bit++;
|
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
|
||||||
|
--touched;;
|
||||||
} else if (!btree_node_reclaim(c, b, true)) {
|
} else if (!btree_node_reclaim(c, b, true)) {
|
||||||
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
|
|
||||||
freed++;
|
freed++;
|
||||||
btree_node_data_free(c, b);
|
btree_node_data_free(c, b);
|
||||||
bc->freed++;
|
bc->nr_freed++;
|
||||||
|
|
||||||
bch2_btree_node_hash_remove(bc, b);
|
|
||||||
six_unlock_write(&b->c.lock);
|
six_unlock_write(&b->c.lock);
|
||||||
six_unlock_intent(&b->c.lock);
|
six_unlock_intent(&b->c.lock);
|
||||||
|
|
||||||
@ -450,7 +517,7 @@ restart:
|
|||||||
!btree_node_will_make_reachable(b) &&
|
!btree_node_will_make_reachable(b) &&
|
||||||
!btree_node_write_blocked(b) &&
|
!btree_node_write_blocked(b) &&
|
||||||
six_trylock_read(&b->c.lock)) {
|
six_trylock_read(&b->c.lock)) {
|
||||||
list_move(&bc->live, &b->list);
|
list_move(&list->list, &b->list);
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||||
six_unlock_read(&b->c.lock);
|
six_unlock_read(&b->c.lock);
|
||||||
@ -464,8 +531,8 @@ restart:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
out_rotate:
|
out_rotate:
|
||||||
if (&t->list != &bc->live)
|
if (&t->list != &list->list)
|
||||||
list_move_tail(&bc->live, &t->list);
|
list_move_tail(&list->list, &t->list);
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
out_nounlock:
|
out_nounlock:
|
||||||
@ -478,44 +545,45 @@ out_nounlock:
|
|||||||
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = shrink->private_data;
|
struct btree_cache_list *list = shrink->private_data;
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
|
||||||
|
|
||||||
if (bch2_btree_shrinker_disabled)
|
if (bch2_btree_shrinker_disabled)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return btree_cache_can_free(bc);
|
return btree_cache_can_free(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b;
|
struct btree *b, *t;
|
||||||
unsigned i, flags;
|
unsigned long flags;
|
||||||
|
|
||||||
shrinker_free(bc->shrink);
|
shrinker_free(bc->live[1].shrink);
|
||||||
|
shrinker_free(bc->live[0].shrink);
|
||||||
|
|
||||||
/* vfree() can allocate memory: */
|
/* vfree() can allocate memory: */
|
||||||
flags = memalloc_nofs_save();
|
flags = memalloc_nofs_save();
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
|
|
||||||
if (c->verify_data)
|
if (c->verify_data)
|
||||||
list_move(&c->verify_data->list, &bc->live);
|
list_move(&c->verify_data->list, &bc->live[0].list);
|
||||||
|
|
||||||
kvfree(c->verify_ondisk);
|
kvfree(c->verify_ondisk);
|
||||||
|
|
||||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||||
|
|
||||||
if (r->b)
|
if (r->b)
|
||||||
list_add(&r->b->list, &bc->live);
|
list_add(&r->b->list, &bc->live[0].list);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_splice(&bc->freeable, &bc->live);
|
list_for_each_entry_safe(b, t, &bc->live[1].list, list)
|
||||||
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
while (!list_empty(&bc->live)) {
|
list_for_each_entry_safe(b, t, &bc->live[0].list, list)
|
||||||
b = list_first_entry(&bc->live, struct btree, list);
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(b, t, &bc->freeable, list) {
|
||||||
BUG_ON(btree_node_read_in_flight(b) ||
|
BUG_ON(btree_node_read_in_flight(b) ||
|
||||||
btree_node_write_in_flight(b));
|
btree_node_write_in_flight(b));
|
||||||
|
|
||||||
@ -523,12 +591,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||||
atomic_read(&c->btree_cache.dirty));
|
atomic_long_read(&c->btree_cache.nr_dirty));
|
||||||
|
|
||||||
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
||||||
|
|
||||||
while (!list_empty(&bc->freed_nonpcpu)) {
|
list_for_each_entry_safe(b, t, &bc->freed_nonpcpu, list) {
|
||||||
b = list_first_entry(&bc->freed_nonpcpu, struct btree, list);
|
|
||||||
list_del(&b->list);
|
list_del(&b->list);
|
||||||
six_lock_exit(&b->c.lock);
|
six_lock_exit(&b->c.lock);
|
||||||
kfree(b);
|
kfree(b);
|
||||||
@ -537,6 +604,12 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
memalloc_nofs_restore(flags);
|
memalloc_nofs_restore(flags);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
||||||
|
BUG_ON(bc->nr_by_btree[i]);
|
||||||
|
BUG_ON(bc->live[0].nr);
|
||||||
|
BUG_ON(bc->live[1].nr);
|
||||||
|
BUG_ON(bc->nr_freeable);
|
||||||
|
|
||||||
if (bc->table_init_done)
|
if (bc->table_init_done)
|
||||||
rhashtable_destroy(&bc->table);
|
rhashtable_destroy(&bc->table);
|
||||||
}
|
}
|
||||||
@ -556,22 +629,32 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
|||||||
|
|
||||||
bch2_recalc_btree_reserve(c);
|
bch2_recalc_btree_reserve(c);
|
||||||
|
|
||||||
for (i = 0; i < bc->reserve; i++)
|
for (i = 0; i < bc->nr_reserve; i++)
|
||||||
if (!__bch2_btree_node_mem_alloc(c))
|
if (!__bch2_btree_node_mem_alloc(c))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
list_splice_init(&bc->live, &bc->freeable);
|
list_splice_init(&bc->live[0].list, &bc->freeable);
|
||||||
|
|
||||||
mutex_init(&c->verify_lock);
|
mutex_init(&c->verify_lock);
|
||||||
|
|
||||||
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
|
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
|
||||||
if (!shrink)
|
if (!shrink)
|
||||||
goto err;
|
goto err;
|
||||||
bc->shrink = shrink;
|
bc->live[0].shrink = shrink;
|
||||||
shrink->count_objects = bch2_btree_cache_count;
|
shrink->count_objects = bch2_btree_cache_count;
|
||||||
shrink->scan_objects = bch2_btree_cache_scan;
|
shrink->scan_objects = bch2_btree_cache_scan;
|
||||||
shrink->seeks = 4;
|
shrink->seeks = 2;
|
||||||
shrink->private_data = c;
|
shrink->private_data = &bc->live[0];
|
||||||
|
shrinker_register(shrink);
|
||||||
|
|
||||||
|
shrink = shrinker_alloc(0, "%s-btree_cache-pinned", c->name);
|
||||||
|
if (!shrink)
|
||||||
|
goto err;
|
||||||
|
bc->live[1].shrink = shrink;
|
||||||
|
shrink->count_objects = bch2_btree_cache_count;
|
||||||
|
shrink->scan_objects = bch2_btree_cache_scan;
|
||||||
|
shrink->seeks = 8;
|
||||||
|
shrink->private_data = &bc->live[1];
|
||||||
shrinker_register(shrink);
|
shrinker_register(shrink);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -582,7 +665,10 @@ err:
|
|||||||
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
|
||||||
{
|
{
|
||||||
mutex_init(&bc->lock);
|
mutex_init(&bc->lock);
|
||||||
INIT_LIST_HEAD(&bc->live);
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) {
|
||||||
|
bc->live[i].idx = i;
|
||||||
|
INIT_LIST_HEAD(&bc->live[i].list);
|
||||||
|
}
|
||||||
INIT_LIST_HEAD(&bc->freeable);
|
INIT_LIST_HEAD(&bc->freeable);
|
||||||
INIT_LIST_HEAD(&bc->freed_pcpu);
|
INIT_LIST_HEAD(&bc->freed_pcpu);
|
||||||
INIT_LIST_HEAD(&bc->freed_nonpcpu);
|
INIT_LIST_HEAD(&bc->freed_nonpcpu);
|
||||||
@ -644,12 +730,14 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
|||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
list_for_each_entry_reverse(b, &bc->live, list)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||||
|
list_for_each_entry_reverse(b, &bc->live[i].list, list)
|
||||||
if (!btree_node_reclaim(c, b, false))
|
if (!btree_node_reclaim(c, b, false))
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
list_for_each_entry_reverse(b, &bc->live, list)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++)
|
||||||
|
list_for_each_entry_reverse(b, &bc->live[i].list, list)
|
||||||
if (!btree_node_write_and_reclaim(c, b))
|
if (!btree_node_write_and_reclaim(c, b))
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
@ -716,14 +804,15 @@ got_node:
|
|||||||
|
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
|
|
||||||
if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) {
|
if (memalloc_flags_do(PF_MEMALLOC_NORECLAIM,
|
||||||
|
btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))) {
|
||||||
bch2_trans_unlock(trans);
|
bch2_trans_unlock(trans);
|
||||||
if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
|
if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
bc->used++;
|
bc->nr_freeable++;
|
||||||
got_mem:
|
got_mem:
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
|
|
||||||
@ -1264,8 +1353,8 @@ wait_on_io:
|
|||||||
BUG_ON(btree_node_dirty(b));
|
BUG_ON(btree_node_dirty(b));
|
||||||
|
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
btree_node_data_free(c, b);
|
|
||||||
bch2_btree_node_hash_remove(bc, b);
|
bch2_btree_node_hash_remove(bc, b);
|
||||||
|
btree_node_data_free(c, b);
|
||||||
mutex_unlock(&bc->lock);
|
mutex_unlock(&bc->lock);
|
||||||
out:
|
out:
|
||||||
six_unlock_write(&b->c.lock);
|
six_unlock_write(&b->c.lock);
|
||||||
@ -1337,13 +1426,20 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
|
static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
|
||||||
const char *label, unsigned nr)
|
const char *label, size_t nr)
|
||||||
{
|
{
|
||||||
prt_printf(out, "%s\t", label);
|
prt_printf(out, "%s\t", label);
|
||||||
prt_human_readable_u64(out, nr * c->opts.btree_node_size);
|
prt_human_readable_u64(out, nr * c->opts.btree_node_size);
|
||||||
prt_printf(out, " (%u)\n", nr);
|
prt_printf(out, " (%zu)\n", nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char * const bch2_btree_cache_not_freed_reasons_strs[] = {
|
||||||
|
#define x(n) #n,
|
||||||
|
BCH_BTREE_CACHE_NOT_FREED_REASONS()
|
||||||
|
#undef x
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
|
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
|
||||||
@ -1351,24 +1447,21 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc
|
|||||||
if (!out->nr_tabstops)
|
if (!out->nr_tabstops)
|
||||||
printbuf_tabstop_push(out, 32);
|
printbuf_tabstop_push(out, 32);
|
||||||
|
|
||||||
prt_btree_cache_line(out, c, "total:", bc->used);
|
prt_btree_cache_line(out, c, "live:", bc->live[0].nr);
|
||||||
prt_btree_cache_line(out, c, "nr dirty:", atomic_read(&bc->dirty));
|
prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr);
|
||||||
|
prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable);
|
||||||
|
prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty));
|
||||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++)
|
||||||
prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]);
|
prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]);
|
||||||
|
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
prt_printf(out, "freed:\t%u\n", bc->freed);
|
prt_printf(out, "freed:\t%zu\n", bc->nr_freed);
|
||||||
prt_printf(out, "not freed:\n");
|
prt_printf(out, "not freed:\n");
|
||||||
prt_printf(out, " dirty\t%u\n", bc->not_freed_dirty);
|
|
||||||
prt_printf(out, " write in flight\t%u\n", bc->not_freed_write_in_flight);
|
for (unsigned i = 0; i < ARRAY_SIZE(bc->not_freed); i++)
|
||||||
prt_printf(out, " read in flight\t%u\n", bc->not_freed_read_in_flight);
|
prt_printf(out, " %s\t%llu\n",
|
||||||
prt_printf(out, " lock intent failed\t%u\n", bc->not_freed_lock_intent);
|
bch2_btree_cache_not_freed_reasons_strs[i], bc->not_freed[i]);
|
||||||
prt_printf(out, " lock write failed\t%u\n", bc->not_freed_lock_write);
|
|
||||||
prt_printf(out, " access bit\t%u\n", bc->not_freed_access_bit);
|
|
||||||
prt_printf(out, " no evict failed\t%u\n", bc->not_freed_noevict);
|
|
||||||
prt_printf(out, " write blocked\t%u\n", bc->not_freed_write_blocked);
|
|
||||||
prt_printf(out, " will make reachable\t%u\n", bc->not_freed_will_make_reachable);
|
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,9 @@ int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
|
|||||||
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
|
||||||
unsigned, enum btree_id);
|
unsigned, enum btree_id);
|
||||||
|
|
||||||
|
void bch2_node_pin(struct bch_fs *, struct btree *);
|
||||||
|
void bch2_btree_cache_unpin(struct bch_fs *);
|
||||||
|
|
||||||
void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsigned,
|
void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsigned,
|
||||||
struct bkey_s_c, struct bkey_i *);
|
struct bkey_s_c, struct bkey_i *);
|
||||||
|
|
||||||
|
@ -549,9 +549,8 @@ reconstruct_root:
|
|||||||
six_unlock_read(&b->c.lock);
|
six_unlock_read(&b->c.lock);
|
||||||
|
|
||||||
if (ret == DROP_THIS_NODE) {
|
if (ret == DROP_THIS_NODE) {
|
||||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&c->btree_cache.lock);
|
||||||
list_move(&b->list, &c->btree_cache.freeable);
|
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
r->b = NULL;
|
r->b = NULL;
|
||||||
@ -753,10 +752,8 @@ static void bch2_gc_free(struct bch_fs *c)
|
|||||||
genradix_free(&c->reflink_gc_table);
|
genradix_free(&c->reflink_gc_table);
|
||||||
genradix_free(&c->gc_stripes);
|
genradix_free(&c->gc_stripes);
|
||||||
|
|
||||||
for_each_member_device(c, ca) {
|
for_each_member_device(c, ca)
|
||||||
kvfree(rcu_dereference_protected(ca->buckets_gc, 1));
|
genradix_free(&ca->buckets_gc);
|
||||||
ca->buckets_gc = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_start(struct bch_fs *c)
|
static int bch2_gc_start(struct bch_fs *c)
|
||||||
@ -910,20 +907,12 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
for_each_member_device(c, ca) {
|
for_each_member_device(c, ca) {
|
||||||
struct bucket_array *buckets = kvmalloc(sizeof(struct bucket_array) +
|
ret = genradix_prealloc(&ca->buckets_gc, ca->mi.nbuckets, GFP_KERNEL);
|
||||||
ca->mi.nbuckets * sizeof(struct bucket),
|
if (ret) {
|
||||||
GFP_KERNEL|__GFP_ZERO);
|
|
||||||
if (!buckets) {
|
|
||||||
bch2_dev_put(ca);
|
bch2_dev_put(ca);
|
||||||
ret = -BCH_ERR_ENOMEM_gc_alloc_start;
|
ret = -BCH_ERR_ENOMEM_gc_alloc_start;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
buckets->first_bucket = ca->mi.first_bucket;
|
|
||||||
buckets->nbuckets = ca->mi.nbuckets;
|
|
||||||
buckets->nbuckets_minus_first =
|
|
||||||
buckets->nbuckets - buckets->first_bucket;
|
|
||||||
rcu_assign_pointer(ca->buckets_gc, buckets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bch_err_fn(c, ret);
|
bch_err_fn(c, ret);
|
||||||
|
@ -1666,7 +1666,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
|||||||
bch2_btree_pos_to_text(&buf, c, b);
|
bch2_btree_pos_to_text(&buf, c, b);
|
||||||
bch_err_ratelimited(c, "%s", buf.buf);
|
bch_err_ratelimited(c, "%s", buf.buf);
|
||||||
|
|
||||||
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
|
if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
|
||||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
|
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
|
||||||
bch2_fatal_error(c);
|
bch2_fatal_error(c);
|
||||||
|
|
||||||
@ -1749,10 +1749,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
|
|||||||
bch2_btree_node_read(trans, b, true);
|
bch2_btree_node_read(trans, b, true);
|
||||||
|
|
||||||
if (btree_node_read_error(b)) {
|
if (btree_node_read_error(b)) {
|
||||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&c->btree_cache.lock);
|
||||||
list_move(&b->list, &c->btree_cache.freeable);
|
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
ret = -BCH_ERR_btree_node_read_error;
|
ret = -BCH_ERR_btree_node_read_error;
|
||||||
@ -2031,7 +2029,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
|||||||
do_write:
|
do_write:
|
||||||
BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
|
BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
|
||||||
|
|
||||||
atomic_dec(&c->btree_cache.dirty);
|
atomic_long_dec(&c->btree_cache.nr_dirty);
|
||||||
|
|
||||||
BUG_ON(btree_node_fake(b));
|
BUG_ON(btree_node_fake(b));
|
||||||
BUG_ON((b->will_make_reachable != 0) != !b->written);
|
BUG_ON((b->will_make_reachable != 0) != !b->written);
|
||||||
|
@ -18,13 +18,13 @@ struct btree_node_read_all;
|
|||||||
static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
|
static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
|
||||||
{
|
{
|
||||||
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
|
if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
|
||||||
atomic_inc(&c->btree_cache.dirty);
|
atomic_long_inc(&c->btree_cache.nr_dirty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
|
static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
|
||||||
{
|
{
|
||||||
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
|
if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
|
||||||
atomic_dec(&c->btree_cache.dirty);
|
atomic_long_dec(&c->btree_cache.nr_dirty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned btree_ptr_sectors_written(struct bkey_s_c k)
|
static inline unsigned btree_ptr_sectors_written(struct bkey_s_c k)
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include "btree_types.h"
|
#include "btree_types.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
|
||||||
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
|
||||||
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
|
||||||
@ -529,6 +531,12 @@ void bch2_set_btree_iter_dontneed(struct btree_iter *);
|
|||||||
|
|
||||||
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
|
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bch2_trans_kmalloc - allocate memory for use by the current transaction
|
||||||
|
*
|
||||||
|
* Must be called after bch2_trans_begin, which on second and further calls
|
||||||
|
* frees all memory allocated in this transaction
|
||||||
|
*/
|
||||||
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||||
{
|
{
|
||||||
size = roundup(size, 8);
|
size = roundup(size, 8);
|
||||||
@ -865,13 +873,19 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
|||||||
(_do) ?: bch2_trans_relock(_trans); \
|
(_do) ?: bch2_trans_relock(_trans); \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
#define memalloc_flags_do(_flags, _do) \
|
||||||
|
({ \
|
||||||
|
unsigned _saved_flags = memalloc_flags_save(_flags); \
|
||||||
|
typeof(_do) _ret = _do; \
|
||||||
|
memalloc_noreclaim_restore(_saved_flags); \
|
||||||
|
_ret; \
|
||||||
|
})
|
||||||
|
|
||||||
#define allocate_dropping_locks_errcode(_trans, _do) \
|
#define allocate_dropping_locks_errcode(_trans, _do) \
|
||||||
({ \
|
({ \
|
||||||
gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
|
int _ret = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\
|
||||||
int _ret = _do; \
|
|
||||||
\
|
\
|
||||||
if (bch2_err_matches(_ret, ENOMEM)) { \
|
if (bch2_err_matches(_ret, ENOMEM)) { \
|
||||||
_gfp = GFP_KERNEL; \
|
|
||||||
_ret = drop_locks_do(_trans, _do); \
|
_ret = drop_locks_do(_trans, _do); \
|
||||||
} \
|
} \
|
||||||
_ret; \
|
_ret; \
|
||||||
@ -879,12 +893,10 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
|
|||||||
|
|
||||||
#define allocate_dropping_locks(_trans, _ret, _do) \
|
#define allocate_dropping_locks(_trans, _ret, _do) \
|
||||||
({ \
|
({ \
|
||||||
gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
|
typeof(_do) _p = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\
|
||||||
typeof(_do) _p = _do; \
|
|
||||||
\
|
\
|
||||||
_ret = 0; \
|
_ret = 0; \
|
||||||
if (unlikely(!_p)) { \
|
if (unlikely(!_p)) { \
|
||||||
_gfp = GFP_KERNEL; \
|
|
||||||
_ret = drop_locks_do(_trans, ((_p = _do), 0)); \
|
_ret = drop_locks_do(_trans, ((_p = _do), 0)); \
|
||||||
} \
|
} \
|
||||||
_p; \
|
_p; \
|
||||||
|
@ -530,6 +530,8 @@ static void __journal_keys_sort(struct journal_keys *keys)
|
|||||||
{
|
{
|
||||||
sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
|
sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
|
|
||||||
struct journal_key *dst = keys->data;
|
struct journal_key *dst = keys->data;
|
||||||
|
|
||||||
darray_for_each(*keys, src) {
|
darray_for_each(*keys, src) {
|
||||||
|
@ -116,8 +116,10 @@ static void bkey_cached_free(struct btree_key_cache *bc,
|
|||||||
this_cpu_inc(*bc->nr_pending);
|
this_cpu_inc(*bc->nr_pending);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
|
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s)
|
||||||
{
|
{
|
||||||
|
gfp_t gfp = GFP_KERNEL|__GFP_ACCOUNT|__GFP_RECLAIMABLE;
|
||||||
|
|
||||||
struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp);
|
struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp);
|
||||||
if (unlikely(!ck))
|
if (unlikely(!ck))
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -145,7 +147,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
|
|||||||
goto lock;
|
goto lock;
|
||||||
|
|
||||||
ck = allocate_dropping_locks(trans, ret,
|
ck = allocate_dropping_locks(trans, ret,
|
||||||
__bkey_cached_alloc(key_u64s, _gfp));
|
__bkey_cached_alloc(key_u64s));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (ck)
|
if (ck)
|
||||||
kfree(ck->k);
|
kfree(ck->k);
|
||||||
@ -239,7 +241,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
|||||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||||
|
|
||||||
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
||||||
kmalloc(key_u64s * sizeof(u64), _gfp));
|
kmalloc(key_u64s * sizeof(u64), GFP_KERNEL));
|
||||||
if (unlikely(!new_k)) {
|
if (unlikely(!new_k)) {
|
||||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||||
bch2_btree_id_str(ck->key.btree_id), key_u64s);
|
bch2_btree_id_str(ck->key.btree_id), key_u64s);
|
||||||
|
@ -138,6 +138,31 @@ struct btree {
|
|||||||
struct list_head list;
|
struct list_head list;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \
|
||||||
|
x(lock_intent) \
|
||||||
|
x(lock_write) \
|
||||||
|
x(dirty) \
|
||||||
|
x(read_in_flight) \
|
||||||
|
x(write_in_flight) \
|
||||||
|
x(noevict) \
|
||||||
|
x(write_blocked) \
|
||||||
|
x(will_make_reachable) \
|
||||||
|
x(access_bit)
|
||||||
|
|
||||||
|
enum bch_btree_cache_not_freed_reasons {
|
||||||
|
#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n,
|
||||||
|
BCH_BTREE_CACHE_NOT_FREED_REASONS()
|
||||||
|
#undef x
|
||||||
|
BCH_BTREE_CACHE_NOT_FREED_REASONS_NR,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct btree_cache_list {
|
||||||
|
unsigned idx;
|
||||||
|
struct shrinker *shrink;
|
||||||
|
struct list_head list;
|
||||||
|
size_t nr;
|
||||||
|
};
|
||||||
|
|
||||||
struct btree_cache {
|
struct btree_cache {
|
||||||
struct rhashtable table;
|
struct rhashtable table;
|
||||||
bool table_init_done;
|
bool table_init_done;
|
||||||
@ -155,28 +180,19 @@ struct btree_cache {
|
|||||||
* should never grow past ~2-3 nodes in practice.
|
* should never grow past ~2-3 nodes in practice.
|
||||||
*/
|
*/
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
struct list_head live;
|
|
||||||
struct list_head freeable;
|
struct list_head freeable;
|
||||||
struct list_head freed_pcpu;
|
struct list_head freed_pcpu;
|
||||||
struct list_head freed_nonpcpu;
|
struct list_head freed_nonpcpu;
|
||||||
|
struct btree_cache_list live[2];
|
||||||
|
|
||||||
/* Number of elements in live + freeable lists */
|
size_t nr_freeable;
|
||||||
unsigned used;
|
size_t nr_reserve;
|
||||||
unsigned reserve;
|
size_t nr_by_btree[BTREE_ID_NR];
|
||||||
unsigned freed;
|
atomic_long_t nr_dirty;
|
||||||
unsigned not_freed_lock_intent;
|
|
||||||
unsigned not_freed_lock_write;
|
|
||||||
unsigned not_freed_dirty;
|
|
||||||
unsigned not_freed_read_in_flight;
|
|
||||||
unsigned not_freed_write_in_flight;
|
|
||||||
unsigned not_freed_noevict;
|
|
||||||
unsigned not_freed_write_blocked;
|
|
||||||
unsigned not_freed_will_make_reachable;
|
|
||||||
unsigned not_freed_access_bit;
|
|
||||||
atomic_t dirty;
|
|
||||||
struct shrinker *shrink;
|
|
||||||
|
|
||||||
unsigned used_by_btree[BTREE_ID_NR];
|
/* shrinker stats */
|
||||||
|
size_t nr_freed;
|
||||||
|
u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to allocate memory for a new btree node and that
|
* If we need to allocate memory for a new btree node and that
|
||||||
@ -189,8 +205,8 @@ struct btree_cache {
|
|||||||
|
|
||||||
struct bbpos pinned_nodes_start;
|
struct bbpos pinned_nodes_start;
|
||||||
struct bbpos pinned_nodes_end;
|
struct bbpos pinned_nodes_end;
|
||||||
u64 pinned_nodes_leaf_mask;
|
/* btree id mask: 0 for leaves, 1 for interior */
|
||||||
u64 pinned_nodes_interior_mask;
|
u64 pinned_nodes_mask[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_node_iter {
|
struct btree_node_iter {
|
||||||
@ -582,7 +598,8 @@ enum btree_write_type {
|
|||||||
x(dying) \
|
x(dying) \
|
||||||
x(fake) \
|
x(fake) \
|
||||||
x(need_rewrite) \
|
x(need_rewrite) \
|
||||||
x(never_write)
|
x(never_write) \
|
||||||
|
x(pinned)
|
||||||
|
|
||||||
enum btree_flags {
|
enum btree_flags {
|
||||||
/* First bits for btree node write type */
|
/* First bits for btree node write type */
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "clock.h"
|
#include "clock.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "extents.h"
|
#include "extents.h"
|
||||||
|
#include "io_write.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
@ -145,7 +146,7 @@ fsck_err:
|
|||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
return ret;
|
return ret;
|
||||||
topology_repair:
|
topology_repair:
|
||||||
if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
|
if ((c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
|
||||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
|
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
|
||||||
bch2_inconsistent_error(c);
|
bch2_inconsistent_error(c);
|
||||||
ret = -BCH_ERR_btree_need_topology_repair;
|
ret = -BCH_ERR_btree_need_topology_repair;
|
||||||
@ -250,8 +251,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
|
|||||||
unsigned i, level = b->c.level;
|
unsigned i, level = b->c.level;
|
||||||
|
|
||||||
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
||||||
|
|
||||||
|
mutex_lock(&c->btree_cache.lock);
|
||||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||||
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
__btree_node_free(trans, b);
|
__btree_node_free(trans, b);
|
||||||
|
|
||||||
six_unlock_write(&b->c.lock);
|
six_unlock_write(&b->c.lock);
|
||||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
|
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
|
||||||
|
|
||||||
@ -283,7 +289,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
|
|||||||
clear_btree_node_need_write(b);
|
clear_btree_node_need_write(b);
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&c->btree_cache.lock);
|
||||||
list_del_init(&b->list);
|
|
||||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
@ -1899,7 +1904,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
|
|||||||
six_unlock_intent(&n->c.lock);
|
six_unlock_intent(&n->c.lock);
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&c->btree_cache.lock);
|
||||||
list_add_tail(&b->list, &c->btree_cache.live);
|
list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list);
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
mutex_unlock(&c->btree_cache.lock);
|
||||||
|
|
||||||
bch2_trans_verify_locks(trans);
|
bch2_trans_verify_locks(trans);
|
||||||
|
@ -75,6 +75,15 @@ void bch2_dev_usage_to_text(struct printbuf *out,
|
|||||||
struct bch_dev *ca,
|
struct bch_dev *ca,
|
||||||
struct bch_dev_usage *usage)
|
struct bch_dev_usage *usage)
|
||||||
{
|
{
|
||||||
|
if (out->nr_tabstops < 5) {
|
||||||
|
printbuf_tabstops_reset(out);
|
||||||
|
printbuf_tabstop_push(out, 12);
|
||||||
|
printbuf_tabstop_push(out, 16);
|
||||||
|
printbuf_tabstop_push(out, 16);
|
||||||
|
printbuf_tabstop_push(out, 16);
|
||||||
|
printbuf_tabstop_push(out, 16);
|
||||||
|
}
|
||||||
|
|
||||||
prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
|
prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
|
||||||
|
|
||||||
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
|
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
|
||||||
@ -100,7 +109,8 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
|
|||||||
|
|
||||||
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
|
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
|
||||||
if (!ca) {
|
if (!ca) {
|
||||||
if (fsck_err(trans, ptr_to_invalid_device,
|
if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
|
||||||
|
trans, ptr_to_invalid_device,
|
||||||
"pointer to missing device %u\n"
|
"pointer to missing device %u\n"
|
||||||
"while marking %s",
|
"while marking %s",
|
||||||
p.ptr.dev,
|
p.ptr.dev,
|
||||||
@ -476,7 +486,7 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
err:
|
err:
|
||||||
bch2_dump_trans_updates(trans);
|
bch2_dump_trans_updates(trans);
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_bucket_ref_update;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -562,8 +572,8 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
|||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
|
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
|
||||||
if (unlikely(!ca)) {
|
if (unlikely(!ca)) {
|
||||||
if (insert)
|
if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_trigger_pointer;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -592,7 +602,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
|||||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
||||||
p.ptr.dev,
|
p.ptr.dev,
|
||||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_trigger_pointer;
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -637,7 +647,7 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
|
|||||||
bch2_trans_inconsistent(trans,
|
bch2_trans_inconsistent(trans,
|
||||||
"stripe pointer doesn't match stripe %llu",
|
"stripe pointer doesn't match stripe %llu",
|
||||||
(u64) p.ec.idx);
|
(u64) p.ec.idx);
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_trigger_stripe_pointer;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -676,7 +686,7 @@ err:
|
|||||||
(u64) p.ec.idx, buf.buf);
|
(u64) p.ec.idx, buf.buf);
|
||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
bch2_inconsistent_error(c);
|
bch2_inconsistent_error(c);
|
||||||
return -EIO;
|
return -BCH_ERR_trigger_stripe_pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
m->block_sectors[p.ec.block] += sectors;
|
m->block_sectors[p.ec.block] += sectors;
|
||||||
@ -740,7 +750,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
} else if (!p.has_ec) {
|
} else if (!p.has_ec) {
|
||||||
*replicas_sectors += disk_sectors;
|
*replicas_sectors += disk_sectors;
|
||||||
acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev;
|
replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev);
|
||||||
} else {
|
} else {
|
||||||
ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
|
ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -876,7 +886,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
|
|||||||
need_rebalance_delta -= s != 0;
|
need_rebalance_delta -= s != 0;
|
||||||
need_rebalance_sectors_delta -= s;
|
need_rebalance_sectors_delta -= s;
|
||||||
|
|
||||||
s = bch2_bkey_sectors_need_rebalance(c, old);
|
s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
|
||||||
need_rebalance_delta += s != 0;
|
need_rebalance_delta += s != 0;
|
||||||
need_rebalance_sectors_delta += s;
|
need_rebalance_sectors_delta += s;
|
||||||
|
|
||||||
@ -956,7 +966,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
|||||||
bch2_data_type_str(a->v.data_type),
|
bch2_data_type_str(a->v.data_type),
|
||||||
bch2_data_type_str(type),
|
bch2_data_type_str(type),
|
||||||
bch2_data_type_str(type));
|
bch2_data_type_str(type));
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_metadata_bucket_inconsistency;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1012,7 +1022,7 @@ err:
|
|||||||
bucket_unlock(g);
|
bucket_unlock(g);
|
||||||
err_unlock:
|
err_unlock:
|
||||||
percpu_up_read(&c->mark_lock);
|
percpu_up_read(&c->mark_lock);
|
||||||
return -EIO;
|
return -BCH_ERR_metadata_bucket_inconsistency;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||||
|
@ -80,22 +80,9 @@ static inline void bucket_lock(struct bucket *b)
|
|||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
|
|
||||||
{
|
|
||||||
return rcu_dereference_check(ca->buckets_gc,
|
|
||||||
!ca->fs ||
|
|
||||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
|
||||||
lockdep_is_held(&ca->fs->state_lock) ||
|
|
||||||
lockdep_is_held(&ca->bucket_lock));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
||||||
{
|
{
|
||||||
struct bucket_array *buckets = gc_bucket_array(ca);
|
return genradix_ptr(&ca->buckets_gc, b);
|
||||||
|
|
||||||
if (b - buckets->first_bucket >= buckets->nbuckets_minus_first)
|
|
||||||
return NULL;
|
|
||||||
return buckets->b + b;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
||||||
|
@ -19,14 +19,6 @@ struct bucket {
|
|||||||
u32 stripe_sectors;
|
u32 stripe_sectors;
|
||||||
} __aligned(sizeof(long));
|
} __aligned(sizeof(long));
|
||||||
|
|
||||||
struct bucket_array {
|
|
||||||
struct rcu_head rcu;
|
|
||||||
u16 first_bucket;
|
|
||||||
size_t nbuckets;
|
|
||||||
size_t nbuckets_minus_first;
|
|
||||||
struct bucket b[] __counted_by(nbuckets);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bucket_gens {
|
struct bucket_gens {
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
u16 first_bucket;
|
u16 first_bucket;
|
||||||
|
@ -100,13 +100,12 @@ static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
|
|||||||
struct scatterlist *sg, size_t len)
|
struct scatterlist *sg, size_t len)
|
||||||
{
|
{
|
||||||
SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
|
SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
|
||||||
int ret;
|
|
||||||
|
|
||||||
skcipher_request_set_sync_tfm(req, tfm);
|
skcipher_request_set_sync_tfm(req, tfm);
|
||||||
skcipher_request_set_callback(req, 0, NULL, NULL);
|
skcipher_request_set_callback(req, 0, NULL, NULL);
|
||||||
skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
|
skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
|
||||||
|
|
||||||
ret = crypto_skcipher_encrypt(req);
|
int ret = crypto_skcipher_encrypt(req);
|
||||||
if (ret)
|
if (ret)
|
||||||
pr_err("got error %i from crypto_skcipher_encrypt()", ret);
|
pr_err("got error %i from crypto_skcipher_encrypt()", ret);
|
||||||
|
|
||||||
@ -118,38 +117,47 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
|
|||||||
void *buf, size_t len)
|
void *buf, size_t len)
|
||||||
{
|
{
|
||||||
if (!is_vmalloc_addr(buf)) {
|
if (!is_vmalloc_addr(buf)) {
|
||||||
struct scatterlist sg;
|
struct scatterlist sg = {};
|
||||||
|
|
||||||
sg_init_table(&sg, 1);
|
sg_mark_end(&sg);
|
||||||
sg_set_page(&sg,
|
sg_set_page(&sg, virt_to_page(buf), len, offset_in_page(buf));
|
||||||
is_vmalloc_addr(buf)
|
|
||||||
? vmalloc_to_page(buf)
|
|
||||||
: virt_to_page(buf),
|
|
||||||
len, offset_in_page(buf));
|
|
||||||
return do_encrypt_sg(tfm, nonce, &sg, len);
|
return do_encrypt_sg(tfm, nonce, &sg, len);
|
||||||
} else {
|
} else {
|
||||||
unsigned pages = buf_pages(buf, len);
|
DARRAY_PREALLOCATED(struct scatterlist, 4) sgl;
|
||||||
struct scatterlist *sg;
|
size_t sgl_len = 0;
|
||||||
size_t orig_len = len;
|
int ret;
|
||||||
int ret, i;
|
|
||||||
|
|
||||||
sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
|
darray_init(&sgl);
|
||||||
if (!sg)
|
|
||||||
return -BCH_ERR_ENOMEM_do_encrypt;
|
|
||||||
|
|
||||||
sg_init_table(sg, pages);
|
while (len) {
|
||||||
|
|
||||||
for (i = 0; i < pages; i++) {
|
|
||||||
unsigned offset = offset_in_page(buf);
|
unsigned offset = offset_in_page(buf);
|
||||||
unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset);
|
struct scatterlist sg = {
|
||||||
|
.page_link = (unsigned long) vmalloc_to_page(buf),
|
||||||
|
.offset = offset,
|
||||||
|
.length = min(len, PAGE_SIZE - offset),
|
||||||
|
};
|
||||||
|
|
||||||
sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
|
if (darray_push(&sgl, sg)) {
|
||||||
buf += pg_len;
|
sg_mark_end(&darray_last(sgl));
|
||||||
len -= pg_len;
|
ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
nonce = nonce_add(nonce, sgl_len);
|
||||||
|
sgl_len = 0;
|
||||||
|
sgl.nr = 0;
|
||||||
|
BUG_ON(darray_push(&sgl, sg));
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = do_encrypt_sg(tfm, nonce, sg, orig_len);
|
buf += sg.length;
|
||||||
kfree(sg);
|
len -= sg.length;
|
||||||
|
sgl_len += sg.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
sg_mark_end(&darray_last(sgl));
|
||||||
|
ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len);
|
||||||
|
err:
|
||||||
|
darray_exit(&sgl);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -325,39 +333,42 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
|
|||||||
{
|
{
|
||||||
struct bio_vec bv;
|
struct bio_vec bv;
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
struct scatterlist sgl[16], *sg = sgl;
|
DARRAY_PREALLOCATED(struct scatterlist, 4) sgl;
|
||||||
size_t bytes = 0;
|
size_t sgl_len = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!bch2_csum_type_is_encryption(type))
|
if (!bch2_csum_type_is_encryption(type))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
sg_init_table(sgl, ARRAY_SIZE(sgl));
|
darray_init(&sgl);
|
||||||
|
|
||||||
bio_for_each_segment(bv, bio, iter) {
|
bio_for_each_segment(bv, bio, iter) {
|
||||||
if (sg == sgl + ARRAY_SIZE(sgl)) {
|
struct scatterlist sg = {
|
||||||
sg_mark_end(sg - 1);
|
.page_link = (unsigned long) bv.bv_page,
|
||||||
|
.offset = bv.bv_offset,
|
||||||
|
.length = bv.bv_len,
|
||||||
|
};
|
||||||
|
|
||||||
ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
|
if (darray_push(&sgl, sg)) {
|
||||||
|
sg_mark_end(&darray_last(sgl));
|
||||||
|
ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
goto err;
|
||||||
|
|
||||||
nonce = nonce_add(nonce, bytes);
|
nonce = nonce_add(nonce, sgl_len);
|
||||||
bytes = 0;
|
sgl_len = 0;
|
||||||
|
sgl.nr = 0;
|
||||||
|
|
||||||
sg_init_table(sgl, ARRAY_SIZE(sgl));
|
BUG_ON(darray_push(&sgl, sg));
|
||||||
sg = sgl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
|
sgl_len += sg.length;
|
||||||
bytes += bv.bv_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sg != sgl) {
|
|
||||||
sg_mark_end(sg - 1);
|
|
||||||
return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sg_mark_end(&darray_last(sgl));
|
||||||
|
ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len);
|
||||||
|
err:
|
||||||
|
darray_exit(&sgl);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -337,6 +337,7 @@ restart_drop_extra_replicas:
|
|||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
|
|
||||||
bch2_fatal_error(c);
|
bch2_fatal_error(c);
|
||||||
|
ret = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -570,7 +571,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
|||||||
while (data_opts.kill_ptrs) {
|
while (data_opts.kill_ptrs) {
|
||||||
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
||||||
|
|
||||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
|
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, i++ == drop);
|
||||||
data_opts.kill_ptrs ^= 1U << drop;
|
data_opts.kill_ptrs ^= 1U << drop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
456
libbcachefs/ec.c
456
libbcachefs/ec.c
@ -18,6 +18,7 @@
|
|||||||
#include "ec.h"
|
#include "ec.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "io_read.h"
|
#include "io_read.h"
|
||||||
|
#include "io_write.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
#include "recovery.h"
|
#include "recovery.h"
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
@ -146,12 +147,18 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
bch2_prt_csum_type(out, s.csum_type);
|
bch2_prt_csum_type(out, s.csum_type);
|
||||||
prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
|
prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
|
||||||
|
|
||||||
|
if (s.disk_label) {
|
||||||
|
prt_str(out, " label");
|
||||||
|
bch2_disk_path_to_text(out, c, s.disk_label - 1);
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < s.nr_blocks; i++) {
|
for (unsigned i = 0; i < s.nr_blocks; i++) {
|
||||||
const struct bch_extent_ptr *ptr = sp->ptrs + i;
|
const struct bch_extent_ptr *ptr = sp->ptrs + i;
|
||||||
|
|
||||||
if ((void *) ptr >= bkey_val_end(k))
|
if ((void *) ptr >= bkey_val_end(k))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
prt_char(out, ' ');
|
||||||
bch2_extent_ptr_to_text(out, c, ptr);
|
bch2_extent_ptr_to_text(out, c, ptr);
|
||||||
|
|
||||||
if (s.csum_type < BCH_CSUM_NR &&
|
if (s.csum_type < BCH_CSUM_NR &&
|
||||||
@ -192,7 +199,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
a->dirty_sectors,
|
a->dirty_sectors,
|
||||||
a->stripe, s.k->p.offset,
|
a->stripe, s.k->p.offset,
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,7 +210,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
a->dirty_sectors,
|
a->dirty_sectors,
|
||||||
a->cached_sectors,
|
a->cached_sectors,
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -213,7 +220,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
bucket.inode, bucket.offset, a->gen,
|
bucket.inode, bucket.offset, a->gen,
|
||||||
a->stripe,
|
a->stripe,
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,7 +230,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
bch2_data_type_str(a->data_type),
|
bch2_data_type_str(a->data_type),
|
||||||
bch2_data_type_str(data_type),
|
bch2_data_type_str(data_type),
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,7 +242,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
a->dirty_sectors,
|
a->dirty_sectors,
|
||||||
a->cached_sectors,
|
a->cached_sectors,
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -273,8 +280,8 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
|
|
||||||
struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
|
struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
|
||||||
if (unlikely(!ca)) {
|
if (unlikely(!ca)) {
|
||||||
if (!(flags & BTREE_TRIGGER_overwrite))
|
if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite))
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -293,7 +300,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
||||||
ptr->dev,
|
ptr->dev,
|
||||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_mark_stripe;
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -351,6 +358,19 @@ static int mark_stripe_buckets(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void stripe_to_mem(struct stripe *m, const struct bch_stripe *s)
|
||||||
|
{
|
||||||
|
m->sectors = le16_to_cpu(s->sectors);
|
||||||
|
m->algorithm = s->algorithm;
|
||||||
|
m->nr_blocks = s->nr_blocks;
|
||||||
|
m->nr_redundant = s->nr_redundant;
|
||||||
|
m->disk_label = s->disk_label;
|
||||||
|
m->blocks_nonempty = 0;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < s->nr_blocks; i++)
|
||||||
|
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_trigger_stripe(struct btree_trans *trans,
|
int bch2_trigger_stripe(struct btree_trans *trans,
|
||||||
enum btree_id btree, unsigned level,
|
enum btree_id btree, unsigned level,
|
||||||
struct bkey_s_c old, struct bkey_s _new,
|
struct bkey_s_c old, struct bkey_s _new,
|
||||||
@ -467,14 +487,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
|||||||
|
|
||||||
memset(m, 0, sizeof(*m));
|
memset(m, 0, sizeof(*m));
|
||||||
} else {
|
} else {
|
||||||
m->sectors = le16_to_cpu(new_s->sectors);
|
stripe_to_mem(m, new_s);
|
||||||
m->algorithm = new_s->algorithm;
|
|
||||||
m->nr_blocks = new_s->nr_blocks;
|
|
||||||
m->nr_redundant = new_s->nr_redundant;
|
|
||||||
m->blocks_nonempty = 0;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < new_s->nr_blocks; i++)
|
|
||||||
m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
|
|
||||||
|
|
||||||
if (!old_s)
|
if (!old_s)
|
||||||
bch2_stripes_heap_insert(c, m, idx);
|
bch2_stripes_heap_insert(c, m, idx);
|
||||||
@ -816,13 +829,15 @@ err:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* recovery read path: */
|
/* recovery read path: */
|
||||||
int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
|
int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
|
||||||
|
struct bkey_s_c orig_k)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct ec_stripe_buf *buf;
|
struct ec_stripe_buf *buf = NULL;
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
struct bch_stripe *v;
|
struct bch_stripe *v;
|
||||||
unsigned i, offset;
|
unsigned i, offset;
|
||||||
|
const char *msg = NULL;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
closure_init_stack(&cl);
|
closure_init_stack(&cl);
|
||||||
@ -835,32 +850,28 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
|
|||||||
|
|
||||||
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
|
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err_ratelimited(c,
|
msg = "stripe not found";
|
||||||
"error doing reconstruct read: error %i looking up stripe", ret);
|
goto err;
|
||||||
kfree(buf);
|
|
||||||
return -EIO;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
v = &bkey_i_to_stripe(&buf->key)->v;
|
v = &bkey_i_to_stripe(&buf->key)->v;
|
||||||
|
|
||||||
if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
|
if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
|
||||||
bch_err_ratelimited(c,
|
msg = "pointer doesn't match stripe";
|
||||||
"error doing reconstruct read: pointer doesn't match stripe");
|
|
||||||
ret = -EIO;
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
|
offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
|
||||||
if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
|
if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
|
||||||
bch_err_ratelimited(c,
|
msg = "read is bigger than stripe";
|
||||||
"error doing reconstruct read: read is bigger than stripe");
|
|
||||||
ret = -EIO;
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
|
ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
msg = "-ENOMEM";
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < v->nr_blocks; i++)
|
for (i = 0; i < v->nr_blocks; i++)
|
||||||
ec_block_io(c, buf, REQ_OP_READ, i, &cl);
|
ec_block_io(c, buf, REQ_OP_READ, i, &cl);
|
||||||
@ -868,9 +879,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
|
|||||||
closure_sync(&cl);
|
closure_sync(&cl);
|
||||||
|
|
||||||
if (ec_nr_failed(buf) > v->nr_redundant) {
|
if (ec_nr_failed(buf) > v->nr_redundant) {
|
||||||
bch_err_ratelimited(c,
|
msg = "unable to read enough blocks";
|
||||||
"error doing reconstruct read: unable to read enough blocks");
|
|
||||||
ret = -EIO;
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -882,20 +891,28 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
|
|||||||
|
|
||||||
memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
|
memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
|
||||||
buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
|
buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
|
||||||
err:
|
out:
|
||||||
ec_stripe_buf_exit(buf);
|
ec_stripe_buf_exit(buf);
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
return ret;
|
return ret;
|
||||||
|
err:
|
||||||
|
struct printbuf msgbuf = PRINTBUF;
|
||||||
|
bch2_bkey_val_to_text(&msgbuf, c, orig_k);
|
||||||
|
bch_err_ratelimited(c,
|
||||||
|
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
|
||||||
|
printbuf_exit(&msgbuf);;
|
||||||
|
ret = -BCH_ERR_stripe_reconstruct;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* stripe bucket accounting: */
|
/* stripe bucket accounting: */
|
||||||
|
|
||||||
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
|
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx)
|
||||||
{
|
{
|
||||||
ec_stripes_heap n, *h = &c->ec_stripes_heap;
|
ec_stripes_heap n, *h = &c->ec_stripes_heap;
|
||||||
|
|
||||||
if (idx >= h->size) {
|
if (idx >= h->size) {
|
||||||
if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
|
if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), GFP_KERNEL))
|
||||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||||
|
|
||||||
mutex_lock(&c->ec_stripes_heap_lock);
|
mutex_lock(&c->ec_stripes_heap_lock);
|
||||||
@ -909,11 +926,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
|
|||||||
free_heap(&n);
|
free_heap(&n);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
|
if (!genradix_ptr_alloc(&c->stripes, idx, GFP_KERNEL))
|
||||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||||
|
|
||||||
if (c->gc_pos.phase != GC_PHASE_not_running &&
|
if (c->gc_pos.phase != GC_PHASE_not_running &&
|
||||||
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
|
!genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL))
|
||||||
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -923,7 +940,7 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
|
|||||||
struct btree_iter *iter)
|
struct btree_iter *iter)
|
||||||
{
|
{
|
||||||
return allocate_dropping_locks_errcode(trans,
|
return allocate_dropping_locks_errcode(trans,
|
||||||
__ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp));
|
__ec_stripe_mem_alloc(trans->c, iter->pos.offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1305,7 +1322,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
|
|||||||
|
|
||||||
bkey_reassemble(n, k);
|
bkey_reassemble(n, k);
|
||||||
|
|
||||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
|
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, ptr->dev != dev);
|
||||||
ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
|
ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
|
||||||
BUG_ON(!ec_ptr);
|
BUG_ON(!ec_ptr);
|
||||||
|
|
||||||
@ -1555,10 +1572,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
|
|||||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
|
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
|
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
|
||||||
{
|
{
|
||||||
struct ec_stripe_new *s = h->s;
|
struct ec_stripe_new *s = h->s;
|
||||||
|
|
||||||
|
lockdep_assert_held(&h->lock);
|
||||||
|
|
||||||
BUG_ON(!s->allocated && !s->err);
|
BUG_ON(!s->allocated && !s->err);
|
||||||
|
|
||||||
h->s = NULL;
|
h->s = NULL;
|
||||||
@ -1571,6 +1590,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
|
|||||||
ec_stripe_new_put(c, s, STRIPE_REF_io);
|
ec_stripe_new_put(c, s, STRIPE_REF_io);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
|
||||||
|
{
|
||||||
|
h->s->err = err;
|
||||||
|
ec_stripe_new_set_pending(c, h);
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
|
||||||
{
|
{
|
||||||
struct ec_stripe_new *s = ob->ec;
|
struct ec_stripe_new *s = ob->ec;
|
||||||
@ -1641,7 +1666,8 @@ static void ec_stripe_key_init(struct bch_fs *c,
|
|||||||
struct bkey_i *k,
|
struct bkey_i *k,
|
||||||
unsigned nr_data,
|
unsigned nr_data,
|
||||||
unsigned nr_parity,
|
unsigned nr_parity,
|
||||||
unsigned stripe_size)
|
unsigned stripe_size,
|
||||||
|
unsigned disk_label)
|
||||||
{
|
{
|
||||||
struct bkey_i_stripe *s = bkey_stripe_init(k);
|
struct bkey_i_stripe *s = bkey_stripe_init(k);
|
||||||
unsigned u64s;
|
unsigned u64s;
|
||||||
@ -1652,7 +1678,7 @@ static void ec_stripe_key_init(struct bch_fs *c,
|
|||||||
s->v.nr_redundant = nr_parity;
|
s->v.nr_redundant = nr_parity;
|
||||||
s->v.csum_granularity_bits = ilog2(c->opts.encoded_extent_max >> 9);
|
s->v.csum_granularity_bits = ilog2(c->opts.encoded_extent_max >> 9);
|
||||||
s->v.csum_type = BCH_CSUM_crc32c;
|
s->v.csum_type = BCH_CSUM_crc32c;
|
||||||
s->v.pad = 0;
|
s->v.disk_label = disk_label;
|
||||||
|
|
||||||
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
|
while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
|
||||||
BUG_ON(1 << s->v.csum_granularity_bits >=
|
BUG_ON(1 << s->v.csum_granularity_bits >=
|
||||||
@ -1685,14 +1711,65 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
|
|||||||
s->nr_parity = h->redundancy;
|
s->nr_parity = h->redundancy;
|
||||||
|
|
||||||
ec_stripe_key_init(c, &s->new_stripe.key,
|
ec_stripe_key_init(c, &s->new_stripe.key,
|
||||||
s->nr_data, s->nr_parity, h->blocksize);
|
s->nr_data, s->nr_parity,
|
||||||
|
h->blocksize, h->disk_label);
|
||||||
|
|
||||||
h->s = s;
|
h->s = s;
|
||||||
|
h->nr_created++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
|
||||||
|
{
|
||||||
|
rcu_read_lock();
|
||||||
|
h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
|
||||||
|
? group_to_target(h->disk_label - 1)
|
||||||
|
: 0);
|
||||||
|
unsigned nr_devs = dev_mask_nr(&h->devs);
|
||||||
|
|
||||||
|
for_each_member_device_rcu(c, ca, &h->devs)
|
||||||
|
if (!ca->mi.durability)
|
||||||
|
__clear_bit(ca->dev_idx, h->devs.d);
|
||||||
|
unsigned nr_devs_with_durability = dev_mask_nr(&h->devs);
|
||||||
|
|
||||||
|
h->blocksize = pick_blocksize(c, &h->devs);
|
||||||
|
|
||||||
|
h->nr_active_devs = 0;
|
||||||
|
for_each_member_device_rcu(c, ca, &h->devs)
|
||||||
|
if (ca->mi.bucket_size == h->blocksize)
|
||||||
|
h->nr_active_devs++;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we only have redundancy + 1 devices, we're better off with just
|
||||||
|
* replication:
|
||||||
|
*/
|
||||||
|
h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;
|
||||||
|
|
||||||
|
if (h->insufficient_devs) {
|
||||||
|
const char *err;
|
||||||
|
|
||||||
|
if (nr_devs < h->redundancy + 2)
|
||||||
|
err = NULL;
|
||||||
|
else if (nr_devs_with_durability < h->redundancy + 2)
|
||||||
|
err = "cannot use durability=0 devices";
|
||||||
|
else
|
||||||
|
err = "mismatched bucket sizes";
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
bch_err(c, "insufficient devices available to create stripe (have %u, need %u): %s",
|
||||||
|
h->nr_active_devs, h->redundancy + 2, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (h->s && !h->s->allocated)
|
||||||
|
ec_stripe_new_cancel(c, h, -EINTR);
|
||||||
|
|
||||||
|
h->rw_devs_change_count = c->rw_devs_change_count;
|
||||||
|
}
|
||||||
|
|
||||||
static struct ec_stripe_head *
|
static struct ec_stripe_head *
|
||||||
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
|
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
|
||||||
unsigned algo, unsigned redundancy,
|
unsigned algo, unsigned redundancy,
|
||||||
enum bch_watermark watermark)
|
enum bch_watermark watermark)
|
||||||
{
|
{
|
||||||
@ -1705,34 +1782,11 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
|
|||||||
mutex_init(&h->lock);
|
mutex_init(&h->lock);
|
||||||
BUG_ON(!mutex_trylock(&h->lock));
|
BUG_ON(!mutex_trylock(&h->lock));
|
||||||
|
|
||||||
h->target = target;
|
h->disk_label = disk_label;
|
||||||
h->algo = algo;
|
h->algo = algo;
|
||||||
h->redundancy = redundancy;
|
h->redundancy = redundancy;
|
||||||
h->watermark = watermark;
|
h->watermark = watermark;
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
h->devs = target_rw_devs(c, BCH_DATA_user, target);
|
|
||||||
|
|
||||||
for_each_member_device_rcu(c, ca, &h->devs)
|
|
||||||
if (!ca->mi.durability)
|
|
||||||
__clear_bit(ca->dev_idx, h->devs.d);
|
|
||||||
|
|
||||||
h->blocksize = pick_blocksize(c, &h->devs);
|
|
||||||
|
|
||||||
for_each_member_device_rcu(c, ca, &h->devs)
|
|
||||||
if (ca->mi.bucket_size == h->blocksize)
|
|
||||||
h->nr_active_devs++;
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we only have redundancy + 1 devices, we're better off with just
|
|
||||||
* replication:
|
|
||||||
*/
|
|
||||||
if (h->nr_active_devs < h->redundancy + 2)
|
|
||||||
bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
|
|
||||||
h->nr_active_devs, h->redundancy + 2);
|
|
||||||
|
|
||||||
list_add(&h->list, &c->ec_stripe_head_list);
|
list_add(&h->list, &c->ec_stripe_head_list);
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
@ -1743,14 +1797,14 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
|
|||||||
h->s->allocated &&
|
h->s->allocated &&
|
||||||
bitmap_weight(h->s->blocks_allocated,
|
bitmap_weight(h->s->blocks_allocated,
|
||||||
h->s->nr_data) == h->s->nr_data)
|
h->s->nr_data) == h->s->nr_data)
|
||||||
ec_stripe_set_pending(c, h);
|
ec_stripe_new_set_pending(c, h);
|
||||||
|
|
||||||
mutex_unlock(&h->lock);
|
mutex_unlock(&h->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ec_stripe_head *
|
static struct ec_stripe_head *
|
||||||
__bch2_ec_stripe_head_get(struct btree_trans *trans,
|
__bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||||
unsigned target,
|
unsigned disk_label,
|
||||||
unsigned algo,
|
unsigned algo,
|
||||||
unsigned redundancy,
|
unsigned redundancy,
|
||||||
enum bch_watermark watermark)
|
enum bch_watermark watermark)
|
||||||
@ -1768,27 +1822,32 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
|||||||
|
|
||||||
if (test_bit(BCH_FS_going_ro, &c->flags)) {
|
if (test_bit(BCH_FS_going_ro, &c->flags)) {
|
||||||
h = ERR_PTR(-BCH_ERR_erofs_no_writes);
|
h = ERR_PTR(-BCH_ERR_erofs_no_writes);
|
||||||
goto found;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
list_for_each_entry(h, &c->ec_stripe_head_list, list)
|
||||||
if (h->target == target &&
|
if (h->disk_label == disk_label &&
|
||||||
h->algo == algo &&
|
h->algo == algo &&
|
||||||
h->redundancy == redundancy &&
|
h->redundancy == redundancy &&
|
||||||
h->watermark == watermark) {
|
h->watermark == watermark) {
|
||||||
ret = bch2_trans_mutex_lock(trans, &h->lock);
|
ret = bch2_trans_mutex_lock(trans, &h->lock);
|
||||||
if (ret)
|
if (ret) {
|
||||||
h = ERR_PTR(ret);
|
h = ERR_PTR(ret);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
|
|
||||||
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
|
h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
|
||||||
found:
|
found:
|
||||||
if (!IS_ERR_OR_NULL(h) &&
|
if (h->rw_devs_change_count != c->rw_devs_change_count)
|
||||||
h->nr_active_devs < h->redundancy + 2) {
|
ec_stripe_head_devs_update(c, h);
|
||||||
|
|
||||||
|
if (h->insufficient_devs) {
|
||||||
mutex_unlock(&h->lock);
|
mutex_unlock(&h->lock);
|
||||||
h = NULL;
|
h = NULL;
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
mutex_unlock(&c->ec_stripe_head_lock);
|
mutex_unlock(&c->ec_stripe_head_lock);
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
@ -1796,38 +1855,39 @@ found:
|
|||||||
static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
|
static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
|
||||||
enum bch_watermark watermark, struct closure *cl)
|
enum bch_watermark watermark, struct closure *cl)
|
||||||
{
|
{
|
||||||
|
struct ec_stripe_new *s = h->s;
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct bch_devs_mask devs = h->devs;
|
struct bch_devs_mask devs = h->devs;
|
||||||
struct open_bucket *ob;
|
struct open_bucket *ob;
|
||||||
struct open_buckets buckets;
|
struct open_buckets buckets;
|
||||||
struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
|
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
|
||||||
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
|
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
|
||||||
bool have_cache = true;
|
bool have_cache = true;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
|
BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
|
||||||
BUG_ON(v->nr_redundant != h->s->nr_parity);
|
BUG_ON(v->nr_redundant != s->nr_parity);
|
||||||
|
|
||||||
/* * We bypass the sector allocator which normally does this: */
|
/* * We bypass the sector allocator which normally does this: */
|
||||||
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
|
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
|
||||||
|
|
||||||
for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
|
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
|
||||||
__clear_bit(v->ptrs[i].dev, devs.d);
|
__clear_bit(v->ptrs[i].dev, devs.d);
|
||||||
if (i < h->s->nr_data)
|
if (i < s->nr_data)
|
||||||
nr_have_data++;
|
nr_have_data++;
|
||||||
else
|
else
|
||||||
nr_have_parity++;
|
nr_have_parity++;
|
||||||
}
|
}
|
||||||
|
|
||||||
BUG_ON(nr_have_data > h->s->nr_data);
|
BUG_ON(nr_have_data > s->nr_data);
|
||||||
BUG_ON(nr_have_parity > h->s->nr_parity);
|
BUG_ON(nr_have_parity > s->nr_parity);
|
||||||
|
|
||||||
buckets.nr = 0;
|
buckets.nr = 0;
|
||||||
if (nr_have_parity < h->s->nr_parity) {
|
if (nr_have_parity < s->nr_parity) {
|
||||||
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
|
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
|
||||||
&h->parity_stripe,
|
&h->parity_stripe,
|
||||||
&devs,
|
&devs,
|
||||||
h->s->nr_parity,
|
s->nr_parity,
|
||||||
&nr_have_parity,
|
&nr_have_parity,
|
||||||
&have_cache, 0,
|
&have_cache, 0,
|
||||||
BCH_DATA_parity,
|
BCH_DATA_parity,
|
||||||
@ -1835,14 +1895,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
|||||||
cl);
|
cl);
|
||||||
|
|
||||||
open_bucket_for_each(c, &buckets, ob, i) {
|
open_bucket_for_each(c, &buckets, ob, i) {
|
||||||
j = find_next_zero_bit(h->s->blocks_gotten,
|
j = find_next_zero_bit(s->blocks_gotten,
|
||||||
h->s->nr_data + h->s->nr_parity,
|
s->nr_data + s->nr_parity,
|
||||||
h->s->nr_data);
|
s->nr_data);
|
||||||
BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
|
BUG_ON(j >= s->nr_data + s->nr_parity);
|
||||||
|
|
||||||
h->s->blocks[j] = buckets.v[i];
|
s->blocks[j] = buckets.v[i];
|
||||||
v->ptrs[j] = bch2_ob_ptr(c, ob);
|
v->ptrs[j] = bch2_ob_ptr(c, ob);
|
||||||
__set_bit(j, h->s->blocks_gotten);
|
__set_bit(j, s->blocks_gotten);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1850,11 +1910,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
|||||||
}
|
}
|
||||||
|
|
||||||
buckets.nr = 0;
|
buckets.nr = 0;
|
||||||
if (nr_have_data < h->s->nr_data) {
|
if (nr_have_data < s->nr_data) {
|
||||||
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
|
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
|
||||||
&h->block_stripe,
|
&h->block_stripe,
|
||||||
&devs,
|
&devs,
|
||||||
h->s->nr_data,
|
s->nr_data,
|
||||||
&nr_have_data,
|
&nr_have_data,
|
||||||
&have_cache, 0,
|
&have_cache, 0,
|
||||||
BCH_DATA_user,
|
BCH_DATA_user,
|
||||||
@ -1862,13 +1922,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
|||||||
cl);
|
cl);
|
||||||
|
|
||||||
open_bucket_for_each(c, &buckets, ob, i) {
|
open_bucket_for_each(c, &buckets, ob, i) {
|
||||||
j = find_next_zero_bit(h->s->blocks_gotten,
|
j = find_next_zero_bit(s->blocks_gotten,
|
||||||
h->s->nr_data, 0);
|
s->nr_data, 0);
|
||||||
BUG_ON(j >= h->s->nr_data);
|
BUG_ON(j >= s->nr_data);
|
||||||
|
|
||||||
h->s->blocks[j] = buckets.v[i];
|
s->blocks[j] = buckets.v[i];
|
||||||
v->ptrs[j] = bch2_ob_ptr(c, ob);
|
v->ptrs[j] = bch2_ob_ptr(c, ob);
|
||||||
__set_bit(j, h->s->blocks_gotten);
|
__set_bit(j, s->blocks_gotten);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1878,7 +1938,6 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX: doesn't obey target: */
|
|
||||||
static s64 get_existing_stripe(struct bch_fs *c,
|
static s64 get_existing_stripe(struct bch_fs *c,
|
||||||
struct ec_stripe_head *head)
|
struct ec_stripe_head *head)
|
||||||
{
|
{
|
||||||
@ -1901,7 +1960,8 @@ static s64 get_existing_stripe(struct bch_fs *c,
|
|||||||
|
|
||||||
m = genradix_ptr(&c->stripes, stripe_idx);
|
m = genradix_ptr(&c->stripes, stripe_idx);
|
||||||
|
|
||||||
if (m->algorithm == head->algo &&
|
if (m->disk_label == head->disk_label &&
|
||||||
|
m->algorithm == head->algo &&
|
||||||
m->nr_redundant == head->redundancy &&
|
m->nr_redundant == head->redundancy &&
|
||||||
m->sectors == head->blocksize &&
|
m->sectors == head->blocksize &&
|
||||||
m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
|
m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
|
||||||
@ -1914,12 +1974,53 @@ static s64 get_existing_stripe(struct bch_fs *c,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s)
|
||||||
|
{
|
||||||
|
struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
|
||||||
|
struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
BUG_ON(existing_v->nr_redundant != s->nr_parity);
|
||||||
|
s->nr_data = existing_v->nr_blocks -
|
||||||
|
existing_v->nr_redundant;
|
||||||
|
|
||||||
|
int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
|
||||||
|
if (ret) {
|
||||||
|
bch2_stripe_close(c, s);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free buckets we initially allocated - they might conflict with
|
||||||
|
* blocks from the stripe we're reusing:
|
||||||
|
*/
|
||||||
|
for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
|
||||||
|
bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
|
||||||
|
s->blocks[i] = 0;
|
||||||
|
}
|
||||||
|
memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
|
||||||
|
memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated));
|
||||||
|
|
||||||
|
for (i = 0; i < existing_v->nr_blocks; i++) {
|
||||||
|
if (stripe_blockcount_get(existing_v, i)) {
|
||||||
|
__set_bit(i, s->blocks_gotten);
|
||||||
|
__set_bit(i, s->blocks_allocated);
|
||||||
|
}
|
||||||
|
|
||||||
|
ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone);
|
||||||
|
}
|
||||||
|
|
||||||
|
bkey_copy(&s->new_stripe.key, &s->existing_stripe.key);
|
||||||
|
s->have_existing_stripe = true;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
|
static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
|
|
||||||
struct bch_stripe *existing_v;
|
|
||||||
unsigned i;
|
|
||||||
s64 idx;
|
s64 idx;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -1939,45 +2040,7 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
|
return init_new_stripe_from_existing(c, h->s);
|
||||||
|
|
||||||
BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
|
|
||||||
h->s->nr_data = existing_v->nr_blocks -
|
|
||||||
existing_v->nr_redundant;
|
|
||||||
|
|
||||||
ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
|
|
||||||
if (ret) {
|
|
||||||
bch2_stripe_close(c, h->s);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
BUG_ON(h->s->existing_stripe.size != h->blocksize);
|
|
||||||
BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Free buckets we initially allocated - they might conflict with
|
|
||||||
* blocks from the stripe we're reusing:
|
|
||||||
*/
|
|
||||||
for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
|
|
||||||
bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
|
|
||||||
h->s->blocks[i] = 0;
|
|
||||||
}
|
|
||||||
memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
|
|
||||||
memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
|
|
||||||
|
|
||||||
for (i = 0; i < existing_v->nr_blocks; i++) {
|
|
||||||
if (stripe_blockcount_get(existing_v, i)) {
|
|
||||||
__set_bit(i, h->s->blocks_gotten);
|
|
||||||
__set_bit(i, h->s->blocks_allocated);
|
|
||||||
}
|
|
||||||
|
|
||||||
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
|
|
||||||
}
|
|
||||||
|
|
||||||
bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
|
|
||||||
h->s->have_existing_stripe = true;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
|
static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
|
||||||
@ -2046,9 +2109,19 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
|||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct ec_stripe_head *h;
|
struct ec_stripe_head *h;
|
||||||
bool waiting = false;
|
bool waiting = false;
|
||||||
|
unsigned disk_label = 0;
|
||||||
|
struct target t = target_decode(target);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
|
if (t.type == TARGET_GROUP) {
|
||||||
|
if (t.group > U8_MAX) {
|
||||||
|
bch_err(c, "cannot create a stripe when disk_label > U8_MAX");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
disk_label = t.group + 1; /* 0 == no label */
|
||||||
|
}
|
||||||
|
|
||||||
|
h = __bch2_ec_stripe_head_get(trans, disk_label, algo, redundancy, watermark);
|
||||||
if (IS_ERR_OR_NULL(h))
|
if (IS_ERR_OR_NULL(h))
|
||||||
return h;
|
return h;
|
||||||
|
|
||||||
@ -2126,6 +2199,79 @@ err:
|
|||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* device removal */
|
||||||
|
|
||||||
|
static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a)
|
||||||
|
{
|
||||||
|
struct bch_alloc_v4 a_convert;
|
||||||
|
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
|
||||||
|
|
||||||
|
if (!a->stripe)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (a->stripe_sectors) {
|
||||||
|
bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
|
||||||
|
return -BCH_ERR_invalidate_stripe_to_dev;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_i_stripe *s =
|
||||||
|
bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
|
||||||
|
BTREE_ITER_slots, stripe);
|
||||||
|
int ret = PTR_ERR_OR_ZERO(s);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
struct disk_accounting_pos acc = {
|
||||||
|
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||||
|
};
|
||||||
|
|
||||||
|
s64 sectors = 0;
|
||||||
|
for (unsigned i = 0; i < s->v.nr_blocks; i++)
|
||||||
|
sectors -= stripe_blockcount_get(&s->v, i);
|
||||||
|
|
||||||
|
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||||
|
acc.replicas.data_type = BCH_DATA_user;
|
||||||
|
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i));
|
||||||
|
bkey_for_each_ptr(ptrs, ptr)
|
||||||
|
if (ptr->dev == k_a.k->p.inode) {
|
||||||
|
if (stripe_blockcount_get(&s->v, ptr - &ptrs.start->ptr)) {
|
||||||
|
bch_err(trans->c, "trying to invalidate device in stripe when stripe block not empty");
|
||||||
|
ret = -BCH_ERR_invalidate_stripe_to_dev;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
ptr->dev = BCH_SB_MEMBER_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
sectors = -sectors;
|
||||||
|
|
||||||
|
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
|
||||||
|
acc.replicas.data_type = BCH_DATA_user;
|
||||||
|
ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
err:
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_dev_remove_stripes(struct bch_fs *c, struct bch_dev *ca)
|
||||||
|
{
|
||||||
|
return bch2_trans_run(c,
|
||||||
|
for_each_btree_key_upto_commit(trans, iter,
|
||||||
|
BTREE_ID_alloc, POS(ca->dev_idx, 0), POS(ca->dev_idx, U64_MAX),
|
||||||
|
BTREE_ITER_intent, k,
|
||||||
|
NULL, NULL, 0, ({
|
||||||
|
bch2_invalidate_stripe_to_dev(trans, k);
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* startup/shutdown */
|
||||||
|
|
||||||
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
||||||
{
|
{
|
||||||
struct ec_stripe_head *h;
|
struct ec_stripe_head *h;
|
||||||
@ -2151,8 +2297,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
|
|||||||
}
|
}
|
||||||
goto unlock;
|
goto unlock;
|
||||||
found:
|
found:
|
||||||
h->s->err = -BCH_ERR_erofs_no_writes;
|
ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
|
||||||
ec_stripe_set_pending(c, h);
|
|
||||||
unlock:
|
unlock:
|
||||||
mutex_unlock(&h->lock);
|
mutex_unlock(&h->lock);
|
||||||
}
|
}
|
||||||
@ -2193,21 +2338,13 @@ int bch2_stripes_read(struct bch_fs *c)
|
|||||||
if (k.k->type != KEY_TYPE_stripe)
|
if (k.k->type != KEY_TYPE_stripe)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
ret = __ec_stripe_mem_alloc(c, k.k->p.offset);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
|
||||||
|
|
||||||
struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
|
struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
|
||||||
m->sectors = le16_to_cpu(s->sectors);
|
|
||||||
m->algorithm = s->algorithm;
|
|
||||||
m->nr_blocks = s->nr_blocks;
|
|
||||||
m->nr_redundant = s->nr_redundant;
|
|
||||||
m->blocks_nonempty = 0;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < s->nr_blocks; i++)
|
stripe_to_mem(m, bkey_s_c_to_stripe(k).v);
|
||||||
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
|
|
||||||
|
|
||||||
bch2_stripes_heap_insert(c, m, k.k->p.offset);
|
bch2_stripes_heap_insert(c, m, k.k->p.offset);
|
||||||
0;
|
0;
|
||||||
@ -2252,6 +2389,8 @@ static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
|
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
|
||||||
prt_printf(out, " %u", s->blocks[i]);
|
prt_printf(out, " %u", s->blocks[i]);
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&s->new_stripe.key));
|
||||||
|
prt_newline(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
||||||
@ -2261,9 +2400,10 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
|
|||||||
|
|
||||||
mutex_lock(&c->ec_stripe_head_lock);
|
mutex_lock(&c->ec_stripe_head_lock);
|
||||||
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
|
||||||
prt_printf(out, "target %u algo %u redundancy %u %s:\n",
|
prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n",
|
||||||
h->target, h->algo, h->redundancy,
|
h->disk_label, h->algo, h->redundancy,
|
||||||
bch2_watermarks[h->watermark]);
|
bch2_watermarks[h->watermark],
|
||||||
|
h->nr_created);
|
||||||
|
|
||||||
if (h->s)
|
if (h->s)
|
||||||
bch2_new_stripe_to_text(out, c, h->s);
|
bch2_new_stripe_to_text(out, c, h->s);
|
||||||
|
@ -97,7 +97,9 @@ static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe
|
|||||||
const struct bch_extent_ptr *data_ptr,
|
const struct bch_extent_ptr *data_ptr,
|
||||||
unsigned sectors)
|
unsigned sectors)
|
||||||
{
|
{
|
||||||
return data_ptr->dev == stripe_ptr->dev &&
|
return (data_ptr->dev == stripe_ptr->dev ||
|
||||||
|
data_ptr->dev == BCH_SB_MEMBER_INVALID ||
|
||||||
|
stripe_ptr->dev == BCH_SB_MEMBER_INVALID) &&
|
||||||
data_ptr->gen == stripe_ptr->gen &&
|
data_ptr->gen == stripe_ptr->gen &&
|
||||||
data_ptr->offset >= stripe_ptr->offset &&
|
data_ptr->offset >= stripe_ptr->offset &&
|
||||||
data_ptr->offset < stripe_ptr->offset + sectors;
|
data_ptr->offset < stripe_ptr->offset + sectors;
|
||||||
@ -186,10 +188,15 @@ struct ec_stripe_head {
|
|||||||
struct list_head list;
|
struct list_head list;
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
|
|
||||||
unsigned target;
|
unsigned disk_label;
|
||||||
unsigned algo;
|
unsigned algo;
|
||||||
unsigned redundancy;
|
unsigned redundancy;
|
||||||
enum bch_watermark watermark;
|
enum bch_watermark watermark;
|
||||||
|
bool insufficient_devs;
|
||||||
|
|
||||||
|
unsigned long rw_devs_change_count;
|
||||||
|
|
||||||
|
u64 nr_created;
|
||||||
|
|
||||||
struct bch_devs_mask devs;
|
struct bch_devs_mask devs;
|
||||||
unsigned nr_active_devs;
|
unsigned nr_active_devs;
|
||||||
@ -202,7 +209,7 @@ struct ec_stripe_head {
|
|||||||
struct ec_stripe_new *s;
|
struct ec_stripe_new *s;
|
||||||
};
|
};
|
||||||
|
|
||||||
int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
|
int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *, struct bkey_s_c);
|
||||||
|
|
||||||
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
|
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
|
||||||
|
|
||||||
@ -247,6 +254,8 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_dev_remove_stripes(struct bch_fs *, struct bch_dev *);
|
||||||
|
|
||||||
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
||||||
void bch2_fs_ec_stop(struct bch_fs *);
|
void bch2_fs_ec_stop(struct bch_fs *);
|
||||||
void bch2_fs_ec_flush(struct bch_fs *);
|
void bch2_fs_ec_flush(struct bch_fs *);
|
||||||
|
@ -11,7 +11,14 @@ struct bch_stripe {
|
|||||||
|
|
||||||
__u8 csum_granularity_bits;
|
__u8 csum_granularity_bits;
|
||||||
__u8 csum_type;
|
__u8 csum_type;
|
||||||
__u8 pad;
|
|
||||||
|
/*
|
||||||
|
* XXX: targets should be 16 bits - fix this if we ever do a stripe_v2
|
||||||
|
*
|
||||||
|
* we can manage with this because this only needs to point to a
|
||||||
|
* disk label, not a target:
|
||||||
|
*/
|
||||||
|
__u8 disk_label;
|
||||||
|
|
||||||
struct bch_extent_ptr ptrs[];
|
struct bch_extent_ptr ptrs[];
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
@ -16,6 +16,7 @@ struct stripe {
|
|||||||
u8 nr_blocks;
|
u8 nr_blocks;
|
||||||
u8 nr_redundant;
|
u8 nr_redundant;
|
||||||
u8 blocks_nonempty;
|
u8 blocks_nonempty;
|
||||||
|
u8 disk_label;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gc_stripe {
|
struct gc_stripe {
|
||||||
|
@ -119,8 +119,8 @@
|
|||||||
x(EEXIST, EEXIST_str_hash_set) \
|
x(EEXIST, EEXIST_str_hash_set) \
|
||||||
x(EEXIST, EEXIST_discard_in_flight_add) \
|
x(EEXIST, EEXIST_discard_in_flight_add) \
|
||||||
x(EEXIST, EEXIST_subvolume_create) \
|
x(EEXIST, EEXIST_subvolume_create) \
|
||||||
x(0, open_buckets_empty) \
|
x(ENOSPC, open_buckets_empty) \
|
||||||
x(0, freelist_empty) \
|
x(ENOSPC, freelist_empty) \
|
||||||
x(BCH_ERR_freelist_empty, no_buckets_found) \
|
x(BCH_ERR_freelist_empty, no_buckets_found) \
|
||||||
x(0, transaction_restart) \
|
x(0, transaction_restart) \
|
||||||
x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
|
x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
|
||||||
@ -244,6 +244,16 @@
|
|||||||
x(EIO, btree_node_read_error) \
|
x(EIO, btree_node_read_error) \
|
||||||
x(EIO, btree_node_read_validate_error) \
|
x(EIO, btree_node_read_validate_error) \
|
||||||
x(EIO, btree_need_topology_repair) \
|
x(EIO, btree_need_topology_repair) \
|
||||||
|
x(EIO, bucket_ref_update) \
|
||||||
|
x(EIO, trigger_pointer) \
|
||||||
|
x(EIO, trigger_stripe_pointer) \
|
||||||
|
x(EIO, metadata_bucket_inconsistency) \
|
||||||
|
x(EIO, mark_stripe) \
|
||||||
|
x(EIO, stripe_reconstruct) \
|
||||||
|
x(EIO, key_type_error) \
|
||||||
|
x(EIO, no_device_to_read_from) \
|
||||||
|
x(EIO, missing_indirect_extent) \
|
||||||
|
x(EIO, invalidate_stripe_to_dev) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
||||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
||||||
@ -257,7 +267,6 @@
|
|||||||
x(BCH_ERR_nopromote, nopromote_in_flight) \
|
x(BCH_ERR_nopromote, nopromote_in_flight) \
|
||||||
x(BCH_ERR_nopromote, nopromote_no_writes) \
|
x(BCH_ERR_nopromote, nopromote_no_writes) \
|
||||||
x(BCH_ERR_nopromote, nopromote_enomem) \
|
x(BCH_ERR_nopromote, nopromote_enomem) \
|
||||||
x(0, need_inode_lock) \
|
|
||||||
x(0, invalid_snapshot_node) \
|
x(0, invalid_snapshot_node) \
|
||||||
x(0, option_needs_open_fs)
|
x(0, option_needs_open_fs)
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (k.k->type == KEY_TYPE_error)
|
if (k.k->type == KEY_TYPE_error)
|
||||||
return -EIO;
|
return -BCH_ERR_key_type_error;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||||
@ -133,7 +133,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
* read:
|
* read:
|
||||||
*/
|
*/
|
||||||
if (!ret && !p.ptr.cached)
|
if (!ret && !p.ptr.cached)
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_no_device_to_read_from;
|
||||||
|
|
||||||
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
|
struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
|
||||||
|
|
||||||
@ -146,16 +146,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
? f->idx
|
? f->idx
|
||||||
: f->idx + 1;
|
: f->idx + 1;
|
||||||
|
|
||||||
if (!p.idx && !ca)
|
if (!p.idx && (!ca || !bch2_dev_is_readable(ca)))
|
||||||
p.idx++;
|
p.idx++;
|
||||||
|
|
||||||
if (!p.idx && p.has_ec && bch2_force_reconstruct_read)
|
if (!p.idx && p.has_ec && bch2_force_reconstruct_read)
|
||||||
p.idx++;
|
p.idx++;
|
||||||
|
|
||||||
if (!p.idx && !bch2_dev_is_readable(ca))
|
if (p.idx > (unsigned) p.has_ec)
|
||||||
p.idx++;
|
|
||||||
|
|
||||||
if (p.idx >= (unsigned) p.has_ec + 1)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (ret > 0 && !ptr_better(c, p, *pick))
|
if (ret > 0 && !ptr_better(c, p, *pick))
|
||||||
@ -781,14 +778,17 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
|
|||||||
/*
|
/*
|
||||||
* Returns pointer to the next entry after the one being dropped:
|
* Returns pointer to the next entry after the one being dropped:
|
||||||
*/
|
*/
|
||||||
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
|
void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr)
|
||||||
struct bch_extent_ptr *ptr)
|
|
||||||
{
|
{
|
||||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
|
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
|
||||||
union bch_extent_entry *entry = to_entry(ptr), *next;
|
union bch_extent_entry *entry = to_entry(ptr), *next;
|
||||||
union bch_extent_entry *ret = entry;
|
|
||||||
bool drop_crc = true;
|
bool drop_crc = true;
|
||||||
|
|
||||||
|
if (k.k->type == KEY_TYPE_stripe) {
|
||||||
|
ptr->dev = BCH_SB_MEMBER_INVALID;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
EBUG_ON(ptr < &ptrs.start->ptr ||
|
EBUG_ON(ptr < &ptrs.start->ptr ||
|
||||||
ptr >= &ptrs.end->ptr);
|
ptr >= &ptrs.end->ptr);
|
||||||
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
|
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
|
||||||
@ -811,20 +811,27 @@ union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if ((extent_entry_is_crc(entry) && drop_crc) ||
|
if ((extent_entry_is_crc(entry) && drop_crc) ||
|
||||||
extent_entry_is_stripe_ptr(entry)) {
|
extent_entry_is_stripe_ptr(entry))
|
||||||
ret = (void *) ret - extent_entry_bytes(entry);
|
|
||||||
extent_entry_drop(k, entry);
|
extent_entry_drop(k, entry);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
|
void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr)
|
||||||
struct bch_extent_ptr *ptr)
|
|
||||||
{
|
{
|
||||||
|
if (k.k->type != KEY_TYPE_stripe) {
|
||||||
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k.s_c);
|
||||||
|
const union bch_extent_entry *entry;
|
||||||
|
struct extent_ptr_decoded p;
|
||||||
|
|
||||||
|
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||||
|
if (p.ptr.dev == ptr->dev && p.has_ec) {
|
||||||
|
ptr->dev = BCH_SB_MEMBER_INVALID;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
|
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
|
||||||
union bch_extent_entry *ret =
|
|
||||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -837,14 +844,10 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
|
|||||||
!bch2_bkey_dirty_devs(k.s_c).nr) {
|
!bch2_bkey_dirty_devs(k.s_c).nr) {
|
||||||
k.k->type = KEY_TYPE_error;
|
k.k->type = KEY_TYPE_error;
|
||||||
set_bkey_val_u64s(k.k, 0);
|
set_bkey_val_u64s(k.k, 0);
|
||||||
ret = NULL;
|
|
||||||
} else if (!bch2_bkey_nr_ptrs(k.s_c)) {
|
} else if (!bch2_bkey_nr_ptrs(k.s_c)) {
|
||||||
k.k->type = KEY_TYPE_deleted;
|
k.k->type = KEY_TYPE_deleted;
|
||||||
set_bkey_val_u64s(k.k, 0);
|
set_bkey_val_u64s(k.k, 0);
|
||||||
ret = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
|
void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
|
||||||
@ -854,10 +857,7 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
|
|||||||
|
|
||||||
void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
|
void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
|
||||||
{
|
{
|
||||||
struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
|
bch2_bkey_drop_ptrs_noerror(k, ptr, ptr->dev == dev);
|
||||||
|
|
||||||
if (ptr)
|
|
||||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
|
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
|
||||||
@ -929,8 +929,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
|
|||||||
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
|
||||||
if (p1.ptr.dev == p2.ptr.dev &&
|
if (p1.ptr.dev == p2.ptr.dev &&
|
||||||
p1.ptr.gen == p2.ptr.gen &&
|
p1.ptr.gen == p2.ptr.gen &&
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This checks that the two pointers point
|
||||||
|
* to the same region on disk - adjusting
|
||||||
|
* for the difference in where the extents
|
||||||
|
* start, since one may have been trimmed:
|
||||||
|
*/
|
||||||
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
|
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
|
||||||
(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
|
(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) &&
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This additionally checks that the
|
||||||
|
* extents overlap on disk, since the
|
||||||
|
* previous check may trigger spuriously
|
||||||
|
* when one extent is immediately partially
|
||||||
|
* overwritten with another extent (so that
|
||||||
|
* on disk they are adjacent) and
|
||||||
|
* compression is in use:
|
||||||
|
*/
|
||||||
|
((p1.ptr.offset >= p2.ptr.offset &&
|
||||||
|
p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) ||
|
||||||
|
(p2.ptr.offset >= p1.ptr.offset &&
|
||||||
|
p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size)))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -611,9 +611,6 @@ unsigned bch2_extent_ptr_desired_durability(struct bch_fs *, struct extent_ptr_d
|
|||||||
unsigned bch2_extent_ptr_durability(struct bch_fs *, struct extent_ptr_decoded *);
|
unsigned bch2_extent_ptr_durability(struct bch_fs *, struct extent_ptr_decoded *);
|
||||||
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
|
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
|
||||||
|
|
||||||
void bch2_bkey_drop_device(struct bkey_s, unsigned);
|
|
||||||
void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
|
|
||||||
|
|
||||||
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
|
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
|
||||||
|
|
||||||
static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
|
static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
|
||||||
@ -649,25 +646,37 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr
|
|||||||
|
|
||||||
void bch2_extent_ptr_decoded_append(struct bkey_i *,
|
void bch2_extent_ptr_decoded_append(struct bkey_i *,
|
||||||
struct extent_ptr_decoded *);
|
struct extent_ptr_decoded *);
|
||||||
union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
|
void bch2_bkey_drop_ptr_noerror(struct bkey_s, struct bch_extent_ptr *);
|
||||||
struct bch_extent_ptr *);
|
void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
|
||||||
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
|
|
||||||
struct bch_extent_ptr *);
|
void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
|
||||||
|
void bch2_bkey_drop_device(struct bkey_s, unsigned);
|
||||||
|
|
||||||
|
#define bch2_bkey_drop_ptrs_noerror(_k, _ptr, _cond) \
|
||||||
|
do { \
|
||||||
|
__label__ _again; \
|
||||||
|
struct bkey_ptrs _ptrs; \
|
||||||
|
_again: \
|
||||||
|
_ptrs = bch2_bkey_ptrs(_k); \
|
||||||
|
\
|
||||||
|
bkey_for_each_ptr(_ptrs, _ptr) \
|
||||||
|
if (_cond) { \
|
||||||
|
bch2_bkey_drop_ptr_noerror(_k, _ptr); \
|
||||||
|
goto _again; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
|
#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
|
||||||
do { \
|
do { \
|
||||||
struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
|
__label__ _again; \
|
||||||
\
|
struct bkey_ptrs _ptrs; \
|
||||||
struct bch_extent_ptr *_ptr = &_ptrs.start->ptr; \
|
_again: \
|
||||||
\
|
|
||||||
while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
|
|
||||||
if (_cond) { \
|
|
||||||
_ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
|
|
||||||
_ptrs = bch2_bkey_ptrs(_k); \
|
_ptrs = bch2_bkey_ptrs(_k); \
|
||||||
continue; \
|
|
||||||
} \
|
|
||||||
\
|
\
|
||||||
(_ptr)++; \
|
bkey_for_each_ptr(_ptrs, _ptr) \
|
||||||
|
if (_cond) { \
|
||||||
|
bch2_bkey_drop_ptr(_k, _ptr); \
|
||||||
|
goto _again; \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -42,7 +42,8 @@ int bch2_create_trans(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
|
ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
|
||||||
|
BTREE_ITER_intent|BTREE_ITER_with_updates);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -163,7 +164,7 @@ int bch2_create_trans(struct btree_trans *trans,
|
|||||||
name,
|
name,
|
||||||
dir_target,
|
dir_target,
|
||||||
&dir_offset,
|
&dir_offset,
|
||||||
STR_HASH_must_create);
|
STR_HASH_must_create|BTREE_ITER_with_updates);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -791,8 +791,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
|
|||||||
static int __bch2_buffered_write(struct bch_inode_info *inode,
|
static int __bch2_buffered_write(struct bch_inode_info *inode,
|
||||||
struct address_space *mapping,
|
struct address_space *mapping,
|
||||||
struct iov_iter *iter,
|
struct iov_iter *iter,
|
||||||
loff_t pos, unsigned len,
|
loff_t pos, unsigned len)
|
||||||
bool inode_locked)
|
|
||||||
{
|
{
|
||||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||||
struct bch2_folio_reservation res;
|
struct bch2_folio_reservation res;
|
||||||
@ -816,15 +815,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
|
|||||||
|
|
||||||
BUG_ON(!fs.nr);
|
BUG_ON(!fs.nr);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're not using the inode lock, we need to lock all the folios for
|
|
||||||
* atomiticity of writes vs. other writes:
|
|
||||||
*/
|
|
||||||
if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
|
|
||||||
ret = -BCH_ERR_need_inode_lock;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
f = darray_first(fs);
|
f = darray_first(fs);
|
||||||
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
|
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
|
||||||
ret = bch2_read_single_folio(f, mapping);
|
ret = bch2_read_single_folio(f, mapping);
|
||||||
@ -921,10 +911,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
|
|||||||
end = pos + copied;
|
end = pos + copied;
|
||||||
|
|
||||||
spin_lock(&inode->v.i_lock);
|
spin_lock(&inode->v.i_lock);
|
||||||
if (end > inode->v.i_size) {
|
if (end > inode->v.i_size)
|
||||||
BUG_ON(!inode_locked);
|
|
||||||
i_size_write(&inode->v, end);
|
i_size_write(&inode->v, end);
|
||||||
}
|
|
||||||
spin_unlock(&inode->v.i_lock);
|
spin_unlock(&inode->v.i_lock);
|
||||||
|
|
||||||
f_pos = pos;
|
f_pos = pos;
|
||||||
@ -968,68 +956,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
struct bch_inode_info *inode = file_bch_inode(file);
|
struct bch_inode_info *inode = file_bch_inode(file);
|
||||||
loff_t pos;
|
loff_t pos = iocb->ki_pos;
|
||||||
bool inode_locked = false;
|
ssize_t written = 0;
|
||||||
ssize_t written = 0, written2 = 0, ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* We don't take the inode lock unless i_size will be changing. Folio
|
|
||||||
* locks provide exclusion with other writes, and the pagecache add lock
|
|
||||||
* provides exclusion with truncate and hole punching.
|
|
||||||
*
|
|
||||||
* There is one nasty corner case where atomicity would be broken
|
|
||||||
* without great care: when copying data from userspace to the page
|
|
||||||
* cache, we do that with faults disable - a page fault would recurse
|
|
||||||
* back into the filesystem, taking filesystem locks again, and
|
|
||||||
* deadlock; so it's done with faults disabled, and we fault in the user
|
|
||||||
* buffer when we aren't holding locks.
|
|
||||||
*
|
|
||||||
* If we do part of the write, but we then race and in the userspace
|
|
||||||
* buffer have been evicted and are no longer resident, then we have to
|
|
||||||
* drop our folio locks to re-fault them in, breaking write atomicity.
|
|
||||||
*
|
|
||||||
* To fix this, we restart the write from the start, if we weren't
|
|
||||||
* holding the inode lock.
|
|
||||||
*
|
|
||||||
* There is another wrinkle after that; if we restart the write from the
|
|
||||||
* start, and then get an unrecoverable error, we _cannot_ claim to
|
|
||||||
* userspace that we did not write data we actually did - so we must
|
|
||||||
* track (written2) the most we ever wrote.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if ((iocb->ki_flags & IOCB_APPEND) ||
|
|
||||||
(iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
|
|
||||||
inode_lock(&inode->v);
|
|
||||||
inode_locked = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = generic_write_checks(iocb, iter);
|
|
||||||
if (ret <= 0)
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
|
|
||||||
if (ret) {
|
|
||||||
if (!inode_locked) {
|
|
||||||
inode_lock(&inode->v);
|
|
||||||
inode_locked = true;
|
|
||||||
ret = file_remove_privs_flags(file, 0);
|
|
||||||
}
|
|
||||||
if (ret)
|
|
||||||
goto unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = file_update_time(file);
|
|
||||||
if (ret)
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
pos = iocb->ki_pos;
|
|
||||||
|
|
||||||
bch2_pagecache_add_get(inode);
|
bch2_pagecache_add_get(inode);
|
||||||
|
|
||||||
if (!inode_locked &&
|
|
||||||
(iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
|
|
||||||
goto get_inode_lock;
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned offset = pos & (PAGE_SIZE - 1);
|
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||||
unsigned bytes = iov_iter_count(iter);
|
unsigned bytes = iov_iter_count(iter);
|
||||||
@ -1054,17 +986,12 @@ again:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
|
|
||||||
goto get_inode_lock;
|
|
||||||
|
|
||||||
if (unlikely(fatal_signal_pending(current))) {
|
if (unlikely(fatal_signal_pending(current))) {
|
||||||
ret = -EINTR;
|
ret = -EINTR;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
|
ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
|
||||||
if (ret == -BCH_ERR_need_inode_lock)
|
|
||||||
goto get_inode_lock;
|
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -1085,46 +1012,50 @@ again:
|
|||||||
}
|
}
|
||||||
pos += ret;
|
pos += ret;
|
||||||
written += ret;
|
written += ret;
|
||||||
written2 = max(written, written2);
|
|
||||||
|
|
||||||
if (ret != bytes && !inode_locked)
|
|
||||||
goto get_inode_lock;
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
balance_dirty_pages_ratelimited(mapping);
|
balance_dirty_pages_ratelimited(mapping);
|
||||||
|
|
||||||
if (0) {
|
|
||||||
get_inode_lock:
|
|
||||||
bch2_pagecache_add_put(inode);
|
|
||||||
inode_lock(&inode->v);
|
|
||||||
inode_locked = true;
|
|
||||||
bch2_pagecache_add_get(inode);
|
|
||||||
|
|
||||||
iov_iter_revert(iter, written);
|
|
||||||
pos -= written;
|
|
||||||
written = 0;
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
} while (iov_iter_count(iter));
|
} while (iov_iter_count(iter));
|
||||||
|
|
||||||
bch2_pagecache_add_put(inode);
|
bch2_pagecache_add_put(inode);
|
||||||
unlock:
|
|
||||||
if (inode_locked)
|
|
||||||
inode_unlock(&inode->v);
|
|
||||||
|
|
||||||
iocb->ki_pos += written;
|
return written ? written : ret;
|
||||||
|
|
||||||
ret = max(written, written2) ?: ret;
|
|
||||||
if (ret > 0)
|
|
||||||
ret = generic_write_sync(iocb, ret);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
{
|
{
|
||||||
ssize_t ret = iocb->ki_flags & IOCB_DIRECT
|
struct file *file = iocb->ki_filp;
|
||||||
? bch2_direct_write(iocb, iter)
|
struct bch_inode_info *inode = file_bch_inode(file);
|
||||||
: bch2_buffered_write(iocb, iter);
|
ssize_t ret;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||||
|
ret = bch2_direct_write(iocb, from);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
inode_lock(&inode->v);
|
||||||
|
|
||||||
|
ret = generic_write_checks(iocb, from);
|
||||||
|
if (ret <= 0)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = file_remove_privs(file);
|
||||||
|
if (ret)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = file_update_time(file);
|
||||||
|
if (ret)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = bch2_buffered_write(iocb, from);
|
||||||
|
if (likely(ret > 0))
|
||||||
|
iocb->ki_pos += ret;
|
||||||
|
unlock:
|
||||||
|
inode_unlock(&inode->v);
|
||||||
|
|
||||||
|
if (ret > 0)
|
||||||
|
ret = generic_write_sync(iocb, ret);
|
||||||
|
out:
|
||||||
return bch2_err_class(ret);
|
return bch2_err_class(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
122
libbcachefs/fs.c
122
libbcachefs/fs.c
@ -273,14 +273,6 @@ retry:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define memalloc_flags_do(_flags, _do) \
|
|
||||||
({ \
|
|
||||||
unsigned _saved_flags = memalloc_flags_save(_flags); \
|
|
||||||
typeof(_do) _ret = _do; \
|
|
||||||
memalloc_noreclaim_restore(_saved_flags); \
|
|
||||||
_ret; \
|
|
||||||
})
|
|
||||||
|
|
||||||
static struct inode *bch2_alloc_inode(struct super_block *sb)
|
static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||||
{
|
{
|
||||||
BUG();
|
BUG();
|
||||||
@ -380,6 +372,8 @@ __bch2_create(struct mnt_idmap *idmap,
|
|||||||
subvol_inum inum;
|
subvol_inum inum;
|
||||||
struct bch_subvolume subvol;
|
struct bch_subvolume subvol;
|
||||||
u64 journal_seq = 0;
|
u64 journal_seq = 0;
|
||||||
|
kuid_t kuid;
|
||||||
|
kgid_t kgid;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -406,13 +400,15 @@ __bch2_create(struct mnt_idmap *idmap,
|
|||||||
retry:
|
retry:
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
|
kuid = mapped_fsuid(idmap, i_user_ns(&dir->v));
|
||||||
|
kgid = mapped_fsgid(idmap, i_user_ns(&dir->v));
|
||||||
ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
|
ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
|
||||||
bch2_create_trans(trans,
|
bch2_create_trans(trans,
|
||||||
inode_inum(dir), &dir_u, &inode_u,
|
inode_inum(dir), &dir_u, &inode_u,
|
||||||
!(flags & BCH_CREATE_TMPFILE)
|
!(flags & BCH_CREATE_TMPFILE)
|
||||||
? &dentry->d_name : NULL,
|
? &dentry->d_name : NULL,
|
||||||
from_kuid(i_user_ns(&dir->v), current_fsuid()),
|
from_kuid(i_user_ns(&dir->v), kuid),
|
||||||
from_kgid(i_user_ns(&dir->v), current_fsgid()),
|
from_kgid(i_user_ns(&dir->v), kgid),
|
||||||
mode, rdev,
|
mode, rdev,
|
||||||
default_acl, acl, snapshot_src, flags) ?:
|
default_acl, acl, snapshot_src, flags) ?:
|
||||||
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
|
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
|
||||||
@ -727,15 +723,16 @@ static int bch2_rename2(struct mnt_idmap *idmap,
|
|||||||
struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
|
struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
|
||||||
struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
|
struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
|
||||||
struct bch_inode_unpacked dst_dir_u, src_dir_u;
|
struct bch_inode_unpacked dst_dir_u, src_dir_u;
|
||||||
struct bch_inode_unpacked src_inode_u, dst_inode_u;
|
struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u;
|
||||||
struct btree_trans *trans;
|
struct btree_trans *trans;
|
||||||
enum bch_rename_mode mode = flags & RENAME_EXCHANGE
|
enum bch_rename_mode mode = flags & RENAME_EXCHANGE
|
||||||
? BCH_RENAME_EXCHANGE
|
? BCH_RENAME_EXCHANGE
|
||||||
: dst_dentry->d_inode
|
: dst_dentry->d_inode
|
||||||
? BCH_RENAME_OVERWRITE : BCH_RENAME;
|
? BCH_RENAME_OVERWRITE : BCH_RENAME;
|
||||||
|
bool whiteout = !!(flags & RENAME_WHITEOUT);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
|
if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE|RENAME_WHITEOUT))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (mode == BCH_RENAME_OVERWRITE) {
|
if (mode == BCH_RENAME_OVERWRITE) {
|
||||||
@ -776,18 +773,48 @@ static int bch2_rename2(struct mnt_idmap *idmap,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
retry:
|
||||||
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = commit_do(trans, NULL, NULL, 0,
|
ret = bch2_rename_trans(trans,
|
||||||
bch2_rename_trans(trans,
|
|
||||||
inode_inum(src_dir), &src_dir_u,
|
inode_inum(src_dir), &src_dir_u,
|
||||||
inode_inum(dst_dir), &dst_dir_u,
|
inode_inum(dst_dir), &dst_dir_u,
|
||||||
&src_inode_u,
|
&src_inode_u,
|
||||||
&dst_inode_u,
|
&dst_inode_u,
|
||||||
&src_dentry->d_name,
|
&src_dentry->d_name,
|
||||||
&dst_dentry->d_name,
|
&dst_dentry->d_name,
|
||||||
mode));
|
mode);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
|
goto err_tx_restart;
|
||||||
|
|
||||||
|
if (whiteout) {
|
||||||
|
whiteout_inode_u = bch2_trans_kmalloc_nomemzero(trans, sizeof(*whiteout_inode_u));
|
||||||
|
ret = PTR_ERR_OR_ZERO(whiteout_inode_u);
|
||||||
|
if (unlikely(ret))
|
||||||
|
goto err_tx_restart;
|
||||||
|
bch2_inode_init_early(c, whiteout_inode_u);
|
||||||
|
|
||||||
|
ret = bch2_create_trans(trans,
|
||||||
|
inode_inum(src_dir), &src_dir_u,
|
||||||
|
whiteout_inode_u,
|
||||||
|
&src_dentry->d_name,
|
||||||
|
from_kuid(i_user_ns(&src_dir->v), current_fsuid()),
|
||||||
|
from_kgid(i_user_ns(&src_dir->v), current_fsgid()),
|
||||||
|
S_IFCHR|WHITEOUT_MODE, 0,
|
||||||
|
NULL, NULL, (subvol_inum) { 0 }, 0) ?:
|
||||||
|
bch2_quota_acct(c, bch_qid(whiteout_inode_u), Q_INO, 1,
|
||||||
|
KEY_TYPE_QUOTA_PREALLOC);
|
||||||
|
if (unlikely(ret))
|
||||||
|
goto err_tx_restart;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bch2_trans_commit(trans, NULL, NULL, 0);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
err_tx_restart:
|
||||||
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
|
goto retry;
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
|
BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
|
||||||
BUG_ON(dst_inode &&
|
BUG_ON(dst_inode &&
|
||||||
@ -835,11 +862,17 @@ static void bch2_setattr_copy(struct mnt_idmap *idmap,
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||||
unsigned int ia_valid = attr->ia_valid;
|
unsigned int ia_valid = attr->ia_valid;
|
||||||
|
kuid_t kuid;
|
||||||
|
kgid_t kgid;
|
||||||
|
|
||||||
if (ia_valid & ATTR_UID)
|
if (ia_valid & ATTR_UID) {
|
||||||
bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
|
kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
|
||||||
if (ia_valid & ATTR_GID)
|
bi->bi_uid = from_kuid(i_user_ns(&inode->v), kuid);
|
||||||
bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
|
}
|
||||||
|
if (ia_valid & ATTR_GID) {
|
||||||
|
kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
|
||||||
|
bi->bi_gid = from_kgid(i_user_ns(&inode->v), kgid);
|
||||||
|
}
|
||||||
|
|
||||||
if (ia_valid & ATTR_SIZE)
|
if (ia_valid & ATTR_SIZE)
|
||||||
bi->bi_size = attr->ia_size;
|
bi->bi_size = attr->ia_size;
|
||||||
@ -854,11 +887,11 @@ static void bch2_setattr_copy(struct mnt_idmap *idmap,
|
|||||||
if (ia_valid & ATTR_MODE) {
|
if (ia_valid & ATTR_MODE) {
|
||||||
umode_t mode = attr->ia_mode;
|
umode_t mode = attr->ia_mode;
|
||||||
kgid_t gid = ia_valid & ATTR_GID
|
kgid_t gid = ia_valid & ATTR_GID
|
||||||
? attr->ia_gid
|
? kgid
|
||||||
: inode->v.i_gid;
|
: inode->v.i_gid;
|
||||||
|
|
||||||
if (!in_group_p(gid) &&
|
if (!in_group_or_capable(idmap, &inode->v,
|
||||||
!capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
|
make_vfsgid(idmap, i_user_ns(&inode->v), gid)))
|
||||||
mode &= ~S_ISGID;
|
mode &= ~S_ISGID;
|
||||||
bi->bi_mode = mode;
|
bi->bi_mode = mode;
|
||||||
}
|
}
|
||||||
@ -874,17 +907,23 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
|
|||||||
struct btree_iter inode_iter = { NULL };
|
struct btree_iter inode_iter = { NULL };
|
||||||
struct bch_inode_unpacked inode_u;
|
struct bch_inode_unpacked inode_u;
|
||||||
struct posix_acl *acl = NULL;
|
struct posix_acl *acl = NULL;
|
||||||
|
kuid_t kuid;
|
||||||
|
kgid_t kgid;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
mutex_lock(&inode->ei_update_lock);
|
mutex_lock(&inode->ei_update_lock);
|
||||||
|
|
||||||
qid = inode->ei_qid;
|
qid = inode->ei_qid;
|
||||||
|
|
||||||
if (attr->ia_valid & ATTR_UID)
|
if (attr->ia_valid & ATTR_UID) {
|
||||||
qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
|
kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
|
||||||
|
qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), kuid);
|
||||||
|
}
|
||||||
|
|
||||||
if (attr->ia_valid & ATTR_GID)
|
if (attr->ia_valid & ATTR_GID) {
|
||||||
qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
|
kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
|
||||||
|
qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), kgid);
|
||||||
|
}
|
||||||
|
|
||||||
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
|
ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
|
||||||
KEY_TYPE_QUOTA_PREALLOC);
|
KEY_TYPE_QUOTA_PREALLOC);
|
||||||
@ -940,13 +979,15 @@ static int bch2_getattr(struct mnt_idmap *idmap,
|
|||||||
{
|
{
|
||||||
struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
|
struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
|
||||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||||
|
vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, &inode->v);
|
||||||
|
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, &inode->v);
|
||||||
|
|
||||||
stat->dev = inode->v.i_sb->s_dev;
|
stat->dev = inode->v.i_sb->s_dev;
|
||||||
stat->ino = inode->v.i_ino;
|
stat->ino = inode->v.i_ino;
|
||||||
stat->mode = inode->v.i_mode;
|
stat->mode = inode->v.i_mode;
|
||||||
stat->nlink = inode->v.i_nlink;
|
stat->nlink = inode->v.i_nlink;
|
||||||
stat->uid = inode->v.i_uid;
|
stat->uid = vfsuid_into_kuid(vfsuid);
|
||||||
stat->gid = inode->v.i_gid;
|
stat->gid = vfsgid_into_kgid(vfsgid);
|
||||||
stat->rdev = inode->v.i_rdev;
|
stat->rdev = inode->v.i_rdev;
|
||||||
stat->size = i_size_read(&inode->v);
|
stat->size = i_size_read(&inode->v);
|
||||||
stat->atime = inode_get_atime(&inode->v);
|
stat->atime = inode_get_atime(&inode->v);
|
||||||
@ -1865,30 +1906,13 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
|
|||||||
static int bch2_show_options(struct seq_file *seq, struct dentry *root)
|
static int bch2_show_options(struct seq_file *seq, struct dentry *root)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = root->d_sb->s_fs_info;
|
struct bch_fs *c = root->d_sb->s_fs_info;
|
||||||
enum bch_opt_id i;
|
|
||||||
struct printbuf buf = PRINTBUF;
|
struct printbuf buf = PRINTBUF;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < bch2_opts_nr; i++) {
|
bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
|
||||||
const struct bch_option *opt = &bch2_opt_table[i];
|
OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
|
||||||
u64 v = bch2_opt_get_by_id(&c->opts, i);
|
|
||||||
|
|
||||||
if ((opt->flags & OPT_HIDDEN) ||
|
|
||||||
!(opt->flags & OPT_MOUNT))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
printbuf_reset(&buf);
|
|
||||||
bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
|
|
||||||
OPT_SHOW_MOUNT_STYLE);
|
|
||||||
seq_putc(seq, ',');
|
|
||||||
seq_puts(seq, buf.buf);
|
seq_puts(seq, buf.buf);
|
||||||
}
|
|
||||||
|
|
||||||
if (buf.allocation_failure)
|
int ret = buf.allocation_failure ? -ENOMEM : 0;
|
||||||
ret = -ENOMEM;
|
|
||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -2209,7 +2233,7 @@ static struct file_system_type bcache_fs_type = {
|
|||||||
.name = "bcachefs",
|
.name = "bcachefs",
|
||||||
.init_fs_context = bch2_init_fs_context,
|
.init_fs_context = bch2_init_fs_context,
|
||||||
.kill_sb = bch2_kill_sb,
|
.kill_sb = bch2_kill_sb,
|
||||||
.fs_flags = FS_REQUIRES_DEV,
|
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
|
||||||
};
|
};
|
||||||
|
|
||||||
MODULE_ALIAS_FS("bcachefs");
|
MODULE_ALIAS_FS("bcachefs");
|
||||||
|
@ -777,7 +777,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
|
|||||||
orig_k->k->k.size,
|
orig_k->k->k.size,
|
||||||
reflink_offset);
|
reflink_offset);
|
||||||
bch2_inconsistent_error(trans->c);
|
bch2_inconsistent_error(trans->c);
|
||||||
ret = -EIO;
|
ret = -BCH_ERR_missing_indirect_extent;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -869,9 +869,15 @@ retry_pick:
|
|||||||
goto hole;
|
goto hole;
|
||||||
|
|
||||||
if (pick_ret < 0) {
|
if (pick_ret < 0) {
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
bch2_bkey_val_to_text(&buf, c, k);
|
||||||
|
|
||||||
bch_err_inum_offset_ratelimited(c,
|
bch_err_inum_offset_ratelimited(c,
|
||||||
read_pos.inode, read_pos.offset << 9,
|
read_pos.inode, read_pos.offset << 9,
|
||||||
"no device to read from");
|
"no device to read from: %s\n %s",
|
||||||
|
bch2_err_str(pick_ret),
|
||||||
|
buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1086,7 +1092,7 @@ get_bio:
|
|||||||
trans->notrace_relock_fail = true;
|
trans->notrace_relock_fail = true;
|
||||||
} else {
|
} else {
|
||||||
/* Attempting reconstruct read: */
|
/* Attempting reconstruct read: */
|
||||||
if (bch2_ec_read_extent(trans, rbio)) {
|
if (bch2_ec_read_extent(trans, rbio, k)) {
|
||||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
|
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1447,9 +1447,7 @@ again:
|
|||||||
op->nr_replicas_required,
|
op->nr_replicas_required,
|
||||||
op->watermark,
|
op->watermark,
|
||||||
op->flags,
|
op->flags,
|
||||||
(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
|
&op->cl, &wp));
|
||||||
BCH_WRITE_ONLY_SPECIFIED_DEVS))
|
|
||||||
? NULL : &op->cl, &wp));
|
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
||||||
break;
|
break;
|
||||||
@ -1592,6 +1590,9 @@ CLOSURE_CALLBACK(bch2_write)
|
|||||||
BUG_ON(!op->write_point.v);
|
BUG_ON(!op->write_point.v);
|
||||||
BUG_ON(bkey_eq(op->pos, POS_MAX));
|
BUG_ON(bkey_eq(op->pos, POS_MAX));
|
||||||
|
|
||||||
|
if (op->flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)
|
||||||
|
op->flags |= BCH_WRITE_ALLOC_NOWAIT;
|
||||||
|
|
||||||
op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
|
op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
|
||||||
op->start_time = local_clock();
|
op->start_time = local_clock();
|
||||||
bch2_keylist_init(&op->insert_keys, op->inline_keys);
|
bch2_keylist_init(&op->insert_keys, op->inline_keys);
|
||||||
|
@ -1353,6 +1353,7 @@ int bch2_journal_read(struct bch_fs *c,
|
|||||||
genradix_for_each(&c->journal_entries, radix_iter, _i) {
|
genradix_for_each(&c->journal_entries, radix_iter, _i) {
|
||||||
struct bch_replicas_padded replicas = {
|
struct bch_replicas_padded replicas = {
|
||||||
.e.data_type = BCH_DATA_journal,
|
.e.data_type = BCH_DATA_journal,
|
||||||
|
.e.nr_devs = 0,
|
||||||
.e.nr_required = 1,
|
.e.nr_required = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1379,7 +1380,7 @@ int bch2_journal_read(struct bch_fs *c,
|
|||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
darray_for_each(i->ptrs, ptr)
|
darray_for_each(i->ptrs, ptr)
|
||||||
replicas.e.devs[replicas.e.nr_devs++] = ptr->dev;
|
replicas_entry_add_dev(&replicas.e, ptr->dev);
|
||||||
|
|
||||||
bch2_replicas_entry_sort(&replicas.e);
|
bch2_replicas_entry_sort(&replicas.e);
|
||||||
|
|
||||||
|
@ -641,6 +641,7 @@ static u64 journal_seq_to_flush(struct journal *j)
|
|||||||
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||||
u64 seq_to_flush;
|
u64 seq_to_flush;
|
||||||
size_t min_nr, min_key_cache, nr_flushed;
|
size_t min_nr, min_key_cache, nr_flushed;
|
||||||
@ -681,7 +682,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
if (j->watermark != BCH_WATERMARK_stripe)
|
if (j->watermark != BCH_WATERMARK_stripe)
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
|
size_t btree_cache_live = bc->live[0].nr + bc->live[1].nr;
|
||||||
|
if (atomic_long_read(&bc->nr_dirty) * 2 > btree_cache_live)
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
|
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
|
||||||
@ -689,8 +691,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
trace_and_count(c, journal_reclaim_start, c,
|
trace_and_count(c, journal_reclaim_start, c,
|
||||||
direct, kicked,
|
direct, kicked,
|
||||||
min_nr, min_key_cache,
|
min_nr, min_key_cache,
|
||||||
atomic_read(&c->btree_cache.dirty),
|
atomic_long_read(&bc->nr_dirty), btree_cache_live,
|
||||||
c->btree_cache.used,
|
|
||||||
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
||||||
atomic_long_read(&c->btree_key_cache.nr_keys));
|
atomic_long_read(&c->btree_key_cache.nr_keys));
|
||||||
|
|
||||||
|
@ -432,6 +432,9 @@ void bch2_opt_to_text(struct printbuf *out,
|
|||||||
else
|
else
|
||||||
prt_str(out, opt->choices[v]);
|
prt_str(out, opt->choices[v]);
|
||||||
break;
|
break;
|
||||||
|
case BCH_OPT_BITFIELD:
|
||||||
|
prt_bitflags(out, opt->choices, v);
|
||||||
|
break;
|
||||||
case BCH_OPT_FN:
|
case BCH_OPT_FN:
|
||||||
opt->fn.to_text(out, c, sb, v);
|
opt->fn.to_text(out, c, sb, v);
|
||||||
break;
|
break;
|
||||||
@ -440,6 +443,32 @@ void bch2_opt_to_text(struct printbuf *out,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bch2_opts_to_text(struct printbuf *out,
|
||||||
|
struct bch_opts opts,
|
||||||
|
struct bch_fs *c, struct bch_sb *sb,
|
||||||
|
unsigned show_mask, unsigned hide_mask,
|
||||||
|
unsigned flags)
|
||||||
|
{
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
|
for (enum bch_opt_id i = 0; i < bch2_opts_nr; i++) {
|
||||||
|
const struct bch_option *opt = &bch2_opt_table[i];
|
||||||
|
|
||||||
|
if ((opt->flags & hide_mask) || !(opt->flags & show_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
u64 v = bch2_opt_get_by_id(&opts, i);
|
||||||
|
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!first)
|
||||||
|
prt_char(out, ',');
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
bch2_opt_to_text(out, c, sb, opt, v, flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
|
int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -373,6 +373,16 @@ enum fsck_err_opts {
|
|||||||
OPT_BOOL(), \
|
OPT_BOOL(), \
|
||||||
BCH2_NO_SB_OPT, false, \
|
BCH2_NO_SB_OPT, false, \
|
||||||
NULL, "Exit recovery immediately prior to journal replay")\
|
NULL, "Exit recovery immediately prior to journal replay")\
|
||||||
|
x(recovery_passes, u64, \
|
||||||
|
OPT_FS|OPT_MOUNT, \
|
||||||
|
OPT_BITFIELD(bch2_recovery_passes), \
|
||||||
|
BCH2_NO_SB_OPT, 0, \
|
||||||
|
NULL, "Recovery passes to run explicitly") \
|
||||||
|
x(recovery_passes_exclude, u64, \
|
||||||
|
OPT_FS|OPT_MOUNT, \
|
||||||
|
OPT_BITFIELD(bch2_recovery_passes), \
|
||||||
|
BCH2_NO_SB_OPT, 0, \
|
||||||
|
NULL, "Recovery passes to exclude") \
|
||||||
x(recovery_pass_last, u8, \
|
x(recovery_pass_last, u8, \
|
||||||
OPT_FS|OPT_MOUNT, \
|
OPT_FS|OPT_MOUNT, \
|
||||||
OPT_STR_NOLIMIT(bch2_recovery_passes), \
|
OPT_STR_NOLIMIT(bch2_recovery_passes), \
|
||||||
@ -595,6 +605,10 @@ int bch2_opt_parse(struct bch_fs *, const struct bch_option *,
|
|||||||
|
|
||||||
void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *,
|
void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *,
|
||||||
const struct bch_option *, u64, unsigned);
|
const struct bch_option *, u64, unsigned);
|
||||||
|
void bch2_opts_to_text(struct printbuf *,
|
||||||
|
struct bch_opts,
|
||||||
|
struct bch_fs *, struct bch_sb *,
|
||||||
|
unsigned, unsigned, unsigned);
|
||||||
|
|
||||||
int bch2_opt_check_may_set(struct bch_fs *, int, u64);
|
int bch2_opt_check_may_set(struct bch_fs *, int, u64);
|
||||||
int bch2_opts_check_may_set(struct bch_fs *);
|
int bch2_opts_check_may_set(struct bch_fs *);
|
||||||
|
@ -219,9 +219,9 @@ static noinline void __process_finished_items(struct rcu_pending *pending,
|
|||||||
BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);
|
BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);
|
||||||
|
|
||||||
void *ptr = (void *)(((unsigned long) obj->func) & ~1UL);
|
void *ptr = (void *)(((unsigned long) obj->func) & ~1UL);
|
||||||
kvfree(ptr);
|
|
||||||
|
|
||||||
bool free_head = ((unsigned long) obj->func) & 1UL;
|
bool free_head = ((unsigned long) obj->func) & 1UL;
|
||||||
|
|
||||||
|
kvfree(ptr);
|
||||||
if (free_head)
|
if (free_head)
|
||||||
kfree(obj);
|
kfree(obj);
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
|
#include "io_write.h"
|
||||||
#include "move.h"
|
#include "move.h"
|
||||||
#include "rebalance.h"
|
#include "rebalance.h"
|
||||||
#include "subvolume.h"
|
#include "subvolume.h"
|
||||||
@ -156,6 +157,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
|||||||
data_opts->rewrite_ptrs =
|
data_opts->rewrite_ptrs =
|
||||||
bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
|
bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
|
||||||
data_opts->target = r->target;
|
data_opts->target = r->target;
|
||||||
|
data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
|
||||||
|
|
||||||
if (!data_opts->rewrite_ptrs) {
|
if (!data_opts->rewrite_ptrs) {
|
||||||
/*
|
/*
|
||||||
@ -263,6 +265,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
|
|||||||
|
|
||||||
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
|
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
|
||||||
data_opts->target = target;
|
data_opts->target = target;
|
||||||
|
data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
|
||||||
return data_opts->rewrite_ptrs != 0;
|
return data_opts->rewrite_ptrs != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
|
|||||||
bch2_write_super(c);
|
bch2_write_super(c);
|
||||||
mutex_unlock(&c->sb_lock);
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||||
|
|
||||||
|
|
||||||
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
|
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
|
||||||
@ -525,17 +525,17 @@ static int read_btree_roots(struct bch_fs *c)
|
|||||||
"error reading btree root %s l=%u: %s",
|
"error reading btree root %s l=%u: %s",
|
||||||
bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
|
bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
|
||||||
if (btree_id_is_alloc(i)) {
|
if (btree_id_is_alloc(i)) {
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
|
||||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||||
r->error = 0;
|
r->error = 0;
|
||||||
} else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
|
} else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
|
||||||
bch_info(c, "will run btree node scan");
|
bch_info(c, "will run btree node scan");
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@ -706,14 +706,14 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
if (check_version_upgrade(c))
|
if (check_version_upgrade(c))
|
||||||
write_sb = true;
|
write_sb = true;
|
||||||
|
|
||||||
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||||
|
|
||||||
if (write_sb)
|
if (write_sb)
|
||||||
bch2_write_super(c);
|
bch2_write_super(c);
|
||||||
mutex_unlock(&c->sb_lock);
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
|
c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
|
||||||
|
|
||||||
if (c->opts.fsck)
|
if (c->opts.fsck)
|
||||||
set_bit(BCH_FS_fsck_running, &c->flags);
|
set_bit(BCH_FS_fsck_running, &c->flags);
|
||||||
|
@ -40,7 +40,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
|
|||||||
|
|
||||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||||
|
|
||||||
if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
|
if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
|
||||||
return bch2_fs_read_write_early(c);
|
return bch2_fs_read_write_early(c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -97,14 +97,14 @@ u64 bch2_recovery_passes_from_stable(u64 v)
|
|||||||
int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
||||||
enum bch_recovery_pass pass)
|
enum bch_recovery_pass pass)
|
||||||
{
|
{
|
||||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
if (c->opts.recovery_passes & BIT_ULL(pass))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||||
bch2_recovery_passes[pass], pass,
|
bch2_recovery_passes[pass], pass,
|
||||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||||
|
|
||||||
c->recovery_passes_explicit |= BIT_ULL(pass);
|
c->opts.recovery_passes |= BIT_ULL(pass);
|
||||||
|
|
||||||
if (c->curr_recovery_pass >= pass) {
|
if (c->curr_recovery_pass >= pass) {
|
||||||
c->curr_recovery_pass = pass;
|
c->curr_recovery_pass = pass;
|
||||||
@ -161,7 +161,9 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
|
|||||||
{
|
{
|
||||||
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
||||||
|
|
||||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
|
||||||
|
return false;
|
||||||
|
if (c->opts.recovery_passes & BIT_ULL(pass))
|
||||||
return true;
|
return true;
|
||||||
if ((p->when & PASS_FSCK) && c->opts.fsck)
|
if ((p->when & PASS_FSCK) && c->opts.fsck)
|
||||||
return true;
|
return true;
|
||||||
|
@ -82,7 +82,8 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < r->nr_devs; i++)
|
for (unsigned i = 0; i < r->nr_devs; i++)
|
||||||
if (!bch2_member_exists(sb, r->devs[i])) {
|
if (r->devs[i] != BCH_SB_MEMBER_INVALID &&
|
||||||
|
!bch2_member_exists(sb, r->devs[i])) {
|
||||||
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
|
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
@ -122,7 +123,7 @@ static void extent_to_replicas(struct bkey_s_c k,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!p.has_ec)
|
if (!p.has_ec)
|
||||||
r->devs[r->nr_devs++] = p.ptr.dev;
|
replicas_entry_add_dev(r, p.ptr.dev);
|
||||||
else
|
else
|
||||||
r->nr_required = 0;
|
r->nr_required = 0;
|
||||||
}
|
}
|
||||||
@ -139,7 +140,7 @@ static void stripe_to_replicas(struct bkey_s_c k,
|
|||||||
for (ptr = s.v->ptrs;
|
for (ptr = s.v->ptrs;
|
||||||
ptr < s.v->ptrs + s.v->nr_blocks;
|
ptr < s.v->ptrs + s.v->nr_blocks;
|
||||||
ptr++)
|
ptr++)
|
||||||
r->devs[r->nr_devs++] = ptr->dev;
|
replicas_entry_add_dev(r, ptr->dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
|
void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
|
||||||
@ -180,7 +181,7 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
|
|||||||
e->nr_required = 1;
|
e->nr_required = 1;
|
||||||
|
|
||||||
darray_for_each(devs, i)
|
darray_for_each(devs, i)
|
||||||
e->devs[e->nr_devs++] = *i;
|
replicas_entry_add_dev(e, *i);
|
||||||
|
|
||||||
bch2_replicas_entry_sort(e);
|
bch2_replicas_entry_sort(e);
|
||||||
}
|
}
|
||||||
@ -795,11 +796,11 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
|||||||
nr_online += test_bit(e->devs[i], devs.d);
|
nr_online += test_bit(e->devs[i], devs.d);
|
||||||
|
|
||||||
struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]);
|
struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]);
|
||||||
nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed;
|
nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (nr_failed == e->nr_devs)
|
if (nr_online + nr_failed == e->nr_devs)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (nr_online < e->nr_required)
|
if (nr_online < e->nr_required)
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
struct bch_replicas_entry_v0 {
|
struct bch_replicas_entry_v0 {
|
||||||
__u8 data_type;
|
__u8 data_type;
|
||||||
__u8 nr_devs;
|
__u8 nr_devs;
|
||||||
__u8 devs[];
|
__u8 devs[] __counted_by(nr_devs);
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct bch_sb_field_replicas_v0 {
|
struct bch_sb_field_replicas_v0 {
|
||||||
@ -17,7 +17,7 @@ struct bch_replicas_entry_v1 {
|
|||||||
__u8 data_type;
|
__u8 data_type;
|
||||||
__u8 nr_devs;
|
__u8 nr_devs;
|
||||||
__u8 nr_required;
|
__u8 nr_required;
|
||||||
__u8 devs[];
|
__u8 devs[] __counted_by(nr_devs);
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct bch_sb_field_replicas {
|
struct bch_sb_field_replicas {
|
||||||
@ -28,4 +28,9 @@ struct bch_sb_field_replicas {
|
|||||||
#define replicas_entry_bytes(_i) \
|
#define replicas_entry_bytes(_i) \
|
||||||
(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
|
(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
|
||||||
|
|
||||||
|
#define replicas_entry_add_dev(e, d) ({ \
|
||||||
|
(e)->nr_devs++; \
|
||||||
|
(e)->devs[(e)->nr_devs - 1] = (d); \
|
||||||
|
})
|
||||||
|
|
||||||
#endif /* _BCACHEFS_REPLICAS_FORMAT_H */
|
#endif /* _BCACHEFS_REPLICAS_FORMAT_H */
|
||||||
|
@ -288,10 +288,10 @@ enum bch_fsck_flags {
|
|||||||
x(invalid_btree_id, 274, 0) \
|
x(invalid_btree_id, 274, 0) \
|
||||||
x(alloc_key_io_time_bad, 275, 0) \
|
x(alloc_key_io_time_bad, 275, 0) \
|
||||||
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
|
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
|
||||||
x(accounting_key_junk_at_end, 277, 0) \
|
x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
|
||||||
x(accounting_key_replicas_nr_devs_0, 278, 0) \
|
x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
|
||||||
x(accounting_key_replicas_nr_required_bad, 279, 0) \
|
x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
|
||||||
x(accounting_key_replicas_devs_unsorted, 280, 0) \
|
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
|
||||||
|
|
||||||
enum bch_sb_error_id {
|
enum bch_sb_error_id {
|
||||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
void bch2_dev_missing(struct bch_fs *c, unsigned dev)
|
void bch2_dev_missing(struct bch_fs *c, unsigned dev)
|
||||||
{
|
{
|
||||||
|
if (dev != BCH_SB_MEMBER_INVALID)
|
||||||
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
|
bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -473,3 +474,51 @@ unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
|
|||||||
nr += bch2_member_exists((struct bch_sb *) sb, i);
|
nr += bch2_member_exists((struct bch_sb *) sb, i);
|
||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_sb_member_alloc(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
unsigned dev_idx = c->sb.nr_devices;
|
||||||
|
struct bch_sb_field_members_v2 *mi;
|
||||||
|
unsigned nr_devices;
|
||||||
|
unsigned u64s;
|
||||||
|
int best = -1;
|
||||||
|
u64 best_last_mount = 0;
|
||||||
|
|
||||||
|
if (dev_idx < BCH_SB_MEMBERS_MAX)
|
||||||
|
goto have_slot;
|
||||||
|
|
||||||
|
for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) {
|
||||||
|
/* eventually BCH_SB_MEMBERS_MAX will be raised */
|
||||||
|
if (dev_idx == BCH_SB_MEMBER_INVALID)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
|
||||||
|
if (bch2_member_alive(&m))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
u64 last_mount = le64_to_cpu(m.last_mount);
|
||||||
|
if (best < 0 || last_mount < best_last_mount) {
|
||||||
|
best = dev_idx;
|
||||||
|
best_last_mount = last_mount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (best >= 0) {
|
||||||
|
dev_idx = best;
|
||||||
|
goto have_slot;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -BCH_ERR_ENOSPC_sb_members;
|
||||||
|
have_slot:
|
||||||
|
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
|
||||||
|
|
||||||
|
mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
|
||||||
|
u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
|
||||||
|
le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
|
||||||
|
|
||||||
|
mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
|
||||||
|
if (!mi)
|
||||||
|
return -BCH_ERR_ENOSPC_sb_members;
|
||||||
|
|
||||||
|
c->disk_sb.sb->nr_devices = nr_devices;
|
||||||
|
return dev_idx;
|
||||||
|
}
|
||||||
|
@ -198,29 +198,37 @@ static inline struct bch_dev *bch2_dev_locked(struct bch_fs *c, unsigned dev)
|
|||||||
lockdep_is_held(&c->state_lock));
|
lockdep_is_held(&c->state_lock));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *c, unsigned dev)
|
static inline struct bch_dev *bch2_dev_rcu_noerror(struct bch_fs *c, unsigned dev)
|
||||||
{
|
{
|
||||||
return c && dev < c->sb.nr_devices
|
return c && dev < c->sb.nr_devices
|
||||||
? rcu_dereference(c->devs[dev])
|
? rcu_dereference(c->devs[dev])
|
||||||
: NULL;
|
: NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bch2_dev_missing(struct bch_fs *, unsigned);
|
||||||
|
|
||||||
|
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *c, unsigned dev)
|
||||||
|
{
|
||||||
|
struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev);
|
||||||
|
if (unlikely(!ca))
|
||||||
|
bch2_dev_missing(c, dev);
|
||||||
|
return ca;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct bch_dev *bch2_dev_tryget_noerror(struct bch_fs *c, unsigned dev)
|
static inline struct bch_dev *bch2_dev_tryget_noerror(struct bch_fs *c, unsigned dev)
|
||||||
{
|
{
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
struct bch_dev *ca = bch2_dev_rcu(c, dev);
|
struct bch_dev *ca = bch2_dev_rcu_noerror(c, dev);
|
||||||
if (ca)
|
if (ca)
|
||||||
bch2_dev_get(ca);
|
bch2_dev_get(ca);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return ca;
|
return ca;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_dev_missing(struct bch_fs *, unsigned);
|
|
||||||
|
|
||||||
static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
|
static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev)
|
||||||
{
|
{
|
||||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
|
struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
|
||||||
if (!ca)
|
if (unlikely(!ca))
|
||||||
bch2_dev_missing(c, dev);
|
bch2_dev_missing(c, dev);
|
||||||
return ca;
|
return ca;
|
||||||
}
|
}
|
||||||
@ -354,4 +362,6 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64
|
|||||||
bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
|
bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
|
||||||
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
|
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
|
||||||
|
|
||||||
|
int bch2_sb_member_alloc(struct bch_fs *);
|
||||||
|
|
||||||
#endif /* _BCACHEFS_SB_MEMBERS_H */
|
#endif /* _BCACHEFS_SB_MEMBERS_H */
|
||||||
|
@ -8,6 +8,11 @@
|
|||||||
*/
|
*/
|
||||||
#define BCH_SB_MEMBERS_MAX 64
|
#define BCH_SB_MEMBERS_MAX 64
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sentinal value - indicates a device that does not exist
|
||||||
|
*/
|
||||||
|
#define BCH_SB_MEMBER_INVALID 255
|
||||||
|
|
||||||
#define BCH_MIN_NR_NBUCKETS (1 << 6)
|
#define BCH_MIN_NR_NBUCKETS (1 << 6)
|
||||||
|
|
||||||
#define BCH_IOPS_MEASUREMENTS() \
|
#define BCH_IOPS_MEASUREMENTS() \
|
||||||
|
@ -270,7 +270,7 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans,
|
|||||||
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
|
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
|
||||||
snapshot),
|
snapshot),
|
||||||
POS(insert->k.p.inode, U64_MAX),
|
POS(insert->k.p.inode, U64_MAX),
|
||||||
BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
|
BTREE_ITER_slots|BTREE_ITER_intent|flags, k, ret) {
|
||||||
if (is_visible_key(desc, inum, k)) {
|
if (is_visible_key(desc, inum, k)) {
|
||||||
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
|
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
|
||||||
goto found;
|
goto found;
|
||||||
|
@ -524,7 +524,7 @@ static void bch2_sb_update(struct bch_fs *c)
|
|||||||
c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
|
c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
|
||||||
|
|
||||||
/* XXX this is wrong, we need a 96 or 128 bit integer type */
|
/* XXX this is wrong, we need a 96 or 128 bit integer type */
|
||||||
c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo),
|
c->sb.time_base_lo = div64_u64(le64_to_cpu(src->time_base_lo),
|
||||||
c->sb.nsec_per_time_unit);
|
c->sb.nsec_per_time_unit);
|
||||||
c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
|
c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
|
||||||
|
|
||||||
|
@ -370,7 +370,7 @@ void bch2_fs_read_only(struct bch_fs *c)
|
|||||||
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
|
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
|
||||||
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
|
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
|
||||||
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
|
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
|
||||||
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
BUG_ON(atomic_long_read(&c->btree_cache.nr_dirty));
|
||||||
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
|
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
|
||||||
BUG_ON(c->btree_write_buffer.inc.keys.nr);
|
BUG_ON(c->btree_write_buffer.inc.keys.nr);
|
||||||
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
|
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
|
||||||
@ -1592,33 +1592,6 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
|||||||
|
|
||||||
/* Device add/removal: */
|
/* Device add/removal: */
|
||||||
|
|
||||||
static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
|
||||||
{
|
|
||||||
struct bpos start = POS(ca->dev_idx, 0);
|
|
||||||
struct bpos end = POS(ca->dev_idx, U64_MAX);
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We clear the LRU and need_discard btrees first so that we don't race
|
|
||||||
* with bch2_do_invalidates() and bch2_do_discards()
|
|
||||||
*/
|
|
||||||
ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
|
|
||||||
BTREE_TRIGGER_norun, NULL) ?:
|
|
||||||
bch2_dev_usage_remove(c, ca->dev_idx);
|
|
||||||
bch_err_msg(c, ret, "removing dev alloc info");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||||
{
|
{
|
||||||
struct bch_member *m;
|
struct bch_member *m;
|
||||||
@ -1730,9 +1703,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
|||||||
struct bch_opts opts = bch2_opts_empty();
|
struct bch_opts opts = bch2_opts_empty();
|
||||||
struct bch_sb_handle sb;
|
struct bch_sb_handle sb;
|
||||||
struct bch_dev *ca = NULL;
|
struct bch_dev *ca = NULL;
|
||||||
struct bch_sb_field_members_v2 *mi;
|
|
||||||
struct bch_member dev_mi;
|
|
||||||
unsigned dev_idx, nr_devices, u64s;
|
|
||||||
struct printbuf errbuf = PRINTBUF;
|
struct printbuf errbuf = PRINTBUF;
|
||||||
struct printbuf label = PRINTBUF;
|
struct printbuf label = PRINTBUF;
|
||||||
int ret;
|
int ret;
|
||||||
@ -1742,7 +1712,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
|
struct bch_member dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
|
||||||
|
|
||||||
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
||||||
bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
|
bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
|
||||||
@ -1780,55 +1750,19 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
|||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
|
|
||||||
if (dynamic_fault("bcachefs:add:no_slot"))
|
if (dynamic_fault("bcachefs:add:no_slot"))
|
||||||
goto no_slot;
|
|
||||||
|
|
||||||
if (c->sb.nr_devices < BCH_SB_MEMBERS_MAX) {
|
|
||||||
dev_idx = c->sb.nr_devices;
|
|
||||||
goto have_slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
int best = -1;
|
|
||||||
u64 best_last_mount = 0;
|
|
||||||
for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) {
|
|
||||||
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
|
|
||||||
if (bch2_member_alive(&m))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
u64 last_mount = le64_to_cpu(m.last_mount);
|
|
||||||
if (best < 0 || last_mount < best_last_mount) {
|
|
||||||
best = dev_idx;
|
|
||||||
best_last_mount = last_mount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (best >= 0) {
|
|
||||||
dev_idx = best;
|
|
||||||
goto have_slot;
|
|
||||||
}
|
|
||||||
no_slot:
|
|
||||||
ret = -BCH_ERR_ENOSPC_sb_members;
|
|
||||||
bch_err_msg(c, ret, "setting up new superblock");
|
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
|
|
||||||
have_slot:
|
ret = bch2_sb_member_alloc(c);
|
||||||
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
|
if (ret < 0) {
|
||||||
|
|
||||||
mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
|
|
||||||
u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
|
|
||||||
le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
|
|
||||||
|
|
||||||
mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
|
|
||||||
if (!mi) {
|
|
||||||
ret = -BCH_ERR_ENOSPC_sb_members;
|
|
||||||
bch_err_msg(c, ret, "setting up new superblock");
|
bch_err_msg(c, ret, "setting up new superblock");
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
|
unsigned dev_idx = ret;
|
||||||
|
|
||||||
/* success: */
|
/* success: */
|
||||||
|
|
||||||
*m = dev_mi;
|
dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds());
|
||||||
m->last_mount = cpu_to_le64(ktime_get_real_seconds());
|
*bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi;
|
||||||
c->disk_sb.sb->nr_devices = nr_devices;
|
|
||||||
|
|
||||||
ca->disk_sb.sb->dev_idx = dev_idx;
|
ca->disk_sb.sb->dev_idx = dev_idx;
|
||||||
bch2_dev_attach(c, ca, dev_idx);
|
bch2_dev_attach(c, ca, dev_idx);
|
||||||
|
@ -244,14 +244,18 @@ static struct attribute sysfs_state_rw = {
|
|||||||
|
|
||||||
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
size_t ret = 0;
|
size_t ret = 0;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
mutex_lock(&c->btree_cache.lock);
|
mutex_lock(&bc->lock);
|
||||||
list_for_each_entry(b, &c->btree_cache.live, list)
|
list_for_each_entry(b, &bc->live[0].list, list)
|
||||||
ret += btree_buf_bytes(b);
|
ret += btree_buf_bytes(b);
|
||||||
|
list_for_each_entry(b, &bc->live[1].list, list)
|
||||||
mutex_unlock(&c->btree_cache.lock);
|
ret += btree_buf_bytes(b);
|
||||||
|
list_for_each_entry(b, &bc->freeable, list)
|
||||||
|
ret += btree_buf_bytes(b);
|
||||||
|
mutex_unlock(&bc->lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,7 +291,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
|||||||
prt_tab_rjust(out);
|
prt_tab_rjust(out);
|
||||||
|
|
||||||
prt_human_readable_u64(out, nr_extents
|
prt_human_readable_u64(out, nr_extents
|
||||||
? div_u64(sectors_uncompressed << 9, nr_extents)
|
? div64_u64(sectors_uncompressed << 9, nr_extents)
|
||||||
: 0);
|
: 0);
|
||||||
prt_tab_rjust(out);
|
prt_tab_rjust(out);
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
@ -444,11 +448,12 @@ STORE(bch2_fs)
|
|||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_btree_cache_shrink) {
|
if (attr == &sysfs_trigger_btree_cache_shrink) {
|
||||||
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct shrink_control sc;
|
struct shrink_control sc;
|
||||||
|
|
||||||
sc.gfp_mask = GFP_KERNEL;
|
sc.gfp_mask = GFP_KERNEL;
|
||||||
sc.nr_to_scan = strtoul_or_return(buf);
|
sc.nr_to_scan = strtoul_or_return(buf);
|
||||||
c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
|
bc->live[0].shrink->scan_objects(bc->live[0].shrink, &sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_btree_key_cache_shrink) {
|
if (attr == &sysfs_trigger_btree_key_cache_shrink) {
|
||||||
@ -456,7 +461,7 @@ STORE(bch2_fs)
|
|||||||
|
|
||||||
sc.gfp_mask = GFP_KERNEL;
|
sc.gfp_mask = GFP_KERNEL;
|
||||||
sc.nr_to_scan = strtoul_or_return(buf);
|
sc.nr_to_scan = strtoul_or_return(buf);
|
||||||
c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
|
c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_gc)
|
if (attr == &sysfs_trigger_gc)
|
||||||
|
@ -64,7 +64,7 @@ static int bch2_pow(u64 n, u64 p, u64 *res)
|
|||||||
*res = 1;
|
*res = 1;
|
||||||
|
|
||||||
while (p--) {
|
while (p--) {
|
||||||
if (*res > div_u64(U64_MAX, n))
|
if (*res > div64_u64(U64_MAX, n))
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
*res *= n;
|
*res *= n;
|
||||||
}
|
}
|
||||||
@ -140,14 +140,14 @@ static int __bch2_strtou64_h(const char *cp, u64 *res)
|
|||||||
|
|
||||||
parse_or_ret(cp, parse_unit_suffix(cp, &b));
|
parse_or_ret(cp, parse_unit_suffix(cp, &b));
|
||||||
|
|
||||||
if (v > div_u64(U64_MAX, b))
|
if (v > div64_u64(U64_MAX, b))
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
v *= b;
|
v *= b;
|
||||||
|
|
||||||
if (f_n > div_u64(U64_MAX, b))
|
if (f_n > div64_u64(U64_MAX, b))
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
|
|
||||||
f_n = div_u64(f_n * b, f_d);
|
f_n = div64_u64(f_n * b, f_d);
|
||||||
if (v + f_n < v)
|
if (v + f_n < v)
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
v += f_n;
|
v += f_n;
|
||||||
@ -214,7 +214,7 @@ u64 bch2_read_flag_list(const char *opt, const char * const list[])
|
|||||||
|
|
||||||
s = strim(d);
|
s = strim(d);
|
||||||
|
|
||||||
while ((p = strsep(&s, ","))) {
|
while ((p = strsep(&s, ",;"))) {
|
||||||
int flag = match_string(list, -1, p);
|
int flag = match_string(list, -1, p);
|
||||||
|
|
||||||
if (flag < 0) {
|
if (flag < 0) {
|
||||||
@ -360,7 +360,7 @@ void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
|||||||
{
|
{
|
||||||
const struct time_unit *u = bch2_pick_time_units(ns);
|
const struct time_unit *u = bch2_pick_time_units(ns);
|
||||||
|
|
||||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
prt_printf(out, "%llu %s", div64_u64(ns, u->nsecs), u->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
||||||
@ -477,7 +477,7 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
|||||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||||
|
|
||||||
u64 q = max(quantiles->entries[i].m, last_q);
|
u64 q = max(quantiles->entries[i].m, last_q);
|
||||||
prt_printf(out, "%llu ", div_u64(q, u->nsecs));
|
prt_printf(out, "%llu ", div64_u64(q, u->nsecs));
|
||||||
if (is_last)
|
if (is_last)
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
last_q = q;
|
last_q = q;
|
||||||
|
@ -13,7 +13,7 @@ struct bch_xattr {
|
|||||||
__u8 x_type;
|
__u8 x_type;
|
||||||
__u8 x_name_len;
|
__u8 x_name_len;
|
||||||
__le16 x_val_len;
|
__le16 x_val_len;
|
||||||
__u8 x_name[];
|
__u8 x_name[] __counted_by(x_name_len);
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
#endif /* _BCACHEFS_XATTR_FORMAT_H */
|
#endif /* _BCACHEFS_XATTR_FORMAT_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user