mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 0d63ed13ea3d closures: Fix race in closure_sync()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
bd9e015334
commit
9799b119c3
@ -1 +1 @@
|
||||
f70a3402188ea797a38fa9f5b729fb6fbe5f5b83
|
||||
0d63ed13ea3d867055ae5752e2e0514a227d1dcb
|
||||
|
@ -47,6 +47,7 @@ typedef struct {
|
||||
#define smp_rmb() cmm_smp_rmb()
|
||||
#define smp_mb() cmm_smp_mb()
|
||||
#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
|
||||
#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
|
||||
|
||||
#else /* C11_ATOMICS */
|
||||
|
||||
@ -205,6 +206,11 @@ static inline i_type a_type##_dec_return(a_type##_t *v) \
|
||||
return __ATOMIC_DEC_RETURN(&v->counter); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_dec_return_release(a_type##_t *v) \
|
||||
{ \
|
||||
return __ATOMIC_SUB_RETURN_RELEASE(1, &v->counter); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_inc(a_type##_t *v) \
|
||||
{ \
|
||||
__ATOMIC_INC(&v->counter); \
|
||||
|
@ -154,6 +154,7 @@ struct closure {
|
||||
struct closure *parent;
|
||||
|
||||
atomic_t remaining;
|
||||
bool closure_get_happened;
|
||||
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
#define CLOSURE_MAGIC_DEAD 0xc054dead
|
||||
@ -185,7 +186,11 @@ static inline unsigned closure_nr_remaining(struct closure *cl)
|
||||
*/
|
||||
static inline void closure_sync(struct closure *cl)
|
||||
{
|
||||
if (closure_nr_remaining(cl) != 1)
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
|
||||
#endif
|
||||
|
||||
if (cl->closure_get_happened)
|
||||
__closure_sync(cl);
|
||||
}
|
||||
|
||||
@ -233,8 +238,6 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
|
||||
closure_set_ip(cl);
|
||||
cl->fn = fn;
|
||||
cl->wq = wq;
|
||||
/* between atomic_dec() in closure_put() */
|
||||
smp_mb__before_atomic();
|
||||
}
|
||||
|
||||
static inline void closure_queue(struct closure *cl)
|
||||
@ -259,6 +262,8 @@ static inline void closure_queue(struct closure *cl)
|
||||
*/
|
||||
static inline void closure_get(struct closure *cl)
|
||||
{
|
||||
cl->closure_get_happened = true;
|
||||
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
BUG_ON((atomic_inc_return(&cl->remaining) &
|
||||
CLOSURE_REMAINING_MASK) <= 1);
|
||||
@ -281,6 +286,7 @@ static inline void closure_init(struct closure *cl, struct closure *parent)
|
||||
closure_get(parent);
|
||||
|
||||
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
|
||||
cl->closure_get_happened = false;
|
||||
|
||||
closure_debug_create(cl);
|
||||
closure_set_ip(cl);
|
||||
|
@ -151,6 +151,14 @@ static inline u64 ktime_get_seconds(void)
|
||||
return ts.tv_sec;
|
||||
}
|
||||
|
||||
static inline u64 ktime_get_real_ns(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return timespec_to_ns(&ts);
|
||||
}
|
||||
|
||||
static inline u64 ktime_get_real_seconds(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
@ -2,22 +2,10 @@
|
||||
#ifndef _BCACHEFS_BBPOS_H
|
||||
#define _BCACHEFS_BBPOS_H
|
||||
|
||||
#include "bbpos_types.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_cache.h"
|
||||
|
||||
struct bbpos {
|
||||
enum btree_id btree;
|
||||
struct bpos pos;
|
||||
};
|
||||
|
||||
static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
|
||||
{
|
||||
return (struct bbpos) { btree, pos };
|
||||
}
|
||||
|
||||
#define BBPOS_MIN BBPOS(0, POS_MIN)
|
||||
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
|
||||
|
||||
static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
|
||||
{
|
||||
return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
|
||||
|
18
libbcachefs/bbpos_types.h
Normal file
18
libbcachefs/bbpos_types.h
Normal file
@ -0,0 +1,18 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BBPOS_TYPES_H
|
||||
#define _BCACHEFS_BBPOS_TYPES_H
|
||||
|
||||
struct bbpos {
|
||||
enum btree_id btree;
|
||||
struct bpos pos;
|
||||
};
|
||||
|
||||
static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
|
||||
{
|
||||
return (struct bbpos) { btree, pos };
|
||||
}
|
||||
|
||||
#define BBPOS_MIN BBPOS(0, POS_MIN)
|
||||
#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
|
||||
|
||||
#endif /* _BCACHEFS_BBPOS_TYPES_H */
|
@ -418,6 +418,7 @@ enum bch_time_stats {
|
||||
#include "buckets_types.h"
|
||||
#include "buckets_waiting_for_journal_types.h"
|
||||
#include "clock_types.h"
|
||||
#include "disk_groups_types.h"
|
||||
#include "ec_types.h"
|
||||
#include "journal_types.h"
|
||||
#include "keylist_types.h"
|
||||
@ -463,6 +464,7 @@ enum gc_phase {
|
||||
GC_PHASE_BTREE_snapshot_trees,
|
||||
GC_PHASE_BTREE_deleted_inodes,
|
||||
GC_PHASE_BTREE_logged_ops,
|
||||
GC_PHASE_BTREE_rebalance_work,
|
||||
|
||||
GC_PHASE_PENDING_DELETE,
|
||||
};
|
||||
@ -938,9 +940,6 @@ struct bch_fs {
|
||||
struct list_head moving_context_list;
|
||||
struct mutex moving_context_lock;
|
||||
|
||||
struct list_head data_progress_list;
|
||||
struct mutex data_progress_lock;
|
||||
|
||||
/* REBALANCE */
|
||||
struct bch_fs_rebalance rebalance;
|
||||
|
||||
|
@ -613,31 +613,17 @@ struct bch_extent_stripe_ptr {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct bch_extent_reservation {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:6,
|
||||
unused:22,
|
||||
replicas:4,
|
||||
generation:32;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 generation:32,
|
||||
replicas:4,
|
||||
unused:22,
|
||||
type:6;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct bch_extent_rebalance {
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u64 type:7,
|
||||
unused:33,
|
||||
compression:8,
|
||||
__u64 type:6,
|
||||
unused:34,
|
||||
compression:8, /* enum bch_compression_opt */
|
||||
target:16;
|
||||
#elif defined (__BIG_ENDIAN_BITFIELD)
|
||||
__u64 target:16,
|
||||
compression:8,
|
||||
unused:33,
|
||||
type:7;
|
||||
unused:34,
|
||||
type:6;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -1682,7 +1668,9 @@ struct bch_sb_field_journal_seq_blacklist {
|
||||
x(snapshot_skiplists, BCH_VERSION(1, 1), \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \
|
||||
x(deleted_inodes, BCH_VERSION(1, 2), \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_inodes))
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
|
||||
x(rebalance_work, BCH_VERSION(1, 3), \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -1693,7 +1681,7 @@ enum bcachefs_metadata_version {
|
||||
};
|
||||
|
||||
static const __maybe_unused
|
||||
unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor;
|
||||
unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
|
||||
|
||||
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
|
||||
|
||||
@ -2306,7 +2294,9 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert))
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOTS, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
|
||||
|
||||
enum btree_id {
|
||||
#define x(name, nr, ...) BTREE_ID_##name = nr,
|
||||
|
@ -119,16 +119,6 @@ enum btree_update_flags {
|
||||
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
|
||||
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
|
||||
|
||||
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
|
||||
((1U << KEY_TYPE_alloc)| \
|
||||
(1U << KEY_TYPE_alloc_v2)| \
|
||||
(1U << KEY_TYPE_alloc_v3)| \
|
||||
(1U << KEY_TYPE_alloc_v4)| \
|
||||
(1U << KEY_TYPE_stripe)| \
|
||||
(1U << KEY_TYPE_inode)| \
|
||||
(1U << KEY_TYPE_inode_v2)| \
|
||||
(1U << KEY_TYPE_snapshot))
|
||||
|
||||
static inline int bch2_trans_mark_key(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new,
|
||||
|
@ -382,8 +382,7 @@ static int run_one_mem_trigger(struct btree_trans *trans,
|
||||
if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
|
||||
return 0;
|
||||
|
||||
if (old_ops->atomic_trigger == new_ops->atomic_trigger &&
|
||||
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
|
||||
if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
|
||||
ret = bch2_mark_key(trans, i->btree_id, i->level,
|
||||
old, bkey_i_to_s_c(new),
|
||||
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
|
||||
@ -425,8 +424,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
|
||||
if (!i->insert_trigger_run &&
|
||||
!i->overwrite_trigger_run &&
|
||||
old_ops->trans_trigger == new_ops->trans_trigger &&
|
||||
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
|
||||
old_ops->trans_trigger == new_ops->trans_trigger) {
|
||||
i->overwrite_trigger_run = true;
|
||||
i->insert_trigger_run = true;
|
||||
return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
|
||||
|
@ -935,14 +935,12 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
static int __mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
u64 journal_seq = trans->journal_res.seq;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
@ -1018,6 +1016,14 @@ int bch2_mark_extent(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
{
|
||||
return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
int bch2_mark_stripe(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
@ -1124,13 +1130,11 @@ int bch2_mark_stripe(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
static int __mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
|
||||
struct bch_fs_usage *fs_usage;
|
||||
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
s64 sectors = (s64) k.k->size;
|
||||
@ -1157,6 +1161,14 @@ int bch2_mark_reservation(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
{
|
||||
return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 start, u64 end,
|
||||
@ -1211,13 +1223,11 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
static int __mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
|
||||
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
|
||||
struct reflink_gc *ref;
|
||||
size_t l, r, m;
|
||||
@ -1251,6 +1261,14 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
{
|
||||
return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
void bch2_trans_fs_usage_revert(struct btree_trans *trans,
|
||||
struct replicas_delta_list *deltas)
|
||||
{
|
||||
@ -1452,15 +1470,11 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new,
|
||||
unsigned flags)
|
||||
static int __trans_mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
|
||||
? old
|
||||
: bkey_i_to_s_c(new);
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
@ -1517,6 +1531,24 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) -
|
||||
(int) bch2_bkey_needs_rebalance(c, old);
|
||||
|
||||
if (mod) {
|
||||
int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
||||
struct bkey_s_c_stripe s,
|
||||
unsigned idx, bool deleting)
|
||||
@ -1670,15 +1702,10 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_i *new,
|
||||
unsigned flags)
|
||||
static int __trans_mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
|
||||
? old
|
||||
: bkey_i_to_s_c(new);
|
||||
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
s64 sectors = (s64) k.k->size;
|
||||
struct replicas_delta_list *d;
|
||||
@ -1700,7 +1727,16 @@ int bch2_trans_mark_reservation(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
int bch2_trans_mark_reservation(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
static int trans_mark_reflink_p_segment(struct btree_trans *trans,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 *idx, unsigned flags)
|
||||
{
|
||||
@ -1767,35 +1803,38 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_i *new,
|
||||
unsigned flags)
|
||||
static int __trans_mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
|
||||
? old
|
||||
: bkey_i_to_s_c(new);
|
||||
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
|
||||
u64 idx, end_idx;
|
||||
int ret = 0;
|
||||
|
||||
if (flags & BTREE_TRIGGER_INSERT) {
|
||||
struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
|
||||
|
||||
v->front_pad = v->back_pad = 0;
|
||||
}
|
||||
|
||||
idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
|
||||
end_idx = le64_to_cpu(p.v->idx) + p.k->size +
|
||||
le32_to_cpu(p.v->back_pad);
|
||||
|
||||
while (idx < end_idx && !ret)
|
||||
ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags);
|
||||
|
||||
ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
if (flags & BTREE_TRIGGER_INSERT) {
|
||||
struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
|
||||
|
||||
v->front_pad = v->back_pad = 0;
|
||||
}
|
||||
|
||||
return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca, size_t b,
|
||||
enum bch_data_type type,
|
||||
@ -1825,16 +1864,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
|
||||
bch2_data_types[type],
|
||||
bch2_data_types[type]);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
goto err;
|
||||
}
|
||||
|
||||
a->v.data_type = type;
|
||||
a->v.dirty_sectors = sectors;
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
out:
|
||||
if (a->v.data_type != type ||
|
||||
a->v.dirty_sectors != sectors) {
|
||||
a->v.data_type = type;
|
||||
a->v.dirty_sectors = sectors;
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -1929,6 +1968,22 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_dev_sbs(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_online_member(ca, c, i) {
|
||||
int ret = bch2_trans_mark_dev_sb(c, ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Disk reservations: */
|
||||
|
||||
#define SECTORS_CACHE 1024
|
||||
|
@ -339,12 +339,27 @@ int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct
|
||||
int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
|
||||
int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
|
||||
|
||||
#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
|
||||
({ \
|
||||
int ret = 0; \
|
||||
\
|
||||
if (_old.k->type) \
|
||||
ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \
|
||||
if (!ret && _new.k->type) \
|
||||
ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \
|
||||
mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
|
||||
|
||||
void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
|
||||
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
|
||||
|
||||
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
|
||||
size_t, enum bch_data_type, unsigned);
|
||||
int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
|
||||
int bch2_trans_mark_dev_sbs(struct bch_fs *);
|
||||
|
||||
static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
|
||||
{
|
||||
|
@ -332,8 +332,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
|
||||
struct bch_ioctl_data_event e = {
|
||||
.type = BCH_DATA_EVENT_PROGRESS,
|
||||
.p.data_type = ctx->stats.data_type,
|
||||
.p.btree_id = ctx->stats.btree_id,
|
||||
.p.pos = ctx->stats.pos,
|
||||
.p.btree_id = ctx->stats.pos.btree,
|
||||
.p.pos = ctx->stats.pos.pos,
|
||||
.p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
|
||||
.p.sectors_total = bch2_fs_usage_read_short(c).used,
|
||||
};
|
||||
|
@ -697,14 +697,32 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
|
||||
{
|
||||
struct bch_compression_opt opt = bch2_compression_decode(v);
|
||||
|
||||
if (opt.type < BCH_COMPRESSION_OPT_NR)
|
||||
prt_str(out, bch2_compression_opts[opt.type]);
|
||||
else
|
||||
prt_printf(out, "(unknown compression opt %u)", opt.type);
|
||||
if (opt.level)
|
||||
prt_printf(out, ":%u", opt.level);
|
||||
}
|
||||
|
||||
void bch2_opt_compression_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct bch_sb *sb,
|
||||
u64 v)
|
||||
{
|
||||
struct bch_compression_opt opt = bch2_compression_decode(v);
|
||||
|
||||
prt_str(out, bch2_compression_opts[opt.type]);
|
||||
if (opt.level)
|
||||
prt_printf(out, ":%u", opt.level);
|
||||
return bch2_compression_opt_to_text(out, v);
|
||||
}
|
||||
|
||||
int bch2_opt_compression_validate(u64 v, struct printbuf *err)
|
||||
{
|
||||
if (!bch2_compression_opt_valid(v)) {
|
||||
prt_printf(err, "invalid compression opt %llu", v);
|
||||
return -BCH_ERR_invalid_sb_opt_compression;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4,12 +4,18 @@
|
||||
|
||||
#include "extents_types.h"
|
||||
|
||||
static const unsigned __bch2_compression_opt_to_type[] = {
|
||||
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
|
||||
BCH_COMPRESSION_OPTS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_compression_opt {
|
||||
u8 type:4,
|
||||
level:4;
|
||||
};
|
||||
|
||||
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
|
||||
static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
|
||||
{
|
||||
return (struct bch_compression_opt) {
|
||||
.type = v & 15,
|
||||
@ -17,17 +23,25 @@ static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool bch2_compression_opt_valid(unsigned v)
|
||||
{
|
||||
struct bch_compression_opt opt = __bch2_compression_decode(v);
|
||||
|
||||
return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
|
||||
}
|
||||
|
||||
static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
|
||||
{
|
||||
return bch2_compression_opt_valid(v)
|
||||
? __bch2_compression_decode(v)
|
||||
: (struct bch_compression_opt) { 0 };
|
||||
}
|
||||
|
||||
static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
|
||||
{
|
||||
return opt.type|(opt.level << 4);
|
||||
}
|
||||
|
||||
static const unsigned __bch2_compression_opt_to_type[] = {
|
||||
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
|
||||
BCH_COMPRESSION_OPTS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
|
||||
{
|
||||
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
|
||||
@ -44,12 +58,16 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
|
||||
void bch2_fs_compress_exit(struct bch_fs *);
|
||||
int bch2_fs_compress_init(struct bch_fs *);
|
||||
|
||||
void bch2_compression_opt_to_text(struct printbuf *, u64);
|
||||
|
||||
int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
|
||||
void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
|
||||
int bch2_opt_compression_validate(u64, struct printbuf *);
|
||||
|
||||
#define bch2_opt_compression (struct bch_opt_fn) { \
|
||||
.parse = bch2_opt_compression_parse, \
|
||||
.to_text = bch2_opt_compression_to_text, \
|
||||
.parse = bch2_opt_compression_parse, \
|
||||
.to_text = bch2_opt_compression_to_text, \
|
||||
.validate = bch2_opt_compression_validate, \
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_COMPRESS_H */
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "nocow_locking.h"
|
||||
#include "rebalance.h"
|
||||
#include "subvolume.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -251,11 +252,11 @@ restart_drop_extra_replicas:
|
||||
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, bkey_start_pos(&insert->k)) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, insert->k.p);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, insert,
|
||||
k.k->p, insert->k.p) ?:
|
||||
bch2_bkey_set_needs_rebalance(c, insert,
|
||||
op->opts.background_target,
|
||||
op->opts.background_compression) ?:
|
||||
bch2_trans_update(trans, &iter, insert,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, &op->res,
|
||||
NULL,
|
||||
@ -281,11 +282,11 @@ next:
|
||||
}
|
||||
continue;
|
||||
nowork:
|
||||
if (m->ctxt && m->ctxt->stats) {
|
||||
if (m->stats && m->stats) {
|
||||
BUG_ON(k.k->p.offset <= iter.pos.offset);
|
||||
atomic64_inc(&m->ctxt->stats->keys_raced);
|
||||
atomic64_inc(&m->stats->keys_raced);
|
||||
atomic64_add(k.k->p.offset - iter.pos.offset,
|
||||
&m->ctxt->stats->sectors_raced);
|
||||
&m->stats->sectors_raced);
|
||||
}
|
||||
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]);
|
||||
@ -439,6 +440,8 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
bch2_bkey_buf_reassemble(&m->k, c, k);
|
||||
m->btree_id = btree_id;
|
||||
m->data_opts = data_opts;
|
||||
m->ctxt = ctxt;
|
||||
m->stats = ctxt ? ctxt->stats : NULL;
|
||||
|
||||
bch2_write_op_init(&m->op, c, io_opts);
|
||||
m->op.pos = bkey_start_pos(k.k);
|
||||
@ -487,7 +490,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
|
||||
if (c->opts.nocow_enabled) {
|
||||
if (ctxt) {
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
move_ctxt_wait_event(ctxt,
|
||||
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
|
||||
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
|
||||
!atomic_read(&ctxt->read_sectors));
|
||||
|
@ -23,6 +23,7 @@ struct data_update {
|
||||
struct bkey_buf k;
|
||||
struct data_update_opts data_opts;
|
||||
struct moving_context *ctxt;
|
||||
struct bch_move_stats *stats;
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
|
@ -175,6 +175,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
|
||||
|
||||
dst->deleted = BCH_GROUP_DELETED(src);
|
||||
dst->parent = BCH_GROUP_PARENT(src);
|
||||
memcpy(dst->label, src->label, sizeof(dst->label));
|
||||
}
|
||||
|
||||
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
|
||||
@ -382,7 +383,57 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
|
||||
return v;
|
||||
}
|
||||
|
||||
void bch2_disk_path_to_text(struct printbuf *out, struct bch_sb *sb, unsigned v)
|
||||
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
|
||||
{
|
||||
struct bch_disk_groups_cpu *groups;
|
||||
struct bch_disk_group_cpu *g;
|
||||
unsigned nr = 0;
|
||||
u16 path[32];
|
||||
|
||||
out->atomic++;
|
||||
rcu_read_lock();
|
||||
groups = rcu_dereference(c->disk_groups);
|
||||
if (!groups)
|
||||
goto invalid;
|
||||
|
||||
while (1) {
|
||||
if (nr == ARRAY_SIZE(path))
|
||||
goto invalid;
|
||||
|
||||
if (v >= groups->nr)
|
||||
goto invalid;
|
||||
|
||||
g = groups->entries + v;
|
||||
|
||||
if (g->deleted)
|
||||
goto invalid;
|
||||
|
||||
path[nr++] = v;
|
||||
|
||||
if (!g->parent)
|
||||
break;
|
||||
|
||||
v = g->parent - 1;
|
||||
}
|
||||
|
||||
while (nr) {
|
||||
v = path[--nr];
|
||||
g = groups->entries + v;
|
||||
|
||||
prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
|
||||
if (nr)
|
||||
prt_printf(out, ".");
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
out->atomic--;
|
||||
return;
|
||||
invalid:
|
||||
prt_printf(out, "invalid label %u", v);
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
|
||||
{
|
||||
struct bch_sb_field_disk_groups *groups =
|
||||
bch2_sb_field_get(sb, disk_groups);
|
||||
@ -493,10 +544,7 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void bch2_opt_target_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct bch_sb *sb,
|
||||
u64 v)
|
||||
void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
|
||||
{
|
||||
struct target t = target_decode(v);
|
||||
|
||||
@ -504,47 +552,69 @@ void bch2_opt_target_to_text(struct printbuf *out,
|
||||
case TARGET_NULL:
|
||||
prt_printf(out, "none");
|
||||
break;
|
||||
case TARGET_DEV:
|
||||
if (c) {
|
||||
struct bch_dev *ca;
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
rcu_read_lock();
|
||||
ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
: NULL;
|
||||
|
||||
if (ca && percpu_ref_tryget(&ca->io_ref)) {
|
||||
prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
} else if (ca) {
|
||||
prt_printf(out, "offline device %u", t.dev);
|
||||
} else {
|
||||
prt_printf(out, "invalid device %u", t.dev);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
if (ca && percpu_ref_tryget(&ca->io_ref)) {
|
||||
prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
} else if (ca) {
|
||||
prt_printf(out, "offline device %u", t.dev);
|
||||
} else {
|
||||
struct bch_member m = bch2_sb_member_get(sb, t.dev);
|
||||
|
||||
if (bch2_dev_exists(sb, t.dev)) {
|
||||
prt_printf(out, "Device ");
|
||||
pr_uuid(out, m.uuid.b);
|
||||
prt_printf(out, " (%u)", t.dev);
|
||||
} else {
|
||||
prt_printf(out, "Bad device %u", t.dev);
|
||||
}
|
||||
prt_printf(out, "invalid device %u", t.dev);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
case TARGET_GROUP:
|
||||
if (c) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_disk_path_to_text(out, c->disk_sb.sb, t.group);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else {
|
||||
bch2_disk_path_to_text(out, sb, t.group);
|
||||
}
|
||||
bch2_disk_path_to_text(out, c, t.group);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
|
||||
{
|
||||
struct target t = target_decode(v);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
prt_printf(out, "none");
|
||||
break;
|
||||
case TARGET_DEV: {
|
||||
struct bch_member m = bch2_sb_member_get(sb, t.dev);
|
||||
|
||||
if (bch2_dev_exists(sb, t.dev)) {
|
||||
prt_printf(out, "Device ");
|
||||
pr_uuid(out, m.uuid.b);
|
||||
prt_printf(out, " (%u)", t.dev);
|
||||
} else {
|
||||
prt_printf(out, "Bad device %u", t.dev);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TARGET_GROUP:
|
||||
bch2_disk_path_to_text_sb(out, sb, t.group);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_opt_target_to_text(struct printbuf *out,
|
||||
struct bch_fs *c,
|
||||
struct bch_sb *sb,
|
||||
u64 v)
|
||||
{
|
||||
if (c)
|
||||
bch2_target_to_text(out, c, v);
|
||||
else
|
||||
bch2_target_to_text_sb(out, sb, v);
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
#ifndef _BCACHEFS_DISK_GROUPS_H
|
||||
#define _BCACHEFS_DISK_GROUPS_H
|
||||
|
||||
#include "disk_groups_types.h"
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
|
||||
|
||||
static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
|
||||
@ -83,7 +85,10 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *);
|
||||
/* Exported for userspace bcachefs-tools: */
|
||||
int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
|
||||
|
||||
void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned);
|
||||
void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned);
|
||||
void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned);
|
||||
|
||||
void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned);
|
||||
|
||||
int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
|
||||
void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
|
||||
|
18
libbcachefs/disk_groups_types.h
Normal file
18
libbcachefs/disk_groups_types.h
Normal file
@ -0,0 +1,18 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H
|
||||
#define _BCACHEFS_DISK_GROUPS_TYPES_H
|
||||
|
||||
struct bch_disk_group_cpu {
|
||||
bool deleted;
|
||||
u16 parent;
|
||||
u8 label[BCH_SB_LABEL_SIZE];
|
||||
struct bch_devs_mask devs;
|
||||
};
|
||||
|
||||
struct bch_disk_groups_cpu {
|
||||
struct rcu_head rcu;
|
||||
unsigned nr;
|
||||
struct bch_disk_group_cpu entries[] __counted_by(nr);
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */
|
@ -213,6 +213,7 @@
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_clean) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
|
||||
x(BCH_ERR_invalid, invalid_bkey) \
|
||||
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
|
||||
x(EIO, btree_node_read_err) \
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "compress.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "error.h"
|
||||
@ -757,18 +758,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
|
||||
return i;
|
||||
}
|
||||
|
||||
static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
|
||||
{
|
||||
union bch_extent_entry *next = extent_entry_next(entry);
|
||||
|
||||
/* stripes have ptrs, but their layout doesn't work with this code */
|
||||
BUG_ON(k.k->type == KEY_TYPE_stripe);
|
||||
|
||||
memmove_u64s_down(entry, next,
|
||||
(u64 *) bkey_val_end(k) - (u64 *) next);
|
||||
k.k->u64s -= (u64 *) next - (u64 *) entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns pointer to the next entry after the one being dropped:
|
||||
*/
|
||||
@ -992,10 +981,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
const struct bch_extent_stripe_ptr *ec;
|
||||
struct bch_dev *ca;
|
||||
bool first = true;
|
||||
|
||||
if (c)
|
||||
@ -1006,9 +991,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, " ");
|
||||
|
||||
switch (__extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
ptr = entry_to_ptr(entry);
|
||||
ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
|
||||
case BCH_EXTENT_ENTRY_ptr: {
|
||||
const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
|
||||
struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
|
||||
? bch_dev_bkey_exists(c, ptr->dev)
|
||||
: NULL;
|
||||
|
||||
@ -1030,10 +1015,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, " stale");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
|
||||
case BCH_EXTENT_ENTRY_crc128: {
|
||||
struct bch_extent_crc_unpacked crc =
|
||||
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
|
||||
|
||||
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
|
||||
crc.compressed_size,
|
||||
@ -1042,12 +1029,26 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
bch2_csum_types[crc.csum_type],
|
||||
bch2_compression_types[crc.compression_type]);
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr:
|
||||
ec = &entry->stripe_ptr;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr: {
|
||||
const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
|
||||
|
||||
prt_printf(out, "ec: idx %llu block %u",
|
||||
(u64) ec->idx, ec->block);
|
||||
break;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_rebalance: {
|
||||
const struct bch_extent_rebalance *r = &entry->rebalance;
|
||||
|
||||
prt_str(out, "rebalance: target ");
|
||||
if (c)
|
||||
bch2_target_to_text(out, c, r->target);
|
||||
else
|
||||
prt_printf(out, "%u", r->target);
|
||||
prt_str(out, " compression ");
|
||||
bch2_compression_opt_to_text(out, r->compression);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
|
||||
return;
|
||||
@ -1207,6 +1208,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
crc_since_last_ptr = true;
|
||||
|
||||
if (crc_is_encoded(crc) &&
|
||||
(crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
|
||||
(flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT))) {
|
||||
prt_printf(err, "too large encoded extent");
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr:
|
||||
if (have_ec) {
|
||||
@ -1215,9 +1224,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
have_ec = true;
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_rebalance:
|
||||
case BCH_EXTENT_ENTRY_rebalance: {
|
||||
const struct bch_extent_rebalance *r = &entry->rebalance;
|
||||
|
||||
if (!bch2_compression_opt_valid(r->compression)) {
|
||||
struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
|
||||
prt_printf(err, "invalid compression opt %u:%u",
|
||||
opt.type, opt.level);
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!nr_ptrs) {
|
||||
@ -1281,6 +1299,125 @@ void bch2_ptr_swab(struct bkey_s k)
|
||||
}
|
||||
}
|
||||
|
||||
const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry)
|
||||
if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
|
||||
return &entry->rebalance;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
|
||||
unsigned target, unsigned compression)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
unsigned rewrite_ptrs = 0;
|
||||
|
||||
if (compression) {
|
||||
unsigned compression_type = bch2_compression_opt_to_type(compression);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
unsigned i = 0;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
|
||||
rewrite_ptrs = 0;
|
||||
goto incompressible;
|
||||
}
|
||||
|
||||
if (!p.ptr.cached && p.crc.compression_type != compression_type)
|
||||
rewrite_ptrs |= 1U << i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
incompressible:
|
||||
if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned i = 0;
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
|
||||
rewrite_ptrs |= 1U << i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return rewrite_ptrs;
|
||||
}
|
||||
|
||||
bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
|
||||
|
||||
/*
|
||||
* If it's an indirect extent, we don't delete the rebalance entry when
|
||||
* done so that we know what options were applied - check if it still
|
||||
* needs work done:
|
||||
*/
|
||||
if (r &&
|
||||
k.k->type == KEY_TYPE_reflink_v &&
|
||||
!bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
|
||||
r = NULL;
|
||||
|
||||
return r != NULL;
|
||||
}
|
||||
|
||||
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
|
||||
unsigned target, unsigned compression)
|
||||
{
|
||||
struct bkey_s k = bkey_i_to_s(_k);
|
||||
struct bch_extent_rebalance *r;
|
||||
bool needs_rebalance;
|
||||
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
return 0;
|
||||
|
||||
/* get existing rebalance entry: */
|
||||
r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
|
||||
if (r) {
|
||||
if (k.k->type == KEY_TYPE_reflink_v) {
|
||||
/*
|
||||
* indirect extents: existing options take precedence,
|
||||
* so that we don't move extents back and forth if
|
||||
* they're referenced by different inodes with different
|
||||
* options:
|
||||
*/
|
||||
if (r->target)
|
||||
target = r->target;
|
||||
if (r->compression)
|
||||
compression = r->compression;
|
||||
}
|
||||
|
||||
r->target = target;
|
||||
r->compression = compression;
|
||||
}
|
||||
|
||||
needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
|
||||
|
||||
if (needs_rebalance && !r) {
|
||||
union bch_extent_entry *new = bkey_val_end(k);
|
||||
|
||||
new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
|
||||
new->rebalance.compression = compression;
|
||||
new->rebalance.target = target;
|
||||
new->rebalance.unused = 0;
|
||||
k.k->u64s += extent_entry_u64s(new);
|
||||
} else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
|
||||
/*
|
||||
* For indirect extents, don't delete the rebalance entry when
|
||||
* we're finished so that we know we specifically moved it or
|
||||
* compressed it to its current location/compression type
|
||||
*/
|
||||
extent_entry_drop(k, (union bch_extent_entry *) r);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Generic extent code: */
|
||||
|
||||
int bch2_cut_front_s(struct bpos where, struct bkey_s k)
|
||||
|
@ -89,6 +89,18 @@ static inline void __extent_entry_insert(struct bkey_i *k,
|
||||
memcpy_u64s_small(dst, new, extent_entry_u64s(new));
|
||||
}
|
||||
|
||||
static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
|
||||
{
|
||||
union bch_extent_entry *next = extent_entry_next(entry);
|
||||
|
||||
/* stripes have ptrs, but their layout doesn't work with this code */
|
||||
BUG_ON(k.k->type == KEY_TYPE_stripe);
|
||||
|
||||
memmove_u64s_down(entry, next,
|
||||
(u64 *) bkey_val_end(k) - (u64 *) next);
|
||||
k.k->u64s -= (u64 *) next - (u64 *) entry;
|
||||
}
|
||||
|
||||
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
|
||||
{
|
||||
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
|
||||
@ -190,6 +202,11 @@ static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
|
||||
crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
|
||||
}
|
||||
|
||||
static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
|
||||
{
|
||||
return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
|
||||
}
|
||||
|
||||
/* bkey_ptrs: generically over any key type that has ptrs */
|
||||
|
||||
struct bkey_ptrs_c {
|
||||
@ -693,6 +710,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||
|
||||
void bch2_ptr_swab(struct bkey_s);
|
||||
|
||||
const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
|
||||
unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
|
||||
unsigned, unsigned);
|
||||
bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
|
||||
unsigned, unsigned);
|
||||
|
||||
/* Generic extent code: */
|
||||
|
||||
enum bch_extent_overlap {
|
||||
@ -737,22 +762,4 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
|
||||
k->size = new_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* In extent_sort_fix_overlapping(), insert_fixup_extent(),
|
||||
* extent_merge_inline() - we're modifying keys in place that are packed. To do
|
||||
* that we have to unpack the key, modify the unpacked key - then this
|
||||
* copies/repacks the unpacked to the original as necessary.
|
||||
*/
|
||||
static inline void extent_save(struct btree *b, struct bkey_packed *dst,
|
||||
struct bkey *src)
|
||||
{
|
||||
struct bkey_format *f = &b->format;
|
||||
struct bkey_i *dst_unpacked;
|
||||
|
||||
if ((dst_unpacked = packed_to_bkey(dst)))
|
||||
dst_unpacked->k = *src;
|
||||
else
|
||||
BUG_ON(!bch2_bkey_pack_key(dst, src, f));
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_H */
|
||||
|
@ -113,6 +113,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
||||
} else {
|
||||
atomic_set(&dio->cl.remaining,
|
||||
CLOSURE_REMAINING_INITIALIZER + 1);
|
||||
dio->cl.closure_get_happened = true;
|
||||
}
|
||||
|
||||
dio->req = req;
|
||||
|
@ -1299,6 +1299,28 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
const union bch_extent_entry *i;
|
||||
unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
|
||||
|
||||
bkey_for_each_crc(k.k, ptrs, crc, i)
|
||||
if (crc_is_encoded(crc) &&
|
||||
crc.uncompressed_size > encoded_extent_max_sectors) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct inode_walker *inode,
|
||||
@ -1434,7 +1456,8 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
&res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
check_extent(trans, &iter, k, &w, &s, &extent_ends);
|
||||
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
})) ?:
|
||||
check_i_sectors(trans, &w);
|
||||
|
||||
@ -1448,6 +1471,30 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_indirect_extents(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct disk_reservation res = { 0 };
|
||||
int ret = 0;
|
||||
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
|
||||
POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
&res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
}));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
bch2_trans_put(trans);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
int bch2_check_inodes(struct bch_fs *);
|
||||
int bch2_check_extents(struct bch_fs *);
|
||||
int bch2_check_indirect_extents(struct bch_fs *);
|
||||
int bch2_check_dirents(struct bch_fs *);
|
||||
int bch2_check_xattrs(struct bch_fs *);
|
||||
int bch2_check_root(struct bch_fs *);
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "compress.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "extent_update.h"
|
||||
@ -422,9 +423,10 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) {
|
||||
prt_printf(err, "invalid data checksum type (%u >= %u)",
|
||||
unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1);
|
||||
if (unpacked.bi_compression &&
|
||||
!bch2_compression_opt_valid(unpacked.bi_compression - 1)) {
|
||||
prt_printf(err, "invalid compression opt %u",
|
||||
unpacked.bi_compression - 1);
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
@ -979,6 +981,18 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
|
||||
opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
|
||||
}
|
||||
|
||||
int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
|
||||
{
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_inode_opts_get(opts, trans->c, &inode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
@ -200,6 +200,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
|
||||
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
|
||||
void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
|
||||
struct bch_inode_unpacked *);
|
||||
int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
|
||||
|
||||
int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
|
||||
int bch2_delete_dead_inodes(struct bch_fs *);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "io_misc.h"
|
||||
#include "io_write.h"
|
||||
#include "logged_ops.h"
|
||||
#include "rebalance.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
/* Overwrites whatever was present with zeroes: */
|
||||
@ -355,6 +356,7 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
|
||||
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
|
||||
struct bch_io_opts opts;
|
||||
u64 dst_offset = le64_to_cpu(op->v.dst_offset);
|
||||
u64 src_offset = le64_to_cpu(op->v.src_offset);
|
||||
s64 shift = dst_offset - src_offset;
|
||||
@ -363,6 +365,10 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
|
||||
bool insert = shift > 0;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_inum_opts_get(trans, inum, &opts);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
|
||||
POS(inum.inum, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
@ -443,7 +449,10 @@ case LOGGED_OP_FINSERT_shift_extents:
|
||||
|
||||
op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
|
||||
|
||||
ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
|
||||
ret = bch2_bkey_set_needs_rebalance(c, copy,
|
||||
opts.background_target,
|
||||
opts.background_compression) ?:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i) ?:
|
||||
bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
|
||||
|
@ -351,10 +351,13 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
bkey_start_pos(&sk.k->k),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_extent_update(trans, inum, &iter, sk.k,
|
||||
&op->res,
|
||||
op->new_i_size, &op->i_sectors_delta,
|
||||
op->flags & BCH_WRITE_CHECK_ENOSPC);
|
||||
ret = bch2_bkey_set_needs_rebalance(c, sk.k,
|
||||
op->opts.background_target,
|
||||
op->opts.background_compression) ?:
|
||||
bch2_extent_update(trans, inum, &iter, sk.k,
|
||||
&op->res,
|
||||
op->new_i_size, &op->i_sectors_delta,
|
||||
op->flags & BCH_WRITE_CHECK_ENOSPC);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -495,7 +498,6 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_i *k;
|
||||
unsigned dev;
|
||||
int ret = 0;
|
||||
|
||||
@ -505,14 +507,6 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* probably not the ideal place to hook this in, but I don't
|
||||
* particularly want to plumb io_opts all the way through the btree
|
||||
* update stack right now
|
||||
*/
|
||||
for_each_keylist_key(keys, k)
|
||||
bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
|
||||
|
||||
if (!bch2_keylist_empty(keys)) {
|
||||
u64 sectors_start = keylist_sectors(keys);
|
||||
|
||||
@ -816,6 +810,7 @@ static enum prep_encoded_ret {
|
||||
|
||||
/* Can we just write the entire extent as is? */
|
||||
if (op->crc.uncompressed_size == op->crc.live_size &&
|
||||
op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 &&
|
||||
op->crc.compressed_size <= wp->sectors_free &&
|
||||
(op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
|
||||
op->incompressible)) {
|
||||
@ -1091,9 +1086,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op,
|
||||
|
||||
e = bkey_s_c_to_extent(k);
|
||||
extent_for_each_ptr_decode(e, p, entry) {
|
||||
if (p.crc.csum_type ||
|
||||
crc_is_compressed(p.crc) ||
|
||||
p.has_ec)
|
||||
if (crc_is_encoded(p.crc) || p.has_ec)
|
||||
return false;
|
||||
|
||||
replicas += bch2_extent_ptr_durability(c, &p);
|
||||
|
@ -1019,6 +1019,25 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_journal_alloc(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
for_each_online_member(ca, c, i) {
|
||||
if (ca->journal.nr)
|
||||
continue;
|
||||
|
||||
int ret = bch2_dev_journal_alloc(ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* startup/shutdown: */
|
||||
|
||||
static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
|
||||
|
@ -534,6 +534,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
|
||||
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
|
||||
unsigned nr);
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
int bch2_fs_journal_alloc(struct bch_fs *);
|
||||
|
||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
#include "snapshot.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -59,20 +60,6 @@ static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c
|
||||
}
|
||||
}
|
||||
|
||||
static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
{
|
||||
mutex_lock(&c->data_progress_lock);
|
||||
list_add(&stats->list, &c->data_progress_list);
|
||||
mutex_unlock(&c->data_progress_lock);
|
||||
}
|
||||
|
||||
static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
{
|
||||
mutex_lock(&c->data_progress_lock);
|
||||
list_del(&stats->list);
|
||||
mutex_unlock(&c->data_progress_lock);
|
||||
}
|
||||
|
||||
struct moving_io {
|
||||
struct list_head read_list;
|
||||
struct list_head io_list;
|
||||
@ -156,13 +143,11 @@ static void move_read_endio(struct bio *bio)
|
||||
closure_put(&ctxt->cl);
|
||||
}
|
||||
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
|
||||
struct btree_trans *trans)
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
|
||||
{
|
||||
struct moving_io *io;
|
||||
|
||||
if (trans)
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
|
||||
while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
|
||||
list_del(&io->read_list);
|
||||
@ -170,21 +155,20 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
|
||||
struct btree_trans *trans)
|
||||
void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
||||
{
|
||||
unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
|
||||
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
move_ctxt_wait_event(ctxt,
|
||||
!atomic_read(&ctxt->write_sectors) ||
|
||||
atomic_read(&ctxt->write_sectors) != sectors_pending);
|
||||
}
|
||||
|
||||
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_fs *c = ctxt->c;
|
||||
struct bch_fs *c = ctxt->trans->c;
|
||||
|
||||
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
closure_sync(&ctxt->cl);
|
||||
|
||||
EBUG_ON(atomic_read(&ctxt->write_sectors));
|
||||
@ -192,16 +176,12 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||
EBUG_ON(atomic_read(&ctxt->read_sectors));
|
||||
EBUG_ON(atomic_read(&ctxt->read_ios));
|
||||
|
||||
if (ctxt->stats) {
|
||||
progress_list_del(c, ctxt->stats);
|
||||
trace_move_data(c,
|
||||
atomic64_read(&ctxt->stats->sectors_moved),
|
||||
atomic64_read(&ctxt->stats->keys_moved));
|
||||
}
|
||||
|
||||
mutex_lock(&c->moving_context_lock);
|
||||
list_del(&ctxt->list);
|
||||
mutex_unlock(&c->moving_context_lock);
|
||||
|
||||
bch2_trans_put(ctxt->trans);
|
||||
memset(ctxt, 0, sizeof(*ctxt));
|
||||
}
|
||||
|
||||
void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
@ -213,7 +193,7 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
{
|
||||
memset(ctxt, 0, sizeof(*ctxt));
|
||||
|
||||
ctxt->c = c;
|
||||
ctxt->trans = bch2_trans_get(c);
|
||||
ctxt->fn = (void *) _RET_IP_;
|
||||
ctxt->rate = rate;
|
||||
ctxt->stats = stats;
|
||||
@ -230,16 +210,17 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
mutex_lock(&c->moving_context_lock);
|
||||
list_add(&ctxt->list, &c->moving_context_list);
|
||||
mutex_unlock(&c->moving_context_lock);
|
||||
}
|
||||
|
||||
if (stats) {
|
||||
progress_list_add(c, stats);
|
||||
stats->data_type = BCH_DATA_user;
|
||||
}
|
||||
void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
|
||||
{
|
||||
trace_move_data(c, stats);
|
||||
}
|
||||
|
||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->data_type = BCH_DATA_user;
|
||||
scnprintf(stats->name, sizeof(stats->name), "%s", name);
|
||||
}
|
||||
|
||||
@ -286,15 +267,14 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
}
|
||||
|
||||
static int bch2_move_extent(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct moving_context *ctxt,
|
||||
struct move_bucket_in_flight *bucket_in_flight,
|
||||
struct bch_io_opts io_opts,
|
||||
enum btree_id btree_id,
|
||||
struct bkey_s_c k,
|
||||
struct data_update_opts data_opts)
|
||||
int bch2_move_extent(struct moving_context *ctxt,
|
||||
struct move_bucket_in_flight *bucket_in_flight,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct bch_io_opts io_opts,
|
||||
struct data_update_opts data_opts)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
struct moving_io *io;
|
||||
@ -303,6 +283,8 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
unsigned sectors = k.k->size, pages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (ctxt->stats)
|
||||
ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
|
||||
trace_move_extent2(c, k);
|
||||
|
||||
bch2_data_update_opts_normalize(k, &data_opts);
|
||||
@ -355,7 +337,7 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
|
||||
io_opts, data_opts, btree_id, k);
|
||||
io_opts, data_opts, iter->btree_id, k);
|
||||
if (ret && ret != -BCH_ERR_unwritten_extent_update)
|
||||
goto err_free_pages;
|
||||
|
||||
@ -367,9 +349,11 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
io->write.ctxt = ctxt;
|
||||
io->write.op.end_io = move_write_done;
|
||||
|
||||
if (ctxt->rate)
|
||||
bch2_ratelimit_increment(ctxt->rate, k.k->size);
|
||||
|
||||
if (ctxt->stats) {
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
|
||||
@ -399,7 +383,7 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
closure_get(&ctxt->cl);
|
||||
bch2_read_extent(trans, &io->rbio,
|
||||
bkey_start_pos(k.k),
|
||||
btree_id, k, 0,
|
||||
iter->btree_id, k, 0,
|
||||
BCH_READ_NODECODE|
|
||||
BCH_READ_LAST_FRAGMENT);
|
||||
return 0;
|
||||
@ -413,45 +397,96 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_inode(struct btree_trans *trans, struct bpos pos,
|
||||
struct bch_inode_unpacked *inode)
|
||||
struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
|
||||
struct per_snapshot_io_opts *io_opts,
|
||||
struct bkey_s_c extent_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
|
||||
if (io_opts->cur_inum != extent_k.k->p.inode) {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
|
||||
io_opts->d.nr = 0;
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
|
||||
if (k.k->p.offset != extent_k.k->p.inode)
|
||||
break;
|
||||
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
BUG_ON(bch2_inode_unpack(k, &inode));
|
||||
|
||||
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
|
||||
bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
|
||||
|
||||
ret = darray_push(&io_opts->d, e);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
io_opts->cur_inum = extent_k.k->p.inode;
|
||||
}
|
||||
|
||||
ret = ret ?: trans_was_restarted(trans, restart_count);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (extent_k.k->p.snapshot) {
|
||||
struct snapshot_io_opts_entry *i;
|
||||
darray_for_each(io_opts->d, i)
|
||||
if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
|
||||
return &i->io_opts;
|
||||
}
|
||||
|
||||
return &io_opts->fs_io_opts;
|
||||
}
|
||||
|
||||
int bch2_move_get_io_opts_one(struct btree_trans *trans,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct bkey_s_c extent_k)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos,
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!k.k || !bkey_eq(k.k->p, pos)) {
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
/* reflink btree? */
|
||||
if (!extent_k.k->p.inode) {
|
||||
*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = bkey_is_inode(k.k) ? 0 : -EIO;
|
||||
if (ret)
|
||||
goto err;
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
|
||||
BTREE_ITER_CACHED);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
|
||||
if (!ret && bkey_is_inode(k.k)) {
|
||||
struct bch_inode_unpacked inode;
|
||||
bch2_inode_unpack(k, &inode);
|
||||
bch2_inode_opts_get(io_opts, trans->c, &inode);
|
||||
} else {
|
||||
*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
|
||||
}
|
||||
|
||||
ret = bch2_inode_unpack(k, inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int move_ratelimit(struct btree_trans *trans,
|
||||
struct moving_context *ctxt)
|
||||
int bch2_move_ratelimit(struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs *c = ctxt->trans->c;
|
||||
u64 delay;
|
||||
|
||||
if (ctxt->wait_on_copygc) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
wait_event_killable(c->copygc_running_wq,
|
||||
!c->copygc_running ||
|
||||
kthread_should_stop());
|
||||
@ -461,7 +496,7 @@ static int move_ratelimit(struct btree_trans *trans,
|
||||
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
|
||||
|
||||
if (delay) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
@ -474,7 +509,7 @@ static int move_ratelimit(struct btree_trans *trans,
|
||||
schedule_timeout(delay);
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (delay);
|
||||
@ -483,7 +518,7 @@ static int move_ratelimit(struct btree_trans *trans,
|
||||
* XXX: these limits really ought to be per device, SSDs and hard drives
|
||||
* will want different limits
|
||||
*/
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
move_ctxt_wait_event(ctxt,
|
||||
atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
|
||||
atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
|
||||
atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
|
||||
@ -492,52 +527,28 @@ static int move_ratelimit(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int move_get_io_opts(struct btree_trans *trans,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct bkey_s_c k, u64 *cur_inum)
|
||||
static int bch2_move_data_btree(struct moving_context *ctxt,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
move_pred_fn pred, void *arg,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret;
|
||||
|
||||
if (*cur_inum == k.k->p.inode)
|
||||
return 0;
|
||||
|
||||
ret = lookup_inode(trans,
|
||||
SPOS(0, k.k->p.inode, k.k->p.snapshot),
|
||||
&inode);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
|
||||
if (!ret)
|
||||
bch2_inode_opts_get(io_opts, trans->c, &inode);
|
||||
else
|
||||
*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
|
||||
*cur_inum = k.k->p.inode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_move_data(struct moving_context *ctxt,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
move_pred_fn pred, void *arg,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
struct bch_fs *c = ctxt->c;
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct per_snapshot_io_opts snapshot_io_opts;
|
||||
struct bch_io_opts *io_opts;
|
||||
struct bkey_buf sk;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct data_update_opts data_opts;
|
||||
u64 cur_inum = U64_MAX;
|
||||
int ret = 0, ret2;
|
||||
|
||||
per_snapshot_io_opts_init(&snapshot_io_opts, c);
|
||||
bch2_bkey_buf_init(&sk);
|
||||
|
||||
if (ctxt->stats) {
|
||||
ctxt->stats->data_type = BCH_DATA_user;
|
||||
ctxt->stats->btree_id = btree_id;
|
||||
ctxt->stats->pos = start;
|
||||
ctxt->stats->pos = BBPOS(btree_id, start);
|
||||
}
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, btree_id, start,
|
||||
@ -547,7 +558,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
|
||||
if (ctxt->rate)
|
||||
bch2_ratelimit_reset(ctxt->rate);
|
||||
|
||||
while (!move_ratelimit(trans, ctxt)) {
|
||||
while (!bch2_move_ratelimit(ctxt)) {
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
@ -564,17 +575,18 @@ static int __bch2_move_data(struct moving_context *ctxt,
|
||||
break;
|
||||
|
||||
if (ctxt->stats)
|
||||
ctxt->stats->pos = iter.pos;
|
||||
ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
|
||||
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
goto next_nondata;
|
||||
|
||||
ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
|
||||
io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
|
||||
ret = PTR_ERR_OR_ZERO(io_opts);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
memset(&data_opts, 0, sizeof(data_opts));
|
||||
if (!pred(c, arg, k, &io_opts, &data_opts))
|
||||
if (!pred(c, arg, k, io_opts, &data_opts))
|
||||
goto next;
|
||||
|
||||
/*
|
||||
@ -584,24 +596,20 @@ static int __bch2_move_data(struct moving_context *ctxt,
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
|
||||
ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
|
||||
io_opts, btree_id, k, data_opts);
|
||||
ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
|
||||
if (ret2) {
|
||||
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
|
||||
if (ret2 == -ENOMEM) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(ctxt, trans);
|
||||
bch2_move_ctxt_wait_for_io(ctxt);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* XXX signal failure */
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (ctxt->rate)
|
||||
bch2_ratelimit_increment(ctxt->rate, k.k->size);
|
||||
next:
|
||||
if (ctxt->stats)
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
|
||||
@ -610,59 +618,68 @@ next_nondata:
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
per_snapshot_io_opts_exit(&snapshot_io_opts);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_move_data(struct moving_context *ctxt,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
move_pred_fn pred, void *arg)
|
||||
{
|
||||
struct bch_fs *c = ctxt->trans->c;
|
||||
enum btree_id id;
|
||||
int ret = 0;
|
||||
|
||||
for (id = start.btree;
|
||||
id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
|
||||
id++) {
|
||||
ctxt->stats->pos = BBPOS(id, POS_MIN);
|
||||
|
||||
if (!btree_type_has_ptrs(id) ||
|
||||
!bch2_btree_id_root(c, id)->b)
|
||||
continue;
|
||||
|
||||
ret = bch2_move_data_btree(ctxt,
|
||||
id == start.btree ? start.pos : POS_MIN,
|
||||
id == end.btree ? end.pos : POS_MAX,
|
||||
pred, arg, id);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_move_data(struct bch_fs *c,
|
||||
enum btree_id start_btree_id, struct bpos start_pos,
|
||||
enum btree_id end_btree_id, struct bpos end_pos,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
struct bch_ratelimit *rate,
|
||||
struct bch_move_stats *stats,
|
||||
struct write_point_specifier wp,
|
||||
bool wait_on_copygc,
|
||||
move_pred_fn pred, void *arg)
|
||||
{
|
||||
|
||||
struct moving_context ctxt;
|
||||
enum btree_id id;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
||||
|
||||
for (id = start_btree_id;
|
||||
id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
|
||||
id++) {
|
||||
stats->btree_id = id;
|
||||
|
||||
if (id != BTREE_ID_extents &&
|
||||
id != BTREE_ID_reflink)
|
||||
continue;
|
||||
|
||||
if (!bch2_btree_id_root(c, id)->b)
|
||||
continue;
|
||||
|
||||
ret = __bch2_move_data(&ctxt,
|
||||
id == start_btree_id ? start_pos : POS_MIN,
|
||||
id == end_btree_id ? end_pos : POS_MAX,
|
||||
pred, arg, id);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = __bch2_move_data(&ctxt, start, end, pred, arg);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
struct move_bucket_in_flight *bucket_in_flight,
|
||||
struct bpos bucket, int gen,
|
||||
struct data_update_opts _data_opts)
|
||||
{
|
||||
struct bch_fs *c = ctxt->c;
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
@ -673,7 +690,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
struct data_update_opts data_opts;
|
||||
unsigned dirty_sectors, bucket_size;
|
||||
u64 fragmentation;
|
||||
u64 cur_inum = U64_MAX;
|
||||
struct bpos bp_pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
@ -708,7 +724,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
while (!(ret = move_ratelimit(trans, ctxt))) {
|
||||
while (!(ret = bch2_move_ratelimit(ctxt))) {
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||
@ -737,7 +753,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
|
||||
ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
|
||||
ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
continue;
|
||||
@ -758,23 +774,20 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||
i++;
|
||||
}
|
||||
|
||||
ret = bch2_move_extent(trans, &iter, ctxt,
|
||||
bucket_in_flight,
|
||||
io_opts, bp.btree_id, k, data_opts);
|
||||
ret = bch2_move_extent(ctxt, bucket_in_flight,
|
||||
&iter, k, io_opts, data_opts);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret == -ENOMEM) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(ctxt, trans);
|
||||
bch2_move_ctxt_wait_for_io(ctxt);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (ctxt->rate)
|
||||
bch2_ratelimit_increment(ctxt->rate, k.k->size);
|
||||
if (ctxt->stats)
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
|
||||
} else {
|
||||
@ -825,14 +838,12 @@ int bch2_evacuate_bucket(struct bch_fs *c,
|
||||
struct write_point_specifier wp,
|
||||
bool wait_on_copygc)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct moving_context ctxt;
|
||||
int ret;
|
||||
|
||||
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
||||
ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts);
|
||||
ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
bch2_trans_put(trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -849,21 +860,25 @@ static int bch2_move_btree(struct bch_fs *c,
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct moving_context ctxt;
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
enum btree_id id;
|
||||
struct data_update_opts data_opts;
|
||||
int ret = 0;
|
||||
|
||||
progress_list_add(c, stats);
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
|
||||
writepoint_ptr(&c->btree_write_point),
|
||||
true);
|
||||
trans = ctxt.trans;
|
||||
|
||||
stats->data_type = BCH_DATA_btree;
|
||||
|
||||
for (id = start_btree_id;
|
||||
id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
|
||||
id++) {
|
||||
stats->btree_id = id;
|
||||
stats->pos = BBPOS(id, POS_MIN);
|
||||
|
||||
if (!bch2_btree_id_root(c, id)->b)
|
||||
continue;
|
||||
@ -882,7 +897,7 @@ retry:
|
||||
bpos_cmp(b->key.k.p, end_pos)) > 0)
|
||||
break;
|
||||
|
||||
stats->pos = iter.pos;
|
||||
stats->pos = BBPOS(iter.btree_id, iter.pos);
|
||||
|
||||
if (!pred(c, arg, b, &io_opts, &data_opts))
|
||||
goto next;
|
||||
@ -904,14 +919,10 @@ next:
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
|
||||
progress_list_del(c, stats);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1032,8 +1043,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1056,14 +1066,16 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c,
|
||||
op.start_btree, op.start_pos,
|
||||
op.end_btree, op.end_pos,
|
||||
(struct bbpos) { op.start_btree, op.start_pos },
|
||||
(struct bbpos) { op.end_btree, op.end_pos },
|
||||
NULL,
|
||||
stats,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
true,
|
||||
rereplicate_pred, c) ?: ret;
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
|
||||
bch2_move_stats_exit(stats, c);
|
||||
break;
|
||||
case BCH_DATA_OP_MIGRATE:
|
||||
if (op.migrate.dev >= c->sb.nr_devices)
|
||||
@ -1080,18 +1092,21 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c,
|
||||
op.start_btree, op.start_pos,
|
||||
op.end_btree, op.end_pos,
|
||||
(struct bbpos) { op.start_btree, op.start_pos },
|
||||
(struct bbpos) { op.end_btree, op.end_pos },
|
||||
NULL,
|
||||
stats,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
true,
|
||||
migrate_pred, &op) ?: ret;
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
|
||||
bch2_move_stats_exit(stats, c);
|
||||
break;
|
||||
case BCH_DATA_OP_REWRITE_OLD_NODES:
|
||||
bch2_move_stats_init(stats, "rewrite_old_nodes");
|
||||
ret = bch2_scan_old_btree_nodes(c, stats);
|
||||
bch2_move_stats_exit(stats, c);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -1100,19 +1115,43 @@ int bch2_data_job(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
|
||||
{
|
||||
prt_printf(out, "%s: data type=%s pos=",
|
||||
stats->name,
|
||||
bch2_data_types[stats->data_type]);
|
||||
bch2_bbpos_to_text(out, stats->pos);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_str(out, "keys moved: ");
|
||||
prt_u64(out, atomic64_read(&stats->keys_moved));
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "keys raced: ");
|
||||
prt_u64(out, atomic64_read(&stats->keys_raced));
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "bytes seen: ");
|
||||
prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "bytes moved: ");
|
||||
prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "bytes raced: ");
|
||||
prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
|
||||
prt_newline(out);
|
||||
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
|
||||
{
|
||||
struct bch_move_stats *stats = ctxt->stats;
|
||||
struct moving_io *io;
|
||||
|
||||
prt_printf(out, "%s (%ps):", stats->name, ctxt->fn);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, " data type %s btree_id %s position: ",
|
||||
bch2_data_types[stats->data_type],
|
||||
bch2_btree_id_str(stats->btree_id));
|
||||
bch2_bpos_to_text(out, stats->pos);
|
||||
prt_newline(out);
|
||||
bch2_move_stats_to_text(out, ctxt->stats);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "reads: ios %u/%u sectors %u/%u",
|
||||
@ -1153,7 +1192,4 @@ void bch2_fs_move_init(struct bch_fs *c)
|
||||
{
|
||||
INIT_LIST_HEAD(&c->moving_context_list);
|
||||
mutex_init(&c->moving_context_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->data_progress_list);
|
||||
mutex_init(&c->data_progress_lock);
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _BCACHEFS_MOVE_H
|
||||
#define _BCACHEFS_MOVE_H
|
||||
|
||||
#include "bbpos.h"
|
||||
#include "bcachefs_ioctl.h"
|
||||
#include "btree_iter.h"
|
||||
#include "buckets.h"
|
||||
@ -11,7 +12,7 @@
|
||||
struct bch_read_bio;
|
||||
|
||||
struct moving_context {
|
||||
struct bch_fs *c;
|
||||
struct btree_trans *trans;
|
||||
struct list_head list;
|
||||
void *fn;
|
||||
|
||||
@ -37,10 +38,10 @@ struct moving_context {
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
|
||||
#define move_ctxt_wait_event(_ctxt, _cond) \
|
||||
do { \
|
||||
bool cond_finished = false; \
|
||||
bch2_moving_ctxt_do_pending_writes(_ctxt, _trans); \
|
||||
bch2_moving_ctxt_do_pending_writes(_ctxt); \
|
||||
\
|
||||
if (_cond) \
|
||||
break; \
|
||||
@ -59,22 +60,60 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
|
||||
struct bch_ratelimit *, struct bch_move_stats *,
|
||||
struct write_point_specifier, bool);
|
||||
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
|
||||
struct btree_trans *);
|
||||
void bch2_moving_ctxt_do_pending_writes(struct moving_context *);
|
||||
void bch2_move_ctxt_wait_for_io(struct moving_context *);
|
||||
int bch2_move_ratelimit(struct moving_context *);
|
||||
|
||||
/* Inodes in different snapshots may have different IO options: */
|
||||
struct snapshot_io_opts_entry {
|
||||
u32 snapshot;
|
||||
struct bch_io_opts io_opts;
|
||||
};
|
||||
|
||||
struct per_snapshot_io_opts {
|
||||
u64 cur_inum;
|
||||
struct bch_io_opts fs_io_opts;
|
||||
DARRAY(struct snapshot_io_opts_entry) d;
|
||||
};
|
||||
|
||||
static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
|
||||
{
|
||||
memset(io_opts, 0, sizeof(*io_opts));
|
||||
io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
}
|
||||
|
||||
static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
|
||||
{
|
||||
darray_exit(&io_opts->d);
|
||||
}
|
||||
|
||||
struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
|
||||
struct per_snapshot_io_opts *, struct bkey_s_c);
|
||||
int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c);
|
||||
|
||||
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
|
||||
|
||||
int bch2_move_extent(struct moving_context *,
|
||||
struct move_bucket_in_flight *,
|
||||
struct btree_iter *,
|
||||
struct bkey_s_c,
|
||||
struct bch_io_opts,
|
||||
struct data_update_opts);
|
||||
|
||||
int __bch2_move_data(struct moving_context *,
|
||||
struct bbpos,
|
||||
struct bbpos,
|
||||
move_pred_fn, void *);
|
||||
int bch2_move_data(struct bch_fs *,
|
||||
enum btree_id, struct bpos,
|
||||
enum btree_id, struct bpos,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
struct bch_ratelimit *,
|
||||
struct bch_move_stats *,
|
||||
struct write_point_specifier,
|
||||
bool,
|
||||
move_pred_fn, void *);
|
||||
|
||||
int __bch2_evacuate_bucket(struct btree_trans *,
|
||||
struct moving_context *,
|
||||
int __bch2_evacuate_bucket(struct moving_context *,
|
||||
struct move_bucket_in_flight *,
|
||||
struct bpos, int,
|
||||
struct data_update_opts);
|
||||
@ -88,7 +127,10 @@ int bch2_data_job(struct bch_fs *,
|
||||
struct bch_move_stats *,
|
||||
struct bch_ioctl_data);
|
||||
|
||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name);
|
||||
void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
|
||||
void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
|
||||
void bch2_move_stats_init(struct bch_move_stats *, char *);
|
||||
|
||||
void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_fs_move_init(struct bch_fs *);
|
||||
|
@ -2,17 +2,17 @@
|
||||
#ifndef _BCACHEFS_MOVE_TYPES_H
|
||||
#define _BCACHEFS_MOVE_TYPES_H
|
||||
|
||||
#include "bbpos_types.h"
|
||||
|
||||
struct bch_move_stats {
|
||||
enum bch_data_type data_type;
|
||||
enum btree_id btree_id;
|
||||
struct bpos pos;
|
||||
struct list_head list;
|
||||
struct bbpos pos;
|
||||
char name[32];
|
||||
|
||||
atomic64_t keys_moved;
|
||||
atomic64_t keys_raced;
|
||||
atomic64_t sectors_moved;
|
||||
atomic64_t sectors_seen;
|
||||
atomic64_t sectors_moved;
|
||||
atomic64_t sectors_raced;
|
||||
};
|
||||
|
||||
|
@ -101,8 +101,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void move_buckets_wait(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
static void move_buckets_wait(struct moving_context *ctxt,
|
||||
struct buckets_in_flight *list,
|
||||
bool flush)
|
||||
{
|
||||
@ -111,7 +110,7 @@ static void move_buckets_wait(struct btree_trans *trans,
|
||||
|
||||
while ((i = list->first)) {
|
||||
if (flush)
|
||||
move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count));
|
||||
move_ctxt_wait_event(ctxt, !atomic_read(&i->count));
|
||||
|
||||
if (atomic_read(&i->count))
|
||||
break;
|
||||
@ -129,7 +128,7 @@ static void move_buckets_wait(struct btree_trans *trans,
|
||||
kfree(i);
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
}
|
||||
|
||||
static bool bucket_in_flight(struct buckets_in_flight *list,
|
||||
@ -140,11 +139,11 @@ static bool bucket_in_flight(struct buckets_in_flight *list,
|
||||
|
||||
typedef DARRAY(struct move_bucket) move_buckets;
|
||||
|
||||
static int bch2_copygc_get_buckets(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
struct buckets_in_flight *buckets_in_flight,
|
||||
move_buckets *buckets)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@ -152,7 +151,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
|
||||
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
|
||||
int ret;
|
||||
|
||||
move_buckets_wait(trans, ctxt, buckets_in_flight, false);
|
||||
move_buckets_wait(ctxt, buckets_in_flight, false);
|
||||
|
||||
ret = bch2_btree_write_buffer_flush(trans);
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
|
||||
@ -188,10 +187,10 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_copygc(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
static int bch2_copygc(struct moving_context *ctxt,
|
||||
struct buckets_in_flight *buckets_in_flight)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct data_update_opts data_opts = {
|
||||
.btree_insert_flags = BCH_WATERMARK_copygc,
|
||||
@ -202,7 +201,7 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets);
|
||||
ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -221,7 +220,7 @@ static int bch2_copygc(struct btree_trans *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket.k.bucket,
|
||||
ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
|
||||
f->bucket.k.gen, data_opts);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -300,7 +299,6 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
static int bch2_copygc_thread(void *arg)
|
||||
{
|
||||
struct bch_fs *c = arg;
|
||||
struct btree_trans *trans;
|
||||
struct moving_context ctxt;
|
||||
struct bch_move_stats move_stats;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
@ -317,7 +315,6 @@ static int bch2_copygc_thread(void *arg)
|
||||
}
|
||||
|
||||
set_freezable();
|
||||
trans = bch2_trans_get(c);
|
||||
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
||||
@ -325,16 +322,16 @@ static int bch2_copygc_thread(void *arg)
|
||||
false);
|
||||
|
||||
while (!ret && !kthread_should_stop()) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_unlock(ctxt.trans);
|
||||
cond_resched();
|
||||
|
||||
if (!c->copy_gc_enabled) {
|
||||
move_buckets_wait(trans, &ctxt, &buckets, true);
|
||||
move_buckets_wait(&ctxt, &buckets, true);
|
||||
kthread_wait_freezable(c->copy_gc_enabled);
|
||||
}
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_buckets_wait(trans, &ctxt, &buckets, true);
|
||||
move_buckets_wait(&ctxt, &buckets, true);
|
||||
__refrigerator(false);
|
||||
continue;
|
||||
}
|
||||
@ -345,7 +342,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
if (wait > clock->max_slop) {
|
||||
c->copygc_wait_at = last;
|
||||
c->copygc_wait = last + wait;
|
||||
move_buckets_wait(trans, &ctxt, &buckets, true);
|
||||
move_buckets_wait(&ctxt, &buckets, true);
|
||||
trace_and_count(c, copygc_wait, c, wait, last + wait);
|
||||
bch2_kthread_io_clock_wait(clock, last + wait,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
@ -355,16 +352,16 @@ static int bch2_copygc_thread(void *arg)
|
||||
c->copygc_wait = 0;
|
||||
|
||||
c->copygc_running = true;
|
||||
ret = bch2_copygc(trans, &ctxt, &buckets);
|
||||
ret = bch2_copygc(&ctxt, &buckets);
|
||||
c->copygc_running = false;
|
||||
|
||||
wake_up(&c->copygc_running_wq);
|
||||
}
|
||||
|
||||
move_buckets_wait(trans, &ctxt, &buckets, true);
|
||||
move_buckets_wait(&ctxt, &buckets, true);
|
||||
rhashtable_destroy(&buckets.table);
|
||||
bch2_trans_put(trans);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
bch2_move_stats_exit(&move_stats, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -294,6 +294,9 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (opt->fn.validate)
|
||||
return opt->fn.validate(v, err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -74,6 +74,7 @@ enum opt_type {
|
||||
struct bch_opt_fn {
|
||||
int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
|
||||
void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
|
||||
int (*validate)(u64, struct printbuf *);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -415,11 +415,11 @@ void bch2_prt_bitflags(struct printbuf *out,
|
||||
while (list[nr])
|
||||
nr++;
|
||||
|
||||
while (flags && (bit = __ffs(flags)) < nr) {
|
||||
while (flags && (bit = __ffs64(flags)) < nr) {
|
||||
if (!first)
|
||||
bch2_prt_printf(out, ",");
|
||||
first = false;
|
||||
bch2_prt_printf(out, "%s", list[bit]);
|
||||
flags ^= 1 << bit;
|
||||
flags ^= BIT_ULL(bit);
|
||||
}
|
||||
}
|
||||
|
@ -3,13 +3,18 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "compress.h"
|
||||
#include "disk_groups.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "inode.h"
|
||||
#include "move.h"
|
||||
#include "rebalance.h"
|
||||
#include "subvolume.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -17,302 +22,398 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
|
||||
/*
|
||||
* Check if an extent should be moved:
|
||||
* returns -1 if it should not be moved, or
|
||||
* device of pointer that should be moved, if known, or INT_MAX if unknown
|
||||
*/
|
||||
#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
|
||||
|
||||
static const char * const bch2_rebalance_state_strs[] = {
|
||||
#define x(t) #t,
|
||||
BCH_REBALANCE_STATES()
|
||||
NULL
|
||||
#undef x
|
||||
};
|
||||
|
||||
static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_cookie *cookie;
|
||||
u64 v;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
|
||||
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
v = k.k->type == KEY_TYPE_cookie
|
||||
? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
|
||||
: 0;
|
||||
|
||||
cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
|
||||
ret = PTR_ERR_OR_ZERO(cookie);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_cookie_init(&cookie->k_i);
|
||||
cookie->k.p = iter.pos;
|
||||
cookie->v.cookie = cpu_to_le64(v + 1);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
|
||||
{
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
__bch2_set_rebalance_needs_scan(trans, inum));
|
||||
rebalance_wakeup(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_set_fs_needs_rebalance(struct bch_fs *c)
|
||||
{
|
||||
return bch2_set_rebalance_needs_scan(c, 0);
|
||||
}
|
||||
|
||||
static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 v;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
|
||||
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
|
||||
BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
v = k.k->type == KEY_TYPE_cookie
|
||||
? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
|
||||
: 0;
|
||||
|
||||
if (v == cookie)
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans,
|
||||
struct btree_iter *work_iter)
|
||||
{
|
||||
return !kthread_should_stop()
|
||||
? bch2_btree_iter_peek(work_iter)
|
||||
: bkey_s_c_null;
|
||||
}
|
||||
|
||||
static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0);
|
||||
int ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
extent_entry_drop(bkey_i_to_s(n),
|
||||
(void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n)));
|
||||
return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
}
|
||||
|
||||
static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
||||
struct bpos work_pos,
|
||||
struct btree_iter *extent_iter,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_iter_exit(trans, extent_iter);
|
||||
bch2_trans_iter_init(trans, extent_iter,
|
||||
work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink,
|
||||
work_pos,
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
k = bch2_btree_iter_peek_slot(extent_iter);
|
||||
if (bkey_err(k))
|
||||
return k;
|
||||
|
||||
const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL;
|
||||
if (!r) {
|
||||
/* raced due to btree write buffer, nothing to do */
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
memset(data_opts, 0, sizeof(*data_opts));
|
||||
|
||||
data_opts->rewrite_ptrs =
|
||||
bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
|
||||
data_opts->target = r->target;
|
||||
|
||||
if (!data_opts->rewrite_ptrs) {
|
||||
/*
|
||||
* device we would want to write to offline? devices in target
|
||||
* changed?
|
||||
*
|
||||
* We'll now need a full scan before this extent is picked up
|
||||
* again:
|
||||
*/
|
||||
int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k);
|
||||
if (ret)
|
||||
return bkey_s_c_err(ret);
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int do_rebalance_extent(struct moving_context *ctxt,
|
||||
struct bpos work_pos,
|
||||
struct btree_iter *extent_iter)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_rebalance *r = &trans->c->rebalance;
|
||||
struct data_update_opts data_opts;
|
||||
struct bch_io_opts io_opts;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf sk;
|
||||
int ret;
|
||||
|
||||
ctxt->stats = &r->work_stats;
|
||||
r->state = BCH_REBALANCE_working;
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
|
||||
ret = bkey_err(k = next_rebalance_extent(trans, work_pos,
|
||||
extent_iter, &data_opts));
|
||||
if (ret || !k.k)
|
||||
goto out;
|
||||
|
||||
ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
|
||||
|
||||
/*
|
||||
* The iterator gets unlocked by __bch2_read_extent - need to
|
||||
* save a copy of @k elsewhere:
|
||||
*/
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
|
||||
ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
|
||||
if (ret) {
|
||||
if (bch2_err_matches(ret, ENOMEM)) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(ctxt);
|
||||
ret = -BCH_ERR_transaction_restart_nested;
|
||||
}
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto out;
|
||||
|
||||
/* skip it and continue, XXX signal failure */
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool rebalance_pred(struct bch_fs *c, void *arg,
|
||||
struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
unsigned i;
|
||||
unsigned target, compression;
|
||||
|
||||
data_opts->rewrite_ptrs = 0;
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->extra_replicas = 0;
|
||||
data_opts->btree_insert_flags = 0;
|
||||
if (k.k->p.inode) {
|
||||
target = io_opts->background_target;
|
||||
compression = io_opts->background_compression ?: io_opts->compression;
|
||||
} else {
|
||||
const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
|
||||
|
||||
if (io_opts->background_compression &&
|
||||
!bch2_bkey_is_incompressible(k)) {
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (!p.ptr.cached &&
|
||||
p.crc.compression_type !=
|
||||
bch2_compression_opt_to_type(io_opts->background_compression))
|
||||
data_opts->rewrite_ptrs |= 1U << i;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (io_opts->background_target) {
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (!ptr->cached &&
|
||||
!bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
|
||||
bch2_target_accepts_data(c, BCH_DATA_user, io_opts->background_target))
|
||||
data_opts->rewrite_ptrs |= 1U << i;
|
||||
i++;
|
||||
}
|
||||
target = r ? r->target : io_opts->background_target;
|
||||
compression = r ? r->compression :
|
||||
(io_opts->background_compression ?: io_opts->compression);
|
||||
}
|
||||
|
||||
data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
|
||||
data_opts->target = target;
|
||||
return data_opts->rewrite_ptrs != 0;
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_key(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts)
|
||||
static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
|
||||
{
|
||||
struct data_update_opts update_opts = { 0 };
|
||||
struct bkey_ptrs_c ptrs;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned i;
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs_rebalance *r = &trans->c->rebalance;
|
||||
int ret;
|
||||
|
||||
if (!rebalance_pred(c, NULL, k, io_opts, &update_opts))
|
||||
return;
|
||||
bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
|
||||
ctxt->stats = &r->scan_stats;
|
||||
|
||||
i = 0;
|
||||
ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if ((1U << i) && update_opts.rewrite_ptrs)
|
||||
if (atomic64_add_return(k.k->size,
|
||||
&bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) ==
|
||||
k.k->size)
|
||||
rebalance_wakeup(c);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
|
||||
{
|
||||
if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
|
||||
sectors)
|
||||
rebalance_wakeup(c);
|
||||
}
|
||||
|
||||
struct rebalance_work {
|
||||
int dev_most_full_idx;
|
||||
unsigned dev_most_full_percent;
|
||||
u64 dev_most_full_work;
|
||||
u64 dev_most_full_capacity;
|
||||
u64 total_work;
|
||||
};
|
||||
|
||||
static void rebalance_work_accumulate(struct rebalance_work *w,
|
||||
u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
|
||||
{
|
||||
unsigned percent_full;
|
||||
u64 work = dev_work + unknown_dev;
|
||||
|
||||
/* avoid divide by 0 */
|
||||
if (!capacity)
|
||||
return;
|
||||
|
||||
if (work < dev_work || work < unknown_dev)
|
||||
work = U64_MAX;
|
||||
work = min(work, capacity);
|
||||
|
||||
percent_full = div64_u64(work * 100, capacity);
|
||||
|
||||
if (percent_full >= w->dev_most_full_percent) {
|
||||
w->dev_most_full_idx = idx;
|
||||
w->dev_most_full_percent = percent_full;
|
||||
w->dev_most_full_work = work;
|
||||
w->dev_most_full_capacity = capacity;
|
||||
if (!inum) {
|
||||
r->scan_start = BBPOS_MIN;
|
||||
r->scan_end = BBPOS_MAX;
|
||||
} else {
|
||||
r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0));
|
||||
r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
|
||||
}
|
||||
|
||||
if (w->total_work + dev_work >= w->total_work &&
|
||||
w->total_work + dev_work >= dev_work)
|
||||
w->total_work += dev_work;
|
||||
}
|
||||
r->state = BCH_REBALANCE_scanning;
|
||||
|
||||
static struct rebalance_work rebalance_work(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct rebalance_work ret = { .dev_most_full_idx = -1 };
|
||||
u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
|
||||
unsigned i;
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
rebalance_work_accumulate(&ret,
|
||||
atomic64_read(&ca->rebalance_work),
|
||||
unknown_dev,
|
||||
bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket),
|
||||
i);
|
||||
|
||||
rebalance_work_accumulate(&ret,
|
||||
unknown_dev, 0, c->capacity, -1);
|
||||
ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
|
||||
commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
bch2_clear_rebalance_needs_scan(trans, inum, cookie));
|
||||
|
||||
bch2_move_stats_exit(&r->scan_stats, trans->c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rebalance_work_reset(struct bch_fs *c)
|
||||
static void rebalance_wait(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct bch_dev *ca;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
u64 now = atomic64_read(&clock->now);
|
||||
u64 min_member_capacity = 128 * 2048;
|
||||
unsigned i;
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
atomic64_set(&ca->rebalance_work, 0);
|
||||
for_each_rw_member(ca, c, i)
|
||||
min_member_capacity = min(min_member_capacity,
|
||||
ca->mi.nbuckets * ca->mi.bucket_size);
|
||||
|
||||
atomic64_set(&c->rebalance.work_unknown_dev, 0);
|
||||
r->wait_iotime_end = now + (min_member_capacity >> 6);
|
||||
|
||||
if (r->state != BCH_REBALANCE_waiting) {
|
||||
r->wait_iotime_start = now;
|
||||
r->wait_wallclock_start = ktime_get_real_ns();
|
||||
r->state = BCH_REBALANCE_waiting;
|
||||
}
|
||||
|
||||
bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
|
||||
}
|
||||
|
||||
static unsigned long curr_cputime(void)
|
||||
static int do_rebalance(struct moving_context *ctxt)
|
||||
{
|
||||
u64 utime, stime;
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct btree_iter rebalance_work_iter, extent_iter = { NULL };
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
task_cputime_adjusted(current, &utime, &stime);
|
||||
return nsecs_to_jiffies(utime + stime);
|
||||
bch2_move_stats_init(&r->work_stats, "rebalance_work");
|
||||
bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
|
||||
|
||||
bch2_trans_iter_init(trans, &rebalance_work_iter,
|
||||
BTREE_ID_rebalance_work, POS_MIN,
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
|
||||
while (!bch2_move_ratelimit(ctxt) &&
|
||||
!kthread_wait_freezable(r->enabled)) {
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter));
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret || !k.k)
|
||||
break;
|
||||
|
||||
ret = k.k->type == KEY_TYPE_cookie
|
||||
? do_rebalance_scan(ctxt, k.k->p.inode,
|
||||
le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie))
|
||||
: do_rebalance_extent(ctxt, k.k->p, &extent_iter);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_iter_advance(&rebalance_work_iter);
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &extent_iter);
|
||||
bch2_trans_iter_exit(trans, &rebalance_work_iter);
|
||||
bch2_move_stats_exit(&r->scan_stats, c);
|
||||
|
||||
if (!ret &&
|
||||
!kthread_should_stop() &&
|
||||
!atomic64_read(&r->work_stats.sectors_seen) &&
|
||||
!atomic64_read(&r->scan_stats.sectors_seen)) {
|
||||
bch2_trans_unlock(trans);
|
||||
rebalance_wait(c);
|
||||
}
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_rebalance_thread(void *arg)
|
||||
{
|
||||
struct bch_fs *c = arg;
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
struct rebalance_work w, p;
|
||||
struct bch_move_stats move_stats;
|
||||
unsigned long start, prev_start;
|
||||
unsigned long prev_run_time, prev_run_cputime;
|
||||
unsigned long cputime, prev_cputime;
|
||||
u64 io_start;
|
||||
long throttle;
|
||||
struct moving_context ctxt;
|
||||
int ret;
|
||||
|
||||
set_freezable();
|
||||
|
||||
io_start = atomic64_read(&clock->now);
|
||||
p = rebalance_work(c);
|
||||
prev_start = jiffies;
|
||||
prev_cputime = curr_cputime();
|
||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
|
||||
writepoint_ptr(&c->rebalance_write_point),
|
||||
true);
|
||||
|
||||
bch2_move_stats_init(&move_stats, "rebalance");
|
||||
while (!kthread_wait_freezable(r->enabled)) {
|
||||
cond_resched();
|
||||
while (!kthread_should_stop() &&
|
||||
!(ret = do_rebalance(&ctxt)))
|
||||
;
|
||||
|
||||
start = jiffies;
|
||||
cputime = curr_cputime();
|
||||
|
||||
prev_run_time = start - prev_start;
|
||||
prev_run_cputime = cputime - prev_cputime;
|
||||
|
||||
w = rebalance_work(c);
|
||||
BUG_ON(!w.dev_most_full_capacity);
|
||||
|
||||
if (!w.total_work) {
|
||||
r->state = REBALANCE_WAITING;
|
||||
kthread_wait_freezable(rebalance_work(c).total_work);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there isn't much work to do, throttle cpu usage:
|
||||
*/
|
||||
throttle = prev_run_cputime * 100 /
|
||||
max(1U, w.dev_most_full_percent) -
|
||||
prev_run_time;
|
||||
|
||||
if (w.dev_most_full_percent < 20 && throttle > 0) {
|
||||
r->throttled_until_iotime = io_start +
|
||||
div_u64(w.dev_most_full_capacity *
|
||||
(20 - w.dev_most_full_percent),
|
||||
50);
|
||||
|
||||
if (atomic64_read(&clock->now) + clock->max_slop <
|
||||
r->throttled_until_iotime) {
|
||||
r->throttled_until_cputime = start + throttle;
|
||||
r->state = REBALANCE_THROTTLED;
|
||||
|
||||
bch2_kthread_io_clock_wait(clock,
|
||||
r->throttled_until_iotime,
|
||||
throttle);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* minimum 1 mb/sec: */
|
||||
r->pd.rate.rate =
|
||||
max_t(u64, 1 << 11,
|
||||
r->pd.rate.rate *
|
||||
max(p.dev_most_full_percent, 1U) /
|
||||
max(w.dev_most_full_percent, 1U));
|
||||
|
||||
io_start = atomic64_read(&clock->now);
|
||||
p = w;
|
||||
prev_start = start;
|
||||
prev_cputime = cputime;
|
||||
|
||||
r->state = REBALANCE_RUNNING;
|
||||
memset(&move_stats, 0, sizeof(move_stats));
|
||||
rebalance_work_reset(c);
|
||||
|
||||
bch2_move_data(c,
|
||||
0, POS_MIN,
|
||||
BTREE_ID_NR, POS_MAX,
|
||||
/* ratelimiting disabled for now */
|
||||
NULL, /* &r->pd.rate, */
|
||||
&move_stats,
|
||||
writepoint_ptr(&c->rebalance_write_point),
|
||||
true,
|
||||
rebalance_pred, NULL);
|
||||
}
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_rebalance *r = &c->rebalance;
|
||||
struct rebalance_work w = rebalance_work(c);
|
||||
|
||||
if (!out->nr_tabstops)
|
||||
printbuf_tabstop_push(out, 20);
|
||||
|
||||
prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx);
|
||||
prt_tab(out);
|
||||
|
||||
prt_human_readable_u64(out, w.dev_most_full_work << 9);
|
||||
prt_printf(out, "/");
|
||||
prt_human_readable_u64(out, w.dev_most_full_capacity << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "total work:");
|
||||
prt_tab(out);
|
||||
|
||||
prt_human_readable_u64(out, w.total_work << 9);
|
||||
prt_printf(out, "/");
|
||||
prt_human_readable_u64(out, c->capacity << 9);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "rate:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%u", r->pd.rate.rate);
|
||||
prt_str(out, bch2_rebalance_state_strs[r->state]);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
switch (r->state) {
|
||||
case REBALANCE_WAITING:
|
||||
prt_printf(out, "waiting");
|
||||
case BCH_REBALANCE_waiting: {
|
||||
u64 now = atomic64_read(&c->io_clock[WRITE].now);
|
||||
|
||||
prt_str(out, "io wait duration: ");
|
||||
bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "io wait remaining: ");
|
||||
bch2_prt_human_readable_s64(out, r->wait_iotime_end - now);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "duration waited: ");
|
||||
bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start);
|
||||
prt_newline(out);
|
||||
break;
|
||||
case REBALANCE_THROTTLED:
|
||||
prt_printf(out, "throttled for %lu sec or ",
|
||||
(r->throttled_until_cputime - jiffies) / HZ);
|
||||
prt_human_readable_u64(out,
|
||||
(r->throttled_until_iotime -
|
||||
atomic64_read(&c->io_clock[WRITE].now)) << 9);
|
||||
prt_printf(out, " io");
|
||||
}
|
||||
case BCH_REBALANCE_working:
|
||||
bch2_move_stats_to_text(out, &r->work_stats);
|
||||
break;
|
||||
case REBALANCE_RUNNING:
|
||||
prt_printf(out, "running");
|
||||
case BCH_REBALANCE_scanning:
|
||||
bch2_move_stats_to_text(out, &r->scan_stats);
|
||||
break;
|
||||
}
|
||||
prt_newline(out);
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
void bch2_rebalance_stop(struct bch_fs *c)
|
||||
@ -361,6 +462,4 @@ int bch2_rebalance_start(struct bch_fs *c)
|
||||
void bch2_fs_rebalance_init(struct bch_fs *c)
|
||||
{
|
||||
bch2_pd_controller_init(&c->rebalance.pd);
|
||||
|
||||
atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
|
||||
}
|
||||
|
@ -4,6 +4,9 @@
|
||||
|
||||
#include "rebalance_types.h"
|
||||
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
|
||||
int bch2_set_fs_needs_rebalance(struct bch_fs *);
|
||||
|
||||
static inline void rebalance_wakeup(struct bch_fs *c)
|
||||
{
|
||||
struct task_struct *p;
|
||||
@ -15,11 +18,7 @@ static inline void rebalance_wakeup(struct bch_fs *c)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_io_opts *);
|
||||
void bch2_rebalance_add_work(struct bch_fs *, u64);
|
||||
|
||||
void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
|
||||
void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
void bch2_rebalance_stop(struct bch_fs *);
|
||||
int bch2_rebalance_start(struct bch_fs *);
|
||||
|
@ -2,25 +2,36 @@
|
||||
#ifndef _BCACHEFS_REBALANCE_TYPES_H
|
||||
#define _BCACHEFS_REBALANCE_TYPES_H
|
||||
|
||||
#include "bbpos_types.h"
|
||||
#include "move_types.h"
|
||||
|
||||
enum rebalance_state {
|
||||
REBALANCE_WAITING,
|
||||
REBALANCE_THROTTLED,
|
||||
REBALANCE_RUNNING,
|
||||
#define BCH_REBALANCE_STATES() \
|
||||
x(waiting) \
|
||||
x(working) \
|
||||
x(scanning)
|
||||
|
||||
enum bch_rebalance_states {
|
||||
#define x(t) BCH_REBALANCE_##t,
|
||||
BCH_REBALANCE_STATES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_fs_rebalance {
|
||||
struct task_struct __rcu *thread;
|
||||
struct task_struct __rcu *thread;
|
||||
struct bch_pd_controller pd;
|
||||
|
||||
atomic64_t work_unknown_dev;
|
||||
enum bch_rebalance_states state;
|
||||
u64 wait_iotime_start;
|
||||
u64 wait_iotime_end;
|
||||
u64 wait_wallclock_start;
|
||||
|
||||
enum rebalance_state state;
|
||||
u64 throttled_until_iotime;
|
||||
unsigned long throttled_until_cputime;
|
||||
struct bch_move_stats work_stats;
|
||||
|
||||
unsigned enabled:1;
|
||||
struct bbpos scan_start;
|
||||
struct bbpos scan_end;
|
||||
struct bch_move_stats scan_stats;
|
||||
|
||||
unsigned enabled:1;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_REBALANCE_TYPES_H */
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "logged_ops.h"
|
||||
#include "move.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "sb-clean.h"
|
||||
@ -946,16 +947,12 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_usage_init(ca);
|
||||
|
||||
for_each_online_member(ca, c, i) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
ret = bch2_fs_journal_alloc(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* journal_res_get() will crash if called before this has
|
||||
@ -973,15 +970,13 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
* btree updates
|
||||
*/
|
||||
bch_verbose(c, "marking superblocks");
|
||||
for_each_member_device(ca, c, i) {
|
||||
ret = bch2_trans_mark_dev_sb(c, ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_trans_mark_dev_sbs(c);
|
||||
bch_err_msg(c, ret, "marking superblocks");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
ca->new_fs_bucket_idx = 0;
|
||||
}
|
||||
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
|
@ -14,6 +14,8 @@
|
||||
x(snapshots_read, PASS_ALWAYS) \
|
||||
x(check_topology, 0) \
|
||||
x(check_allocations, PASS_FSCK) \
|
||||
x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(journal_replay, PASS_ALWAYS) \
|
||||
x(check_alloc_info, PASS_FSCK) \
|
||||
@ -32,6 +34,7 @@
|
||||
x(resume_logged_ops, PASS_ALWAYS) \
|
||||
x(check_inodes, PASS_FSCK) \
|
||||
x(check_extents, PASS_FSCK) \
|
||||
x(check_indirect_extents, PASS_FSCK) \
|
||||
x(check_dirents, PASS_FSCK) \
|
||||
x(check_xattrs, PASS_FSCK) \
|
||||
x(check_root, PASS_FSCK) \
|
||||
@ -39,6 +42,7 @@
|
||||
x(check_nlinks, PASS_FSCK) \
|
||||
x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \
|
||||
x(fix_reflink_p, 0) \
|
||||
x(set_fs_needs_rebalance, 0) \
|
||||
|
||||
enum bch_recovery_pass {
|
||||
#define x(n, when) BCH_RECOVERY_PASS_##n,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "inode.h"
|
||||
#include "io_misc.h"
|
||||
#include "io_write.h"
|
||||
#include "rebalance.h"
|
||||
#include "reflink.h"
|
||||
#include "subvolume.h"
|
||||
#include "super-io.h"
|
||||
@ -103,21 +104,22 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags)
|
||||
{
|
||||
if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
new->k.size = 0;
|
||||
set_bkey_val_u64s(&new->k, 0);;
|
||||
*flags &= ~BTREE_TRIGGER_INSERT;
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_trans_mark_reflink_v(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
|
||||
struct bkey_i_reflink_v *r = bkey_i_to_reflink_v(new);
|
||||
|
||||
if (!r->v.refcount) {
|
||||
r->k.type = KEY_TYPE_deleted;
|
||||
r->k.size = 0;
|
||||
set_bkey_val_u64s(&r->k, 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
check_indirect_extent_deleting(new, &flags);
|
||||
|
||||
return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
@ -132,7 +134,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
}
|
||||
|
||||
void bch2_indirect_inline_data_to_text(struct printbuf *out,
|
||||
struct bch_fs *c, struct bkey_s_c k)
|
||||
struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k);
|
||||
unsigned datalen = bkey_inline_data_bytes(k.k);
|
||||
@ -147,16 +149,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
|
||||
struct bkey_s_c old, struct bkey_i *new,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & BTREE_TRIGGER_OVERWRITE)) {
|
||||
struct bkey_i_indirect_inline_data *r =
|
||||
bkey_i_to_indirect_inline_data(new);
|
||||
|
||||
if (!r->v.refcount) {
|
||||
r->k.type = KEY_TYPE_deleted;
|
||||
r->k.size = 0;
|
||||
set_bkey_val_u64s(&r->k, 0);
|
||||
}
|
||||
}
|
||||
check_indirect_extent_deleting(new, &flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -260,6 +253,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
struct bpos dst_start = POS(dst_inum.inum, dst_offset);
|
||||
struct bpos src_start = POS(src_inum.inum, src_offset);
|
||||
struct bpos dst_end = dst_start, src_end = src_start;
|
||||
struct bch_io_opts opts;
|
||||
struct bpos src_want;
|
||||
u64 dst_done;
|
||||
u32 dst_snapshot, src_snapshot;
|
||||
@ -277,6 +271,10 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
bch2_bkey_buf_init(&new_src);
|
||||
trans = bch2_trans_get(c);
|
||||
|
||||
ret = bch2_inum_opts_get(trans, src_inum, &opts);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
|
||||
@ -360,10 +358,13 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
min(src_k.k->p.offset - src_want.offset,
|
||||
dst_end.offset - dst_iter.pos.offset));
|
||||
|
||||
ret = bch2_extent_update(trans, dst_inum, &dst_iter,
|
||||
new_dst.k, &disk_res,
|
||||
new_i_size, i_sectors_delta,
|
||||
true);
|
||||
ret = bch2_bkey_set_needs_rebalance(c, new_dst.k,
|
||||
opts.background_target,
|
||||
opts.background_compression) ?:
|
||||
bch2_extent_update(trans, dst_inum, &dst_iter,
|
||||
new_dst.k, &disk_res,
|
||||
new_i_size, i_sectors_delta,
|
||||
true);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &dst_iter);
|
||||
@ -394,7 +395,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
} while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
|
||||
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&new_src, c);
|
||||
bch2_bkey_buf_exit(&new_dst, c);
|
||||
|
@ -948,9 +948,6 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
bch2_sb_from_fs(c, ca);
|
||||
|
||||
for_each_online_member(ca, c, i)
|
||||
bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now);
|
||||
|
||||
@ -960,12 +957,6 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
for (i = 0; i < BCH_TRANSACTIONS_NR; i++) {
|
||||
mutex_lock(&c->btree_transaction_stats[i].lock);
|
||||
bch2_time_stats_init(&c->btree_transaction_stats[i].lock_hold_times);
|
||||
mutex_unlock(&c->btree_transaction_stats[i].lock);
|
||||
}
|
||||
|
||||
ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
|
||||
? bch2_fs_recovery(c)
|
||||
: bch2_fs_initialize(c);
|
||||
@ -1591,7 +1582,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
|
||||
|
||||
if (BCH_MEMBER_GROUP(&dev_mi)) {
|
||||
bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
|
||||
bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
|
||||
if (label.allocation_failure) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
@ -1689,13 +1680,13 @@ have_slot:
|
||||
|
||||
ret = bch2_trans_mark_dev_sb(c, ca);
|
||||
if (ret) {
|
||||
bch_err_msg(c, ret, "marking new superblock");
|
||||
bch_err_msg(ca, ret, "marking new superblock");
|
||||
goto err_late;
|
||||
}
|
||||
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret) {
|
||||
bch_err_msg(c, ret, "initializing free space");
|
||||
bch_err_msg(ca, ret, "initializing free space");
|
||||
goto err_late;
|
||||
}
|
||||
|
||||
@ -1763,19 +1754,26 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw)
|
||||
__bch2_dev_read_write(c, ca);
|
||||
|
||||
if (!ca->mi.freespace_initialized) {
|
||||
ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
|
||||
bch_err_msg(ca, ret, "initializing free space");
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!ca->journal.nr) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
bch_err_msg(ca, ret, "allocating journal");
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
|
||||
|
||||
m->last_mount =
|
||||
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
|
||||
cpu_to_le64(ktime_get_real_seconds());
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
bch_err_msg(c, ret, "initializing free space");
|
||||
|
||||
up_write(&c->state_lock);
|
||||
return 0;
|
||||
err:
|
||||
|
@ -37,16 +37,4 @@ struct bch_member_cpu {
|
||||
u8 valid;
|
||||
};
|
||||
|
||||
struct bch_disk_group_cpu {
|
||||
bool deleted;
|
||||
u16 parent;
|
||||
struct bch_devs_mask devs;
|
||||
};
|
||||
|
||||
struct bch_disk_groups_cpu {
|
||||
struct rcu_head rcu;
|
||||
unsigned nr;
|
||||
struct bch_disk_group_cpu entries[] __counted_by(nr);
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_TYPES_H */
|
||||
|
@ -212,7 +212,7 @@ read_attribute(copy_gc_wait);
|
||||
|
||||
rw_attribute(rebalance_enabled);
|
||||
sysfs_pd_controller_attribute(rebalance);
|
||||
read_attribute(rebalance_work);
|
||||
read_attribute(rebalance_status);
|
||||
rw_attribute(promote_whole_extents);
|
||||
|
||||
read_attribute(new_stripes);
|
||||
@ -386,8 +386,8 @@ SHOW(bch2_fs)
|
||||
if (attr == &sysfs_copy_gc_wait)
|
||||
bch2_copygc_wait_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_rebalance_work)
|
||||
bch2_rebalance_work_to_text(out, c);
|
||||
if (attr == &sysfs_rebalance_status)
|
||||
bch2_rebalance_status_to_text(out, c);
|
||||
|
||||
sysfs_print(promote_whole_extents, c->promote_whole_extents);
|
||||
|
||||
@ -646,7 +646,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_copy_gc_wait,
|
||||
|
||||
&sysfs_rebalance_enabled,
|
||||
&sysfs_rebalance_work,
|
||||
&sysfs_rebalance_status,
|
||||
sysfs_pd_controller_files(rebalance),
|
||||
|
||||
&sysfs_moving_ctxts,
|
||||
@ -707,10 +707,8 @@ STORE(bch2_fs_opts_dir)
|
||||
bch2_opt_set_by_id(&c->opts, id, v);
|
||||
|
||||
if ((id == Opt_background_target ||
|
||||
id == Opt_background_compression) && v) {
|
||||
bch2_rebalance_add_work(c, S64_MAX);
|
||||
rebalance_wakeup(c);
|
||||
}
|
||||
id == Opt_background_compression) && v)
|
||||
bch2_set_rebalance_needs_scan(c, 0);
|
||||
|
||||
ret = size;
|
||||
err:
|
||||
@ -910,13 +908,8 @@ SHOW(bch2_dev)
|
||||
sysfs_print(discard, ca->mi.discard);
|
||||
|
||||
if (attr == &sysfs_label) {
|
||||
if (ca->mi.group) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_disk_path_to_text(out, c->disk_sb.sb,
|
||||
ca->mi.group - 1);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
if (ca->mi.group)
|
||||
bch2_disk_path_to_text(out, c, ca->mi.group - 1);
|
||||
prt_char(out, '\n');
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "keylist.h"
|
||||
#include "move_types.h"
|
||||
#include "opts.h"
|
||||
#include "six.h"
|
||||
|
||||
|
@ -767,25 +767,36 @@ DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
|
||||
);
|
||||
|
||||
TRACE_EVENT(move_data,
|
||||
TP_PROTO(struct bch_fs *c, u64 sectors_moved,
|
||||
u64 keys_moved),
|
||||
TP_ARGS(c, sectors_moved, keys_moved),
|
||||
TP_PROTO(struct bch_fs *c,
|
||||
struct bch_move_stats *stats),
|
||||
TP_ARGS(c, stats),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(u64, sectors_moved )
|
||||
__field(dev_t, dev )
|
||||
__field(u64, keys_moved )
|
||||
__field(u64, keys_raced )
|
||||
__field(u64, sectors_seen )
|
||||
__field(u64, sectors_moved )
|
||||
__field(u64, sectors_raced )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->sectors_moved = sectors_moved;
|
||||
__entry->keys_moved = keys_moved;
|
||||
__entry->dev = c->dev;
|
||||
__entry->keys_moved = atomic64_read(&stats->keys_moved);
|
||||
__entry->keys_raced = atomic64_read(&stats->keys_raced);
|
||||
__entry->sectors_seen = atomic64_read(&stats->sectors_seen);
|
||||
__entry->sectors_moved = atomic64_read(&stats->sectors_moved);
|
||||
__entry->sectors_raced = atomic64_read(&stats->sectors_raced);
|
||||
),
|
||||
|
||||
TP_printk("%d,%d sectors_moved %llu keys_moved %llu",
|
||||
TP_printk("%d,%d keys moved %llu raced %llu"
|
||||
"sectors seen %llu moved %llu raced %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->sectors_moved, __entry->keys_moved)
|
||||
__entry->keys_moved,
|
||||
__entry->keys_raced,
|
||||
__entry->sectors_seen,
|
||||
__entry->sectors_moved,
|
||||
__entry->sectors_raced)
|
||||
);
|
||||
|
||||
TRACE_EVENT(evacuate_bucket,
|
||||
|
@ -590,7 +590,7 @@ err:
|
||||
if (value &&
|
||||
(opt_id == Opt_background_compression ||
|
||||
opt_id == Opt_background_target))
|
||||
bch2_rebalance_add_work(c, inode->v.i_blocks);
|
||||
bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
|
||||
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
@ -22,6 +22,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
|
||||
panic("closure_put_after_sub: bogus flags %x remaining %i", flags, r);
|
||||
|
||||
if (!r) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
|
||||
cl->closure_get_happened = false;
|
||||
|
||||
if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
|
||||
atomic_set(&cl->remaining,
|
||||
CLOSURE_REMAINING_INITIALIZER);
|
||||
@ -44,7 +48,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
|
||||
/* For clearing flags with the same atomic op as a put */
|
||||
void closure_sub(struct closure *cl, int v)
|
||||
{
|
||||
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
|
||||
closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
|
||||
}
|
||||
EXPORT_SYMBOL(closure_sub);
|
||||
|
||||
@ -53,7 +57,7 @@ EXPORT_SYMBOL(closure_sub);
|
||||
*/
|
||||
void closure_put(struct closure *cl)
|
||||
{
|
||||
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
|
||||
closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
|
||||
}
|
||||
EXPORT_SYMBOL(closure_put);
|
||||
|
||||
@ -91,6 +95,7 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
|
||||
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
|
||||
return false;
|
||||
|
||||
cl->closure_get_happened = true;
|
||||
closure_set_waiting(cl, _RET_IP_);
|
||||
atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
|
||||
llist_add(&cl->list, &waitlist->list);
|
||||
|
Loading…
Reference in New Issue
Block a user