mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 8d3fc97ca3 bcachefs: Fixes for building in userspace
This commit is contained in:
parent
980f7437e2
commit
c1e4d447f6
@ -1 +1 @@
|
||||
61ebcb532a1266e5e36f354858b552e2a4fb9925
|
||||
8d3fc97ca3f24d8f7ab1e9ed04d8ca354c44dd8c
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@ PREFIX?=/usr/local
|
||||
PKG_CONFIG?=pkg-config
|
||||
INSTALL=install
|
||||
|
||||
CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \
|
||||
CFLAGS+=-std=gnu11 -O2 -g -MMD -Wall -fPIC \
|
||||
-Wno-pointer-sign \
|
||||
-fno-strict-aliasing \
|
||||
-fno-delete-null-pointer-checks \
|
||||
|
@ -54,6 +54,8 @@ typedef struct {
|
||||
#define __ATOMIC_ADD_RETURN_RELEASE(v, p) \
|
||||
__atomic_add_fetch(p, v, __ATOMIC_RELEASE)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \
|
||||
__atomic_sub_fetch(p, v, __ATOMIC_RELEASE)
|
||||
|
||||
#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
|
||||
#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
|
||||
@ -123,6 +125,11 @@ do { \
|
||||
({ smp_mb__before_atomic(); __ATOMIC_ADD_RETURN(i, v); })
|
||||
#endif
|
||||
|
||||
#ifndef __ATOMIC_SUB_RETURN_RELEASE
|
||||
#define __ATOMIC_SUB_RETURN_RELEASE(i, v) \
|
||||
({ smp_mb__before_atomic(); __ATOMIC_SUB_RETURN(i, v); })
|
||||
#endif
|
||||
|
||||
#ifndef __ATOMIC_SUB
|
||||
#define __ATOMIC_SUB(i, v) __ATOMIC_SUB_RETURN(i, v)
|
||||
#endif
|
||||
@ -164,6 +171,11 @@ static inline i_type a_type##_add_return_release(i_type i, a_type##_t *v)\
|
||||
return __ATOMIC_ADD_RETURN_RELEASE(i, &v->counter); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_sub_return_release(i_type i, a_type##_t *v)\
|
||||
{ \
|
||||
return __ATOMIC_SUB_RETURN_RELEASE(i, &v->counter); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_sub_return(i_type i, a_type##_t *v) \
|
||||
{ \
|
||||
return __ATOMIC_SUB_RETURN(i, &v->counter); \
|
||||
|
@ -229,6 +229,8 @@ static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *
|
||||
}
|
||||
|
||||
struct printbuf;
|
||||
extern void prt_u64(struct printbuf *out, u64 num);
|
||||
|
||||
extern __printf(2, 0) void prt_vprintf(struct printbuf *out, const char *fmt, va_list args);
|
||||
extern __printf(2, 3) void prt_printf(struct printbuf *out, const char *fmt, ...);
|
||||
|
||||
|
@ -155,7 +155,7 @@ struct mean_and_variance_weighted {
|
||||
u64 variance;
|
||||
};
|
||||
|
||||
inline s64 fast_divpow2(s64 n, u8 d);
|
||||
s64 fast_divpow2(s64 n, u8 d);
|
||||
|
||||
struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1);
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s);
|
||||
|
@ -24,6 +24,7 @@ typedef struct {
|
||||
} wait_queue_head_t;
|
||||
|
||||
void wake_up(wait_queue_head_t *);
|
||||
void wake_up_all(wait_queue_head_t *);
|
||||
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
||||
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
|
||||
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
|
||||
|
@ -344,25 +344,29 @@ DEFINE_EVENT(btree_node, btree_node_free,
|
||||
TRACE_EVENT(btree_reserve_get_fail,
|
||||
TP_PROTO(const char *trans_fn,
|
||||
unsigned long caller_ip,
|
||||
size_t required),
|
||||
TP_ARGS(trans_fn, caller_ip, required),
|
||||
size_t required,
|
||||
int ret),
|
||||
TP_ARGS(trans_fn, caller_ip, required, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__field(size_t, required )
|
||||
__array(char, ret, 32 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strscpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
|
||||
__entry->caller_ip = caller_ip;
|
||||
__entry->required = required;
|
||||
strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
|
||||
),
|
||||
|
||||
TP_printk("%s %pS required %zu",
|
||||
TP_printk("%s %pS required %zu ret %s",
|
||||
__entry->trans_fn,
|
||||
(void *) __entry->caller_ip,
|
||||
__entry->required)
|
||||
__entry->required,
|
||||
__entry->ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_node_compact,
|
||||
@ -542,14 +546,11 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
u64 avail,
|
||||
u64 copygc_wait_amount,
|
||||
s64 copygc_waiting_for,
|
||||
u64 seen,
|
||||
u64 open,
|
||||
u64 need_journal_commit,
|
||||
u64 nouse,
|
||||
struct bucket_alloc_state *s,
|
||||
bool nonblocking,
|
||||
const char *err),
|
||||
TP_ARGS(ca, alloc_reserve, free, avail, copygc_wait_amount, copygc_waiting_for,
|
||||
seen, open, need_journal_commit, nouse, nonblocking, err),
|
||||
s, nonblocking, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
@ -573,10 +574,10 @@ TRACE_EVENT(bucket_alloc_fail,
|
||||
__entry->avail = avail;
|
||||
__entry->copygc_wait_amount = copygc_wait_amount;
|
||||
__entry->copygc_waiting_for = copygc_waiting_for;
|
||||
__entry->seen = seen;
|
||||
__entry->open = open;
|
||||
__entry->need_journal_commit = need_journal_commit;
|
||||
__entry->nouse = nouse;
|
||||
__entry->seen = s->buckets_seen;
|
||||
__entry->open = s->skipped_open;
|
||||
__entry->need_journal_commit = s->skipped_need_journal_commit;
|
||||
__entry->nouse = s->skipped_nouse;
|
||||
__entry->nonblocking = nonblocking;
|
||||
strscpy(__entry->err, err, sizeof(__entry->err));
|
||||
),
|
||||
|
@ -279,6 +279,22 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX this is wrong, we'll be checking updates that happened from
|
||||
* before BCH_FS_CHECK_BACKPOINTERS_DONE
|
||||
*/
|
||||
if (rw == WRITE && test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
|
||||
unsigned i, bp_len = 0;
|
||||
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++)
|
||||
bp_len += alloc_v4_backpointers_c(a.v)[i].bucket_len;
|
||||
|
||||
if (bp_len > a.v->dirty_sectors) {
|
||||
prt_printf(err, "too many backpointers");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (rw == WRITE) {
|
||||
if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) {
|
||||
prt_printf(err, "invalid data type (got %u should be %u)",
|
||||
|
@ -195,26 +195,24 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
u64 bucket,
|
||||
enum alloc_reserve reserve,
|
||||
struct bch_alloc_v4 *a,
|
||||
u64 *skipped_open,
|
||||
u64 *skipped_need_journal_commit,
|
||||
u64 *skipped_nouse,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
|
||||
(*skipped_nouse)++;
|
||||
s->skipped_nouse++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
|
||||
(*skipped_open)++;
|
||||
s->skipped_open++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
|
||||
(*skipped_need_journal_commit)++;
|
||||
s->skipped_need_journal_commit++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -234,7 +232,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
/* Recheck under lock: */
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
|
||||
spin_unlock(&c->freelist_lock);
|
||||
(*skipped_open)++;
|
||||
s->skipped_open++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -274,9 +272,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
|
||||
static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve, u64 free_entry,
|
||||
u64 *skipped_open,
|
||||
u64 *skipped_need_journal_commit,
|
||||
u64 *skipped_nouse,
|
||||
struct bucket_alloc_state *s,
|
||||
struct bkey_s_c freespace_k,
|
||||
struct closure *cl)
|
||||
{
|
||||
@ -339,7 +335,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
|
||||
u64 bp_offset = 0;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1,
|
||||
&bp_offset, &bp, 0);
|
||||
&bp_offset, &bp,
|
||||
BTREE_ITER_NOPRESERVE);
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
goto err;
|
||||
@ -356,11 +353,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
|
||||
}
|
||||
}
|
||||
|
||||
ob = __try_alloc_bucket(c, ca, b, reserve, &a,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
cl);
|
||||
ob = __try_alloc_bucket(c, ca, b, reserve, &a, s, cl);
|
||||
if (!ob)
|
||||
iter.path->preserve = false;
|
||||
err:
|
||||
@ -406,11 +399,7 @@ static noinline struct open_bucket *
|
||||
bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
u64 *cur_bucket,
|
||||
u64 *buckets_seen,
|
||||
u64 *skipped_open,
|
||||
u64 *skipped_need_journal_commit,
|
||||
u64 *skipped_nouse,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -418,10 +407,10 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
struct open_bucket *ob = NULL;
|
||||
int ret;
|
||||
|
||||
*cur_bucket = max_t(u64, *cur_bucket, ca->mi.first_bucket);
|
||||
*cur_bucket = max_t(u64, *cur_bucket, ca->new_fs_bucket_idx);
|
||||
s->cur_bucket = max_t(u64, s->cur_bucket, ca->mi.first_bucket);
|
||||
s->cur_bucket = max_t(u64, s->cur_bucket, ca->new_fs_bucket_idx);
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket),
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, s->cur_bucket),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
struct bch_alloc_v4 a;
|
||||
|
||||
@ -437,19 +426,15 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
if (a.data_type != BCH_DATA_free)
|
||||
continue;
|
||||
|
||||
(*buckets_seen)++;
|
||||
s->buckets_seen++;
|
||||
|
||||
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, reserve, &a,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
cl);
|
||||
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, reserve, &a, s, cl);
|
||||
if (ob)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
*cur_bucket = iter.pos.offset;
|
||||
s->cur_bucket = iter.pos.offset;
|
||||
|
||||
return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
|
||||
}
|
||||
@ -457,11 +442,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
u64 *cur_bucket,
|
||||
u64 *buckets_seen,
|
||||
u64 *skipped_open,
|
||||
u64 *skipped_need_journal_commit,
|
||||
u64 *skipped_nouse,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -477,25 +458,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
|
||||
* at previously
|
||||
*/
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
|
||||
POS(ca->dev_idx, *cur_bucket), 0, k, ret) {
|
||||
POS(ca->dev_idx, s->cur_bucket), 0, k, ret) {
|
||||
if (k.k->p.inode != ca->dev_idx)
|
||||
break;
|
||||
|
||||
for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
|
||||
*cur_bucket < k.k->p.offset;
|
||||
(*cur_bucket)++) {
|
||||
for (s->cur_bucket = max(s->cur_bucket, bkey_start_offset(k.k));
|
||||
s->cur_bucket < k.k->p.offset;
|
||||
s->cur_bucket++) {
|
||||
ret = btree_trans_too_many_iters(trans);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
(*buckets_seen)++;
|
||||
s->buckets_seen++;
|
||||
|
||||
ob = try_alloc_bucket(trans, ca, reserve,
|
||||
*cur_bucket,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
k, cl);
|
||||
s->cur_bucket, s, k, cl);
|
||||
if (ob)
|
||||
break;
|
||||
}
|
||||
@ -525,11 +502,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
|
||||
bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
|
||||
u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
|
||||
u64 avail;
|
||||
u64 cur_bucket = start;
|
||||
u64 buckets_seen = 0;
|
||||
u64 skipped_open = 0;
|
||||
u64 skipped_need_journal_commit = 0;
|
||||
u64 skipped_nouse = 0;
|
||||
struct bucket_alloc_state s = { .cur_bucket = start };
|
||||
bool waiting = false;
|
||||
again:
|
||||
bch2_dev_usage_read_fast(ca, usage);
|
||||
@ -568,31 +541,19 @@ again:
|
||||
}
|
||||
|
||||
ob = likely(ca->mi.freespace_initialized)
|
||||
? bch2_bucket_alloc_freelist(trans, ca, reserve,
|
||||
&cur_bucket,
|
||||
&buckets_seen,
|
||||
&skipped_open,
|
||||
&skipped_need_journal_commit,
|
||||
&skipped_nouse,
|
||||
cl)
|
||||
: bch2_bucket_alloc_early(trans, ca, reserve,
|
||||
&cur_bucket,
|
||||
&buckets_seen,
|
||||
&skipped_open,
|
||||
&skipped_need_journal_commit,
|
||||
&skipped_nouse,
|
||||
cl);
|
||||
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
|
||||
: bch2_bucket_alloc_early(trans, ca, reserve, &s, cl);
|
||||
|
||||
if (skipped_need_journal_commit * 2 > avail)
|
||||
if (s.skipped_need_journal_commit * 2 > avail)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (!ob && !freespace_initialized && start) {
|
||||
start = cur_bucket = 0;
|
||||
start = s.cur_bucket = 0;
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (!freespace_initialized)
|
||||
ca->bucket_alloc_trans_early_cursor = cur_bucket;
|
||||
ca->bucket_alloc_trans_early_cursor = s.cur_bucket;
|
||||
err:
|
||||
if (!ob)
|
||||
ob = ERR_PTR(-BCH_ERR_no_buckets_found);
|
||||
@ -607,10 +568,7 @@ err:
|
||||
avail,
|
||||
bch2_copygc_wait_amount(c),
|
||||
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
|
||||
buckets_seen,
|
||||
skipped_open,
|
||||
skipped_need_journal_commit,
|
||||
skipped_nouse,
|
||||
&s,
|
||||
cl == NULL,
|
||||
bch2_err_str(PTR_ERR(ob)));
|
||||
|
||||
@ -1152,16 +1110,17 @@ out:
|
||||
/*
|
||||
* Get us an open_bucket we can allocate from, return with it locked:
|
||||
*/
|
||||
struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
unsigned target,
|
||||
unsigned erasure_code,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
unsigned target,
|
||||
unsigned erasure_code,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl,
|
||||
struct write_point **wp_ret)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct write_point *wp;
|
||||
@ -1183,7 +1142,7 @@ retry:
|
||||
write_points_nr = c->write_points_nr;
|
||||
have_cache = false;
|
||||
|
||||
wp = writepoint_find(trans, write_point.v);
|
||||
*wp_ret = wp = writepoint_find(trans, write_point.v);
|
||||
|
||||
if (wp->data_type == BCH_DATA_user)
|
||||
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
|
||||
@ -1240,7 +1199,7 @@ alloc_done:
|
||||
|
||||
BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
|
||||
|
||||
return wp;
|
||||
return 0;
|
||||
err:
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||
if (ptrs.nr < ARRAY_SIZE(ptrs.v))
|
||||
@ -1258,39 +1217,13 @@ err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
|
||||
bch2_err_matches(ret, BCH_ERR_freelist_empty))
|
||||
return cl
|
||||
? ERR_PTR(-EAGAIN)
|
||||
: ERR_PTR(-BCH_ERR_ENOSPC_bucket_alloc);
|
||||
? -EAGAIN
|
||||
: -BCH_ERR_ENOSPC_bucket_alloc;
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
unsigned target,
|
||||
unsigned erasure_code,
|
||||
struct write_point_specifier write_point,
|
||||
struct bch_devs_list *devs_have,
|
||||
unsigned nr_replicas,
|
||||
unsigned nr_replicas_required,
|
||||
enum alloc_reserve reserve,
|
||||
unsigned flags,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct write_point *wp;
|
||||
|
||||
bch2_trans_do(c, NULL, NULL, 0,
|
||||
PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target,
|
||||
erasure_code,
|
||||
write_point,
|
||||
devs_have,
|
||||
nr_replicas,
|
||||
nr_replicas_required,
|
||||
reserve,
|
||||
flags, cl)));
|
||||
return wp;
|
||||
return -EROFS;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
|
||||
@ -1361,6 +1294,10 @@ static inline void writepoint_init(struct write_point *wp,
|
||||
{
|
||||
mutex_init(&wp->lock);
|
||||
wp->data_type = type;
|
||||
|
||||
INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates);
|
||||
INIT_LIST_HEAD(&wp->writes);
|
||||
spin_lock_init(&wp->writes_lock);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_foreground_init(struct bch_fs *c)
|
||||
|
@ -136,22 +136,15 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
|
||||
unsigned, unsigned *, bool *, enum alloc_reserve,
|
||||
unsigned, struct closure *);
|
||||
|
||||
struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *,
|
||||
unsigned, unsigned,
|
||||
struct write_point_specifier,
|
||||
struct bch_devs_list *,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
unsigned,
|
||||
struct closure *);
|
||||
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
|
||||
unsigned, unsigned,
|
||||
struct write_point_specifier,
|
||||
struct bch_devs_list *,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
unsigned,
|
||||
struct closure *);
|
||||
int bch2_alloc_sectors_start_trans(struct btree_trans *,
|
||||
unsigned, unsigned,
|
||||
struct write_point_specifier,
|
||||
struct bch_devs_list *,
|
||||
unsigned, unsigned,
|
||||
enum alloc_reserve,
|
||||
unsigned,
|
||||
struct closure *,
|
||||
struct write_point **);
|
||||
|
||||
struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *);
|
||||
void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
|
||||
|
@ -8,6 +8,14 @@
|
||||
#include "clock_types.h"
|
||||
#include "fifo.h"
|
||||
|
||||
struct bucket_alloc_state {
|
||||
u64 cur_bucket;
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 skipped_nouse;
|
||||
};
|
||||
|
||||
struct ec_bucket_buf;
|
||||
|
||||
#define BCH_ALLOC_RESERVES() \
|
||||
@ -78,6 +86,11 @@ struct write_point {
|
||||
|
||||
struct open_buckets ptrs;
|
||||
struct dev_stripe_state stripe;
|
||||
|
||||
struct work_struct index_update_work;
|
||||
|
||||
struct list_head writes;
|
||||
spinlock_t writes_lock;
|
||||
};
|
||||
|
||||
struct write_point_specifier {
|
||||
|
@ -9,8 +9,6 @@
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
|
||||
|
||||
/*
|
||||
* Convert from pos in backpointer btree to pos of corresponding bucket in alloc
|
||||
* btree:
|
||||
@ -43,27 +41,6 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
|
||||
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
||||
u32 bucket_offset;
|
||||
|
||||
*bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
|
||||
*bp = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
.data_type = data_type,
|
||||
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
|
||||
p.crc.offset,
|
||||
.bucket_len = ptr_disk_sectors(sectors, p),
|
||||
.pos = k.k->p,
|
||||
};
|
||||
}
|
||||
|
||||
static bool extent_matches_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k,
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
|
||||
#include "buckets.h"
|
||||
#include "super.h"
|
||||
|
||||
int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
|
||||
@ -16,9 +17,28 @@ void bch2_backpointer_swab(struct bkey_s);
|
||||
.swab = bch2_backpointer_swab, \
|
||||
})
|
||||
|
||||
void bch2_extent_ptr_to_bp(struct bch_fs *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct extent_ptr_decoded,
|
||||
struct bpos *, struct bch_backpointer *);
|
||||
#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
|
||||
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
|
||||
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
||||
u32 bucket_offset;
|
||||
|
||||
*bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
|
||||
*bp = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
.data_type = data_type,
|
||||
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
|
||||
p.crc.offset,
|
||||
.bucket_len = ptr_disk_sectors(sectors, p),
|
||||
.pos = k.k->p,
|
||||
};
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
|
||||
struct bch_backpointer, struct bkey_s_c);
|
||||
|
@ -226,6 +226,10 @@ do { \
|
||||
dynamic_fault("bcachefs:meta:write:" name)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define BCACHEFS_LOG_PREFIX
|
||||
#endif
|
||||
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name)
|
||||
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
|
||||
#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
|
||||
@ -598,6 +602,23 @@ typedef struct {
|
||||
#define BCACHEFS_ROOT_SUBVOL_INUM \
|
||||
((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
|
||||
|
||||
#define BCH_BTREE_WRITE_TYPES() \
|
||||
x(initial, 0) \
|
||||
x(init_next_bset, 1) \
|
||||
x(cache_reclaim, 2) \
|
||||
x(journal_reclaim, 3) \
|
||||
x(interior, 4)
|
||||
|
||||
enum btree_write_type {
|
||||
#define x(t, n) BTREE_WRITE_##t,
|
||||
BCH_BTREE_WRITE_TYPES()
|
||||
#undef x
|
||||
BTREE_WRITE_TYPE_NR,
|
||||
};
|
||||
|
||||
#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
|
||||
#define BTREE_WRITE_TYPE_BITS ilog2(BTREE_WRITE_TYPE_MASK)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -707,6 +728,13 @@ struct bch_fs {
|
||||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
/* btree_io.c: */
|
||||
spinlock_t btree_write_error_lock;
|
||||
struct btree_write_stats {
|
||||
atomic64_t nr;
|
||||
atomic64_t bytes;
|
||||
} btree_write_stats[BTREE_WRITE_TYPE_NR];
|
||||
|
||||
/* btree_iter.c: */
|
||||
struct mutex btree_trans_lock;
|
||||
struct list_head btree_trans_list;
|
||||
@ -881,11 +909,6 @@ struct bch_fs {
|
||||
struct bio_set dio_write_bioset;
|
||||
struct bio_set dio_read_bioset;
|
||||
|
||||
|
||||
atomic64_t btree_writes_nr;
|
||||
atomic64_t btree_writes_sectors;
|
||||
spinlock_t btree_write_error_lock;
|
||||
|
||||
/* ERRORS */
|
||||
struct list_head fsck_errors;
|
||||
struct mutex fsck_error_lock;
|
||||
|
@ -178,7 +178,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst,
|
||||
continue;
|
||||
|
||||
while ((next = sort_iter_peek(iter)) &&
|
||||
!bch2_bkey_cmp_packed(iter->b, in, next)) {
|
||||
!bch2_bkey_cmp_packed_inlined(iter->b, in, next)) {
|
||||
BUG_ON(in->needs_whiteout &&
|
||||
next->needs_whiteout);
|
||||
needs_whiteout |= in->needs_whiteout;
|
||||
|
@ -280,9 +280,11 @@ wait_on_io:
|
||||
* the post write cleanup:
|
||||
*/
|
||||
if (bch2_verify_btree_ondisk)
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_intent,
|
||||
BTREE_WRITE_cache_reclaim);
|
||||
else
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b,
|
||||
BTREE_WRITE_cache_reclaim);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
@ -389,7 +391,7 @@ restart:
|
||||
six_trylock_read(&b->c.lock)) {
|
||||
list_move(&bc->live, &b->list);
|
||||
mutex_unlock(&bc->lock);
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||
six_unlock_read(&b->c.lock);
|
||||
if (touched >= nr)
|
||||
goto out_nounlock;
|
||||
@ -675,6 +677,7 @@ out:
|
||||
b->flags = 0;
|
||||
b->written = 0;
|
||||
b->nsets = 0;
|
||||
b->write_type = 0;
|
||||
b->sib_u64s[0] = 0;
|
||||
b->sib_u64s[1] = 0;
|
||||
b->whiteout_u64s = 0;
|
||||
@ -1118,7 +1121,7 @@ wait_on_io:
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
__bch2_btree_node_write(c, b, 0);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
|
@ -450,6 +450,24 @@ void bch2_btree_build_aux_trees(struct btree *b)
|
||||
t == bset_tree_last(b));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have MAX_BSETS (3) bsets, should we sort them all down to just one?
|
||||
*
|
||||
* The first bset is going to be of similar order to the size of the node, the
|
||||
* last bset is bounded by btree_write_set_buffer(), which is set to keep the
|
||||
* memmove on insert from being too expensive: the middle bset should, ideally,
|
||||
* be the geometric mean of the first and the last.
|
||||
*
|
||||
* Returns true if the middle bset is greater than that geometric mean:
|
||||
*/
|
||||
static inline bool should_compact_all(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
unsigned mid_u64s_bits =
|
||||
(ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2;
|
||||
|
||||
return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* @bch_btree_init_next - initialize a new (unwritten) bset that can then be
|
||||
* inserted into
|
||||
@ -467,19 +485,14 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
EBUG_ON(!(b->c.lock.state.seq & 1));
|
||||
BUG_ON(bset_written(b, bset(b, &b->set[1])));
|
||||
BUG_ON(btree_node_just_written(b));
|
||||
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
!btree_node_write_in_flight(b)) {
|
||||
unsigned log_u64s[] = {
|
||||
ilog2(bset_u64s(&b->set[0])),
|
||||
ilog2(bset_u64s(&b->set[1])),
|
||||
ilog2(bset_u64s(&b->set[2])),
|
||||
};
|
||||
|
||||
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write, 0);
|
||||
reinit_iter = true;
|
||||
}
|
||||
!btree_node_write_in_flight(b) &&
|
||||
should_compact_all(c, b)) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
reinit_iter = true;
|
||||
}
|
||||
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
@ -1653,7 +1666,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
} while ((v = cmpxchg(&b->flags, old, new)) != old);
|
||||
|
||||
if (new & (1U << BTREE_NODE_write_in_flight))
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
|
||||
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|b->write_type);
|
||||
else
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
@ -1802,6 +1815,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool used_mempool;
|
||||
unsigned long old, new;
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
int ret;
|
||||
|
||||
@ -1848,6 +1862,12 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
if (new & (1U << BTREE_NODE_need_write))
|
||||
return;
|
||||
do_write:
|
||||
if ((flags & BTREE_WRITE_ONLY_IF_NEED))
|
||||
type = b->write_type;
|
||||
b->write_type = 0;
|
||||
|
||||
BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
|
||||
|
||||
atomic_dec(&c->btree_cache.dirty);
|
||||
|
||||
BUG_ON(btree_node_fake(b));
|
||||
@ -2022,8 +2042,8 @@ do_write:
|
||||
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
|
||||
cpu_to_le16(b->written);
|
||||
|
||||
atomic64_inc(&c->btree_writes_nr);
|
||||
atomic64_add(sectors_to_write, &c->btree_writes_sectors);
|
||||
atomic64_inc(&c->btree_write_stats[type].nr);
|
||||
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
|
||||
|
||||
INIT_WORK(&wbio->work, btree_write_submit);
|
||||
queue_work(c->io_complete_wq, &wbio->work);
|
||||
@ -2151,3 +2171,33 @@ bool bch2_btree_flush_all_writes(struct bch_fs *c)
|
||||
{
|
||||
return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
const char * const bch2_btree_write_types[] = {
|
||||
#define x(t, n) [n] = #t,
|
||||
BCH_BTREE_WRITE_TYPES()
|
||||
NULL
|
||||
};
|
||||
|
||||
void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
printbuf_tabstop_push(out, 20);
|
||||
printbuf_tabstop_push(out, 10);
|
||||
|
||||
prt_tab(out);
|
||||
prt_str(out, "nr");
|
||||
prt_tab(out);
|
||||
prt_str(out, "size");
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) {
|
||||
u64 nr = atomic64_read(&c->btree_write_stats[i].nr);
|
||||
u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes);
|
||||
|
||||
prt_printf(out, "%s:", bch2_btree_write_types[i]);
|
||||
prt_tab(out);
|
||||
prt_u64(out, nr);
|
||||
prt_tab(out);
|
||||
prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0);
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
@ -139,8 +139,12 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
||||
|
||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
|
||||
#define BTREE_WRITE_ONLY_IF_NEED (1U << 0)
|
||||
#define BTREE_WRITE_ALREADY_STARTED (1U << 1)
|
||||
enum btree_write_flags {
|
||||
__BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS,
|
||||
__BTREE_WRITE_ALREADY_STARTED,
|
||||
};
|
||||
#define BTREE_WRITE_ONLY_IF_NEED (1U << __BTREE_WRITE_ONLY_IF_NEED )
|
||||
#define BTREE_WRITE_ALREADY_STARTED (1U << __BTREE_WRITE_ALREADY_STARTED)
|
||||
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
@ -219,4 +223,6 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
|
||||
bn->min_key = bpos_nosnap_successor(bn->min_key);
|
||||
}
|
||||
|
||||
void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_IO_H */
|
||||
|
@ -646,9 +646,9 @@ static inline void __btree_path_level_init(struct btree_path *path,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b);
|
||||
}
|
||||
|
||||
inline void bch2_btree_path_level_init(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
void bch2_btree_path_level_init(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
BUG_ON(path->cached);
|
||||
|
||||
@ -1172,11 +1172,10 @@ int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
|
||||
btree_path_traverse_one(trans, path, flags, _RET_IP_);
|
||||
}
|
||||
|
||||
static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
|
||||
static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
|
||||
struct btree_path *src)
|
||||
{
|
||||
unsigned i, offset = offsetof(struct btree_path, pos);
|
||||
int cmp = btree_path_cmp(dst, src);
|
||||
|
||||
memcpy((void *) dst + offset,
|
||||
(void *) src + offset,
|
||||
@ -1188,9 +1187,6 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
|
||||
if (t != BTREE_NODE_UNLOCKED)
|
||||
six_lock_increment(&dst->l[i].b->c.lock, t);
|
||||
}
|
||||
|
||||
if (cmp)
|
||||
bch2_btree_path_check_sort_fast(trans, dst, cmp);
|
||||
}
|
||||
|
||||
static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
|
||||
@ -1203,21 +1199,18 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr
|
||||
return new;
|
||||
}
|
||||
|
||||
__flatten
|
||||
struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
|
||||
struct btree_path *path, bool intent,
|
||||
unsigned long ip)
|
||||
{
|
||||
if (path->ref > 1 || path->preserve) {
|
||||
__btree_path_put(path, intent);
|
||||
path = btree_path_clone(trans, path, intent);
|
||||
path->preserve = false;
|
||||
__btree_path_put(path, intent);
|
||||
path = btree_path_clone(trans, path, intent);
|
||||
path->preserve = false;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
path->ip_allocated = ip;
|
||||
path->ip_allocated = ip;
|
||||
#endif
|
||||
btree_trans_verify_sorted(trans);
|
||||
}
|
||||
|
||||
path->should_be_locked = false;
|
||||
btree_trans_verify_sorted(trans);
|
||||
return path;
|
||||
}
|
||||
|
||||
@ -1554,7 +1547,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
|
||||
return path;
|
||||
}
|
||||
|
||||
inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
|
||||
struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
|
||||
{
|
||||
|
||||
struct btree_path_level *l = path_l(path);
|
||||
@ -2536,6 +2529,18 @@ static inline void btree_path_swap(struct btree_trans *trans,
|
||||
btree_path_verify_sorted_ref(trans, r);
|
||||
}
|
||||
|
||||
static inline struct btree_path *sib_btree_path(struct btree_trans *trans,
|
||||
struct btree_path *path, int sib)
|
||||
{
|
||||
unsigned idx = (unsigned) path->sorted_idx + sib;
|
||||
|
||||
EBUG_ON(sib != -1 && sib != 1);
|
||||
|
||||
return idx < trans->nr_sorted
|
||||
? trans->paths + trans->sorted[idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
int cmp)
|
||||
@ -2545,9 +2550,7 @@ static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *
|
||||
|
||||
EBUG_ON(!cmp);
|
||||
|
||||
while ((n = cmp < 0
|
||||
? prev_btree_path(trans, path)
|
||||
: next_btree_path(trans, path)) &&
|
||||
while ((n = sib_btree_path(trans, path, cmp)) &&
|
||||
(cmp2 = btree_path_cmp(n, path)) &&
|
||||
cmp2 != cmp)
|
||||
btree_path_swap(trans, n, path);
|
||||
|
@ -165,13 +165,12 @@ int __must_check bch2_btree_path_traverse(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos,
|
||||
unsigned, unsigned, unsigned, unsigned long);
|
||||
inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);
|
||||
struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);
|
||||
|
||||
struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
|
||||
struct btree_iter *, struct bpos);
|
||||
|
||||
inline void bch2_btree_path_level_init(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
void bch2_btree_path_level_init(struct btree_trans *, struct btree_path *, struct btree *);
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_trans_verify_paths(struct btree_trans *);
|
||||
|
@ -173,10 +173,9 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
|
||||
}
|
||||
|
||||
if (unlikely(!best)) {
|
||||
struct bch_fs *c = g->g->trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch_err(c, "cycle of nofail locks");
|
||||
prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
|
||||
|
||||
for (i = g->g; i < g->g + g->nr; i++) {
|
||||
struct btree_trans *trans = i->trans;
|
||||
|
@ -77,6 +77,7 @@ struct btree {
|
||||
u8 nsets;
|
||||
u8 nr_key_bits;
|
||||
u16 version_ondisk;
|
||||
u8 write_type;
|
||||
|
||||
struct bkey_format format;
|
||||
|
||||
|
@ -246,6 +246,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
|
||||
unsigned nr_reserve;
|
||||
enum alloc_reserve alloc_reserve;
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_INSERT_USE_RESERVE) {
|
||||
nr_reserve = 0;
|
||||
@ -268,7 +269,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
mutex_unlock(&c->btree_reserve_cache_lock);
|
||||
|
||||
retry:
|
||||
wp = bch2_alloc_sectors_start_trans(trans,
|
||||
ret = bch2_alloc_sectors_start_trans(trans,
|
||||
c->opts.metadata_target ?:
|
||||
c->opts.foreground_target,
|
||||
0,
|
||||
@ -276,9 +277,9 @@ retry:
|
||||
&devs_have,
|
||||
res->nr_replicas,
|
||||
c->opts.metadata_replicas_required,
|
||||
alloc_reserve, 0, cl);
|
||||
if (IS_ERR(wp))
|
||||
return ERR_CAST(wp);
|
||||
alloc_reserve, 0, cl, &wp);
|
||||
if (unlikely(ret))
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (wp->sectors_free < btree_sectors(c)) {
|
||||
struct open_bucket *ob;
|
||||
@ -1178,7 +1179,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
trace_and_count(c, btree_reserve_get_fail, trans->fn, _RET_IP_, nr_nodes[0] + nr_nodes[1]);
|
||||
trace_and_count(c, btree_reserve_get_fail, trans->fn,
|
||||
_RET_IP_, nr_nodes[0] + nr_nodes[1], ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1307,6 +1309,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
||||
bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
|
||||
set_btree_node_dirty_acct(c, b);
|
||||
set_btree_node_need_write(b);
|
||||
b->write_type = BTREE_WRITE_interior;
|
||||
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
@ -282,6 +282,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_packed k;
|
||||
|
||||
BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
|
||||
EBUG_ON(btree_node_just_written(b));
|
||||
|
||||
if (!bkey_pack_pos(&k, pos, b)) {
|
||||
struct bkey *u = (void *) &k;
|
||||
|
@ -181,6 +181,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
new |= 1 << BTREE_NODE_need_write;
|
||||
} while ((v = cmpxchg(&b->flags, old, new)) != old);
|
||||
|
||||
b->write_type = BTREE_WRITE_journal_reclaim;
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
@ -289,7 +291,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -721,33 +723,34 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
|
||||
}
|
||||
|
||||
static inline int trans_lock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
int ret;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c);
|
||||
if (ret)
|
||||
goto fail;
|
||||
if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
|
||||
return trans_lock_write_fail(trans, i);
|
||||
|
||||
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
|
||||
}
|
||||
|
||||
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
|
||||
@ -758,6 +761,33 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans
|
||||
bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
|
||||
}
|
||||
|
||||
static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int rw = (trans->flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
|
||||
printbuf_reset(err);
|
||||
prt_printf(err, "invalid bkey on insert from %s -> %ps",
|
||||
trans->fn, (void *) i->ip_allocated);
|
||||
prt_newline(err);
|
||||
printbuf_indent_add(err, 2);
|
||||
|
||||
bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k));
|
||||
prt_newline(err);
|
||||
|
||||
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, rw, err);
|
||||
bch2_print_string_as_lines(KERN_ERR, err->buf);
|
||||
|
||||
bch2_inconsistent_error(c);
|
||||
bch2_dump_trans_updates(trans);
|
||||
printbuf_exit(err);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
@ -772,24 +802,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
int rw = (trans->flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if (bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, rw, &buf)) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "invalid bkey on insert from %s -> %ps",
|
||||
trans->fn, (void *) i->ip_allocated);
|
||||
prt_newline(&buf);
|
||||
printbuf_indent_add(&buf, 2);
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
prt_newline(&buf);
|
||||
|
||||
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, rw, &buf);
|
||||
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
i->bkey_type, rw, &buf)))
|
||||
return bch2_trans_commit_bkey_invalid(trans, i, &buf);
|
||||
btree_insert_entry_checks(trans, i);
|
||||
}
|
||||
|
||||
|
@ -1263,23 +1263,24 @@ void fs_usage_apply_warn(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch_err(c, "disk usage increased %lli more than %u sectors reserved",
|
||||
should_not_have_added, disk_res_sectors);
|
||||
prt_printf(&buf,
|
||||
bch2_fmt(c, "disk usage increased %lli more than %u sectors reserved)"),
|
||||
should_not_have_added, disk_res_sectors);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
struct bkey_s_c old = { &i->old_k, i->old_v };
|
||||
|
||||
pr_err("while inserting");
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "new ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
pr_err(" %s", buf.buf);
|
||||
pr_err("overlapping with");
|
||||
printbuf_reset(&buf);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_str(&buf, "old ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
pr_err(" %s", buf.buf);
|
||||
prt_newline(&buf);
|
||||
}
|
||||
|
||||
__WARN();
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -1949,7 +1950,7 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
#define SECTORS_CACHE 1024
|
||||
|
||||
int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
u64 sectors, int flags)
|
||||
{
|
||||
struct bch_fs_pcpu *pcpu;
|
||||
|
@ -259,15 +259,39 @@ int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
|
||||
static inline void bch2_disk_reservation_put(struct bch_fs *c,
|
||||
struct disk_reservation *res)
|
||||
{
|
||||
this_cpu_sub(*c->online_reserved, res->sectors);
|
||||
res->sectors = 0;
|
||||
if (res->sectors) {
|
||||
this_cpu_sub(*c->online_reserved, res->sectors);
|
||||
res->sectors = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
|
||||
|
||||
int bch2_disk_reservation_add(struct bch_fs *,
|
||||
struct disk_reservation *,
|
||||
u64, int);
|
||||
int __bch2_disk_reservation_add(struct bch_fs *,
|
||||
struct disk_reservation *,
|
||||
u64, int);
|
||||
|
||||
static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
u64 sectors, int flags)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
u64 old, new;
|
||||
|
||||
do {
|
||||
old = this_cpu_read(c->pcpu->sectors_available);
|
||||
if (sectors > old)
|
||||
return __bch2_disk_reservation_add(c, res, sectors, flags);
|
||||
|
||||
new = old - sectors;
|
||||
} while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old);
|
||||
|
||||
this_cpu_add(*c->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
return 0;
|
||||
#else
|
||||
return __bch2_disk_reservation_add(c, res, sectors, flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct disk_reservation
|
||||
bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
|
||||
|
@ -316,7 +316,7 @@ struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
|
||||
return __bch2_checksum_bio(c, type, nonce, bio, &iter);
|
||||
}
|
||||
|
||||
int bch2_encrypt_bio(struct bch_fs *c, unsigned type,
|
||||
int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
|
||||
struct nonce nonce, struct bio *bio)
|
||||
{
|
||||
struct bio_vec bv;
|
||||
|
@ -61,8 +61,16 @@ int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
|
||||
struct bch_extent_crc_unpacked *,
|
||||
unsigned, unsigned, unsigned);
|
||||
|
||||
int bch2_encrypt_bio(struct bch_fs *, unsigned,
|
||||
struct nonce, struct bio *);
|
||||
int __bch2_encrypt_bio(struct bch_fs *, unsigned,
|
||||
struct nonce, struct bio *);
|
||||
|
||||
static inline int bch2_encrypt_bio(struct bch_fs *c, unsigned type,
|
||||
struct nonce nonce, struct bio *bio)
|
||||
{
|
||||
return bch2_csum_type_is_encryption(type)
|
||||
? __bch2_encrypt_bio(c, type, nonce, bio)
|
||||
: 0;
|
||||
}
|
||||
|
||||
int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
|
||||
struct bch_key *);
|
||||
|
@ -97,7 +97,7 @@ static void bch2_bkey_mark_dev_cached(struct bkey_s k, unsigned dev)
|
||||
ptr->cached = true;
|
||||
}
|
||||
|
||||
static int bch2_data_update_index_update(struct bch_write_op *op)
|
||||
int bch2_data_update_index_update(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct btree_trans trans;
|
||||
@ -225,7 +225,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
|
||||
bch2_trans_update(&trans, &iter, insert,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(&trans, &op->res,
|
||||
op_journal_seq(op),
|
||||
NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
m->data_opts.btree_insert_flags);
|
||||
if (!ret) {
|
||||
@ -270,8 +270,7 @@ out:
|
||||
}
|
||||
|
||||
void bch2_data_update_read_done(struct data_update *m,
|
||||
struct bch_extent_crc_unpacked crc,
|
||||
struct closure *cl)
|
||||
struct bch_extent_crc_unpacked crc)
|
||||
{
|
||||
/* write bio must own pages: */
|
||||
BUG_ON(!m->op.wbio.bio.bi_vcnt);
|
||||
@ -279,7 +278,7 @@ void bch2_data_update_read_done(struct data_update *m,
|
||||
m->op.crc = crc;
|
||||
m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
|
||||
|
||||
closure_call(&m->op.cl, bch2_write, NULL, cl);
|
||||
closure_call(&m->op.cl, bch2_write, NULL, NULL);
|
||||
}
|
||||
|
||||
void bch2_data_update_exit(struct data_update *update)
|
||||
@ -317,14 +316,13 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
|
||||
m->op.flags |= BCH_WRITE_PAGES_STABLE|
|
||||
BCH_WRITE_PAGES_OWNED|
|
||||
BCH_WRITE_DATA_ENCODED|
|
||||
BCH_WRITE_FROM_INTERNAL|
|
||||
BCH_WRITE_MOVE|
|
||||
m->data_opts.write_flags;
|
||||
m->op.compression_type =
|
||||
bch2_compression_opt_to_type[io_opts.background_compression ?:
|
||||
io_opts.compression];
|
||||
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
|
||||
m->op.alloc_reserve = RESERVE_movinggc;
|
||||
m->op.index_update_fn = bch2_data_update_index_update;
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
|
@ -26,9 +26,10 @@ struct data_update {
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
int bch2_data_update_index_update(struct bch_write_op *);
|
||||
|
||||
void bch2_data_update_read_done(struct data_update *,
|
||||
struct bch_extent_crc_unpacked,
|
||||
struct closure *);
|
||||
struct bch_extent_crc_unpacked);
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
int bch2_data_update_init(struct bch_fs *, struct data_update *,
|
||||
|
@ -125,8 +125,10 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
||||
s->nr++;
|
||||
}
|
||||
|
||||
#ifdef BCACHEFS_LOG_PREFIX
|
||||
if (!strncmp(fmt, "bcachefs:", 9))
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
|
@ -65,7 +65,6 @@ struct quota_res {
|
||||
};
|
||||
|
||||
struct bch_writepage_io {
|
||||
struct closure cl;
|
||||
struct bch_inode_info *inode;
|
||||
|
||||
/* must be last: */
|
||||
@ -73,11 +72,13 @@ struct bch_writepage_io {
|
||||
};
|
||||
|
||||
struct dio_write {
|
||||
struct completion done;
|
||||
struct kiocb *req;
|
||||
struct address_space *mapping;
|
||||
struct bch_inode_info *inode;
|
||||
struct mm_struct *mm;
|
||||
unsigned loop:1,
|
||||
sync:1,
|
||||
flush:1,
|
||||
free_iov:1;
|
||||
struct quota_res quota_res;
|
||||
u64 written;
|
||||
@ -98,7 +99,7 @@ struct dio_read {
|
||||
};
|
||||
|
||||
/* pagecache_block must be held */
|
||||
static int write_invalidate_inode_pages_range(struct address_space *mapping,
|
||||
static noinline int write_invalidate_inode_pages_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end)
|
||||
{
|
||||
int ret;
|
||||
@ -750,25 +751,25 @@ vm_fault_t bch2_page_fault(struct vm_fault *vmf)
|
||||
if (fdm > mapping) {
|
||||
struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
|
||||
|
||||
if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock))
|
||||
if (bch2_pagecache_add_tryget(inode))
|
||||
goto got_lock;
|
||||
|
||||
bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock);
|
||||
bch2_pagecache_block_put(fdm_host);
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
bch2_pagecache_add_put(inode);
|
||||
|
||||
bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock);
|
||||
bch2_pagecache_block_get(fdm_host);
|
||||
|
||||
/* Signal that lock has been dropped: */
|
||||
set_fdm_dropped_locks();
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
got_lock:
|
||||
ret = filemap_fault(vmf);
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -796,7 +797,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
|
||||
* a write_invalidate_inode_pages_range() that works without dropping
|
||||
* page lock before invalidating page
|
||||
*/
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
|
||||
lock_page(page);
|
||||
isize = i_size_read(&inode->v);
|
||||
@ -829,7 +830,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
|
||||
wait_for_stable_page(page);
|
||||
ret = VM_FAULT_LOCKED;
|
||||
out:
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
sb_end_pagefault(inode->v.i_sb);
|
||||
|
||||
return ret;
|
||||
@ -1097,7 +1098,7 @@ void bch2_readahead(struct readahead_control *ractl)
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
|
||||
while ((page = readpage_iter_next(&readpages_iter))) {
|
||||
pgoff_t index = readpages_iter.offset + readpages_iter.idx;
|
||||
@ -1120,7 +1121,7 @@ void bch2_readahead(struct readahead_control *ractl)
|
||||
&readpages_iter);
|
||||
}
|
||||
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
kfree(readpages_iter.pages);
|
||||
@ -1200,18 +1201,10 @@ static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs
|
||||
};
|
||||
}
|
||||
|
||||
static void bch2_writepage_io_free(struct closure *cl)
|
||||
static void bch2_writepage_io_done(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_writepage_io *io = container_of(cl,
|
||||
struct bch_writepage_io, cl);
|
||||
|
||||
bio_put(&io->op.wbio.bio);
|
||||
}
|
||||
|
||||
static void bch2_writepage_io_done(struct closure *cl)
|
||||
{
|
||||
struct bch_writepage_io *io = container_of(cl,
|
||||
struct bch_writepage_io, cl);
|
||||
struct bch_writepage_io *io =
|
||||
container_of(op, struct bch_writepage_io, op);
|
||||
struct bch_fs *c = io->op.c;
|
||||
struct bio *bio = &io->op.wbio.bio;
|
||||
struct bvec_iter_all iter;
|
||||
@ -1273,7 +1266,7 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
end_page_writeback(bvec->bv_page);
|
||||
}
|
||||
|
||||
closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
|
||||
bio_put(&io->op.wbio.bio);
|
||||
}
|
||||
|
||||
static void bch2_writepage_do_io(struct bch_writepage_state *w)
|
||||
@ -1281,8 +1274,7 @@ static void bch2_writepage_do_io(struct bch_writepage_state *w)
|
||||
struct bch_writepage_io *io = w->io;
|
||||
|
||||
w->io = NULL;
|
||||
closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
|
||||
continue_at(&io->cl, bch2_writepage_io_done, NULL);
|
||||
closure_call(&io->op.cl, bch2_write, NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1304,9 +1296,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
|
||||
&c->writepage_bioset),
|
||||
struct bch_writepage_io, op.wbio.bio);
|
||||
|
||||
closure_init(&w->io->cl, NULL);
|
||||
w->io->inode = inode;
|
||||
|
||||
op = &w->io->op;
|
||||
bch2_write_op_init(op, c, w->opts);
|
||||
op->target = w->opts.foreground_target;
|
||||
@ -1315,6 +1305,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
|
||||
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
|
||||
op->subvol = inode->ei_subvol;
|
||||
op->pos = POS(inode->v.i_ino, sector);
|
||||
op->end_io = bch2_writepage_io_done;
|
||||
op->wbio.bio.bi_iter.bi_sector = sector;
|
||||
op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
|
||||
}
|
||||
@ -1438,7 +1429,8 @@ do_io:
|
||||
|
||||
/* Check for writing past i_size: */
|
||||
WARN_ON_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
|
||||
round_up(i_size, block_bytes(c)));
|
||||
round_up(i_size, block_bytes(c)) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
|
||||
|
||||
w->io->op.res.sectors += reserved_sectors;
|
||||
w->io->op.i_sectors_delta -= dirty_sectors;
|
||||
@ -1490,7 +1482,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
|
||||
bch2_page_reservation_init(c, inode, res);
|
||||
*fsdata = res;
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
|
||||
page = grab_cache_page_write_begin(mapping, index);
|
||||
if (!page)
|
||||
@ -1547,7 +1539,7 @@ err:
|
||||
put_page(page);
|
||||
*pagep = NULL;
|
||||
err_unlock:
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
kfree(res);
|
||||
*fsdata = NULL;
|
||||
return bch2_err_class(ret);
|
||||
@ -1591,7 +1583,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
|
||||
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
|
||||
bch2_page_reservation_put(c, inode, res);
|
||||
kfree(res);
|
||||
@ -1760,7 +1752,7 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
|
||||
ssize_t written = 0;
|
||||
int ret = 0;
|
||||
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
|
||||
do {
|
||||
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||
@ -1818,7 +1810,7 @@ again:
|
||||
balance_dirty_pages_ratelimited(mapping);
|
||||
} while (iov_iter_count(iter));
|
||||
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
|
||||
return written ? written : ret;
|
||||
}
|
||||
@ -1981,11 +1973,13 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
struct blk_plug plug;
|
||||
|
||||
ret = filemap_write_and_wait_range(mapping,
|
||||
iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (unlikely(mapping->nrpages)) {
|
||||
ret = filemap_write_and_wait_range(mapping,
|
||||
iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
file_accessed(file);
|
||||
|
||||
@ -1996,9 +1990,9 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
if (ret >= 0)
|
||||
iocb->ki_pos += ret;
|
||||
} else {
|
||||
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_get(inode);
|
||||
ret = generic_file_read_iter(iocb, iter);
|
||||
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_add_put(inode);
|
||||
}
|
||||
out:
|
||||
return bch2_err_class(ret);
|
||||
@ -2050,31 +2044,154 @@ err:
|
||||
return err ? false : ret;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct bch_write_op *);
|
||||
|
||||
static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio)
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct bch_fs *c = dio->op.c;
|
||||
struct bch_inode_info *inode = dio->inode;
|
||||
struct bio *bio = &dio->op.wbio.bio;
|
||||
|
||||
return bch2_check_range_allocated(c, inode_inum(inode),
|
||||
dio->op.pos.offset, bio_sectors(bio),
|
||||
dio->op.opts.data_replicas,
|
||||
dio->op.opts.compression != 0);
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct bch_write_op *);
|
||||
static __always_inline long bch2_dio_write_done(struct dio_write *dio);
|
||||
|
||||
static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
|
||||
{
|
||||
struct iovec *iov = dio->inline_vecs;
|
||||
|
||||
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
|
||||
iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!iov))
|
||||
return -ENOMEM;
|
||||
|
||||
dio->free_iov = true;
|
||||
}
|
||||
|
||||
memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
|
||||
dio->iter.iov = iov;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_flush_done(struct closure *cl)
|
||||
{
|
||||
struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
|
||||
struct bch_fs *c = dio->op.c;
|
||||
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
dio->op.error = bch2_journal_error(&c->journal);
|
||||
|
||||
bch2_dio_write_done(dio);
|
||||
}
|
||||
|
||||
static noinline void bch2_dio_write_flush(struct dio_write *dio)
|
||||
{
|
||||
struct bch_fs *c = dio->op.c;
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret;
|
||||
|
||||
dio->flush = 0;
|
||||
|
||||
closure_init(&dio->op.cl, NULL);
|
||||
|
||||
if (!dio->op.error) {
|
||||
ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode);
|
||||
if (ret)
|
||||
dio->op.error = ret;
|
||||
else
|
||||
bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl);
|
||||
}
|
||||
|
||||
if (dio->sync) {
|
||||
closure_sync(&dio->op.cl);
|
||||
closure_debug_destroy(&dio->op.cl);
|
||||
} else {
|
||||
continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline long bch2_dio_write_done(struct dio_write *dio)
|
||||
{
|
||||
struct bch_fs *c = dio->op.c;
|
||||
struct kiocb *req = dio->req;
|
||||
struct address_space *mapping = req->ki_filp->f_mapping;
|
||||
struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_inode_info *inode = dio->inode;
|
||||
bool sync = dio->sync;
|
||||
long ret;
|
||||
|
||||
if (unlikely(dio->flush)) {
|
||||
bch2_dio_write_flush(dio);
|
||||
if (!sync)
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
bch2_pagecache_block_put(inode);
|
||||
bch2_quota_reservation_put(c, inode, &dio->quota_res);
|
||||
|
||||
if (dio->free_iov)
|
||||
kfree(dio->iter.iov);
|
||||
|
||||
ret = dio->op.error ?: ((long) dio->written << 9);
|
||||
bio_put(&dio->op.wbio.bio);
|
||||
|
||||
/* inode->i_dio_count is our ref on inode and thus bch_fs */
|
||||
inode_dio_end(&inode->v);
|
||||
|
||||
if (ret < 0)
|
||||
ret = bch2_err_class(ret);
|
||||
|
||||
if (!sync) {
|
||||
req->ki_complete(req, ret);
|
||||
ret = -EIOCBQUEUED;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline void bch2_dio_write_end(struct dio_write *dio)
|
||||
{
|
||||
struct bch_fs *c = dio->op.c;
|
||||
struct kiocb *req = dio->req;
|
||||
struct bch_inode_info *inode = dio->inode;
|
||||
struct bio *bio = &dio->op.wbio.bio;
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
|
||||
i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta);
|
||||
req->ki_pos += (u64) dio->op.written << 9;
|
||||
dio->written += dio->op.written;
|
||||
|
||||
spin_lock(&inode->v.i_lock);
|
||||
if (req->ki_pos > inode->v.i_size)
|
||||
i_size_write(&inode->v, req->ki_pos);
|
||||
spin_unlock(&inode->v.i_lock);
|
||||
|
||||
if (likely(!bio_flagged(bio, BIO_NO_PAGE_REF)))
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
put_page(bv->bv_page);
|
||||
|
||||
if (unlikely(dio->op.error))
|
||||
set_bit(EI_INODE_ERROR, &inode->ei_flags);
|
||||
}
|
||||
|
||||
static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
{
|
||||
struct bch_fs *c = dio->op.c;
|
||||
struct kiocb *req = dio->req;
|
||||
struct address_space *mapping = dio->mapping;
|
||||
struct bch_inode_info *inode = dio->inode;
|
||||
struct bio *bio = &dio->op.wbio.bio;
|
||||
unsigned unaligned, iter_count;
|
||||
bool sync = dio->sync, dropped_locks;
|
||||
long ret;
|
||||
|
||||
if (dio->loop)
|
||||
goto loop;
|
||||
|
||||
while (1) {
|
||||
iter_count = dio->iter.count;
|
||||
|
||||
if (kthread && dio->mm)
|
||||
kthread_use_mm(dio->mm);
|
||||
BUG_ON(current->faults_disabled_mapping);
|
||||
EBUG_ON(current->faults_disabled_mapping);
|
||||
current->faults_disabled_mapping = mapping;
|
||||
|
||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||
@ -2082,8 +2199,6 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
dropped_locks = fdm_dropped_locks();
|
||||
|
||||
current->faults_disabled_mapping = NULL;
|
||||
if (kthread && dio->mm)
|
||||
kthread_unuse_mm(dio->mm);
|
||||
|
||||
/*
|
||||
* If the fault handler returned an error but also signalled
|
||||
@ -2121,116 +2236,80 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
}
|
||||
|
||||
bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
|
||||
dio->op.end_io = bch2_dio_write_loop_async;
|
||||
dio->op.end_io = sync
|
||||
? NULL
|
||||
: bch2_dio_write_loop_async;
|
||||
dio->op.target = dio->op.opts.foreground_target;
|
||||
dio->op.write_point = writepoint_hashed((unsigned long) current);
|
||||
dio->op.nr_replicas = dio->op.opts.data_replicas;
|
||||
dio->op.subvol = inode->ei_subvol;
|
||||
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
|
||||
|
||||
if ((req->ki_flags & IOCB_DSYNC) &&
|
||||
!c->opts.journal_flush_disabled)
|
||||
dio->op.flags |= BCH_WRITE_FLUSH;
|
||||
if (sync)
|
||||
dio->op.flags |= BCH_WRITE_SYNC;
|
||||
dio->op.flags |= BCH_WRITE_CHECK_ENOSPC;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
|
||||
dio->op.opts.data_replicas, 0);
|
||||
if (unlikely(ret) &&
|
||||
!bch2_check_range_allocated(c, inode_inum(inode),
|
||||
dio->op.pos.offset, bio_sectors(bio),
|
||||
dio->op.opts.data_replicas,
|
||||
dio->op.opts.compression != 0))
|
||||
!bch2_dio_write_check_allocated(dio))
|
||||
goto err;
|
||||
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
|
||||
if (!dio->sync && !dio->loop && dio->iter.count) {
|
||||
struct iovec *iov = dio->inline_vecs;
|
||||
if (unlikely(dio->iter.count) &&
|
||||
!dio->sync &&
|
||||
!dio->loop &&
|
||||
bch2_dio_write_copy_iov(dio))
|
||||
dio->sync = sync = true;
|
||||
|
||||
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
|
||||
iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!iov)) {
|
||||
dio->sync = sync = true;
|
||||
goto do_io;
|
||||
}
|
||||
|
||||
dio->free_iov = true;
|
||||
}
|
||||
|
||||
memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
|
||||
dio->iter.iov = iov;
|
||||
}
|
||||
do_io:
|
||||
dio->loop = true;
|
||||
closure_call(&dio->op.cl, bch2_write, NULL, NULL);
|
||||
|
||||
if (sync)
|
||||
wait_for_completion(&dio->done);
|
||||
else
|
||||
if (!sync)
|
||||
return -EIOCBQUEUED;
|
||||
loop:
|
||||
i_sectors_acct(c, inode, &dio->quota_res,
|
||||
dio->op.i_sectors_delta);
|
||||
req->ki_pos += (u64) dio->op.written << 9;
|
||||
dio->written += dio->op.written;
|
||||
|
||||
spin_lock(&inode->v.i_lock);
|
||||
if (req->ki_pos > inode->v.i_size)
|
||||
i_size_write(&inode->v, req->ki_pos);
|
||||
spin_unlock(&inode->v.i_lock);
|
||||
bch2_dio_write_end(dio);
|
||||
|
||||
if (likely(!bio_flagged(bio, BIO_NO_PAGE_REF)))
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
put_page(bv->bv_page);
|
||||
bio->bi_vcnt = 0;
|
||||
|
||||
if (dio->op.error) {
|
||||
set_bit(EI_INODE_ERROR, &inode->ei_flags);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!dio->iter.count)
|
||||
if (likely(!dio->iter.count) || dio->op.error)
|
||||
break;
|
||||
|
||||
bio_reset(bio, NULL, REQ_OP_WRITE);
|
||||
reinit_completion(&dio->done);
|
||||
}
|
||||
|
||||
ret = dio->op.error ?: ((long) dio->written << 9);
|
||||
out:
|
||||
return bch2_dio_write_done(dio);
|
||||
err:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
bch2_quota_reservation_put(c, inode, &dio->quota_res);
|
||||
dio->op.error = ret;
|
||||
|
||||
if (dio->free_iov)
|
||||
kfree(dio->iter.iov);
|
||||
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
|
||||
if (likely(!bio_flagged(bio, BIO_NO_PAGE_REF)))
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
put_page(bv->bv_page);
|
||||
bio_put(bio);
|
||||
|
||||
/* inode->i_dio_count is our ref on inode and thus bch_fs */
|
||||
inode_dio_end(&inode->v);
|
||||
|
||||
if (ret < 0)
|
||||
ret = bch2_err_class(ret);
|
||||
|
||||
if (!sync) {
|
||||
req->ki_complete(req, ret);
|
||||
ret = -EIOCBQUEUED;
|
||||
}
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct bch_write_op *op)
|
||||
{
|
||||
struct dio_write *dio = container_of(op, struct dio_write, op);
|
||||
struct mm_struct *mm = dio->mm;
|
||||
|
||||
if (dio->sync)
|
||||
complete(&dio->done);
|
||||
else
|
||||
bch2_dio_write_loop(dio);
|
||||
bch2_dio_write_end(dio);
|
||||
|
||||
if (likely(!dio->iter.count) || dio->op.error) {
|
||||
bch2_dio_write_done(dio);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
|
||||
|
||||
if (mm)
|
||||
kthread_use_mm(mm);
|
||||
bch2_dio_write_loop(dio);
|
||||
if (mm)
|
||||
kthread_unuse_mm(mm);
|
||||
}
|
||||
|
||||
static noinline
|
||||
@ -2268,7 +2347,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
|
||||
goto err;
|
||||
|
||||
inode_dio_begin(&inode->v);
|
||||
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_get(inode);
|
||||
|
||||
extending = req->ki_pos + iter->count > inode->v.i_size;
|
||||
if (!extending) {
|
||||
@ -2282,26 +2361,31 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
|
||||
GFP_KERNEL,
|
||||
&c->dio_write_bioset);
|
||||
dio = container_of(bio, struct dio_write, op.wbio.bio);
|
||||
init_completion(&dio->done);
|
||||
dio->req = req;
|
||||
dio->mapping = mapping;
|
||||
dio->inode = inode;
|
||||
dio->mm = current->mm;
|
||||
dio->loop = false;
|
||||
dio->sync = is_sync_kiocb(req) || extending;
|
||||
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
|
||||
dio->free_iov = false;
|
||||
dio->quota_res.sectors = 0;
|
||||
dio->written = 0;
|
||||
dio->iter = *iter;
|
||||
dio->op.c = c;
|
||||
|
||||
ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
|
||||
iter->count >> 9, true);
|
||||
if (unlikely(ret))
|
||||
goto err_put_bio;
|
||||
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
req->ki_pos,
|
||||
req->ki_pos + iter->count - 1);
|
||||
if (unlikely(ret))
|
||||
goto err_put_bio;
|
||||
if (unlikely(mapping->nrpages)) {
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
req->ki_pos,
|
||||
req->ki_pos + iter->count - 1);
|
||||
if (unlikely(ret))
|
||||
goto err_put_bio;
|
||||
}
|
||||
|
||||
ret = bch2_dio_write_loop(dio);
|
||||
err:
|
||||
@ -2309,7 +2393,7 @@ err:
|
||||
inode_unlock(&inode->v);
|
||||
return ret;
|
||||
err_put_bio:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_put(inode);
|
||||
bch2_quota_reservation_put(c, inode, &dio->quota_res);
|
||||
bio_put(bio);
|
||||
inode_dio_end(&inode->v);
|
||||
@ -2613,7 +2697,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
|
||||
}
|
||||
|
||||
inode_dio_wait(&inode->v);
|
||||
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_get(inode);
|
||||
|
||||
ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u);
|
||||
if (ret)
|
||||
@ -2692,7 +2776,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
|
||||
|
||||
ret = bch2_setattr_nonsize(mnt_userns, inode, iattr);
|
||||
err:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_put(inode);
|
||||
return bch2_err_class(ret);
|
||||
}
|
||||
|
||||
@ -3005,8 +3089,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
|
||||
}
|
||||
|
||||
ret = bch2_extent_update(&trans, inode_inum(inode), &iter,
|
||||
&reservation.k_i,
|
||||
&disk_res, NULL,
|
||||
&reservation.k_i, &disk_res,
|
||||
0, &i_sectors_delta, true);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
@ -3105,7 +3188,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
|
||||
inode_lock(&inode->v);
|
||||
inode_dio_wait(&inode->v);
|
||||
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_get(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
@ -3122,7 +3205,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
|
||||
else
|
||||
ret = -EOPNOTSUPP;
|
||||
err:
|
||||
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
|
||||
bch2_pagecache_block_put(inode);
|
||||
inode_unlock(&inode->v);
|
||||
percpu_ref_put(&c->writes);
|
||||
|
||||
|
@ -43,58 +43,6 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
|
||||
struct bch_inode_unpacked *,
|
||||
struct bch_subvolume *);
|
||||
|
||||
static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
|
||||
{
|
||||
BUG_ON(atomic_long_read(&lock->v) == 0);
|
||||
|
||||
if (atomic_long_sub_return_release(i, &lock->v) == 0)
|
||||
wake_up_all(&lock->wait);
|
||||
}
|
||||
|
||||
static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
|
||||
{
|
||||
long v = atomic_long_read(&lock->v), old;
|
||||
|
||||
do {
|
||||
old = v;
|
||||
|
||||
if (i > 0 ? v < 0 : v > 0)
|
||||
return false;
|
||||
} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
|
||||
old, old + i)) != old);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
|
||||
{
|
||||
wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
|
||||
}
|
||||
|
||||
void bch2_pagecache_add_put(struct pagecache_lock *lock)
|
||||
{
|
||||
__pagecache_lock_put(lock, 1);
|
||||
}
|
||||
|
||||
bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
|
||||
{
|
||||
return __pagecache_lock_tryget(lock, 1);
|
||||
}
|
||||
|
||||
void bch2_pagecache_add_get(struct pagecache_lock *lock)
|
||||
{
|
||||
__pagecache_lock_get(lock, 1);
|
||||
}
|
||||
|
||||
void bch2_pagecache_block_put(struct pagecache_lock *lock)
|
||||
{
|
||||
__pagecache_lock_put(lock, -1);
|
||||
}
|
||||
|
||||
void bch2_pagecache_block_get(struct pagecache_lock *lock)
|
||||
{
|
||||
__pagecache_lock_get(lock, -1);
|
||||
}
|
||||
|
||||
void bch2_inode_update_after_write(struct btree_trans *trans,
|
||||
struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
@ -1409,7 +1357,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||
|
||||
inode_init_once(&inode->v);
|
||||
mutex_init(&inode->ei_update_lock);
|
||||
pagecache_lock_init(&inode->ei_pagecache_lock);
|
||||
two_state_lock_init(&inode->ei_pagecache_lock);
|
||||
mutex_init(&inode->ei_quota_lock);
|
||||
|
||||
return &inode->v;
|
||||
|
@ -6,31 +6,11 @@
|
||||
#include "opts.h"
|
||||
#include "str_hash.h"
|
||||
#include "quota_types.h"
|
||||
#include "two_state_shared_lock.h"
|
||||
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/stat.h>
|
||||
|
||||
/*
|
||||
* Two-state lock - can be taken for add or block - both states are shared,
|
||||
* like read side of rwsem, but conflict with other state:
|
||||
*/
|
||||
struct pagecache_lock {
|
||||
atomic_long_t v;
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
static inline void pagecache_lock_init(struct pagecache_lock *lock)
|
||||
{
|
||||
atomic_long_set(&lock->v, 0);
|
||||
init_waitqueue_head(&lock->wait);
|
||||
}
|
||||
|
||||
void bch2_pagecache_add_put(struct pagecache_lock *);
|
||||
bool bch2_pagecache_add_tryget(struct pagecache_lock *);
|
||||
void bch2_pagecache_add_get(struct pagecache_lock *);
|
||||
void bch2_pagecache_block_put(struct pagecache_lock *);
|
||||
void bch2_pagecache_block_get(struct pagecache_lock *);
|
||||
|
||||
struct bch_inode_info {
|
||||
struct inode v;
|
||||
unsigned long ei_flags;
|
||||
@ -39,7 +19,7 @@ struct bch_inode_info {
|
||||
u64 ei_quota_reserved;
|
||||
unsigned long ei_last_dirtied;
|
||||
|
||||
struct pagecache_lock ei_pagecache_lock;
|
||||
two_state_lock_t ei_pagecache_lock;
|
||||
|
||||
struct mutex ei_quota_lock;
|
||||
struct bch_qid ei_qid;
|
||||
@ -50,6 +30,13 @@ struct bch_inode_info {
|
||||
struct bch_inode_unpacked ei_inode;
|
||||
};
|
||||
|
||||
#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0)
|
||||
#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0)
|
||||
#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0)
|
||||
|
||||
#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1)
|
||||
#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1)
|
||||
|
||||
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
|
||||
{
|
||||
return (subvol_inum) {
|
||||
@ -96,7 +83,7 @@ do { \
|
||||
if ((_locks) & INODE_LOCK) \
|
||||
down_write_nested(&a[i]->v.i_rwsem, i); \
|
||||
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
||||
bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
|
||||
bch2_pagecache_block_get(a[i]);\
|
||||
if ((_locks) & INODE_UPDATE_LOCK) \
|
||||
mutex_lock_nested(&a[i]->ei_update_lock, i);\
|
||||
} \
|
||||
@ -114,7 +101,7 @@ do { \
|
||||
if ((_locks) & INODE_LOCK) \
|
||||
up_write(&a[i]->v.i_rwsem); \
|
||||
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
||||
bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
|
||||
bch2_pagecache_block_put(a[i]);\
|
||||
if ((_locks) & INODE_UPDATE_LOCK) \
|
||||
mutex_unlock(&a[i]->ei_update_lock); \
|
||||
} \
|
||||
|
326
libbcachefs/io.c
326
libbcachefs/io.c
@ -16,6 +16,7 @@
|
||||
#include "checksum.h"
|
||||
#include "compress.h"
|
||||
#include "clock.h"
|
||||
#include "data_update.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
@ -237,12 +238,14 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *k,
|
||||
struct disk_reservation *disk_res,
|
||||
u64 *journal_seq,
|
||||
u64 new_i_size,
|
||||
s64 *i_sectors_delta_total,
|
||||
bool check_enospc)
|
||||
{
|
||||
struct btree_iter inode_iter = { NULL };
|
||||
struct bkey_s_c inode_k;
|
||||
struct bkey_s_c_inode_v3 inode;
|
||||
struct bkey_i_inode_v3 *new_inode;
|
||||
struct bpos next_pos;
|
||||
bool usage_increasing;
|
||||
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
|
||||
@ -282,59 +285,51 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (new_i_size || i_sectors_delta) {
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_inode_v3 inode;
|
||||
struct bkey_i_inode_v3 *new_inode;
|
||||
bool i_size_update;
|
||||
bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes,
|
||||
SPOS(0, inum.inum, iter->snapshot),
|
||||
BTREE_ITER_INTENT|BTREE_ITER_CACHED);
|
||||
inode_k = bch2_btree_iter_peek_slot(&inode_iter);
|
||||
ret = bkey_err(inode_k);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes,
|
||||
SPOS(0, inum.inum, iter->snapshot),
|
||||
BTREE_ITER_INTENT|BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&inode_iter);
|
||||
ret = bkey_err(k);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
ret = bkey_is_inode(inode_k.k) ? 0 : -ENOENT;
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = bkey_is_inode(k.k) ? 0 : -ENOENT;
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
if (unlikely(k.k->type != KEY_TYPE_inode_v3)) {
|
||||
k = bch2_inode_to_v3(trans, k);
|
||||
ret = bkey_err(k);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
}
|
||||
|
||||
inode = bkey_s_c_to_inode_v3(k);
|
||||
i_size_update = !(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
new_i_size > le64_to_cpu(inode.v->bi_size);
|
||||
|
||||
if (!i_sectors_delta && !i_size_update)
|
||||
goto no_inode_update;
|
||||
|
||||
new_inode = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
ret = PTR_ERR_OR_ZERO(new_inode);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&new_inode->k_i, k);
|
||||
|
||||
if (i_size_update)
|
||||
new_inode->v.bi_size = cpu_to_le64(new_i_size);
|
||||
|
||||
le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta);
|
||||
|
||||
new_inode->k.p.snapshot = iter->snapshot;
|
||||
|
||||
ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0);
|
||||
if (unlikely(inode_k.k->type != KEY_TYPE_inode_v3)) {
|
||||
inode_k = bch2_inode_to_v3(trans, inode_k);
|
||||
ret = bkey_err(inode_k);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
}
|
||||
no_inode_update:
|
||||
ret = bch2_trans_update(trans, iter, k, 0) ?:
|
||||
bch2_trans_commit(trans, disk_res, journal_seq,
|
||||
|
||||
inode = bkey_s_c_to_inode_v3(inode_k);
|
||||
|
||||
new_inode = bch2_trans_kmalloc(trans, bkey_bytes(inode_k.k));
|
||||
ret = PTR_ERR_OR_ZERO(new_inode);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
bkey_reassemble(&new_inode->k_i, inode.s_c);
|
||||
|
||||
if (!(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
new_i_size > le64_to_cpu(inode.v->bi_size))
|
||||
new_inode->v.bi_size = cpu_to_le64(new_i_size);
|
||||
|
||||
le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta);
|
||||
|
||||
new_inode->k.p.snapshot = iter->snapshot;
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* We always have to do an inode updated - even when i_size/i_sectors
|
||||
* aren't changing - for fsync to work properly; fsync relies on
|
||||
* inode->bi_journal_seq which is updated by the trigger code:
|
||||
*/
|
||||
ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0) ?:
|
||||
bch2_trans_update(trans, iter, k, 0) ?:
|
||||
bch2_trans_commit(trans, disk_res, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
if (unlikely(ret))
|
||||
@ -397,8 +392,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
|
||||
bch2_cut_back(end_pos, &delete);
|
||||
|
||||
ret = bch2_extent_update(trans, inum, iter, &delete,
|
||||
&disk_res, NULL,
|
||||
0, i_sectors_delta, false);
|
||||
&disk_res, 0, i_sectors_delta, false);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
}
|
||||
|
||||
@ -428,7 +422,7 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *op)
|
||||
static int bch2_write_index_default(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct bkey_buf sk;
|
||||
@ -465,7 +459,7 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_extent_update(&trans, inum, &iter, sk.k,
|
||||
&op->res, op_journal_seq(op),
|
||||
&op->res,
|
||||
op->new_i_size, &op->i_sectors_delta,
|
||||
op->flags & BCH_WRITE_CHECK_ENOSPC);
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
@ -543,29 +537,22 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static void __bch2_write(struct closure *);
|
||||
static void __bch2_write(struct bch_write_op *);
|
||||
|
||||
static void bch2_write_done(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
|
||||
if (!op->error && (op->flags & BCH_WRITE_FLUSH))
|
||||
op->error = bch2_journal_error(&c->journal);
|
||||
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
percpu_ref_put(&c->writes);
|
||||
bch2_keylist_free(&op->insert_keys, op->inline_keys);
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
|
||||
|
||||
if (op->end_io) {
|
||||
EBUG_ON(cl->parent);
|
||||
closure_debug_destroy(cl);
|
||||
closure_debug_destroy(cl);
|
||||
if (op->end_io)
|
||||
op->end_io(op);
|
||||
} else {
|
||||
closure_return(cl);
|
||||
}
|
||||
}
|
||||
|
||||
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
|
||||
@ -603,7 +590,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
struct keylist *keys = &op->insert_keys;
|
||||
struct bkey_i *k;
|
||||
unsigned dev;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
|
||||
ret = bch2_write_drop_io_error_ptrs(op);
|
||||
@ -626,7 +613,10 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
|
||||
if (!bch2_keylist_empty(keys)) {
|
||||
u64 sectors_start = keylist_sectors(keys);
|
||||
int ret = op->index_update_fn(op);
|
||||
|
||||
ret = !(op->flags & BCH_WRITE_MOVE)
|
||||
? bch2_write_index_default(op)
|
||||
: bch2_data_update_index_update(op);
|
||||
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||
BUG_ON(keylist_sectors(keys) && !ret);
|
||||
@ -636,7 +626,7 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
if (ret) {
|
||||
bch_err_inum_ratelimited(c, op->pos.inode,
|
||||
"write error while doing btree update: %s", bch2_err_str(ret));
|
||||
op->error = ret;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
out:
|
||||
@ -649,25 +639,45 @@ out:
|
||||
err:
|
||||
keys->top = keys->keys;
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_write_index(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
struct write_point *wp = op->wp;
|
||||
struct workqueue_struct *wq = index_update_wq(op);
|
||||
|
||||
__bch2_write_index(op);
|
||||
barrier();
|
||||
op->btree_update_ready = true;
|
||||
queue_work(wq, &wp->index_update_work);
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE)) {
|
||||
continue_at(cl, __bch2_write, index_update_wq(op));
|
||||
} else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
|
||||
bch2_journal_flush_seq_async(&c->journal,
|
||||
*op_journal_seq(op),
|
||||
cl);
|
||||
continue_at(cl, bch2_write_done, index_update_wq(op));
|
||||
} else {
|
||||
continue_at_nobarrier(cl, bch2_write_done, NULL);
|
||||
void bch2_write_point_do_index_updates(struct work_struct *work)
|
||||
{
|
||||
struct write_point *wp =
|
||||
container_of(work, struct write_point, index_update_work);
|
||||
struct bch_write_op *op;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&wp->writes_lock);
|
||||
op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
|
||||
if (op && !op->btree_update_ready)
|
||||
op = NULL;
|
||||
if (op)
|
||||
list_del(&op->wp_list);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
|
||||
if (!op)
|
||||
break;
|
||||
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
__bch2_write(op);
|
||||
else
|
||||
bch2_write_done(&op->cl);
|
||||
}
|
||||
}
|
||||
|
||||
@ -700,12 +710,12 @@ static void bch2_write_endio(struct bio *bio)
|
||||
if (wbio->put_bio)
|
||||
bio_put(bio);
|
||||
|
||||
if (parent)
|
||||
if (parent) {
|
||||
bio_endio(&parent->bio);
|
||||
else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
|
||||
closure_put(cl);
|
||||
else
|
||||
continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
|
||||
return;
|
||||
}
|
||||
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void init_append_extent(struct bch_write_op *op,
|
||||
@ -1112,19 +1122,18 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __bch2_write(struct closure *cl)
|
||||
static void __bch2_write(struct bch_write_op *op)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
struct write_point *wp;
|
||||
struct write_point *wp = NULL;
|
||||
struct bio *bio = NULL;
|
||||
bool skip_put = true;
|
||||
unsigned nofs_flags;
|
||||
int ret;
|
||||
|
||||
nofs_flags = memalloc_nofs_save();
|
||||
again:
|
||||
memset(&op->failed, 0, sizeof(op->failed));
|
||||
op->btree_update_ready = false;
|
||||
|
||||
do {
|
||||
struct bkey_i *key_to_write;
|
||||
@ -1134,76 +1143,60 @@ again:
|
||||
/* +1 for possible cache device: */
|
||||
if (op->open_buckets.nr + op->nr_replicas + 1 >
|
||||
ARRAY_SIZE(op->open_buckets.v))
|
||||
goto flush_io;
|
||||
break;
|
||||
|
||||
if (bch2_keylist_realloc(&op->insert_keys,
|
||||
op->inline_keys,
|
||||
ARRAY_SIZE(op->inline_keys),
|
||||
BKEY_EXTENT_U64s_MAX))
|
||||
goto flush_io;
|
||||
break;
|
||||
|
||||
/*
|
||||
* The copygc thread is now global, which means it's no longer
|
||||
* freeing up space on specific disks, which means that
|
||||
* allocations for specific disks may hang arbitrarily long:
|
||||
*/
|
||||
wp = bch2_alloc_sectors_start(c,
|
||||
op->target,
|
||||
op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
|
||||
op->write_point,
|
||||
&op->devs_have,
|
||||
op->nr_replicas,
|
||||
op->nr_replicas_required,
|
||||
op->alloc_reserve,
|
||||
op->flags,
|
||||
(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
|
||||
EBUG_ON(!wp);
|
||||
|
||||
if (IS_ERR(wp)) {
|
||||
if (unlikely(wp != ERR_PTR(-EAGAIN))) {
|
||||
ret = PTR_ERR(wp);
|
||||
goto err;
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_alloc_sectors_start_trans(&trans,
|
||||
op->target,
|
||||
op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
|
||||
op->write_point,
|
||||
&op->devs_have,
|
||||
op->nr_replicas,
|
||||
op->nr_replicas_required,
|
||||
op->alloc_reserve,
|
||||
op->flags,
|
||||
(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS))
|
||||
? NULL : &op->cl, &wp));
|
||||
if (unlikely(ret)) {
|
||||
if (unlikely(ret != -EAGAIN)) {
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
}
|
||||
|
||||
goto flush_io;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible for the allocator to fail, put us on the
|
||||
* freelist waitlist, and then succeed in one of various retry
|
||||
* paths: if that happens, we need to disable the skip_put
|
||||
* optimization because otherwise there won't necessarily be a
|
||||
* barrier before we free the bch_write_op:
|
||||
*/
|
||||
if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
|
||||
skip_put = false;
|
||||
|
||||
bch2_open_bucket_get(c, wp, &op->open_buckets);
|
||||
ret = bch2_write_extent(op, wp, &bio);
|
||||
|
||||
bch2_alloc_sectors_done(c, wp);
|
||||
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
if (ret) {
|
||||
skip_put = false;
|
||||
} else {
|
||||
/*
|
||||
* for the skip_put optimization this has to be set
|
||||
* before we submit the bio:
|
||||
*/
|
||||
if (ret < 0) {
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
bio->bi_end_io = bch2_write_endio;
|
||||
bio->bi_private = &op->cl;
|
||||
bio->bi_opf |= REQ_OP_WRITE;
|
||||
|
||||
if (!skip_put)
|
||||
closure_get(bio->bi_private);
|
||||
else
|
||||
op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
|
||||
closure_get(bio->bi_private);
|
||||
|
||||
key_to_write = (void *) (op->insert_keys.keys_p +
|
||||
key_to_write_offset);
|
||||
@ -1212,48 +1205,34 @@ again:
|
||||
key_to_write);
|
||||
} while (ret);
|
||||
|
||||
if (!skip_put)
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
out:
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
return;
|
||||
err:
|
||||
op->error = ret;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
goto out;
|
||||
flush_io:
|
||||
/*
|
||||
* If the write can't all be submitted at once, we generally want to
|
||||
* block synchronously as that signals backpressure to the caller.
|
||||
* Sync or no?
|
||||
*
|
||||
* However, if we're running out of a workqueue, we can't block here
|
||||
* because we'll be blocking other work items from completing:
|
||||
* If we're running asynchronously, wne may still want to block
|
||||
* synchronously here if we weren't able to submit all of the IO at
|
||||
* once, as that signals backpressure to the caller.
|
||||
*/
|
||||
if (current->flags & PF_WQ_WORKER) {
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
goto out;
|
||||
}
|
||||
|
||||
closure_sync(cl);
|
||||
|
||||
if (!bch2_keylist_empty(&op->insert_keys)) {
|
||||
if ((op->flags & BCH_WRITE_SYNC) || !(op->flags & BCH_WRITE_DONE)) {
|
||||
closure_sync(&op->cl);
|
||||
__bch2_write_index(op);
|
||||
|
||||
if (op->error) {
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
continue_at_nobarrier(cl, bch2_write_done, NULL);
|
||||
goto out;
|
||||
}
|
||||
if (!(op->flags & BCH_WRITE_DONE))
|
||||
goto again;
|
||||
bch2_write_done(&op->cl);
|
||||
} else {
|
||||
spin_lock(&wp->writes_lock);
|
||||
op->wp = wp;
|
||||
list_add_tail(&op->wp_list, &wp->writes);
|
||||
spin_unlock(&wp->writes_lock);
|
||||
|
||||
continue_at(&op->cl, bch2_write_index, NULL);
|
||||
}
|
||||
|
||||
goto again;
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
}
|
||||
|
||||
static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
{
|
||||
struct closure *cl = &op->cl;
|
||||
struct bio *bio = &op->wbio.bio;
|
||||
struct bvec_iter iter;
|
||||
struct bkey_i_inline_data *id;
|
||||
@ -1290,8 +1269,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
|
||||
op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
|
||||
continue_at_nobarrier(cl, bch2_write_index, NULL);
|
||||
return;
|
||||
__bch2_write_index(op);
|
||||
err:
|
||||
bch2_write_done(&op->cl);
|
||||
}
|
||||
@ -1319,6 +1297,7 @@ void bch2_write(struct closure *cl)
|
||||
struct bch_fs *c = op->c;
|
||||
unsigned data_len;
|
||||
|
||||
EBUG_ON(op->cl.parent);
|
||||
BUG_ON(!op->nr_replicas);
|
||||
BUG_ON(!op->write_point.v);
|
||||
BUG_ON(!bkey_cmp(op->pos, POS_MAX));
|
||||
@ -1352,24 +1331,19 @@ void bch2_write(struct closure *cl)
|
||||
return;
|
||||
}
|
||||
|
||||
continue_at_nobarrier(cl, __bch2_write, NULL);
|
||||
__bch2_write(op);
|
||||
return;
|
||||
err:
|
||||
bch2_disk_reservation_put(c, &op->res);
|
||||
|
||||
if (op->end_io) {
|
||||
EBUG_ON(cl->parent);
|
||||
closure_debug_destroy(cl);
|
||||
closure_debug_destroy(&op->cl);
|
||||
if (op->end_io)
|
||||
op->end_io(op);
|
||||
} else {
|
||||
closure_return(cl);
|
||||
}
|
||||
}
|
||||
|
||||
/* Cache promotion on read */
|
||||
|
||||
struct promote_op {
|
||||
struct closure cl;
|
||||
struct rcu_head rcu;
|
||||
u64 start_time;
|
||||
|
||||
@ -1423,10 +1397,10 @@ static void promote_free(struct bch_fs *c, struct promote_op *op)
|
||||
kfree_rcu(op, rcu);
|
||||
}
|
||||
|
||||
static void promote_done(struct closure *cl)
|
||||
static void promote_done(struct bch_write_op *wop)
|
||||
{
|
||||
struct promote_op *op =
|
||||
container_of(cl, struct promote_op, cl);
|
||||
container_of(wop, struct promote_op, write.op);
|
||||
struct bch_fs *c = op->write.op.c;
|
||||
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
|
||||
@ -1438,7 +1412,6 @@ static void promote_done(struct closure *cl)
|
||||
|
||||
static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
{
|
||||
struct closure *cl = &op->cl;
|
||||
struct bio *bio = &op->write.op.wbio.bio;
|
||||
|
||||
trace_and_count(op->write.op.c, read_promote, &rbio->bio);
|
||||
@ -1451,9 +1424,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
|
||||
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
|
||||
swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
|
||||
|
||||
closure_init(cl, NULL);
|
||||
bch2_data_update_read_done(&op->write, rbio->pick.crc, cl);
|
||||
closure_return_with_destructor(cl, promote_done);
|
||||
bch2_data_update_read_done(&op->write, rbio->pick.crc);
|
||||
}
|
||||
|
||||
static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
@ -1518,6 +1489,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
|
||||
},
|
||||
btree_id, k);
|
||||
BUG_ON(ret);
|
||||
op->write.op.end_io = promote_done;
|
||||
|
||||
return op;
|
||||
err:
|
||||
|
@ -27,28 +27,20 @@ const char *bch2_blk_status_to_str(blk_status_t);
|
||||
enum bch_write_flags {
|
||||
BCH_WRITE_ALLOC_NOWAIT = (1 << 0),
|
||||
BCH_WRITE_CACHED = (1 << 1),
|
||||
BCH_WRITE_FLUSH = (1 << 2),
|
||||
BCH_WRITE_DATA_ENCODED = (1 << 3),
|
||||
BCH_WRITE_PAGES_STABLE = (1 << 4),
|
||||
BCH_WRITE_PAGES_OWNED = (1 << 5),
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6),
|
||||
BCH_WRITE_WROTE_DATA_INLINE = (1 << 7),
|
||||
BCH_WRITE_FROM_INTERNAL = (1 << 8),
|
||||
BCH_WRITE_CHECK_ENOSPC = (1 << 9),
|
||||
BCH_WRITE_DATA_ENCODED = (1 << 2),
|
||||
BCH_WRITE_PAGES_STABLE = (1 << 3),
|
||||
BCH_WRITE_PAGES_OWNED = (1 << 4),
|
||||
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 5),
|
||||
BCH_WRITE_WROTE_DATA_INLINE = (1 << 6),
|
||||
BCH_WRITE_CHECK_ENOSPC = (1 << 7),
|
||||
BCH_WRITE_SYNC = (1 << 8),
|
||||
BCH_WRITE_MOVE = (1 << 9),
|
||||
|
||||
/* Internal: */
|
||||
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 10),
|
||||
BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11),
|
||||
BCH_WRITE_DONE = (1 << 12),
|
||||
BCH_WRITE_IO_ERROR = (1 << 13),
|
||||
BCH_WRITE_DONE = (1 << 10),
|
||||
BCH_WRITE_IO_ERROR = (1 << 11),
|
||||
};
|
||||
|
||||
static inline u64 *op_journal_seq(struct bch_write_op *op)
|
||||
{
|
||||
return (op->flags & BCH_WRITE_JOURNAL_SEQ_PTR)
|
||||
? op->journal_seq_p : &op->journal_seq;
|
||||
}
|
||||
|
||||
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
|
||||
{
|
||||
return op->alloc_reserve == RESERVE_movinggc
|
||||
@ -60,14 +52,12 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, bool *, s64 *, s64 *);
|
||||
int bch2_extent_update(struct btree_trans *, subvol_inum,
|
||||
struct btree_iter *, struct bkey_i *,
|
||||
struct disk_reservation *, u64 *, u64, s64 *, bool);
|
||||
struct disk_reservation *, u64, s64 *, bool);
|
||||
|
||||
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
|
||||
subvol_inum, u64, s64 *);
|
||||
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
|
||||
|
||||
int bch2_write_index_default(struct bch_write_op *);
|
||||
|
||||
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
struct bch_io_opts opts)
|
||||
{
|
||||
@ -91,14 +81,14 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
op->version = ZERO_VERSION;
|
||||
op->write_point = (struct write_point_specifier) { 0 };
|
||||
op->res = (struct disk_reservation) { 0 };
|
||||
op->journal_seq = 0;
|
||||
op->new_i_size = U64_MAX;
|
||||
op->i_sectors_delta = 0;
|
||||
op->index_update_fn = bch2_write_index_default;
|
||||
}
|
||||
|
||||
void bch2_write(struct closure *);
|
||||
|
||||
void bch2_write_point_do_index_updates(struct work_struct *);
|
||||
|
||||
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||
{
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
|
@ -117,6 +117,7 @@ struct bch_write_op {
|
||||
unsigned nr_replicas_required:4;
|
||||
unsigned alloc_reserve:3;
|
||||
unsigned incompressible:1;
|
||||
unsigned btree_update_ready:1;
|
||||
|
||||
struct bch_devs_list devs_have;
|
||||
u16 target;
|
||||
@ -132,23 +133,16 @@ struct bch_write_op {
|
||||
|
||||
struct write_point_specifier write_point;
|
||||
|
||||
struct write_point *wp;
|
||||
struct list_head wp_list;
|
||||
|
||||
struct disk_reservation res;
|
||||
|
||||
struct open_buckets open_buckets;
|
||||
|
||||
/*
|
||||
* If caller wants to flush but hasn't passed us a journal_seq ptr, we
|
||||
* still need to stash the journal_seq somewhere:
|
||||
*/
|
||||
union {
|
||||
u64 *journal_seq_p;
|
||||
u64 journal_seq;
|
||||
};
|
||||
u64 new_i_size;
|
||||
s64 i_sectors_delta;
|
||||
|
||||
int (*index_update_fn)(struct bch_write_op *);
|
||||
|
||||
struct bch_devs_mask failed;
|
||||
|
||||
struct keylist insert_keys;
|
||||
|
@ -17,7 +17,6 @@ static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
|
||||
{
|
||||
if (l->keys_p != inline_keys)
|
||||
kfree(l->keys_p);
|
||||
bch2_keylist_init(l, inline_keys);
|
||||
}
|
||||
|
||||
static inline void bch2_keylist_push(struct keylist *l)
|
||||
|
@ -53,9 +53,8 @@ struct moving_io {
|
||||
struct bio_vec bi_inline_vecs[0];
|
||||
};
|
||||
|
||||
static void move_free(struct closure *cl)
|
||||
static void move_free(struct moving_io *io)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
struct bch_fs *c = ctxt->c;
|
||||
|
||||
@ -65,31 +64,30 @@ static void move_free(struct closure *cl)
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
static void move_write_done(struct closure *cl)
|
||||
static void move_write_done(struct bch_write_op *op)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
struct moving_io *io = container_of(op, struct moving_io, write.op);
|
||||
struct moving_context *ctxt = io->write.ctxt;
|
||||
|
||||
if (io->write.op.error)
|
||||
ctxt->write_error = true;
|
||||
|
||||
atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
closure_return_with_destructor(cl, move_free);
|
||||
move_free(io);
|
||||
closure_put(&ctxt->cl);
|
||||
}
|
||||
|
||||
static void move_write(struct closure *cl)
|
||||
static void move_write(struct moving_io *io)
|
||||
{
|
||||
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
||||
|
||||
if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
|
||||
closure_return_with_destructor(cl, move_free);
|
||||
move_free(io);
|
||||
return;
|
||||
}
|
||||
|
||||
closure_get(&io->write.ctxt->cl);
|
||||
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
|
||||
|
||||
bch2_data_update_read_done(&io->write, io->rbio.pick.crc, cl);
|
||||
continue_at(cl, move_write_done, NULL);
|
||||
bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
|
||||
}
|
||||
|
||||
static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
|
||||
@ -121,7 +119,7 @@ static void do_pending_writes(struct moving_context *ctxt, struct btree_trans *t
|
||||
|
||||
while ((io = next_pending_write(ctxt))) {
|
||||
list_del(&io->list);
|
||||
closure_call(&io->cl, move_write, NULL, &ctxt->cl);
|
||||
move_write(io);
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,7 +183,7 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt,
|
||||
}
|
||||
}
|
||||
|
||||
void bch_move_stats_init(struct bch_move_stats *stats, char *name)
|
||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
scnprintf(stats->name, sizeof(stats->name), "%s", name);
|
||||
@ -302,6 +300,7 @@ static int bch2_move_extent(struct btree_trans *trans,
|
||||
goto err_free_pages;
|
||||
|
||||
io->write.ctxt = ctxt;
|
||||
io->write.op.end_io = move_write_done;
|
||||
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
|
||||
@ -956,7 +955,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
switch (op.op) {
|
||||
case BCH_DATA_OP_REREPLICATE:
|
||||
bch_move_stats_init(stats, "rereplicate");
|
||||
bch2_move_stats_init(stats, "rereplicate");
|
||||
stats->data_type = BCH_DATA_journal;
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
||||
|
||||
@ -980,7 +979,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
if (op.migrate.dev >= c->sb.nr_devices)
|
||||
return -EINVAL;
|
||||
|
||||
bch_move_stats_init(stats, "migrate");
|
||||
bch2_move_stats_init(stats, "migrate");
|
||||
stats->data_type = BCH_DATA_journal;
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
||||
|
||||
@ -1001,7 +1000,7 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
break;
|
||||
case BCH_DATA_OP_REWRITE_OLD_NODES:
|
||||
bch_move_stats_init(stats, "rewrite_old_nodes");
|
||||
bch2_move_stats_init(stats, "rewrite_old_nodes");
|
||||
ret = bch2_scan_old_btree_nodes(c, stats);
|
||||
break;
|
||||
default:
|
||||
|
@ -60,8 +60,7 @@ int bch2_data_job(struct bch_fs *,
|
||||
struct bch_move_stats *,
|
||||
struct bch_ioctl_data);
|
||||
|
||||
inline void bch_move_stats_init(struct bch_move_stats *stats,
|
||||
char *name);
|
||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name);
|
||||
|
||||
|
||||
#endif /* _BCACHEFS_MOVE_H */
|
||||
|
@ -102,7 +102,7 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
bch_move_stats_init(&move_stats, "copygc");
|
||||
bch2_move_stats_init(&move_stats, "copygc");
|
||||
|
||||
for_each_rw_member(ca, c, dev_idx)
|
||||
heap_size += ca->mi.nbuckets >> 7;
|
||||
|
@ -189,7 +189,7 @@ static int bch2_rebalance_thread(void *arg)
|
||||
prev_start = jiffies;
|
||||
prev_cputime = curr_cputime();
|
||||
|
||||
bch_move_stats_init(&move_stats, "rebalance");
|
||||
bch2_move_stats_init(&move_stats, "rebalance");
|
||||
while (!kthread_wait_freezable(r->enabled)) {
|
||||
cond_resched();
|
||||
|
||||
|
@ -1414,7 +1414,7 @@ use_clean:
|
||||
le16_to_cpu(c->sb.version_min) < bcachefs_metadata_version_btree_ptr_sectors_written) {
|
||||
struct bch_move_stats stats;
|
||||
|
||||
bch_move_stats_init(&stats, "recovery");
|
||||
bch2_move_stats_init(&stats, "recovery");
|
||||
|
||||
bch_info(c, "scanning for old btree nodes");
|
||||
ret = bch2_fs_read_write(c);
|
||||
@ -1486,6 +1486,9 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
|
@ -378,7 +378,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
dst_end.offset - dst_iter.pos.offset));
|
||||
|
||||
ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
|
||||
new_dst.k, &disk_res, NULL,
|
||||
new_dst.k, &disk_res,
|
||||
new_i_size, i_sectors_delta,
|
||||
true);
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
|
@ -184,7 +184,7 @@ read_attribute(io_latency_stats_read);
|
||||
read_attribute(io_latency_stats_write);
|
||||
read_attribute(congested);
|
||||
|
||||
read_attribute(btree_avg_write_size);
|
||||
read_attribute(btree_write_stats);
|
||||
|
||||
read_attribute(btree_cache_size);
|
||||
read_attribute(compression_stats);
|
||||
@ -250,14 +250,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t bch2_btree_avg_write_size(struct bch_fs *c)
|
||||
{
|
||||
u64 nr = atomic64_read(&c->btree_writes_nr);
|
||||
u64 sectors = atomic64_read(&c->btree_writes_sectors);
|
||||
|
||||
return nr ? div64_u64(sectors, nr) : 0;
|
||||
}
|
||||
|
||||
static long data_progress_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
long ret = 0;
|
||||
@ -396,7 +388,9 @@ SHOW(bch2_fs)
|
||||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
sysfs_hprint(btree_avg_write_size, bch2_btree_avg_write_size(c));
|
||||
|
||||
if (attr == &sysfs_btree_write_stats)
|
||||
bch2_btree_write_stats_to_text(out, c);
|
||||
|
||||
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
|
||||
|
||||
@ -554,7 +548,7 @@ SYSFS_OPS(bch2_fs);
|
||||
struct attribute *bch2_fs_files[] = {
|
||||
&sysfs_minor,
|
||||
&sysfs_btree_cache_size,
|
||||
&sysfs_btree_avg_write_size,
|
||||
&sysfs_btree_write_stats,
|
||||
|
||||
&sysfs_promote_whole_extents,
|
||||
|
||||
|
33
libbcachefs/two_state_shared_lock.c
Normal file
33
libbcachefs/two_state_shared_lock.c
Normal file
@ -0,0 +1,33 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "two_state_shared_lock.h"
|
||||
|
||||
void bch2_two_state_unlock(two_state_lock_t *lock, int s)
|
||||
{
|
||||
long i = s ? 1 : -1;
|
||||
|
||||
BUG_ON(atomic_long_read(&lock->v) == 0);
|
||||
|
||||
if (atomic_long_sub_return_release(i, &lock->v) == 0)
|
||||
wake_up_all(&lock->wait);
|
||||
}
|
||||
|
||||
bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
|
||||
{
|
||||
long i = s ? 1 : -1;
|
||||
long v = atomic_long_read(&lock->v), old;
|
||||
|
||||
do {
|
||||
old = v;
|
||||
|
||||
if (i > 0 ? v < 0 : v > 0)
|
||||
return false;
|
||||
} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
|
||||
old, old + i)) != old);
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_two_state_lock(two_state_lock_t *lock, int s)
|
||||
{
|
||||
wait_event(lock->wait, bch2_two_state_trylock(lock, s));
|
||||
}
|
28
libbcachefs/two_state_shared_lock.h
Normal file
28
libbcachefs/two_state_shared_lock.h
Normal file
@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_TWO_STATE_LOCK_H
|
||||
#define _BCACHEFS_TWO_STATE_LOCK_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
/*
|
||||
* Two-state lock - can be taken for add or block - both states are shared,
|
||||
* like read side of rwsem, but conflict with other state:
|
||||
*/
|
||||
typedef struct {
|
||||
atomic_long_t v;
|
||||
wait_queue_head_t wait;
|
||||
} two_state_lock_t;
|
||||
|
||||
static inline void two_state_lock_init(two_state_lock_t *lock)
|
||||
{
|
||||
atomic_long_set(&lock->v, 0);
|
||||
init_waitqueue_head(&lock->wait);
|
||||
}
|
||||
|
||||
void bch2_two_state_unlock(two_state_lock_t *, int);
|
||||
bool bch2_two_state_trylock(two_state_lock_t *, int);
|
||||
void bch2_two_state_lock(two_state_lock_t *, int);
|
||||
|
||||
#endif /* _BCACHEFS_TWO_STATE_LOCK_H */
|
@ -52,7 +52,7 @@
|
||||
*
|
||||
* note: this rounds towards 0.
|
||||
*/
|
||||
inline s64 fast_divpow2(s64 n, u8 d)
|
||||
s64 fast_divpow2(s64 n, u8 d)
|
||||
{
|
||||
return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
|
||||
}
|
||||
|
@ -27,3 +27,8 @@ void prt_printf(struct printbuf *out, const char *fmt, ...)
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void prt_u64(struct printbuf *out, u64 v)
|
||||
{
|
||||
prt_printf(out, "%llu", v);
|
||||
}
|
||||
|
@ -342,7 +342,11 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LOCK_SPIN_ON_OWNER
|
||||
/*
|
||||
* We don't see stable performance with SIX_LOCK_SPIN_ON_OWNER enabled, so it's
|
||||
* off for now:
|
||||
*/
|
||||
#ifdef SIX_LOCK_SPIN_ON_OWNER
|
||||
|
||||
static inline bool six_optimistic_spin(struct six_lock *lock,
|
||||
struct six_lock_waiter *wait)
|
||||
|
@ -66,6 +66,11 @@ void wake_up(wait_queue_head_t *q)
|
||||
__wake_up(q, TASK_NORMAL, 1, NULL);
|
||||
}
|
||||
|
||||
void wake_up_all(wait_queue_head_t *q)
|
||||
{
|
||||
__wake_up(q, TASK_NORMAL, 0, NULL);
|
||||
}
|
||||
|
||||
static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
|
||||
{
|
||||
__wake_up_common(q, mode, nr, 0, NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user