mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 783085c3cc44 kbuild: Allow gcov to be enabled on the command line
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
e6b578917f
commit
06611a71a3
@ -1 +1 @@
|
|||||||
938f680845d1be28979e23aee972dba010c464ba
|
783085c3cc440183ba5e987b1aa7791cc1ca42ba
|
||||||
|
2
Makefile
2
Makefile
@ -18,7 +18,7 @@ else
|
|||||||
Q = @
|
Q = @
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CFLAGS+=-std=gnu11 -O2 -g -MMD -Wall -fPIC \
|
CFLAGS+=-std=gnu11 -O2 -g -MMD -Wall -fPIC \
|
||||||
-Wno-pointer-sign \
|
-Wno-pointer-sign \
|
||||||
-Wno-deprecated-declarations \
|
-Wno-deprecated-declarations \
|
||||||
-fno-strict-aliasing \
|
-fno-strict-aliasing \
|
||||||
|
18
cmd_data.c
18
cmd_data.c
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include "libbcachefs/bcachefs_ioctl.h"
|
#include "libbcachefs/bcachefs_ioctl.h"
|
||||||
#include "libbcachefs/btree_cache.h"
|
#include "libbcachefs/btree_cache.h"
|
||||||
|
#include "libbcachefs/move.h"
|
||||||
|
|
||||||
#include "cmds.h"
|
#include "cmds.h"
|
||||||
#include "libbcachefs.h"
|
#include "libbcachefs.h"
|
||||||
@ -55,7 +56,7 @@ int cmd_data_rereplicate(int argc, char *argv[])
|
|||||||
die("too many arguments");
|
die("too many arguments");
|
||||||
|
|
||||||
return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
|
return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
|
||||||
.op = BCH_DATA_OP_REREPLICATE,
|
.op = BCH_DATA_OP_rereplicate,
|
||||||
.start_btree = 0,
|
.start_btree = 0,
|
||||||
.start_pos = POS_MIN,
|
.start_pos = POS_MIN,
|
||||||
.end_btree = BTREE_ID_NR,
|
.end_btree = BTREE_ID_NR,
|
||||||
@ -70,7 +71,7 @@ static void data_job_usage(void)
|
|||||||
"\n"
|
"\n"
|
||||||
"Kick off a data job and report progress\n"
|
"Kick off a data job and report progress\n"
|
||||||
"\n"
|
"\n"
|
||||||
"job: one of scrub, rereplicate, migrate, or rewrite_old_nodes\n"
|
"job: one of scrub, rereplicate, migrate, rewrite_old_nodes, or drop_extra_replicas\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Options:\n"
|
"Options:\n"
|
||||||
" -b btree btree to operate on\n"
|
" -b btree btree to operate on\n"
|
||||||
@ -81,14 +82,6 @@ static void data_job_usage(void)
|
|||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * const data_jobs[] = {
|
|
||||||
"scrub",
|
|
||||||
"rereplicate",
|
|
||||||
"migrate",
|
|
||||||
"rewrite_old_nodes",
|
|
||||||
NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
int cmd_data_job(int argc, char *argv[])
|
int cmd_data_job(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
struct bch_ioctl_data op = {
|
struct bch_ioctl_data op = {
|
||||||
@ -121,10 +114,7 @@ int cmd_data_job(int argc, char *argv[])
|
|||||||
if (!job)
|
if (!job)
|
||||||
die("please specify which type of job");
|
die("please specify which type of job");
|
||||||
|
|
||||||
op.op = read_string_list_or_die(job, data_jobs, "bad job type");
|
op.op = read_string_list_or_die(job, bch2_data_ops_strs, "bad job type");
|
||||||
|
|
||||||
if (op.op == BCH_DATA_OP_SCRUB)
|
|
||||||
die("scrub not implemented yet");
|
|
||||||
|
|
||||||
char *fs_path = arg_pop();
|
char *fs_path = arg_pop();
|
||||||
if (!fs_path)
|
if (!fs_path)
|
||||||
|
@ -332,7 +332,7 @@ int cmd_device_evacuate(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
return bchu_data(fs, (struct bch_ioctl_data) {
|
return bchu_data(fs, (struct bch_ioctl_data) {
|
||||||
.op = BCH_DATA_OP_MIGRATE,
|
.op = BCH_DATA_OP_migrate,
|
||||||
.start_btree = 0,
|
.start_btree = 0,
|
||||||
.start_pos = POS_MIN,
|
.start_pos = POS_MIN,
|
||||||
.end_btree = BTREE_ID_NR,
|
.end_btree = BTREE_ID_NR,
|
||||||
|
@ -161,6 +161,13 @@ static inline i_type a_type##_read(const a_type##_t *v) \
|
|||||||
return __ATOMIC_READ(&v->counter); \
|
return __ATOMIC_READ(&v->counter); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
|
static inline i_type a_type##_read_acquire(const a_type##_t *v) \
|
||||||
|
{ \
|
||||||
|
i_type ret = __ATOMIC_READ(&v->counter); \
|
||||||
|
smp_mb__after_atomic(); \
|
||||||
|
return ret; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
static inline void a_type##_set(a_type##_t *v, i_type i) \
|
static inline void a_type##_set(a_type##_t *v, i_type i) \
|
||||||
{ \
|
{ \
|
||||||
return __ATOMIC_SET(&v->counter, i); \
|
return __ATOMIC_SET(&v->counter, i); \
|
||||||
|
@ -104,7 +104,7 @@
|
|||||||
|
|
||||||
struct closure;
|
struct closure;
|
||||||
struct closure_syncer;
|
struct closure_syncer;
|
||||||
typedef void (closure_fn) (struct closure *);
|
typedef void (closure_fn) (struct work_struct *);
|
||||||
extern struct dentry *bcache_debug;
|
extern struct dentry *bcache_debug;
|
||||||
|
|
||||||
struct closure_waitlist {
|
struct closure_waitlist {
|
||||||
@ -254,7 +254,7 @@ static inline void closure_queue(struct closure *cl)
|
|||||||
INIT_WORK(&cl->work, cl->work.func);
|
INIT_WORK(&cl->work, cl->work.func);
|
||||||
BUG_ON(!queue_work(wq, &cl->work));
|
BUG_ON(!queue_work(wq, &cl->work));
|
||||||
} else
|
} else
|
||||||
cl->fn(cl);
|
cl->fn(&cl->work);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -309,6 +309,11 @@ static inline void closure_wake_up(struct closure_waitlist *list)
|
|||||||
__closure_wake_up(list);
|
__closure_wake_up(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws)
|
||||||
|
#define closure_type(name, type, member) \
|
||||||
|
struct closure *cl = container_of(ws, struct closure, work); \
|
||||||
|
type *name = container_of(cl, type, member)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* continue_at - jump to another function with barrier
|
* continue_at - jump to another function with barrier
|
||||||
*
|
*
|
||||||
|
@ -22,10 +22,18 @@ struct shrinker {
|
|||||||
int seeks; /* seeks to recreate an obj */
|
int seeks; /* seeks to recreate an obj */
|
||||||
long batch; /* reclaim batch size, 0 = default */
|
long batch; /* reclaim batch size, 0 = default */
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
void *private_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
int register_shrinker(struct shrinker *, const char *, ...);
|
static inline void shrinker_free(struct shrinker *s)
|
||||||
void unregister_shrinker(struct shrinker *);
|
{
|
||||||
|
free(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct shrinker *shrinker_alloc(unsigned int, const char *, ...);
|
||||||
|
|
||||||
|
int shrinker_register(struct shrinker *);
|
||||||
|
void shrinker_unregister(struct shrinker *);
|
||||||
|
|
||||||
void run_shrinkers(gfp_t gfp_mask, bool);
|
void run_shrinkers(gfp_t gfp_mask, bool);
|
||||||
|
|
||||||
|
@ -847,6 +847,19 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* need to know if we're getting called from the invalidate path or
|
||||||
|
* not:
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
|
||||||
|
old_a->cached_sectors) {
|
||||||
|
ret = bch2_update_cached_sectors_list(trans, new->k.p.inode,
|
||||||
|
-((s64) old_a->cached_sectors));
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1212,7 +1225,7 @@ fsck_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_trans *trans,
|
static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans,
|
||||||
struct btree_iter *iter)
|
struct btree_iter *iter)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
@ -1271,24 +1284,6 @@ delete:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_check_discard_freespace_key(struct btree_trans *trans,
|
|
||||||
struct btree_iter *iter,
|
|
||||||
struct bpos end)
|
|
||||||
{
|
|
||||||
if (!btree_id_is_extents(iter->btree_id)) {
|
|
||||||
return __bch2_check_discard_freespace_key(trans, iter);
|
|
||||||
} else {
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
while (!bkey_eq(iter->pos, end) &&
|
|
||||||
!(ret = btree_trans_too_many_iters(trans) ?:
|
|
||||||
__bch2_check_discard_freespace_key(trans, iter)))
|
|
||||||
bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We've already checked that generation numbers in the bucket_gens btree are
|
* We've already checked that generation numbers in the bucket_gens btree are
|
||||||
* valid for buckets that exist; this just checks for keys for nonexistent
|
* valid for buckets that exist; this just checks for keys for nonexistent
|
||||||
@ -1445,12 +1440,40 @@ bkey_err:
|
|||||||
ret = for_each_btree_key2(trans, iter,
|
ret = for_each_btree_key2(trans, iter,
|
||||||
BTREE_ID_need_discard, POS_MIN,
|
BTREE_ID_need_discard, POS_MIN,
|
||||||
BTREE_ITER_PREFETCH, k,
|
BTREE_ITER_PREFETCH, k,
|
||||||
bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
|
bch2_check_discard_freespace_key(trans, &iter));
|
||||||
for_each_btree_key2(trans, iter,
|
if (ret)
|
||||||
BTREE_ID_freespace, POS_MIN,
|
goto err;
|
||||||
BTREE_ITER_PREFETCH, k,
|
|
||||||
bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?:
|
bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN,
|
||||||
for_each_btree_key_commit(trans, iter,
|
BTREE_ITER_PREFETCH);
|
||||||
|
while (1) {
|
||||||
|
bch2_trans_begin(trans);
|
||||||
|
k = bch2_btree_iter_peek(&iter);
|
||||||
|
if (!k.k)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = bkey_err(k) ?:
|
||||||
|
bch2_check_discard_freespace_key(trans, &iter);
|
||||||
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||||
|
ret = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ret) {
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
bch2_bkey_val_to_text(&buf, c, k);
|
||||||
|
|
||||||
|
bch_err(c, "while checking %s", buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
ret = for_each_btree_key_commit(trans, iter,
|
||||||
BTREE_ID_bucket_gens, POS_MIN,
|
BTREE_ID_bucket_gens, POS_MIN,
|
||||||
BTREE_ITER_PREFETCH, k,
|
BTREE_ITER_PREFETCH, k,
|
||||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||||
@ -1802,7 +1825,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
|||||||
unsigned i;
|
unsigned i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush(trans);
|
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -1297,6 +1297,30 @@ out:
|
|||||||
return wp;
|
return wp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static noinline void
|
||||||
|
deallocate_extra_replicas(struct bch_fs *c,
|
||||||
|
struct open_buckets *ptrs,
|
||||||
|
struct open_buckets *ptrs_no_use,
|
||||||
|
unsigned extra_replicas)
|
||||||
|
{
|
||||||
|
struct open_buckets ptrs2 = { 0 };
|
||||||
|
struct open_bucket *ob;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
open_bucket_for_each(c, ptrs, ob, i) {
|
||||||
|
unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
|
||||||
|
|
||||||
|
if (d && d <= extra_replicas) {
|
||||||
|
extra_replicas -= d;
|
||||||
|
ob_push(c, ptrs_no_use, ob);
|
||||||
|
} else {
|
||||||
|
ob_push(c, &ptrs2, ob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*ptrs = ptrs2;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get us an open_bucket we can allocate from, return with it locked:
|
* Get us an open_bucket we can allocate from, return with it locked:
|
||||||
*/
|
*/
|
||||||
@ -1382,6 +1406,9 @@ alloc_done:
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (nr_effective > nr_replicas)
|
||||||
|
deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
|
||||||
|
|
||||||
/* Free buckets we didn't use: */
|
/* Free buckets we didn't use: */
|
||||||
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
open_bucket_for_each(c, &wp->ptrs, ob, i)
|
||||||
open_bucket_free_unused(c, ob);
|
open_bucket_free_unused(c, ob);
|
||||||
|
@ -406,6 +406,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
|||||||
x(blocked_journal_max_in_flight) \
|
x(blocked_journal_max_in_flight) \
|
||||||
x(blocked_allocate) \
|
x(blocked_allocate) \
|
||||||
x(blocked_allocate_open_bucket) \
|
x(blocked_allocate_open_bucket) \
|
||||||
|
x(blocked_write_buffer_full) \
|
||||||
x(nocow_lock_contended)
|
x(nocow_lock_contended)
|
||||||
|
|
||||||
enum bch_time_stats {
|
enum bch_time_stats {
|
||||||
@ -640,6 +641,8 @@ struct journal_keys {
|
|||||||
size_t gap;
|
size_t gap;
|
||||||
size_t nr;
|
size_t nr;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
atomic_t ref;
|
||||||
|
bool initial_ref_held;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_trans_buf {
|
struct btree_trans_buf {
|
||||||
@ -664,7 +667,8 @@ struct btree_trans_buf {
|
|||||||
x(invalidate) \
|
x(invalidate) \
|
||||||
x(delete_dead_snapshots) \
|
x(delete_dead_snapshots) \
|
||||||
x(snapshot_delete_pagecache) \
|
x(snapshot_delete_pagecache) \
|
||||||
x(sysfs)
|
x(sysfs) \
|
||||||
|
x(btree_write_buffer)
|
||||||
|
|
||||||
enum bch_write_ref {
|
enum bch_write_ref {
|
||||||
#define x(n) BCH_WRITE_REF_##n,
|
#define x(n) BCH_WRITE_REF_##n,
|
||||||
@ -1064,6 +1068,16 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
||||||
|
{
|
||||||
|
#ifdef BCH_WRITE_REF_DEBUG
|
||||||
|
return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
|
||||||
|
atomic_long_inc_not_zero(&c->writes[ref]);
|
||||||
|
#else
|
||||||
|
return percpu_ref_tryget(&c->writes);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
||||||
{
|
{
|
||||||
#ifdef BCH_WRITE_REF_DEBUG
|
#ifdef BCH_WRITE_REF_DEBUG
|
||||||
|
@ -303,6 +303,13 @@ struct bkey_i {
|
|||||||
struct bch_val v;
|
struct bch_val v;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define POS_KEY(_pos) \
|
||||||
|
((struct bkey) { \
|
||||||
|
.u64s = BKEY_U64s, \
|
||||||
|
.format = KEY_FORMAT_CURRENT, \
|
||||||
|
.p = _pos, \
|
||||||
|
})
|
||||||
|
|
||||||
#define KEY(_inode, _offset, _size) \
|
#define KEY(_inode, _offset, _size) \
|
||||||
((struct bkey) { \
|
((struct bkey) { \
|
||||||
.u64s = BKEY_U64s, \
|
.u64s = BKEY_U64s, \
|
||||||
@ -1436,7 +1443,7 @@ struct bch_sb_field_replicas_v0 {
|
|||||||
struct bch_replicas_entry_v0 entries[];
|
struct bch_replicas_entry_v0 entries[];
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
struct bch_replicas_entry {
|
struct bch_replicas_entry_v1 {
|
||||||
__u8 data_type;
|
__u8 data_type;
|
||||||
__u8 nr_devs;
|
__u8 nr_devs;
|
||||||
__u8 nr_required;
|
__u8 nr_required;
|
||||||
@ -1448,7 +1455,7 @@ struct bch_replicas_entry {
|
|||||||
|
|
||||||
struct bch_sb_field_replicas {
|
struct bch_sb_field_replicas {
|
||||||
struct bch_sb_field field;
|
struct bch_sb_field field;
|
||||||
struct bch_replicas_entry entries[];
|
struct bch_replicas_entry_v1 entries[];
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
/* BCH_SB_FIELD_quota: */
|
/* BCH_SB_FIELD_quota: */
|
||||||
@ -2124,7 +2131,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
|||||||
x(clock, 7) \
|
x(clock, 7) \
|
||||||
x(dev_usage, 8) \
|
x(dev_usage, 8) \
|
||||||
x(log, 9) \
|
x(log, 9) \
|
||||||
x(overwrite, 10)
|
x(overwrite, 10) \
|
||||||
|
x(write_buffer_keys, 11)
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||||
@ -2174,7 +2182,7 @@ struct jset_entry_usage {
|
|||||||
struct jset_entry_data_usage {
|
struct jset_entry_data_usage {
|
||||||
struct jset_entry entry;
|
struct jset_entry entry;
|
||||||
__le64 v;
|
__le64 v;
|
||||||
struct bch_replicas_entry r;
|
struct bch_replicas_entry_v1 r;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct jset_entry_clock {
|
struct jset_entry_clock {
|
||||||
|
@ -173,12 +173,18 @@ struct bch_ioctl_disk_set_state {
|
|||||||
__u64 dev;
|
__u64 dev;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define BCH_DATA_OPS() \
|
||||||
|
x(scrub, 0) \
|
||||||
|
x(rereplicate, 1) \
|
||||||
|
x(migrate, 2) \
|
||||||
|
x(rewrite_old_nodes, 3) \
|
||||||
|
x(drop_extra_replicas, 4)
|
||||||
|
|
||||||
enum bch_data_ops {
|
enum bch_data_ops {
|
||||||
BCH_DATA_OP_SCRUB = 0,
|
#define x(t, n) BCH_DATA_OP_##t = n,
|
||||||
BCH_DATA_OP_REREPLICATE = 1,
|
BCH_DATA_OPS()
|
||||||
BCH_DATA_OP_MIGRATE = 2,
|
#undef x
|
||||||
BCH_DATA_OP_REWRITE_OLD_NODES = 3,
|
BCH_DATA_OP_NR
|
||||||
BCH_DATA_OP_NR = 4,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -237,7 +243,7 @@ struct bch_ioctl_data_event {
|
|||||||
|
|
||||||
struct bch_replicas_usage {
|
struct bch_replicas_usage {
|
||||||
__u64 sectors;
|
__u64 sectors;
|
||||||
struct bch_replicas_entry r;
|
struct bch_replicas_entry_v1 r;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
static inline struct bch_replicas_usage *
|
static inline struct bch_replicas_usage *
|
||||||
|
@ -318,8 +318,7 @@ static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
|||||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
struct bch_fs *c = shrink->private_data;
|
||||||
btree_cache.shrink);
|
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
struct btree *b, *t;
|
struct btree *b, *t;
|
||||||
unsigned long nr = sc->nr_to_scan;
|
unsigned long nr = sc->nr_to_scan;
|
||||||
@ -420,8 +419,7 @@ out_nounlock:
|
|||||||
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
struct bch_fs *c = shrink->private_data;
|
||||||
btree_cache.shrink);
|
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
|
||||||
if (bch2_btree_shrinker_disabled)
|
if (bch2_btree_shrinker_disabled)
|
||||||
@ -432,8 +430,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
|||||||
|
|
||||||
static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
struct bch_fs *c = shrink->private_data;
|
||||||
btree_cache.shrink);
|
|
||||||
char *cbuf;
|
char *cbuf;
|
||||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||||
@ -448,7 +445,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
struct btree *b;
|
struct btree *b;
|
||||||
unsigned i, flags;
|
unsigned i, flags;
|
||||||
|
|
||||||
unregister_shrinker(&bc->shrink);
|
shrinker_free(bc->shrink);
|
||||||
|
|
||||||
/* vfree() can allocate memory: */
|
/* vfree() can allocate memory: */
|
||||||
flags = memalloc_nofs_save();
|
flags = memalloc_nofs_save();
|
||||||
@ -502,6 +499,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
|||||||
int bch2_fs_btree_cache_init(struct bch_fs *c)
|
int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_cache *bc = &c->btree_cache;
|
struct btree_cache *bc = &c->btree_cache;
|
||||||
|
struct shrinker *shrink;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -521,13 +519,16 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
|||||||
|
|
||||||
mutex_init(&c->verify_lock);
|
mutex_init(&c->verify_lock);
|
||||||
|
|
||||||
bc->shrink.count_objects = bch2_btree_cache_count;
|
shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
|
||||||
bc->shrink.scan_objects = bch2_btree_cache_scan;
|
if (!shrink)
|
||||||
bc->shrink.to_text = bch2_btree_cache_shrinker_to_text;
|
|
||||||
bc->shrink.seeks = 4;
|
|
||||||
ret = register_shrinker(&bc->shrink, "%s-btree_cache", c->name);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
goto err;
|
||||||
|
bc->shrink = shrink;
|
||||||
|
shrink->count_objects = bch2_btree_cache_count;
|
||||||
|
shrink->scan_objects = bch2_btree_cache_scan;
|
||||||
|
shrink->to_text = bch2_btree_cache_shrinker_to_text;
|
||||||
|
shrink->seeks = 4;
|
||||||
|
shrink->private_data = c;
|
||||||
|
shrinker_register(shrink);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
|
@ -1287,7 +1287,7 @@ static int bch2_gc_done(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
if (metadata_only &&
|
if (metadata_only &&
|
||||||
|
@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_node_read_all_replicas_done(struct closure *cl)
|
static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
|
||||||
{
|
{
|
||||||
struct btree_node_read_all *ra =
|
closure_type(ra, struct btree_node_read_all, cl);
|
||||||
container_of(cl, struct btree_node_read_all, cl);
|
|
||||||
struct bch_fs *c = ra->c;
|
struct bch_fs *c = ra->c;
|
||||||
struct btree *b = ra->b;
|
struct btree *b = ra->b;
|
||||||
struct printbuf buf = PRINTBUF;
|
struct printbuf buf = PRINTBUF;
|
||||||
@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
|
|||||||
|
|
||||||
if (sync) {
|
if (sync) {
|
||||||
closure_sync(&ra->cl);
|
closure_sync(&ra->cl);
|
||||||
btree_node_read_all_replicas_done(&ra->cl);
|
btree_node_read_all_replicas_done(&ra->cl.work);
|
||||||
} else {
|
} else {
|
||||||
continue_at(&ra->cl, btree_node_read_all_replicas_done,
|
continue_at(&ra->cl, btree_node_read_all_replicas_done,
|
||||||
c->io_complete_wq);
|
c->io_complete_wq);
|
||||||
|
@ -1854,19 +1854,11 @@ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
|
|||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bpos end_pos)
|
struct bpos end_pos)
|
||||||
{
|
{
|
||||||
struct bkey_i *k;
|
return bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
|
||||||
|
iter->path->level,
|
||||||
if (bpos_lt(iter->path->pos, iter->journal_pos))
|
iter->path->pos,
|
||||||
iter->journal_idx = 0;
|
end_pos,
|
||||||
|
&iter->journal_idx);
|
||||||
k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
|
|
||||||
iter->path->level,
|
|
||||||
iter->path->pos,
|
|
||||||
end_pos,
|
|
||||||
&iter->journal_idx);
|
|
||||||
|
|
||||||
iter->journal_pos = k ? k->k.p : end_pos;
|
|
||||||
return k;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline
|
static noinline
|
||||||
@ -2874,7 +2866,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
|||||||
trans->fn_idx = fn_idx;
|
trans->fn_idx = fn_idx;
|
||||||
trans->locking_wait.task = current;
|
trans->locking_wait.task = current;
|
||||||
trans->journal_replay_not_finished =
|
trans->journal_replay_not_finished =
|
||||||
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
|
||||||
|
atomic_inc_not_zero(&c->journal_keys.ref);
|
||||||
closure_init_stack(&trans->ref);
|
closure_init_stack(&trans->ref);
|
||||||
|
|
||||||
s = btree_trans_stats(trans);
|
s = btree_trans_stats(trans);
|
||||||
@ -2991,6 +2984,9 @@ void bch2_trans_put(struct btree_trans *trans)
|
|||||||
kfree(trans->fs_usage_deltas);
|
kfree(trans->fs_usage_deltas);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (unlikely(trans->journal_replay_not_finished))
|
||||||
|
bch2_journal_keys_put(c);
|
||||||
|
|
||||||
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
|
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
|
||||||
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
||||||
else
|
else
|
||||||
|
@ -445,14 +445,16 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
|||||||
unsigned flags,
|
unsigned flags,
|
||||||
unsigned long ip)
|
unsigned long ip)
|
||||||
{
|
{
|
||||||
memset(iter, 0, sizeof(*iter));
|
iter->trans = trans;
|
||||||
iter->trans = trans;
|
iter->update_path = NULL;
|
||||||
iter->btree_id = btree_id;
|
iter->key_cache_path = NULL;
|
||||||
iter->flags = flags;
|
iter->btree_id = btree_id;
|
||||||
iter->snapshot = pos.snapshot;
|
iter->min_depth = 0;
|
||||||
iter->pos = pos;
|
iter->flags = flags;
|
||||||
iter->k.p = pos;
|
iter->snapshot = pos.snapshot;
|
||||||
|
iter->pos = pos;
|
||||||
|
iter->k = POS_KEY(pos);
|
||||||
|
iter->journal_idx = 0;
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
iter->ip_allocated = ip;
|
iter->ip_allocated = ip;
|
||||||
#endif
|
#endif
|
||||||
|
@ -73,6 +73,7 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
|
|||||||
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
|
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns first non-overwritten key >= search key: */
|
||||||
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
|
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
|
||||||
unsigned level, struct bpos pos,
|
unsigned level, struct bpos pos,
|
||||||
struct bpos end_pos, size_t *idx)
|
struct bpos end_pos, size_t *idx)
|
||||||
@ -80,16 +81,32 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
|
|||||||
struct journal_keys *keys = &c->journal_keys;
|
struct journal_keys *keys = &c->journal_keys;
|
||||||
unsigned iters = 0;
|
unsigned iters = 0;
|
||||||
struct journal_key *k;
|
struct journal_key *k;
|
||||||
|
|
||||||
|
BUG_ON(*idx > keys->nr);
|
||||||
search:
|
search:
|
||||||
if (!*idx)
|
if (!*idx)
|
||||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||||
|
|
||||||
|
while (*idx &&
|
||||||
|
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
|
||||||
|
--(*idx);
|
||||||
|
iters++;
|
||||||
|
if (iters == 10) {
|
||||||
|
*idx = 0;
|
||||||
|
goto search;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||||
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
|
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0 &&
|
if (k->overwritten) {
|
||||||
!k->overwritten)
|
(*idx)++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__journal_key_cmp(btree_id, level, pos, k) <= 0)
|
||||||
return k->k;
|
return k->k;
|
||||||
|
|
||||||
(*idx)++;
|
(*idx)++;
|
||||||
@ -189,10 +206,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
|||||||
/* Since @keys was full, there was no gap: */
|
/* Since @keys was full, there was no gap: */
|
||||||
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
|
memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
|
||||||
kvfree(keys->d);
|
kvfree(keys->d);
|
||||||
*keys = new_keys;
|
keys->d = new_keys.d;
|
||||||
|
keys->nr = new_keys.nr;
|
||||||
|
keys->size = new_keys.size;
|
||||||
|
|
||||||
/* And now the gap is at the end: */
|
/* And now the gap is at the end: */
|
||||||
keys->gap = keys->nr;
|
keys->gap = keys->nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
journal_iters_move_gap(c, keys->gap, idx);
|
journal_iters_move_gap(c, keys->gap, idx);
|
||||||
@ -415,10 +434,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
|||||||
cmp_int(l->journal_offset, r->journal_offset);
|
cmp_int(l->journal_offset, r->journal_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_journal_keys_free(struct journal_keys *keys)
|
void bch2_journal_keys_put(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
|
struct journal_keys *keys = &c->journal_keys;
|
||||||
struct journal_key *i;
|
struct journal_key *i;
|
||||||
|
|
||||||
|
BUG_ON(atomic_read(&keys->ref) <= 0);
|
||||||
|
|
||||||
|
if (!atomic_dec_and_test(&keys->ref))
|
||||||
|
return;
|
||||||
|
|
||||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||||
keys->gap = keys->nr;
|
keys->gap = keys->nr;
|
||||||
|
|
||||||
@ -429,6 +454,8 @@ void bch2_journal_keys_free(struct journal_keys *keys)
|
|||||||
kvfree(keys->d);
|
kvfree(keys->d);
|
||||||
keys->d = NULL;
|
keys->d = NULL;
|
||||||
keys->nr = keys->gap = keys->size = 0;
|
keys->nr = keys->gap = keys->size = 0;
|
||||||
|
|
||||||
|
bch2_journal_entries_free(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __journal_keys_sort(struct journal_keys *keys)
|
static void __journal_keys_sort(struct journal_keys *keys)
|
||||||
|
@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
|||||||
struct bch_fs *,
|
struct bch_fs *,
|
||||||
struct btree *);
|
struct btree *);
|
||||||
|
|
||||||
void bch2_journal_keys_free(struct journal_keys *);
|
void bch2_journal_keys_put(struct bch_fs *);
|
||||||
|
|
||||||
|
static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
if (c->journal_keys.initial_ref_held)
|
||||||
|
bch2_journal_keys_put(c);
|
||||||
|
c->journal_keys.initial_ref_held = false;
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_journal_entries_free(struct bch_fs *);
|
void bch2_journal_entries_free(struct bch_fs *);
|
||||||
|
|
||||||
int bch2_journal_keys_sort(struct bch_fs *);
|
int bch2_journal_keys_sort(struct bch_fs *);
|
||||||
|
@ -646,11 +646,19 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
|||||||
if (journal_seq && ck->journal.seq != journal_seq)
|
if (journal_seq && ck->journal.seq != journal_seq)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
trans->journal_res.seq = ck->journal.seq;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since journal reclaim depends on us making progress here, and the
|
* If we're at the end of the journal, we really want to free up space
|
||||||
* allocator/copygc depend on journal reclaim making progress, we need
|
* in the journal right away - we don't want to pin that old journal
|
||||||
* to be using alloc reserves:
|
* sequence number with a new btree node write, we want to re-journal
|
||||||
|
* the update
|
||||||
*/
|
*/
|
||||||
|
if (ck->journal.seq == journal_last_seq(j))
|
||||||
|
commit_flags |= BCH_WATERMARK_reclaim;
|
||||||
|
else
|
||||||
|
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
|
||||||
|
|
||||||
ret = bch2_btree_iter_traverse(&b_iter) ?:
|
ret = bch2_btree_iter_traverse(&b_iter) ?:
|
||||||
bch2_trans_update(trans, &b_iter, ck->k,
|
bch2_trans_update(trans, &b_iter, ck->k,
|
||||||
BTREE_UPDATE_KEY_CACHE_RECLAIM|
|
BTREE_UPDATE_KEY_CACHE_RECLAIM|
|
||||||
@ -659,9 +667,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
|||||||
bch2_trans_commit(trans, NULL, NULL,
|
bch2_trans_commit(trans, NULL, NULL,
|
||||||
BCH_TRANS_COMMIT_no_check_rw|
|
BCH_TRANS_COMMIT_no_check_rw|
|
||||||
BCH_TRANS_COMMIT_no_enospc|
|
BCH_TRANS_COMMIT_no_enospc|
|
||||||
(ck->journal.seq == journal_last_seq(j)
|
|
||||||
? BCH_WATERMARK_reclaim
|
|
||||||
: 0)|
|
|
||||||
commit_flags);
|
commit_flags);
|
||||||
|
|
||||||
bch2_fs_fatal_err_on(ret &&
|
bch2_fs_fatal_err_on(ret &&
|
||||||
@ -830,8 +835,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
|||||||
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
struct bch_fs *c = shrink->private_data;
|
||||||
btree_key_cache.shrink);
|
|
||||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||||
struct bucket_table *tbl;
|
struct bucket_table *tbl;
|
||||||
struct bkey_cached *ck, *t;
|
struct bkey_cached *ck, *t;
|
||||||
@ -932,8 +936,7 @@ out:
|
|||||||
static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
|
static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(shrink, struct bch_fs,
|
struct bch_fs *c = shrink->private_data;
|
||||||
btree_key_cache.shrink);
|
|
||||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||||
long nr = atomic_long_read(&bc->nr_keys) -
|
long nr = atomic_long_read(&bc->nr_keys) -
|
||||||
atomic_long_read(&bc->nr_dirty);
|
atomic_long_read(&bc->nr_dirty);
|
||||||
@ -953,7 +956,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
|||||||
int cpu;
|
int cpu;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unregister_shrinker(&bc->shrink);
|
shrinker_free(bc->shrink);
|
||||||
|
|
||||||
mutex_lock(&bc->lock);
|
mutex_lock(&bc->lock);
|
||||||
|
|
||||||
@ -1028,8 +1031,8 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
|||||||
|
|
||||||
static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||||
{
|
{
|
||||||
struct btree_key_cache *bc =
|
struct bch_fs *c = shrink->private_data;
|
||||||
container_of(shrink, struct btree_key_cache, shrink);
|
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||||
char *cbuf;
|
char *cbuf;
|
||||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||||
@ -1041,6 +1044,7 @@ static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shri
|
|||||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||||
|
struct shrinker *shrink;
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
|
bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
|
||||||
@ -1053,12 +1057,16 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
|||||||
|
|
||||||
bc->table_init_done = true;
|
bc->table_init_done = true;
|
||||||
|
|
||||||
bc->shrink.seeks = 0;
|
shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name);
|
||||||
bc->shrink.count_objects = bch2_btree_key_cache_count;
|
if (!shrink)
|
||||||
bc->shrink.scan_objects = bch2_btree_key_cache_scan;
|
|
||||||
bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text;
|
|
||||||
if (register_shrinker(&bc->shrink, "%s-btree_key_cache", c->name))
|
|
||||||
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
|
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
|
||||||
|
bc->shrink = shrink;
|
||||||
|
shrink->seeks = 0;
|
||||||
|
shrink->count_objects = bch2_btree_key_cache_count;
|
||||||
|
shrink->scan_objects = bch2_btree_key_cache_scan;
|
||||||
|
shrink->to_text = bch2_btree_key_cache_shrinker_to_text;
|
||||||
|
shrink->private_data = c;
|
||||||
|
shrinker_register(shrink);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ struct btree_key_cache {
|
|||||||
struct list_head freed_nonpcpu;
|
struct list_head freed_nonpcpu;
|
||||||
size_t nr_freed_nonpcpu;
|
size_t nr_freed_nonpcpu;
|
||||||
|
|
||||||
struct shrinker shrink;
|
struct shrinker *shrink;
|
||||||
unsigned shrink_iter;
|
unsigned shrink_iter;
|
||||||
struct btree_key_cache_freelist __percpu *pcpu_freed;
|
struct btree_key_cache_freelist __percpu *pcpu_freed;
|
||||||
|
|
||||||
|
@ -660,10 +660,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
|||||||
i->k->k.needs_whiteout = false;
|
i->k->k.needs_whiteout = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trans->nr_wb_updates &&
|
|
||||||
trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
|
|
||||||
return -BCH_ERR_btree_insert_need_flush_buffer;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't get journal reservation until after we know insert will
|
* Don't get journal reservation until after we know insert will
|
||||||
* succeed:
|
* succeed:
|
||||||
@ -698,14 +694,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
|||||||
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
|
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
|
||||||
return -BCH_ERR_btree_insert_need_mark_replicas;
|
return -BCH_ERR_btree_insert_need_mark_replicas;
|
||||||
|
|
||||||
if (trans->nr_wb_updates) {
|
|
||||||
EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res);
|
|
||||||
|
|
||||||
ret = bch2_btree_insert_keys_write_buffer(trans);
|
|
||||||
if (ret)
|
|
||||||
goto revert_fs_usage;
|
|
||||||
}
|
|
||||||
|
|
||||||
h = trans->hooks;
|
h = trans->hooks;
|
||||||
while (h) {
|
while (h) {
|
||||||
ret = h->fn(trans, h);
|
ret = h->fn(trans, h);
|
||||||
@ -767,7 +755,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
|||||||
|
|
||||||
trans_for_each_wb_update(trans, wb) {
|
trans_for_each_wb_update(trans, wb) {
|
||||||
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
||||||
BCH_JSET_ENTRY_btree_keys,
|
BCH_JSET_ENTRY_write_buffer_keys,
|
||||||
wb->btree, 0,
|
wb->btree, 0,
|
||||||
wb->k.k.u64s);
|
wb->k.k.u64s);
|
||||||
bkey_copy((struct bkey_i *) entry->start, &wb->k);
|
bkey_copy((struct bkey_i *) entry->start, &wb->k);
|
||||||
@ -951,30 +939,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
|||||||
|
|
||||||
ret = bch2_trans_relock(trans);
|
ret = bch2_trans_relock(trans);
|
||||||
break;
|
break;
|
||||||
case -BCH_ERR_btree_insert_need_flush_buffer: {
|
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
if (wb->state.nr > wb->size * 3 / 4) {
|
|
||||||
bch2_trans_unlock(trans);
|
|
||||||
mutex_lock(&wb->flush_lock);
|
|
||||||
|
|
||||||
if (wb->state.nr > wb->size * 3 / 4) {
|
|
||||||
bch2_trans_begin(trans);
|
|
||||||
ret = __bch2_btree_write_buffer_flush(trans,
|
|
||||||
flags|BCH_TRANS_COMMIT_no_check_rw, true);
|
|
||||||
if (!ret) {
|
|
||||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
|
||||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
mutex_unlock(&wb->flush_lock);
|
|
||||||
ret = bch2_trans_relock(trans);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
BUG_ON(ret >= 0);
|
BUG_ON(ret >= 0);
|
||||||
break;
|
break;
|
||||||
@ -1073,20 +1037,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
|||||||
goto out_reset;
|
goto out_reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
|
|
||||||
mutex_trylock(&c->btree_write_buffer.flush_lock)) {
|
|
||||||
bch2_trans_begin(trans);
|
|
||||||
bch2_trans_unlock(trans);
|
|
||||||
|
|
||||||
ret = __bch2_btree_write_buffer_flush(trans,
|
|
||||||
flags|BCH_TRANS_COMMIT_no_check_rw, true);
|
|
||||||
if (!ret) {
|
|
||||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
|
||||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
|
||||||
}
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||||
|
|
||||||
trans->journal_u64s = trans->extra_journal_entries.nr;
|
trans->journal_u64s = trans->extra_journal_entries.nr;
|
||||||
|
@ -173,7 +173,7 @@ struct btree_cache {
|
|||||||
unsigned not_freed_will_make_reachable;
|
unsigned not_freed_will_make_reachable;
|
||||||
unsigned not_freed_access_bit;
|
unsigned not_freed_access_bit;
|
||||||
atomic_t dirty;
|
atomic_t dirty;
|
||||||
struct shrinker shrink;
|
struct shrinker *shrink;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to allocate memory for a new btree node and that
|
* If we need to allocate memory for a new btree node and that
|
||||||
@ -297,8 +297,7 @@ struct btree_iter {
|
|||||||
struct btree_path *key_cache_path;
|
struct btree_path *key_cache_path;
|
||||||
|
|
||||||
enum btree_id btree_id:8;
|
enum btree_id btree_id:8;
|
||||||
unsigned min_depth:3;
|
u8 min_depth;
|
||||||
unsigned advanced:1;
|
|
||||||
|
|
||||||
/* btree_iter_copy starts here: */
|
/* btree_iter_copy starts here: */
|
||||||
u16 flags;
|
u16 flags;
|
||||||
@ -315,7 +314,6 @@ struct btree_iter {
|
|||||||
|
|
||||||
/* BTREE_ITER_WITH_JOURNAL: */
|
/* BTREE_ITER_WITH_JOURNAL: */
|
||||||
size_t journal_idx;
|
size_t journal_idx;
|
||||||
struct bpos journal_pos;
|
|
||||||
#ifdef TRACK_PATH_ALLOCATED
|
#ifdef TRACK_PATH_ALLOCATED
|
||||||
unsigned long ip_allocated;
|
unsigned long ip_allocated;
|
||||||
#endif
|
#endif
|
||||||
|
@ -774,9 +774,9 @@ static void btree_interior_update_work(struct work_struct *work)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btree_update_set_nodes_written(struct closure *cl)
|
static CLOSURE_CALLBACK(btree_update_set_nodes_written)
|
||||||
{
|
{
|
||||||
struct btree_update *as = container_of(cl, struct btree_update, cl);
|
closure_type(as, struct btree_update, cl);
|
||||||
struct bch_fs *c = as->c;
|
struct bch_fs *c = as->c;
|
||||||
|
|
||||||
mutex_lock(&c->btree_interior_update_lock);
|
mutex_lock(&c->btree_interior_update_lock);
|
||||||
|
@ -7,43 +7,132 @@
|
|||||||
#include "btree_write_buffer.h"
|
#include "btree_write_buffer.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
|
#include "journal_io.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
|
|
||||||
#include <linux/sort.h>
|
#include <linux/prefetch.h>
|
||||||
|
|
||||||
static int bch2_btree_write_buffer_journal_flush(struct journal *,
|
static int bch2_btree_write_buffer_journal_flush(struct journal *,
|
||||||
struct journal_entry_pin *, u64);
|
struct journal_entry_pin *, u64);
|
||||||
|
|
||||||
static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
|
static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *);
|
||||||
{
|
|
||||||
const struct btree_write_buffered_key *l = _l;
|
|
||||||
const struct btree_write_buffered_key *r = _r;
|
|
||||||
|
|
||||||
return cmp_int(l->btree, r->btree) ?:
|
static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
|
||||||
bpos_cmp(l->k.k.p, r->k.k.p) ?:
|
{
|
||||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
return (cmp_int(l->hi, r->hi) ?:
|
||||||
cmp_int(l->journal_offset, r->journal_offset);
|
cmp_int(l->mi, r->mi) ?:
|
||||||
|
cmp_int(l->lo, r->lo)) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int btree_write_buffered_journal_cmp(const void *_l, const void *_r)
|
static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
|
||||||
{
|
{
|
||||||
const struct btree_write_buffered_key *l = _l;
|
#ifdef CONFIG_X86_64
|
||||||
const struct btree_write_buffered_key *r = _r;
|
int cmp;
|
||||||
|
|
||||||
return cmp_int(l->journal_seq, r->journal_seq);
|
asm(".intel_syntax noprefix;"
|
||||||
|
"mov rax, [%[l]];"
|
||||||
|
"sub rax, [%[r]];"
|
||||||
|
"mov rax, [%[l] + 8];"
|
||||||
|
"sbb rax, [%[r] + 8];"
|
||||||
|
"mov rax, [%[l] + 16];"
|
||||||
|
"sbb rax, [%[r] + 16];"
|
||||||
|
".att_syntax prefix;"
|
||||||
|
: "=@ccae" (cmp)
|
||||||
|
: [l] "r" (l), [r] "r" (r)
|
||||||
|
: "rax", "cc");
|
||||||
|
|
||||||
|
EBUG_ON(cmp != __wb_key_cmp(l, r));
|
||||||
|
return cmp;
|
||||||
|
#else
|
||||||
|
return __wb_key_cmp(l, r);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
|
/* Compare excluding idx, the low 24 bits: */
|
||||||
struct btree_iter *iter,
|
static inline bool wb_key_eq(const void *_l, const void *_r)
|
||||||
struct btree_write_buffered_key *wb,
|
{
|
||||||
unsigned commit_flags,
|
const struct wb_key_ref *l = _l;
|
||||||
bool *write_locked,
|
const struct wb_key_ref *r = _r;
|
||||||
size_t *fast)
|
|
||||||
|
return !((l->hi ^ r->hi)|
|
||||||
|
(l->mi ^ r->mi)|
|
||||||
|
((l->lo >> 24) ^ (r->lo >> 24)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline void wb_sort(struct wb_key_ref *base, size_t num)
|
||||||
|
{
|
||||||
|
size_t n = num, a = num / 2;
|
||||||
|
|
||||||
|
if (!a) /* num < 2 || size == 0 */
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
size_t b, c, d;
|
||||||
|
|
||||||
|
if (a) /* Building heap: sift down --a */
|
||||||
|
--a;
|
||||||
|
else if (--n) /* Sorting: Extract root to --n */
|
||||||
|
swap(base[0], base[n]);
|
||||||
|
else /* Sort complete */
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sift element at "a" down into heap. This is the
|
||||||
|
* "bottom-up" variant, which significantly reduces
|
||||||
|
* calls to cmp_func(): we find the sift-down path all
|
||||||
|
* the way to the leaves (one compare per level), then
|
||||||
|
* backtrack to find where to insert the target element.
|
||||||
|
*
|
||||||
|
* Because elements tend to sift down close to the leaves,
|
||||||
|
* this uses fewer compares than doing two per level
|
||||||
|
* on the way down. (A bit more than half as many on
|
||||||
|
* average, 3/4 worst-case.)
|
||||||
|
*/
|
||||||
|
for (b = a; c = 2*b + 1, (d = c + 1) < n;)
|
||||||
|
b = wb_key_cmp(base + c, base + d) ? c : d;
|
||||||
|
if (d == n) /* Special case last leaf with no sibling */
|
||||||
|
b = c;
|
||||||
|
|
||||||
|
/* Now backtrack from "b" to the correct location for "a" */
|
||||||
|
while (b != a && wb_key_cmp(base + a, base + b))
|
||||||
|
b = (b - 1) / 2;
|
||||||
|
c = b; /* Where "a" belongs */
|
||||||
|
while (b != a) { /* Shift it into place */
|
||||||
|
b = (b - 1) / 2;
|
||||||
|
swap(base[b], base[c]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
struct btree_write_buffered_key *wb)
|
||||||
|
{
|
||||||
|
bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b);
|
||||||
|
|
||||||
|
trans->journal_res.seq = wb->journal_seq;
|
||||||
|
|
||||||
|
return bch2_trans_update(trans, iter, &wb->k,
|
||||||
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||||
|
bch2_trans_commit(trans, NULL, NULL,
|
||||||
|
BCH_TRANS_COMMIT_no_enospc|
|
||||||
|
BCH_TRANS_COMMIT_no_check_rw|
|
||||||
|
BCH_TRANS_COMMIT_no_journal_res|
|
||||||
|
BCH_TRANS_COMMIT_journal_reclaim);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
|
struct btree_write_buffered_key *wb,
|
||||||
|
bool *write_locked, size_t *fast)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_path *path;
|
struct btree_path *path;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
EBUG_ON(!wb->journal_seq);
|
||||||
|
EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
|
||||||
|
EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
|
||||||
|
|
||||||
ret = bch2_btree_iter_traverse(iter);
|
ret = bch2_btree_iter_traverse(iter);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -66,46 +155,14 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
|
|||||||
*write_locked = true;
|
*write_locked = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) {
|
if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
|
||||||
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
|
|
||||||
*write_locked = false;
|
*write_locked = false;
|
||||||
goto trans_commit;
|
return wb_flush_one_slowpath(trans, iter, wb);
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
|
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
|
||||||
(*fast)++;
|
(*fast)++;
|
||||||
return 0;
|
return 0;
|
||||||
trans_commit:
|
|
||||||
trans->journal_res.seq = wb->journal_seq;
|
|
||||||
|
|
||||||
return bch2_trans_update(trans, iter, &wb->k,
|
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
|
||||||
bch2_trans_commit(trans, NULL, NULL,
|
|
||||||
commit_flags|
|
|
||||||
BCH_TRANS_COMMIT_no_check_rw|
|
|
||||||
BCH_TRANS_COMMIT_no_enospc|
|
|
||||||
BCH_TRANS_COMMIT_no_journal_res|
|
|
||||||
BCH_TRANS_COMMIT_journal_reclaim);
|
|
||||||
}
|
|
||||||
|
|
||||||
static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
|
|
||||||
{
|
|
||||||
union btree_write_buffer_state old, new;
|
|
||||||
u64 v = READ_ONCE(wb->state.v);
|
|
||||||
|
|
||||||
do {
|
|
||||||
old.v = new.v = v;
|
|
||||||
|
|
||||||
new.nr = 0;
|
|
||||||
new.idx++;
|
|
||||||
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
|
|
||||||
|
|
||||||
while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
|
|
||||||
cpu_relax();
|
|
||||||
|
|
||||||
smp_mb();
|
|
||||||
|
|
||||||
return old;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -137,35 +194,79 @@ btree_write_buffered_insert(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
|
static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
|
||||||
bool locked)
|
{
|
||||||
|
struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
|
||||||
|
struct journal *j = &c->journal;
|
||||||
|
|
||||||
|
if (!wb->inc.keys.nr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin,
|
||||||
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
|
||||||
|
darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
|
||||||
|
darray_resize(&wb->sorted, wb->flushing.keys.size);
|
||||||
|
|
||||||
|
if (!wb->flushing.keys.nr && wb->sorted.size >= wb->inc.keys.nr) {
|
||||||
|
swap(wb->flushing.keys, wb->inc.keys);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t nr = min(darray_room(wb->flushing.keys),
|
||||||
|
wb->sorted.size - wb->flushing.keys.nr);
|
||||||
|
nr = min(nr, wb->inc.keys.nr);
|
||||||
|
|
||||||
|
memcpy(&darray_top(wb->flushing.keys),
|
||||||
|
wb->inc.keys.data,
|
||||||
|
sizeof(wb->inc.keys.data[0]) * nr);
|
||||||
|
|
||||||
|
memmove(wb->inc.keys.data,
|
||||||
|
wb->inc.keys.data + nr,
|
||||||
|
sizeof(wb->inc.keys.data[0]) * (wb->inc.keys.nr - nr));
|
||||||
|
|
||||||
|
wb->flushing.keys.nr += nr;
|
||||||
|
wb->inc.keys.nr -= nr;
|
||||||
|
out:
|
||||||
|
if (!wb->inc.keys.nr)
|
||||||
|
bch2_journal_pin_drop(j, &wb->inc.pin);
|
||||||
|
else
|
||||||
|
bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin,
|
||||||
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
|
||||||
|
if (j->watermark) {
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
bch2_journal_set_watermark(j);
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct journal *j = &c->journal;
|
struct journal *j = &c->journal;
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
struct journal_entry_pin pin;
|
struct wb_key_ref *i;
|
||||||
struct btree_write_buffered_key *i, *keys;
|
|
||||||
struct btree_iter iter = { NULL };
|
struct btree_iter iter = { NULL };
|
||||||
size_t nr = 0, skipped = 0, fast = 0, slowpath = 0;
|
size_t skipped = 0, fast = 0, slowpath = 0;
|
||||||
bool write_locked = false;
|
bool write_locked = false;
|
||||||
union btree_write_buffer_state s;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
memset(&pin, 0, sizeof(pin));
|
bch2_trans_unlock(trans);
|
||||||
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
if (!locked && !mutex_trylock(&wb->flush_lock))
|
mutex_lock(&wb->inc.lock);
|
||||||
return 0;
|
move_keys_from_inc_to_flushing(wb);
|
||||||
|
mutex_unlock(&wb->inc.lock);
|
||||||
|
|
||||||
bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
|
for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
|
||||||
bch2_btree_write_buffer_journal_flush);
|
wb->sorted.data[i].idx = i;
|
||||||
bch2_journal_pin_drop(j, &wb->journal_pin);
|
wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree;
|
||||||
|
memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos));
|
||||||
s = btree_write_buffer_switch(wb);
|
}
|
||||||
keys = wb->keys[s.idx];
|
wb->sorted.nr = wb->flushing.keys.nr;
|
||||||
nr = s.nr;
|
|
||||||
|
|
||||||
if (race_fault())
|
|
||||||
goto slowpath;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We first sort so that we can detect and skip redundant updates, and
|
* We first sort so that we can detect and skip redundant updates, and
|
||||||
@ -181,110 +282,178 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
|||||||
* If that happens, simply skip the key so we can optimistically insert
|
* If that happens, simply skip the key so we can optimistically insert
|
||||||
* as many keys as possible in the fast path.
|
* as many keys as possible in the fast path.
|
||||||
*/
|
*/
|
||||||
sort(keys, nr, sizeof(keys[0]),
|
wb_sort(wb->sorted.data, wb->sorted.nr);
|
||||||
btree_write_buffered_key_cmp, NULL);
|
|
||||||
|
darray_for_each(wb->sorted, i) {
|
||||||
|
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
||||||
|
|
||||||
|
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
|
||||||
|
prefetch(&wb->flushing.keys.data[n->idx]);
|
||||||
|
|
||||||
|
BUG_ON(!k->journal_seq);
|
||||||
|
|
||||||
|
if (i + 1 < &darray_top(wb->sorted) &&
|
||||||
|
wb_key_eq(i, i + 1)) {
|
||||||
|
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
|
||||||
|
|
||||||
for (i = keys; i < keys + nr; i++) {
|
|
||||||
if (i + 1 < keys + nr &&
|
|
||||||
i[0].btree == i[1].btree &&
|
|
||||||
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
|
|
||||||
skipped++;
|
skipped++;
|
||||||
i->journal_seq = 0;
|
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);;
|
||||||
|
k->journal_seq = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write_locked &&
|
if (write_locked &&
|
||||||
(iter.path->btree_id != i->btree ||
|
(iter.path->btree_id != k->btree ||
|
||||||
bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) {
|
bpos_gt(k->k.k.p, iter.path->l[0].b->key.k.p))) {
|
||||||
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
||||||
write_locked = false;
|
write_locked = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!iter.path || iter.path->btree_id != i->btree) {
|
if (!iter.path || iter.path->btree_id != k->btree) {
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
|
bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p,
|
||||||
BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
|
BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_btree_iter_set_pos(&iter, i->k.k.p);
|
bch2_btree_iter_set_pos(&iter, k->k.k.p);
|
||||||
iter.path->preserve = false;
|
iter.path->preserve = false;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
ret = bch2_btree_write_buffer_flush_one(trans, &iter, i,
|
if (race_fault()) {
|
||||||
commit_flags, &write_locked, &fast);
|
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
|
||||||
if (!write_locked)
|
if (!write_locked)
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||||
|
|
||||||
if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
|
if (!ret) {
|
||||||
|
k->journal_seq = 0;
|
||||||
|
} else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
|
||||||
slowpath++;
|
slowpath++;
|
||||||
continue;
|
ret = 0;
|
||||||
}
|
} else
|
||||||
if (ret)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
i->journal_seq = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write_locked)
|
if (write_locked)
|
||||||
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
if (slowpath)
|
if (slowpath) {
|
||||||
goto slowpath;
|
/*
|
||||||
|
* Flush in the order they were present in the journal, so that
|
||||||
|
* we can release journal pins:
|
||||||
|
* The fastpath zapped the seq of keys that were successfully flushed so
|
||||||
|
* we can skip those here.
|
||||||
|
*/
|
||||||
|
trace_write_buffer_flush_slowpath(trans, slowpath, wb->flushing.keys.nr);
|
||||||
|
|
||||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
struct btree_write_buffered_key *i;
|
||||||
out:
|
darray_for_each(wb->flushing.keys, i) {
|
||||||
bch2_journal_pin_drop(j, &pin);
|
if (!i->journal_seq)
|
||||||
mutex_unlock(&wb->flush_lock);
|
continue;
|
||||||
return ret;
|
|
||||||
slowpath:
|
|
||||||
trace_write_buffer_flush_slowpath(trans, i - keys, nr);
|
|
||||||
|
|
||||||
/*
|
bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
|
||||||
* Now sort the rest by journal seq and bump the journal pin as we go.
|
bch2_btree_write_buffer_journal_flush);
|
||||||
* The slowpath zapped the seq of keys that were successfully flushed so
|
|
||||||
* we can skip those here.
|
|
||||||
*/
|
|
||||||
sort(keys, nr, sizeof(keys[0]),
|
|
||||||
btree_write_buffered_journal_cmp,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
commit_flags &= ~BCH_WATERMARK_MASK;
|
bch2_trans_begin(trans);
|
||||||
commit_flags |= BCH_WATERMARK_reclaim;
|
|
||||||
|
|
||||||
for (i = keys; i < keys + nr; i++) {
|
ret = commit_do(trans, NULL, NULL,
|
||||||
if (!i->journal_seq)
|
BCH_WATERMARK_reclaim|
|
||||||
continue;
|
BCH_TRANS_COMMIT_no_check_rw|
|
||||||
|
BCH_TRANS_COMMIT_no_enospc|
|
||||||
bch2_journal_pin_update(j, i->journal_seq, &pin,
|
BCH_TRANS_COMMIT_no_journal_res|
|
||||||
bch2_btree_write_buffer_journal_flush);
|
BCH_TRANS_COMMIT_journal_reclaim,
|
||||||
|
btree_write_buffered_insert(trans, i));
|
||||||
ret = commit_do(trans, NULL, NULL,
|
if (ret)
|
||||||
commit_flags|
|
goto err;
|
||||||
BCH_TRANS_COMMIT_no_enospc|
|
}
|
||||||
BCH_TRANS_COMMIT_no_journal_res|
|
|
||||||
BCH_TRANS_COMMIT_journal_reclaim,
|
|
||||||
btree_write_buffered_insert(trans, i));
|
|
||||||
if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
|
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||||
|
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
|
||||||
|
bch2_journal_pin_drop(j, &wb->flushing.pin);
|
||||||
|
wb->flushing.keys.nr = 0;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
goto out;
|
static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
|
||||||
|
{
|
||||||
|
struct journal *j = &c->journal;
|
||||||
|
struct journal_buf *buf;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
mutex_lock(&j->buf_lock);
|
||||||
|
while ((buf = bch2_next_write_buffer_flush_journal_buf(j, seq)))
|
||||||
|
if (bch2_journal_keys_to_write_buffer(c, buf)) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
mutex_unlock(&j->buf_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
|
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
int ret = 0, fetch_from_journal_err;
|
||||||
|
|
||||||
|
trace_write_buffer_flush_sync(trans, _RET_IP_);
|
||||||
|
retry:
|
||||||
bch2_trans_unlock(trans);
|
bch2_trans_unlock(trans);
|
||||||
mutex_lock(&trans->c->btree_write_buffer.flush_lock);
|
|
||||||
return __bch2_btree_write_buffer_flush(trans, 0, true);
|
bch2_journal_block_reservations(&c->journal);
|
||||||
|
fetch_from_journal_err = fetch_wb_keys_from_journal(c, U64_MAX);
|
||||||
|
bch2_journal_unblock(&c->journal);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On memory allocation failure, bch2_btree_write_buffer_flush_locked()
|
||||||
|
* is not guaranteed to empty wb->inc:
|
||||||
|
*/
|
||||||
|
mutex_lock(&wb->flushing.lock);
|
||||||
|
while (!ret &&
|
||||||
|
(wb->flushing.keys.nr || wb->inc.keys.nr))
|
||||||
|
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
|
||||||
|
if (!ret && fetch_from_journal_err)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_btree_write_buffer_flush(struct btree_trans *trans)
|
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
return __bch2_btree_write_buffer_flush(trans, 0, false);
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (mutex_trylock(&wb->flushing.lock)) {
|
||||||
|
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
|
||||||
|
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
|
||||||
|
return -BCH_ERR_erofs_no_writes;
|
||||||
|
|
||||||
|
int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
||||||
@ -292,84 +461,195 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
int ret, fetch_from_journal_err;
|
||||||
|
|
||||||
mutex_lock(&wb->flush_lock);
|
do {
|
||||||
|
fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
|
||||||
|
|
||||||
return bch2_trans_run(c,
|
mutex_lock(&wb->flushing.lock);
|
||||||
__bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true));
|
ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
} while (!ret &&
|
||||||
|
(fetch_from_journal_err ||
|
||||||
|
(wb->flushing.pin.seq && wb->flushing.pin.seq <= seq) ||
|
||||||
|
(wb->inc.pin.seq && wb->inc.pin.seq <= seq)));
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 btree_write_buffer_ref(int idx)
|
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
return ((union btree_write_buffer_state) {
|
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
|
||||||
.ref0 = idx == 0,
|
|
||||||
.ref1 = idx == 1,
|
|
||||||
}).v;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
struct btree_write_buffered_key *i;
|
int ret;
|
||||||
union btree_write_buffer_state old, new;
|
|
||||||
int ret = 0;
|
|
||||||
u64 v;
|
|
||||||
|
|
||||||
trans_for_each_wb_update(trans, i) {
|
mutex_lock(&wb->flushing.lock);
|
||||||
EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
|
do {
|
||||||
|
ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
|
||||||
|
} while (!ret && bch2_btree_write_buffer_should_flush(c));
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
|
||||||
i->journal_seq = trans->journal_res.seq;
|
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||||
i->journal_offset = trans->journal_res.offset;
|
}
|
||||||
|
|
||||||
|
int __bch2_journal_key_to_wb(struct bch_fs *c,
|
||||||
|
struct journal_keys_to_wb *dst,
|
||||||
|
enum btree_id btree, struct bkey_i *k)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
int ret;
|
||||||
|
retry:
|
||||||
|
ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL);
|
||||||
|
if (!ret && dst->wb == &wb->flushing)
|
||||||
|
ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
|
||||||
|
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
if (dst->wb == &c->btree_write_buffer.flushing) {
|
||||||
|
mutex_unlock(&dst->wb->lock);
|
||||||
|
dst->wb = &c->btree_write_buffer.inc;
|
||||||
|
bch2_journal_pin_add(&c->journal, dst->seq, &dst->wb->pin,
|
||||||
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
dst->room = darray_room(dst->wb->keys);
|
||||||
v = READ_ONCE(wb->state.v);
|
if (dst->wb == &wb->flushing)
|
||||||
do {
|
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
||||||
old.v = new.v = v;
|
BUG_ON(!dst->room);
|
||||||
|
BUG_ON(!dst->seq);
|
||||||
|
|
||||||
new.v += btree_write_buffer_ref(new.idx);
|
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
||||||
new.nr += trans->nr_wb_updates;
|
wb_k->journal_seq = dst->seq;
|
||||||
if (new.nr > wb->size) {
|
wb_k->btree = btree;
|
||||||
ret = -BCH_ERR_btree_insert_need_flush_buffer;
|
bkey_copy(&wb_k->k, k);
|
||||||
goto out;
|
dst->wb->keys.nr++;
|
||||||
|
dst->room--;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
|
if (mutex_trylock(&wb->flushing.lock)) {
|
||||||
|
mutex_lock(&wb->inc.lock);
|
||||||
|
move_keys_from_inc_to_flushing(wb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to skip wb->inc, and add keys directly to
|
||||||
|
* wb->flushing, saving us a copy later:
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!wb->inc.keys.nr) {
|
||||||
|
dst->wb = &wb->flushing;
|
||||||
|
} else {
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
dst->wb = &wb->inc;
|
||||||
}
|
}
|
||||||
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
|
} else {
|
||||||
|
mutex_lock(&wb->inc.lock);
|
||||||
|
dst->wb = &wb->inc;
|
||||||
|
}
|
||||||
|
|
||||||
memcpy(wb->keys[new.idx] + old.nr,
|
dst->room = darray_room(dst->wb->keys);
|
||||||
trans->wb_updates,
|
if (dst->wb == &wb->flushing)
|
||||||
sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
|
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
||||||
|
dst->seq = seq;
|
||||||
|
|
||||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
|
bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
|
||||||
bch2_btree_write_buffer_journal_flush);
|
bch2_btree_write_buffer_journal_flush);
|
||||||
|
}
|
||||||
|
|
||||||
atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
|
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
|
if (!dst->wb->keys.nr)
|
||||||
|
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
|
||||||
|
|
||||||
|
if (bch2_btree_write_buffer_should_flush(c) &&
|
||||||
|
__bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
|
||||||
|
!queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
|
||||||
|
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||||
|
|
||||||
|
if (dst->wb == &wb->flushing)
|
||||||
|
mutex_unlock(&wb->flushing.lock);
|
||||||
|
mutex_unlock(&wb->inc.lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
|
||||||
|
{
|
||||||
|
struct journal_keys_to_wb dst;
|
||||||
|
struct jset_entry *entry;
|
||||||
|
struct bkey_i *k;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq));
|
||||||
|
|
||||||
|
for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) {
|
||||||
|
jset_entry_for_each_key(entry, k) {
|
||||||
|
ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry->type = BCH_JSET_ENTRY_btree_keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf->need_flush_to_write_buffer = false;
|
||||||
out:
|
out:
|
||||||
preempt_enable();
|
bch2_journal_keys_to_write_buffer_end(c, &dst);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
|
||||||
|
{
|
||||||
|
if (wb->keys.size >= new_size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!mutex_trylock(&wb->lock))
|
||||||
|
return -EINTR;
|
||||||
|
|
||||||
|
int ret = darray_resize(&wb->keys, new_size);
|
||||||
|
mutex_unlock(&wb->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
|
return wb_keys_resize(&wb->flushing, new_size) ?:
|
||||||
|
wb_keys_resize(&wb->inc, new_size);
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
|
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
|
||||||
|
!bch2_journal_error(&c->journal));
|
||||||
|
|
||||||
kvfree(wb->keys[1]);
|
darray_exit(&wb->sorted);
|
||||||
kvfree(wb->keys[0]);
|
darray_exit(&wb->flushing.keys);
|
||||||
|
darray_exit(&wb->inc.keys);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
mutex_init(&wb->flush_lock);
|
mutex_init(&wb->inc.lock);
|
||||||
wb->size = c->opts.btree_write_buffer_size;
|
mutex_init(&wb->flushing.lock);
|
||||||
|
INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
|
||||||
|
|
||||||
wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
|
/* Will be resized by journal as needed: */
|
||||||
wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
|
unsigned initial_size = 1 << 16;
|
||||||
if (!wb->keys[0] || !wb->keys[1])
|
|
||||||
return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init;
|
|
||||||
|
|
||||||
return 0;
|
return darray_make_room(&wb->inc.keys, initial_size) ?:
|
||||||
|
darray_make_room(&wb->flushing.keys, initial_size) ?:
|
||||||
|
darray_make_room(&wb->sorted, initial_size);
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,59 @@
|
|||||||
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
|
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
|
||||||
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
|
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
|
||||||
|
|
||||||
int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool);
|
#include "bkey.h"
|
||||||
|
|
||||||
|
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
|
return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||||
|
|
||||||
|
return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct btree_trans;
|
||||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
||||||
int bch2_btree_write_buffer_flush(struct btree_trans *);
|
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
|
||||||
|
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
|
||||||
|
|
||||||
int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
|
struct journal_keys_to_wb {
|
||||||
|
struct btree_write_buffer_keys *wb;
|
||||||
|
size_t room;
|
||||||
|
u64 seq;
|
||||||
|
};
|
||||||
|
|
||||||
|
int __bch2_journal_key_to_wb(struct bch_fs *,
|
||||||
|
struct journal_keys_to_wb *,
|
||||||
|
enum btree_id, struct bkey_i *);
|
||||||
|
|
||||||
|
static inline int bch2_journal_key_to_wb(struct bch_fs *c,
|
||||||
|
struct journal_keys_to_wb *dst,
|
||||||
|
enum btree_id btree, struct bkey_i *k)
|
||||||
|
{
|
||||||
|
EBUG_ON(!dst->seq);
|
||||||
|
|
||||||
|
if (unlikely(!dst->room))
|
||||||
|
return __bch2_journal_key_to_wb(c, dst, btree, k);
|
||||||
|
|
||||||
|
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
||||||
|
wb_k->journal_seq = dst->seq;
|
||||||
|
wb_k->btree = btree;
|
||||||
|
bkey_copy(&wb_k->k, k);
|
||||||
|
dst->wb->keys.nr++;
|
||||||
|
dst->room--;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
|
||||||
|
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
|
||||||
|
|
||||||
|
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
|
||||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
|
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
|
||||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
|
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
|
||||||
|
|
||||||
|
@ -2,43 +2,56 @@
|
|||||||
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||||
#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||||
|
|
||||||
|
#include "darray.h"
|
||||||
#include "journal_types.h"
|
#include "journal_types.h"
|
||||||
|
|
||||||
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
||||||
#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
|
#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
|
||||||
|
|
||||||
|
struct wb_key_ref {
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
unsigned idx:24;
|
||||||
|
u8 pos[sizeof(struct bpos)];
|
||||||
|
enum btree_id btree:8;
|
||||||
|
#else
|
||||||
|
enum btree_id btree:8;
|
||||||
|
u8 pos[sizeof(struct bpos)];
|
||||||
|
unsigned idx:24;
|
||||||
|
#endif
|
||||||
|
} __packed;
|
||||||
|
struct {
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
u64 lo;
|
||||||
|
u64 mi;
|
||||||
|
u64 hi;
|
||||||
|
#else
|
||||||
|
u64 hi;
|
||||||
|
u64 mi;
|
||||||
|
u64 lo;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
struct btree_write_buffered_key {
|
struct btree_write_buffered_key {
|
||||||
u64 journal_seq;
|
enum btree_id btree:8;
|
||||||
unsigned journal_offset;
|
u64 journal_seq:56;
|
||||||
enum btree_id btree;
|
|
||||||
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
|
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
|
||||||
};
|
};
|
||||||
|
|
||||||
union btree_write_buffer_state {
|
struct btree_write_buffer_keys {
|
||||||
struct {
|
DARRAY(struct btree_write_buffered_key) keys;
|
||||||
atomic64_t counter;
|
struct journal_entry_pin pin;
|
||||||
};
|
struct mutex lock;
|
||||||
|
|
||||||
struct {
|
|
||||||
u64 v;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct {
|
|
||||||
u64 nr:23;
|
|
||||||
u64 idx:1;
|
|
||||||
u64 ref0:20;
|
|
||||||
u64 ref1:20;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btree_write_buffer {
|
struct btree_write_buffer {
|
||||||
struct mutex flush_lock;
|
DARRAY(struct wb_key_ref) sorted;
|
||||||
struct journal_entry_pin journal_pin;
|
struct btree_write_buffer_keys inc;
|
||||||
|
struct btree_write_buffer_keys flushing;
|
||||||
union btree_write_buffer_state state;
|
struct work_struct flush_work;
|
||||||
size_t size;
|
|
||||||
|
|
||||||
struct btree_write_buffered_key *keys[2];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
|
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
|
||||||
|
@ -61,7 +61,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
|||||||
usage->reserved += usage->persistent_reserved[i];
|
usage->reserved += usage->persistent_reserved[i];
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
|
fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
|
||||||
@ -214,7 +214,7 @@ void bch2_fs_usage_to_text(struct printbuf *out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
prt_printf(out, "\t");
|
prt_printf(out, "\t");
|
||||||
@ -345,7 +345,7 @@ static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
|
|||||||
|
|
||||||
static inline int __update_replicas(struct bch_fs *c,
|
static inline int __update_replicas(struct bch_fs *c,
|
||||||
struct bch_fs_usage *fs_usage,
|
struct bch_fs_usage *fs_usage,
|
||||||
struct bch_replicas_entry *r,
|
struct bch_replicas_entry_v1 *r,
|
||||||
s64 sectors)
|
s64 sectors)
|
||||||
{
|
{
|
||||||
int idx = bch2_replicas_entry_idx(c, r);
|
int idx = bch2_replicas_entry_idx(c, r);
|
||||||
@ -359,7 +359,7 @@ static inline int __update_replicas(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
|
static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
|
||||||
struct bch_replicas_entry *r, s64 sectors,
|
struct bch_replicas_entry_v1 *r, s64 sectors,
|
||||||
unsigned journal_seq, bool gc)
|
unsigned journal_seq, bool gc)
|
||||||
{
|
{
|
||||||
struct bch_fs_usage *fs_usage;
|
struct bch_fs_usage *fs_usage;
|
||||||
@ -453,9 +453,9 @@ int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
|
|||||||
__replicas_deltas_realloc(trans, more, _gfp));
|
__replicas_deltas_realloc(trans, more, _gfp));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int update_replicas_list(struct btree_trans *trans,
|
int bch2_update_replicas_list(struct btree_trans *trans,
|
||||||
struct bch_replicas_entry *r,
|
struct bch_replicas_entry_v1 *r,
|
||||||
s64 sectors)
|
s64 sectors)
|
||||||
{
|
{
|
||||||
struct replicas_delta_list *d;
|
struct replicas_delta_list *d;
|
||||||
struct replicas_delta *n;
|
struct replicas_delta *n;
|
||||||
@ -481,14 +481,13 @@ static inline int update_replicas_list(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int update_cached_sectors_list(struct btree_trans *trans,
|
int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 sectors)
|
||||||
unsigned dev, s64 sectors)
|
|
||||||
{
|
{
|
||||||
struct bch_replicas_padded r;
|
struct bch_replicas_padded r;
|
||||||
|
|
||||||
bch2_replicas_entry_cached(&r.e, dev);
|
bch2_replicas_entry_cached(&r.e, dev);
|
||||||
|
|
||||||
return update_replicas_list(trans, &r.e, sectors);
|
return bch2_update_replicas_list(trans, &r.e, sectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_mark_alloc(struct btree_trans *trans,
|
int bch2_mark_alloc(struct btree_trans *trans,
|
||||||
@ -580,23 +579,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
percpu_up_read(&c->mark_lock);
|
percpu_up_read(&c->mark_lock);
|
||||||
|
|
||||||
/*
|
|
||||||
* need to know if we're getting called from the invalidate path or
|
|
||||||
* not:
|
|
||||||
*/
|
|
||||||
|
|
||||||
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
|
|
||||||
old_a->cached_sectors) {
|
|
||||||
ret = update_cached_sectors(c, new, ca->dev_idx,
|
|
||||||
-((s64) old_a->cached_sectors),
|
|
||||||
journal_seq, gc);
|
|
||||||
if (ret) {
|
|
||||||
bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
|
|
||||||
__func__);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_a->data_type == BCH_DATA_free &&
|
if (new_a->data_type == BCH_DATA_free &&
|
||||||
(!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
|
(!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
|
||||||
closure_wake_up(&c->freelist_wait);
|
closure_wake_up(&c->freelist_wait);
|
||||||
@ -1470,7 +1452,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
|||||||
|
|
||||||
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
|
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
|
||||||
r.e.data_type = data_type;
|
r.e.data_type = data_type;
|
||||||
ret = update_replicas_list(trans, &r.e, sectors);
|
ret = bch2_update_replicas_list(trans, &r.e, sectors);
|
||||||
err:
|
err:
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1513,8 +1495,8 @@ static int __trans_mark_extent(struct btree_trans *trans,
|
|||||||
|
|
||||||
if (p.ptr.cached) {
|
if (p.ptr.cached) {
|
||||||
if (!stale) {
|
if (!stale) {
|
||||||
ret = update_cached_sectors_list(trans, p.ptr.dev,
|
ret = bch2_update_cached_sectors_list(trans, p.ptr.dev,
|
||||||
disk_sectors);
|
disk_sectors);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1532,7 +1514,7 @@ static int __trans_mark_extent(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (r.e.nr_devs)
|
if (r.e.nr_devs)
|
||||||
ret = update_replicas_list(trans, &r.e, dirty_sectors);
|
ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1669,7 +1651,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
|
|||||||
s64 sectors = le16_to_cpu(new_s->sectors);
|
s64 sectors = le16_to_cpu(new_s->sectors);
|
||||||
|
|
||||||
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new));
|
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new));
|
||||||
ret = update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
|
ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1678,7 +1660,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
|
|||||||
s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
|
s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
|
||||||
|
|
||||||
bch2_bkey_to_replicas(&r.e, old);
|
bch2_bkey_to_replicas(&r.e, old);
|
||||||
ret = update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
|
ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -315,6 +315,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
|
|||||||
: c->usage[journal_seq & JOURNAL_BUF_MASK]);
|
: c->usage[journal_seq & JOURNAL_BUF_MASK]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_update_replicas_list(struct btree_trans *,
|
||||||
|
struct bch_replicas_entry_v1 *, s64);
|
||||||
|
int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64);
|
||||||
int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
|
int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
|
||||||
|
|
||||||
void bch2_fs_usage_initialize(struct bch_fs *);
|
void bch2_fs_usage_initialize(struct bch_fs *);
|
||||||
|
@ -444,7 +444,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
|||||||
dst_end = (void *) arg->replicas + replica_entries_bytes;
|
dst_end = (void *) arg->replicas + replica_entries_bytes;
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *src_e =
|
struct bch_replicas_entry_v1 *src_e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
/* check that we have enough space for one replicas entry */
|
/* check that we have enough space for one replicas entry */
|
||||||
|
@ -95,7 +95,6 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
|||||||
unsigned long io_until,
|
unsigned long io_until,
|
||||||
unsigned long cpu_timeout)
|
unsigned long cpu_timeout)
|
||||||
{
|
{
|
||||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
|
||||||
struct io_clock_wait wait;
|
struct io_clock_wait wait;
|
||||||
|
|
||||||
wait.io_timer.expire = io_until;
|
wait.io_timer.expire = io_until;
|
||||||
@ -111,7 +110,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
|||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
set_current_state(TASK_INTERRUPTIBLE);
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
if (kthread && kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (wait.expired)
|
if (wait.expired)
|
||||||
|
@ -9,10 +9,12 @@ int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, g
|
|||||||
if (new_size > d->size) {
|
if (new_size > d->size) {
|
||||||
new_size = roundup_pow_of_two(new_size);
|
new_size = roundup_pow_of_two(new_size);
|
||||||
|
|
||||||
void *data = krealloc_array(d->data, new_size, element_size, gfp);
|
void *data = kvmalloc_array(new_size, element_size, gfp);
|
||||||
if (!data)
|
if (!data)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
memcpy(data, d->data, d->size * element_size);
|
||||||
|
kvfree(d->data);
|
||||||
d->data = data;
|
d->data = data;
|
||||||
d->size = new_size;
|
d->size = new_size;
|
||||||
}
|
}
|
||||||
|
@ -92,7 +92,7 @@ do { \
|
|||||||
|
|
||||||
#define darray_exit(_d) \
|
#define darray_exit(_d) \
|
||||||
do { \
|
do { \
|
||||||
kfree((_d)->data); \
|
kvfree((_d)->data); \
|
||||||
darray_init(_d); \
|
darray_init(_d); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -1005,7 +1005,7 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
|
|||||||
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush(trans);
|
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "bcachefs_format.h"
|
#include "bcachefs_format.h"
|
||||||
|
|
||||||
struct bch_replicas_padded {
|
struct bch_replicas_padded {
|
||||||
struct bch_replicas_entry e;
|
struct bch_replicas_entry_v1 e;
|
||||||
u8 pad[BCH_BKEY_PTRS_MAX];
|
u8 pad[BCH_BKEY_PTRS_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -150,7 +150,6 @@
|
|||||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
|
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
|
||||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
|
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
|
||||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
|
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
|
||||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \
|
|
||||||
x(0, backpointer_to_overwritten_btree_node) \
|
x(0, backpointer_to_overwritten_btree_node) \
|
||||||
x(0, lock_fail_root_changed) \
|
x(0, lock_fail_root_changed) \
|
||||||
x(0, journal_reclaim_would_deadlock) \
|
x(0, journal_reclaim_would_deadlock) \
|
||||||
|
@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_dio_read_complete(struct closure *cl)
|
static CLOSURE_CALLBACK(bch2_dio_read_complete)
|
||||||
{
|
{
|
||||||
struct dio_read *dio = container_of(cl, struct dio_read, cl);
|
closure_type(dio, struct dio_read, cl);
|
||||||
|
|
||||||
dio->req->ki_complete(dio->req, dio->ret);
|
dio->req->ki_complete(dio->req, dio->ret);
|
||||||
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
|
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
|
||||||
@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_dio_write_flush_done(struct closure *cl)
|
static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
|
||||||
{
|
{
|
||||||
struct dio_write *dio = container_of(cl, struct dio_write, op.cl);
|
closure_type(dio, struct dio_write, op.cl);
|
||||||
struct bch_fs *c = dio->op.c;
|
struct bch_fs *c = dio->op.c;
|
||||||
|
|
||||||
closure_debug_destroy(cl);
|
closure_debug_destroy(cl);
|
||||||
|
@ -861,7 +861,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
|||||||
abs(pos_src - pos_dst) < len)
|
abs(pos_src - pos_dst) < len)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
|
lock_two_nondirectories(&src->v, &dst->v);
|
||||||
|
bch2_lock_inodes(INODE_PAGECACHE_BLOCK, src, dst);
|
||||||
|
|
||||||
inode_dio_wait(&src->v);
|
inode_dio_wait(&src->v);
|
||||||
inode_dio_wait(&dst->v);
|
inode_dio_wait(&dst->v);
|
||||||
@ -914,7 +915,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
|
|||||||
ret = bch2_flush_inode(c, dst);
|
ret = bch2_flush_inode(c, dst);
|
||||||
err:
|
err:
|
||||||
bch2_quota_reservation_put(c, dst, "a_res);
|
bch2_quota_reservation_put(c, dst, "a_res);
|
||||||
bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
|
bch2_unlock_inodes(INODE_PAGECACHE_BLOCK, src, dst);
|
||||||
|
unlock_two_nondirectories(&src->v, &dst->v);
|
||||||
|
|
||||||
return bch2_err_class(ret);
|
return bch2_err_class(ret);
|
||||||
}
|
}
|
||||||
|
@ -453,35 +453,33 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
|
|||||||
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
|
||||||
struct bch_ioctl_subvolume arg)
|
struct bch_ioctl_subvolume arg)
|
||||||
{
|
{
|
||||||
struct filename *name;
|
|
||||||
struct path path;
|
struct path path;
|
||||||
struct inode *dir;
|
struct inode *dir;
|
||||||
struct dentry *victim;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (arg.flags)
|
if (arg.flags)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
name = getname((const char __user *)(unsigned long)arg.dst_ptr);
|
ret = user_path_at(arg.dirfd,
|
||||||
victim = filename_path_locked(arg.dirfd, name, &path);
|
(const char __user *)(unsigned long)arg.dst_ptr,
|
||||||
putname(name);
|
LOOKUP_FOLLOW, &path);
|
||||||
if (IS_ERR(victim))
|
if (ret)
|
||||||
return PTR_ERR(victim);
|
return ret;
|
||||||
|
|
||||||
if (victim->d_sb->s_fs_info != c) {
|
if (path.dentry->d_sb->s_fs_info != c) {
|
||||||
ret = -EXDEV;
|
ret = -EXDEV;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
dir = d_inode(path.dentry);
|
dir = path.dentry->d_parent->d_inode;
|
||||||
ret = __bch2_unlink(dir, victim, true);
|
|
||||||
if (!ret) {
|
ret = __bch2_unlink(dir, path.dentry, true);
|
||||||
fsnotify_rmdir(dir, victim);
|
if (ret)
|
||||||
d_delete(victim);
|
goto err;
|
||||||
}
|
|
||||||
inode_unlock(dir);
|
fsnotify_rmdir(dir, path.dentry);
|
||||||
|
d_delete(path.dentry);
|
||||||
err:
|
err:
|
||||||
dput(victim);
|
|
||||||
path_put(&path);
|
path_put(&path);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
|
|||||||
if (!first)
|
if (!first)
|
||||||
seq_putc(seq, ':');
|
seq_putc(seq, ':');
|
||||||
first = false;
|
first = false;
|
||||||
seq_puts(seq, "/dev/");
|
seq_puts(seq, ca->disk_sb.sb_name);
|
||||||
seq_puts(seq, ca->name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1901,7 +1900,7 @@ got_sb:
|
|||||||
sb->s_flags |= SB_POSIXACL;
|
sb->s_flags |= SB_POSIXACL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sb->s_shrink.seeks = 0;
|
sb->s_shrink->seeks = 0;
|
||||||
|
|
||||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
|
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
|
||||||
ret = PTR_ERR_OR_ZERO(vinode);
|
ret = PTR_ERR_OR_ZERO(vinode);
|
||||||
|
@ -77,9 +77,8 @@ static inline int ptrcmp(void *l, void *r)
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum bch_inode_lock_op {
|
enum bch_inode_lock_op {
|
||||||
INODE_LOCK = (1U << 0),
|
INODE_PAGECACHE_BLOCK = (1U << 0),
|
||||||
INODE_PAGECACHE_BLOCK = (1U << 1),
|
INODE_UPDATE_LOCK = (1U << 1),
|
||||||
INODE_UPDATE_LOCK = (1U << 2),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define bch2_lock_inodes(_locks, ...) \
|
#define bch2_lock_inodes(_locks, ...) \
|
||||||
@ -91,8 +90,6 @@ do { \
|
|||||||
\
|
\
|
||||||
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
||||||
if (a[i] != a[i - 1]) { \
|
if (a[i] != a[i - 1]) { \
|
||||||
if ((_locks) & INODE_LOCK) \
|
|
||||||
down_write_nested(&a[i]->v.i_rwsem, i); \
|
|
||||||
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
||||||
bch2_pagecache_block_get(a[i]);\
|
bch2_pagecache_block_get(a[i]);\
|
||||||
if ((_locks) & INODE_UPDATE_LOCK) \
|
if ((_locks) & INODE_UPDATE_LOCK) \
|
||||||
@ -109,8 +106,6 @@ do { \
|
|||||||
\
|
\
|
||||||
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
for (i = 1; i < ARRAY_SIZE(a); i++) \
|
||||||
if (a[i] != a[i - 1]) { \
|
if (a[i] != a[i - 1]) { \
|
||||||
if ((_locks) & INODE_LOCK) \
|
|
||||||
up_write(&a[i]->v.i_rwsem); \
|
|
||||||
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
if ((_locks) & INODE_PAGECACHE_BLOCK) \
|
||||||
bch2_pagecache_block_put(a[i]);\
|
bch2_pagecache_block_put(a[i]);\
|
||||||
if ((_locks) & INODE_UPDATE_LOCK) \
|
if ((_locks) & INODE_UPDATE_LOCK) \
|
||||||
|
@ -826,6 +826,18 @@ fsck_err:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
|
||||||
|
{
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0);
|
||||||
|
int ret = bkey_err(k);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return k.k->type == KEY_TYPE_set;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_inode(struct btree_trans *trans,
|
static int check_inode(struct btree_trans *trans,
|
||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bkey_s_c k,
|
struct bkey_s_c k,
|
||||||
@ -890,6 +902,17 @@ static int check_inode(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (u.bi_flags & BCH_INODE_unlinked &&
|
||||||
|
c->sb.version >= bcachefs_metadata_version_deleted_inodes) {
|
||||||
|
ret = check_inode_deleted_list(trans, k.k->p);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
fsck_err_on(ret, c, unlinked_inode_not_on_deleted_list,
|
||||||
|
"inode %llu:%u unlinked, but not on deleted list",
|
||||||
|
u.bi_inum, k.k->p.snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
if (u.bi_flags & BCH_INODE_unlinked &&
|
if (u.bi_flags & BCH_INODE_unlinked &&
|
||||||
(!c->sb.clean ||
|
(!c->sb.clean ||
|
||||||
fsck_err(c, inode_unlinked_but_clean,
|
fsck_err(c, inode_unlinked_but_clean,
|
||||||
|
@ -1157,10 +1157,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
|
|||||||
again:
|
again:
|
||||||
need_another_pass = false;
|
need_another_pass = false;
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Weird transaction restart handling here because on successful delete,
|
* Weird transaction restart handling here because on successful delete,
|
||||||
* bch2_inode_rm_snapshot() will return a nested transaction restart,
|
* bch2_inode_rm_snapshot() will return a nested transaction restart,
|
||||||
@ -1191,8 +1187,12 @@ again:
|
|||||||
}
|
}
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
if (!ret && need_another_pass)
|
if (!ret && need_another_pass) {
|
||||||
|
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
goto again;
|
goto again;
|
||||||
|
}
|
||||||
err:
|
err:
|
||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
|
|
||||||
|
@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running)
|
|||||||
__wp_update_state(wp, state);
|
__wp_update_state(wp, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_write_index(struct closure *cl)
|
static CLOSURE_CALLBACK(bch2_write_index)
|
||||||
{
|
{
|
||||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
closure_type(op, struct bch_write_op, cl);
|
||||||
struct write_point *wp = op->wp;
|
struct write_point *wp = op->wp;
|
||||||
struct workqueue_struct *wq = index_update_wq(op);
|
struct workqueue_struct *wq = index_update_wq(op);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op)
|
|||||||
bch2_nocow_write_convert_unwritten(op);
|
bch2_nocow_write_convert_unwritten(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_nocow_write_done(struct closure *cl)
|
static CLOSURE_CALLBACK(bch2_nocow_write_done)
|
||||||
{
|
{
|
||||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
closure_type(op, struct bch_write_op, cl);
|
||||||
|
|
||||||
__bch2_nocow_write_done(op);
|
__bch2_nocow_write_done(op);
|
||||||
bch2_write_done(cl);
|
bch2_write_done(cl);
|
||||||
@ -1363,7 +1363,7 @@ err:
|
|||||||
op->insert_keys.top = op->insert_keys.keys;
|
op->insert_keys.top = op->insert_keys.keys;
|
||||||
} else if (op->flags & BCH_WRITE_SYNC) {
|
} else if (op->flags & BCH_WRITE_SYNC) {
|
||||||
closure_sync(&op->cl);
|
closure_sync(&op->cl);
|
||||||
bch2_nocow_write_done(&op->cl);
|
bch2_nocow_write_done(&op->cl.work);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* XXX
|
* XXX
|
||||||
@ -1566,9 +1566,9 @@ err:
|
|||||||
* If op->discard is true, instead of inserting the data it invalidates the
|
* If op->discard is true, instead of inserting the data it invalidates the
|
||||||
* region of the cache represented by op->bio and op->inode.
|
* region of the cache represented by op->bio and op->inode.
|
||||||
*/
|
*/
|
||||||
void bch2_write(struct closure *cl)
|
CLOSURE_CALLBACK(bch2_write)
|
||||||
{
|
{
|
||||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
closure_type(op, struct bch_write_op, cl);
|
||||||
struct bio *bio = &op->wbio.bio;
|
struct bio *bio = &op->wbio.bio;
|
||||||
struct bch_fs *c = op->c;
|
struct bch_fs *c = op->c;
|
||||||
unsigned data_len;
|
unsigned data_len;
|
||||||
|
@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
|||||||
op->devs_need_flush = NULL;
|
op->devs_need_flush = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_write(struct closure *);
|
CLOSURE_CALLBACK(bch2_write);
|
||||||
|
|
||||||
void bch2_write_point_do_index_updates(struct work_struct *);
|
void bch2_write_point_do_index_updates(struct work_struct *);
|
||||||
|
|
||||||
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "bkey_methods.h"
|
#include "bkey_methods.h"
|
||||||
#include "btree_gc.h"
|
#include "btree_gc.h"
|
||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
|
#include "btree_write_buffer.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
@ -147,6 +148,7 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
|
|||||||
bch2_journal_reclaim_fast(j);
|
bch2_journal_reclaim_fast(j);
|
||||||
if (write)
|
if (write)
|
||||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||||
|
wake_up(&j->wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -184,6 +186,8 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
|
|||||||
/* Close out old buffer: */
|
/* Close out old buffer: */
|
||||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||||
|
|
||||||
|
trace_journal_entry_close(c, vstruct_bytes(buf->data));
|
||||||
|
|
||||||
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
|
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
|
||||||
buf->u64s_reserved) << c->block_bits;
|
buf->u64s_reserved) << c->block_bits;
|
||||||
BUG_ON(sectors > buf->sectors);
|
BUG_ON(sectors > buf->sectors);
|
||||||
@ -328,6 +332,7 @@ static int journal_entry_open(struct journal *j)
|
|||||||
buf->must_flush = false;
|
buf->must_flush = false;
|
||||||
buf->separate_flush = false;
|
buf->separate_flush = false;
|
||||||
buf->flush_time = 0;
|
buf->flush_time = 0;
|
||||||
|
buf->need_flush_to_write_buffer = true;
|
||||||
|
|
||||||
memset(buf->data, 0, sizeof(*buf->data));
|
memset(buf->data, 0, sizeof(*buf->data));
|
||||||
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
|
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
|
||||||
@ -764,6 +769,75 @@ void bch2_journal_block(struct journal *j)
|
|||||||
journal_quiesce(j);
|
journal_quiesce(j);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX: ideally this would not be closing the current journal entry, but
|
||||||
|
* otherwise we do not have a way to avoid racing with res_get() - j->blocked
|
||||||
|
* will race.
|
||||||
|
*/
|
||||||
|
static bool journal_reservations_stopped(struct journal *j)
|
||||||
|
{
|
||||||
|
union journal_res_state s;
|
||||||
|
|
||||||
|
journal_entry_close(j);
|
||||||
|
|
||||||
|
s.v = atomic64_read_acquire(&j->reservations.counter);
|
||||||
|
|
||||||
|
return s.buf0_count == 0 &&
|
||||||
|
s.buf1_count == 0 &&
|
||||||
|
s.buf2_count == 0 &&
|
||||||
|
s.buf3_count == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_journal_block_reservations(struct journal *j)
|
||||||
|
{
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
j->blocked++;
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
|
wait_event(j->wait, journal_reservations_stopped(j));
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
|
||||||
|
{
|
||||||
|
spin_lock(&j->lock);
|
||||||
|
max_seq = min(max_seq, journal_cur_seq(j));
|
||||||
|
|
||||||
|
for (u64 seq = journal_last_unwritten_seq(j);
|
||||||
|
seq <= max_seq;
|
||||||
|
seq++) {
|
||||||
|
unsigned idx = seq & JOURNAL_BUF_MASK;
|
||||||
|
struct journal_buf *buf = j->buf + idx;
|
||||||
|
union journal_res_state s;
|
||||||
|
|
||||||
|
if (!buf->need_flush_to_write_buffer)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (seq == journal_cur_seq(j))
|
||||||
|
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||||
|
|
||||||
|
s.v = atomic64_read_acquire(&j->reservations.counter);
|
||||||
|
|
||||||
|
if (journal_state_count(s, idx)) {
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
return ERR_PTR(-EAGAIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&j->lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
|
||||||
|
{
|
||||||
|
struct journal_buf *ret;
|
||||||
|
|
||||||
|
wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* allocate journal on a device: */
|
/* allocate journal on a device: */
|
||||||
|
|
||||||
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||||
@ -1215,6 +1289,7 @@ int bch2_fs_journal_init(struct journal *j)
|
|||||||
static struct lock_class_key res_key;
|
static struct lock_class_key res_key;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
mutex_init(&j->buf_lock);
|
||||||
spin_lock_init(&j->lock);
|
spin_lock_init(&j->lock);
|
||||||
spin_lock_init(&j->err_lock);
|
spin_lock_init(&j->err_lock);
|
||||||
init_waitqueue_head(&j->wait);
|
init_waitqueue_head(&j->wait);
|
||||||
|
@ -259,7 +259,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
|
|||||||
{
|
{
|
||||||
union journal_res_state s;
|
union journal_res_state s;
|
||||||
|
|
||||||
s.v = atomic64_sub_return(((union journal_res_state) {
|
s.v = atomic64_sub_return_release(((union journal_res_state) {
|
||||||
.buf0_count = idx == 0,
|
.buf0_count = idx == 0,
|
||||||
.buf1_count = idx == 1,
|
.buf1_count = idx == 1,
|
||||||
.buf2_count = idx == 2,
|
.buf2_count = idx == 2,
|
||||||
@ -427,6 +427,8 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
|||||||
|
|
||||||
void bch2_journal_unblock(struct journal *);
|
void bch2_journal_unblock(struct journal *);
|
||||||
void bch2_journal_block(struct journal *);
|
void bch2_journal_block(struct journal *);
|
||||||
|
void bch2_journal_block_reservations(struct journal *);
|
||||||
|
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq);
|
||||||
|
|
||||||
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
||||||
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include "alloc_foreground.h"
|
#include "alloc_foreground.h"
|
||||||
#include "btree_io.h"
|
#include "btree_io.h"
|
||||||
#include "btree_update_interior.h"
|
#include "btree_update_interior.h"
|
||||||
|
#include "btree_write_buffer.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
#include "checksum.h"
|
#include "checksum.h"
|
||||||
#include "disk_groups.h"
|
#include "disk_groups.h"
|
||||||
@ -713,6 +714,22 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
|
|||||||
journal_entry_btree_keys_to_text(out, c, entry);
|
journal_entry_btree_keys_to_text(out, c, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
|
||||||
|
struct jset *jset,
|
||||||
|
struct jset_entry *entry,
|
||||||
|
unsigned version, int big_endian,
|
||||||
|
enum bkey_invalid_flags flags)
|
||||||
|
{
|
||||||
|
return journal_entry_btree_keys_validate(c, jset, entry,
|
||||||
|
version, big_endian, READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c,
|
||||||
|
struct jset_entry *entry)
|
||||||
|
{
|
||||||
|
journal_entry_btree_keys_to_text(out, c, entry);
|
||||||
|
}
|
||||||
|
|
||||||
struct jset_entry_ops {
|
struct jset_entry_ops {
|
||||||
int (*validate)(struct bch_fs *, struct jset *,
|
int (*validate)(struct bch_fs *, struct jset *,
|
||||||
struct jset_entry *, unsigned, int,
|
struct jset_entry *, unsigned, int,
|
||||||
@ -1025,10 +1042,9 @@ next_block:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_journal_read_device(struct closure *cl)
|
static CLOSURE_CALLBACK(bch2_journal_read_device)
|
||||||
{
|
{
|
||||||
struct journal_device *ja =
|
closure_type(ja, struct journal_device, read);
|
||||||
container_of(cl, struct journal_device, read);
|
|
||||||
struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
|
struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
|
||||||
struct bch_fs *c = ca->fs;
|
struct bch_fs *c = ca->fs;
|
||||||
struct journal_list *jlist =
|
struct journal_list *jlist =
|
||||||
@ -1494,6 +1510,8 @@ done:
|
|||||||
|
|
||||||
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
|
|
||||||
/* we aren't holding j->lock: */
|
/* we aren't holding j->lock: */
|
||||||
unsigned new_size = READ_ONCE(j->buf_size_want);
|
unsigned new_size = READ_ONCE(j->buf_size_want);
|
||||||
void *new_buf;
|
void *new_buf;
|
||||||
@ -1501,6 +1519,11 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
|||||||
if (buf->buf_size >= new_size)
|
if (buf->buf_size >= new_size)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
size_t btree_write_buffer_size = new_size / 64;
|
||||||
|
|
||||||
|
if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
|
||||||
|
return;
|
||||||
|
|
||||||
new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
|
new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
|
||||||
if (!new_buf)
|
if (!new_buf)
|
||||||
return;
|
return;
|
||||||
@ -1520,9 +1543,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
|
|||||||
return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
|
return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void journal_write_done(struct closure *cl)
|
static CLOSURE_CALLBACK(journal_write_done)
|
||||||
{
|
{
|
||||||
struct journal *j = container_of(cl, struct journal, io);
|
closure_type(j, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
struct bch_replicas_padded replicas;
|
struct bch_replicas_padded replicas;
|
||||||
@ -1590,6 +1613,7 @@ static void journal_write_done(struct closure *cl)
|
|||||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||||
old.v, new.v)) != old.v);
|
old.v, new.v)) != old.v);
|
||||||
|
|
||||||
|
bch2_journal_reclaim_fast(j);
|
||||||
bch2_journal_space_available(j);
|
bch2_journal_space_available(j);
|
||||||
|
|
||||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||||
@ -1641,9 +1665,9 @@ static void journal_write_endio(struct bio *bio)
|
|||||||
percpu_ref_put(&ca->io_ref);
|
percpu_ref_put(&ca->io_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_journal_write(struct closure *cl)
|
static CLOSURE_CALLBACK(do_journal_write)
|
||||||
{
|
{
|
||||||
struct journal *j = container_of(cl, struct journal, io);
|
closure_type(j, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
@ -1693,9 +1717,11 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
|||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct jset_entry *start, *end, *i, *next, *prev = NULL;
|
struct jset_entry *start, *end, *i, *next, *prev = NULL;
|
||||||
struct jset *jset = w->data;
|
struct jset *jset = w->data;
|
||||||
|
struct journal_keys_to_wb wb = { NULL };
|
||||||
unsigned sectors, bytes, u64s;
|
unsigned sectors, bytes, u64s;
|
||||||
bool validate_before_checksum = false;
|
|
||||||
unsigned long btree_roots_have = 0;
|
unsigned long btree_roots_have = 0;
|
||||||
|
bool validate_before_checksum = false;
|
||||||
|
u64 seq = le64_to_cpu(jset->seq);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1723,9 +1749,28 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
|||||||
* to c->btree_roots we have to get any missing btree roots and
|
* to c->btree_roots we have to get any missing btree roots and
|
||||||
* add them to this journal entry:
|
* add them to this journal entry:
|
||||||
*/
|
*/
|
||||||
if (i->type == BCH_JSET_ENTRY_btree_root) {
|
switch (i->type) {
|
||||||
|
case BCH_JSET_ENTRY_btree_root:
|
||||||
bch2_journal_entry_to_btree_root(c, i);
|
bch2_journal_entry_to_btree_root(c, i);
|
||||||
__set_bit(i->btree_id, &btree_roots_have);
|
__set_bit(i->btree_id, &btree_roots_have);
|
||||||
|
break;
|
||||||
|
case BCH_JSET_ENTRY_write_buffer_keys:
|
||||||
|
EBUG_ON(!w->need_flush_to_write_buffer);
|
||||||
|
|
||||||
|
if (!wb.wb)
|
||||||
|
bch2_journal_keys_to_write_buffer_start(c, &wb, seq);
|
||||||
|
|
||||||
|
struct bkey_i *k;
|
||||||
|
jset_entry_for_each_key(i, k) {
|
||||||
|
ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
|
||||||
|
if (ret) {
|
||||||
|
bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
|
||||||
|
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i->type = BCH_JSET_ENTRY_btree_keys;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can we merge with previous entry? */
|
/* Can we merge with previous entry? */
|
||||||
@ -1748,6 +1793,10 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
|||||||
memmove_u64s_down(prev, i, jset_u64s(u64s));
|
memmove_u64s_down(prev, i, jset_u64s(u64s));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wb.wb)
|
||||||
|
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||||
|
w->need_flush_to_write_buffer = false;
|
||||||
|
|
||||||
prev = prev ? vstruct_next(prev) : jset->start;
|
prev = prev ? vstruct_next(prev) : jset->start;
|
||||||
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
|
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
|
||||||
|
|
||||||
@ -1755,8 +1804,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
|||||||
|
|
||||||
end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
|
end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
|
||||||
|
|
||||||
bch2_journal_super_entries_add_common(c, &end,
|
bch2_journal_super_entries_add_common(c, &end, seq);
|
||||||
le64_to_cpu(jset->seq));
|
|
||||||
u64s = (u64 *) end - (u64 *) start;
|
u64s = (u64 *) end - (u64 *) start;
|
||||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||||
|
|
||||||
@ -1779,7 +1827,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
|||||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||||
|
|
||||||
if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
|
if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
|
||||||
j->last_empty_seq = le64_to_cpu(jset->seq);
|
j->last_empty_seq = seq;
|
||||||
|
|
||||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
||||||
validate_before_checksum = true;
|
validate_before_checksum = true;
|
||||||
@ -1838,7 +1886,7 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
|
|||||||
(!w->must_flush &&
|
(!w->must_flush &&
|
||||||
(jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
|
(jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
|
||||||
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
|
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
|
||||||
w->noflush = true;
|
w->noflush = true;
|
||||||
SET_JSET_NO_FLUSH(w->data, true);
|
SET_JSET_NO_FLUSH(w->data, true);
|
||||||
w->data->last_seq = 0;
|
w->data->last_seq = 0;
|
||||||
w->last_seq = 0;
|
w->last_seq = 0;
|
||||||
@ -1853,9 +1901,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_journal_write(struct closure *cl)
|
CLOSURE_CALLBACK(bch2_journal_write)
|
||||||
{
|
{
|
||||||
struct journal *j = container_of(cl, struct journal, io);
|
closure_type(j, struct journal, io);
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||||
@ -1875,9 +1923,11 @@ void bch2_journal_write(struct closure *cl)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
mutex_lock(&j->buf_lock);
|
||||||
journal_buf_realloc(j, w);
|
journal_buf_realloc(j, w);
|
||||||
|
|
||||||
ret = bch2_journal_write_prep(j, w);
|
ret = bch2_journal_write_prep(j, w);
|
||||||
|
mutex_unlock(&j->buf_lock);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
|
|||||||
|
|
||||||
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
|
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
|
||||||
|
|
||||||
void bch2_journal_write(struct closure *);
|
CLOSURE_CALLBACK(bch2_journal_write);
|
||||||
|
|
||||||
#endif /* _BCACHEFS_JOURNAL_IO_H */
|
#endif /* _BCACHEFS_JOURNAL_IO_H */
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include "bcachefs.h"
|
#include "bcachefs.h"
|
||||||
#include "btree_key_cache.h"
|
#include "btree_key_cache.h"
|
||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
|
#include "btree_write_buffer.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
@ -50,20 +51,23 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
|||||||
return available;
|
return available;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void journal_set_watermark(struct journal *j)
|
void bch2_journal_set_watermark(struct journal *j)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
bool low_on_space = j->space[journal_space_clean].total * 4 <=
|
bool low_on_space = j->space[journal_space_clean].total * 4 <=
|
||||||
j->space[journal_space_total].total;
|
j->space[journal_space_total].total;
|
||||||
bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
|
bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
|
||||||
unsigned watermark = low_on_space || low_on_pin
|
bool low_on_wb = bch2_btree_write_buffer_must_wait(c);
|
||||||
|
unsigned watermark = low_on_space || low_on_pin || low_on_wb
|
||||||
? BCH_WATERMARK_reclaim
|
? BCH_WATERMARK_reclaim
|
||||||
: BCH_WATERMARK_stripe;
|
: BCH_WATERMARK_stripe;
|
||||||
|
|
||||||
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
|
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
|
||||||
&j->low_on_space_start, low_on_space) ||
|
&j->low_on_space_start, low_on_space) ||
|
||||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
|
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
|
||||||
&j->low_on_pin_start, low_on_pin))
|
&j->low_on_pin_start, low_on_pin) ||
|
||||||
|
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
|
||||||
|
&j->write_buffer_full_start, low_on_wb))
|
||||||
trace_and_count(c, journal_full, c);
|
trace_and_count(c, journal_full, c);
|
||||||
|
|
||||||
swap(watermark, j->watermark);
|
swap(watermark, j->watermark);
|
||||||
@ -230,7 +234,7 @@ void bch2_journal_space_available(struct journal *j)
|
|||||||
else
|
else
|
||||||
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
||||||
|
|
||||||
journal_set_watermark(j);
|
bch2_journal_set_watermark(j);
|
||||||
out:
|
out:
|
||||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||||
j->cur_entry_error = ret;
|
j->cur_entry_error = ret;
|
||||||
@ -303,6 +307,7 @@ void bch2_journal_reclaim_fast(struct journal *j)
|
|||||||
* all btree nodes got written out
|
* all btree nodes got written out
|
||||||
*/
|
*/
|
||||||
while (!fifo_empty(&j->pin) &&
|
while (!fifo_empty(&j->pin) &&
|
||||||
|
j->pin.front <= j->seq_ondisk &&
|
||||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||||
j->pin.front++;
|
j->pin.front++;
|
||||||
popped = true;
|
popped = true;
|
||||||
@ -635,7 +640,6 @@ static u64 journal_seq_to_flush(struct journal *j)
|
|||||||
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
|
||||||
u64 seq_to_flush;
|
u64 seq_to_flush;
|
||||||
size_t min_nr, min_key_cache, nr_flushed;
|
size_t min_nr, min_key_cache, nr_flushed;
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
@ -651,7 +655,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
flags = memalloc_noreclaim_save();
|
flags = memalloc_noreclaim_save();
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (kthread && kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (bch2_journal_error(j)) {
|
if (bch2_journal_error(j)) {
|
||||||
|
@ -16,6 +16,7 @@ static inline void journal_reclaim_kick(struct journal *j)
|
|||||||
unsigned bch2_journal_dev_buckets_available(struct journal *,
|
unsigned bch2_journal_dev_buckets_available(struct journal *,
|
||||||
struct journal_device *,
|
struct journal_device *,
|
||||||
enum journal_space_from);
|
enum journal_space_from);
|
||||||
|
void bch2_journal_set_watermark(struct journal *);
|
||||||
void bch2_journal_space_available(struct journal *);
|
void bch2_journal_space_available(struct journal *);
|
||||||
|
|
||||||
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
||||||
|
@ -36,6 +36,7 @@ struct journal_buf {
|
|||||||
bool noflush; /* write has already been kicked off, and was noflush */
|
bool noflush; /* write has already been kicked off, and was noflush */
|
||||||
bool must_flush; /* something wants a flush */
|
bool must_flush; /* something wants a flush */
|
||||||
bool separate_flush;
|
bool separate_flush;
|
||||||
|
bool need_flush_to_write_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -181,6 +182,12 @@ struct journal {
|
|||||||
*/
|
*/
|
||||||
darray_u64 early_journal_entries;
|
darray_u64 early_journal_entries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Protects journal_buf->data, when accessing without a jorunal
|
||||||
|
* reservation: for synchronization between the btree write buffer code
|
||||||
|
* and the journal write path:
|
||||||
|
*/
|
||||||
|
struct mutex buf_lock;
|
||||||
/*
|
/*
|
||||||
* Two journal entries -- one is currently open for new entries, the
|
* Two journal entries -- one is currently open for new entries, the
|
||||||
* other is possibly being written out.
|
* other is possibly being written out.
|
||||||
@ -271,6 +278,7 @@ struct journal {
|
|||||||
u64 low_on_space_start;
|
u64 low_on_space_start;
|
||||||
u64 low_on_pin_start;
|
u64 low_on_pin_start;
|
||||||
u64 max_in_flight_start;
|
u64 max_in_flight_start;
|
||||||
|
u64 write_buffer_full_start;
|
||||||
|
|
||||||
struct bch2_time_stats *flush_write_time;
|
struct bch2_time_stats *flush_write_time;
|
||||||
struct bch2_time_stats *noflush_write_time;
|
struct bch2_time_stats *noflush_write_time;
|
||||||
|
@ -27,6 +27,13 @@
|
|||||||
#include <linux/ioprio.h>
|
#include <linux/ioprio.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
|
|
||||||
|
const char * const bch2_data_ops_strs[] = {
|
||||||
|
#define x(t, n, ...) [n] = #t,
|
||||||
|
BCH_DATA_OPS()
|
||||||
|
#undef x
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
|
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
if (trace_move_extent_enabled()) {
|
if (trace_move_extent_enabled()) {
|
||||||
@ -163,12 +170,17 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
|||||||
atomic_read(&ctxt->write_sectors) != sectors_pending);
|
atomic_read(&ctxt->write_sectors) != sectors_pending);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
|
||||||
|
{
|
||||||
|
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||||
|
closure_sync(&ctxt->cl);
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
void bch2_moving_ctxt_exit(struct moving_context *ctxt)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = ctxt->trans->c;
|
struct bch_fs *c = ctxt->trans->c;
|
||||||
|
|
||||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
bch2_moving_ctxt_flush_all(ctxt);
|
||||||
closure_sync(&ctxt->cl);
|
|
||||||
|
|
||||||
EBUG_ON(atomic_read(&ctxt->write_sectors));
|
EBUG_ON(atomic_read(&ctxt->write_sectors));
|
||||||
EBUG_ON(atomic_read(&ctxt->write_ios));
|
EBUG_ON(atomic_read(&ctxt->write_ios));
|
||||||
@ -216,7 +228,7 @@ void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
|
|||||||
trace_move_data(c, stats);
|
trace_move_data(c, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
|
void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
|
||||||
{
|
{
|
||||||
memset(stats, 0, sizeof(*stats));
|
memset(stats, 0, sizeof(*stats));
|
||||||
stats->data_type = BCH_DATA_user;
|
stats->data_type = BCH_DATA_user;
|
||||||
@ -484,8 +496,8 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
|||||||
struct bch_fs *c = ctxt->trans->c;
|
struct bch_fs *c = ctxt->trans->c;
|
||||||
u64 delay;
|
u64 delay;
|
||||||
|
|
||||||
if (ctxt->wait_on_copygc && !c->copygc_running) {
|
if (ctxt->wait_on_copygc && c->copygc_running) {
|
||||||
bch2_trans_unlock_long(ctxt->trans);
|
bch2_moving_ctxt_flush_all(ctxt);
|
||||||
wait_event_killable(c->copygc_running_wq,
|
wait_event_killable(c->copygc_running_wq,
|
||||||
!c->copygc_running ||
|
!c->copygc_running ||
|
||||||
kthread_should_stop());
|
kthread_should_stop());
|
||||||
@ -503,7 +515,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
|||||||
set_current_state(TASK_INTERRUPTIBLE);
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
|
if (kthread_should_stop()) {
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -512,7 +524,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
|||||||
schedule_timeout(delay);
|
schedule_timeout(delay);
|
||||||
|
|
||||||
if (unlikely(freezing(current))) {
|
if (unlikely(freezing(current))) {
|
||||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
bch2_moving_ctxt_flush_all(ctxt);
|
||||||
try_to_freeze();
|
try_to_freeze();
|
||||||
}
|
}
|
||||||
} while (delay);
|
} while (delay);
|
||||||
@ -721,11 +733,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
|
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
|
||||||
fragmentation = a->fragmentation_lru;
|
fragmentation = a->fragmentation_lru;
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush(trans);
|
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||||
if (ret) {
|
bch_err_msg(c, ret, "flushing btree write buffer");
|
||||||
bch_err_msg(c, ret, "flushing btree write buffer");
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
|
||||||
|
|
||||||
while (!(ret = bch2_move_ratelimit(ctxt))) {
|
while (!(ret = bch2_move_ratelimit(ctxt))) {
|
||||||
bch2_trans_begin(trans);
|
bch2_trans_begin(trans);
|
||||||
@ -856,18 +867,17 @@ typedef bool (*move_btree_pred)(struct bch_fs *, void *,
|
|||||||
struct data_update_opts *);
|
struct data_update_opts *);
|
||||||
|
|
||||||
static int bch2_move_btree(struct bch_fs *c,
|
static int bch2_move_btree(struct bch_fs *c,
|
||||||
enum btree_id start_btree_id, struct bpos start_pos,
|
struct bbpos start,
|
||||||
enum btree_id end_btree_id, struct bpos end_pos,
|
struct bbpos end,
|
||||||
move_btree_pred pred, void *arg,
|
move_btree_pred pred, void *arg,
|
||||||
struct bch_move_stats *stats)
|
struct bch_move_stats *stats)
|
||||||
{
|
{
|
||||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
|
||||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||||
struct moving_context ctxt;
|
struct moving_context ctxt;
|
||||||
struct btree_trans *trans;
|
struct btree_trans *trans;
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
enum btree_id id;
|
enum btree_id btree;
|
||||||
struct data_update_opts data_opts;
|
struct data_update_opts data_opts;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -878,26 +888,26 @@ static int bch2_move_btree(struct bch_fs *c,
|
|||||||
|
|
||||||
stats->data_type = BCH_DATA_btree;
|
stats->data_type = BCH_DATA_btree;
|
||||||
|
|
||||||
for (id = start_btree_id;
|
for (btree = start.btree;
|
||||||
id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1);
|
btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
|
||||||
id++) {
|
btree ++) {
|
||||||
stats->pos = BBPOS(id, POS_MIN);
|
stats->pos = BBPOS(btree, POS_MIN);
|
||||||
|
|
||||||
if (!bch2_btree_id_root(c, id)->b)
|
if (!bch2_btree_id_root(c, btree)->b)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
|
bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0,
|
||||||
BTREE_ITER_PREFETCH);
|
BTREE_ITER_PREFETCH);
|
||||||
retry:
|
retry:
|
||||||
ret = 0;
|
ret = 0;
|
||||||
while (bch2_trans_begin(trans),
|
while (bch2_trans_begin(trans),
|
||||||
(b = bch2_btree_iter_peek_node(&iter)) &&
|
(b = bch2_btree_iter_peek_node(&iter)) &&
|
||||||
!(ret = PTR_ERR_OR_ZERO(b))) {
|
!(ret = PTR_ERR_OR_ZERO(b))) {
|
||||||
if (kthread && kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if ((cmp_int(id, end_btree_id) ?:
|
if ((cmp_int(btree, end.btree) ?:
|
||||||
bpos_cmp(b->key.k.p, end_pos)) > 0)
|
bpos_cmp(b->key.k.p, end.pos)) > 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
stats->pos = BBPOS(iter.btree_id, iter.pos);
|
stats->pos = BBPOS(iter.btree_id, iter.pos);
|
||||||
@ -918,7 +928,7 @@ next:
|
|||||||
|
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
if (kthread && kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1034,8 +1044,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = bch2_move_btree(c,
|
ret = bch2_move_btree(c,
|
||||||
0, POS_MIN,
|
BBPOS_MIN,
|
||||||
BTREE_ID_NR, SPOS_MAX,
|
BBPOS_MAX,
|
||||||
rewrite_old_nodes_pred, c, stats);
|
rewrite_old_nodes_pred, c, stats);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
mutex_lock(&c->sb_lock);
|
mutex_lock(&c->sb_lock);
|
||||||
@ -1050,71 +1060,101 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
|
||||||
|
struct bkey_s_c k,
|
||||||
|
struct bch_io_opts *io_opts,
|
||||||
|
struct data_update_opts *data_opts)
|
||||||
|
{
|
||||||
|
unsigned durability = bch2_bkey_durability(c, k);
|
||||||
|
unsigned replicas = bkey_is_btree_ptr(k.k)
|
||||||
|
? c->opts.metadata_replicas
|
||||||
|
: io_opts->data_replicas;
|
||||||
|
const union bch_extent_entry *entry;
|
||||||
|
struct extent_ptr_decoded p;
|
||||||
|
unsigned i = 0;
|
||||||
|
|
||||||
|
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
|
||||||
|
unsigned d = bch2_extent_ptr_durability(c, &p);
|
||||||
|
|
||||||
|
if (d && durability - d >= replicas) {
|
||||||
|
data_opts->kill_ptrs |= BIT(i);
|
||||||
|
durability -= d;
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return data_opts->kill_ptrs != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
|
||||||
|
struct btree *b,
|
||||||
|
struct bch_io_opts *io_opts,
|
||||||
|
struct data_update_opts *data_opts)
|
||||||
|
{
|
||||||
|
return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_data_job(struct bch_fs *c,
|
int bch2_data_job(struct bch_fs *c,
|
||||||
struct bch_move_stats *stats,
|
struct bch_move_stats *stats,
|
||||||
struct bch_ioctl_data op)
|
struct bch_ioctl_data op)
|
||||||
{
|
{
|
||||||
|
struct bbpos start = BBPOS(op.start_btree, op.start_pos);
|
||||||
|
struct bbpos end = BBPOS(op.end_btree, op.end_pos);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
if (op.op >= BCH_DATA_OP_NR)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
|
||||||
|
|
||||||
switch (op.op) {
|
switch (op.op) {
|
||||||
case BCH_DATA_OP_REREPLICATE:
|
case BCH_DATA_OP_rereplicate:
|
||||||
bch2_move_stats_init(stats, "rereplicate");
|
|
||||||
stats->data_type = BCH_DATA_journal;
|
stats->data_type = BCH_DATA_journal;
|
||||||
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
||||||
|
ret = bch2_move_btree(c, start, end,
|
||||||
ret = bch2_move_btree(c,
|
|
||||||
op.start_btree, op.start_pos,
|
|
||||||
op.end_btree, op.end_pos,
|
|
||||||
rereplicate_btree_pred, c, stats) ?: ret;
|
rereplicate_btree_pred, c, stats) ?: ret;
|
||||||
ret = bch2_replicas_gc2(c) ?: ret;
|
ret = bch2_move_data(c, start, end,
|
||||||
|
|
||||||
ret = bch2_move_data(c,
|
|
||||||
(struct bbpos) { op.start_btree, op.start_pos },
|
|
||||||
(struct bbpos) { op.end_btree, op.end_pos },
|
|
||||||
NULL,
|
NULL,
|
||||||
stats,
|
stats,
|
||||||
writepoint_hashed((unsigned long) current),
|
writepoint_hashed((unsigned long) current),
|
||||||
true,
|
true,
|
||||||
rereplicate_pred, c) ?: ret;
|
rereplicate_pred, c) ?: ret;
|
||||||
ret = bch2_replicas_gc2(c) ?: ret;
|
ret = bch2_replicas_gc2(c) ?: ret;
|
||||||
|
|
||||||
bch2_move_stats_exit(stats, c);
|
|
||||||
break;
|
break;
|
||||||
case BCH_DATA_OP_MIGRATE:
|
case BCH_DATA_OP_migrate:
|
||||||
if (op.migrate.dev >= c->sb.nr_devices)
|
if (op.migrate.dev >= c->sb.nr_devices)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
bch2_move_stats_init(stats, "migrate");
|
|
||||||
stats->data_type = BCH_DATA_journal;
|
stats->data_type = BCH_DATA_journal;
|
||||||
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
||||||
|
ret = bch2_move_btree(c, start, end,
|
||||||
ret = bch2_move_btree(c,
|
|
||||||
op.start_btree, op.start_pos,
|
|
||||||
op.end_btree, op.end_pos,
|
|
||||||
migrate_btree_pred, &op, stats) ?: ret;
|
migrate_btree_pred, &op, stats) ?: ret;
|
||||||
ret = bch2_replicas_gc2(c) ?: ret;
|
ret = bch2_move_data(c, start, end,
|
||||||
|
|
||||||
ret = bch2_move_data(c,
|
|
||||||
(struct bbpos) { op.start_btree, op.start_pos },
|
|
||||||
(struct bbpos) { op.end_btree, op.end_pos },
|
|
||||||
NULL,
|
NULL,
|
||||||
stats,
|
stats,
|
||||||
writepoint_hashed((unsigned long) current),
|
writepoint_hashed((unsigned long) current),
|
||||||
true,
|
true,
|
||||||
migrate_pred, &op) ?: ret;
|
migrate_pred, &op) ?: ret;
|
||||||
ret = bch2_replicas_gc2(c) ?: ret;
|
ret = bch2_replicas_gc2(c) ?: ret;
|
||||||
|
|
||||||
bch2_move_stats_exit(stats, c);
|
|
||||||
break;
|
break;
|
||||||
case BCH_DATA_OP_REWRITE_OLD_NODES:
|
case BCH_DATA_OP_rewrite_old_nodes:
|
||||||
bch2_move_stats_init(stats, "rewrite_old_nodes");
|
|
||||||
ret = bch2_scan_old_btree_nodes(c, stats);
|
ret = bch2_scan_old_btree_nodes(c, stats);
|
||||||
bch2_move_stats_exit(stats, c);
|
break;
|
||||||
|
case BCH_DATA_OP_drop_extra_replicas:
|
||||||
|
ret = bch2_move_btree(c, start, end,
|
||||||
|
drop_extra_replicas_btree_pred, c, stats) ?: ret;
|
||||||
|
ret = bch2_move_data(c, start, end, NULL, stats,
|
||||||
|
writepoint_hashed((unsigned long) current),
|
||||||
|
true,
|
||||||
|
drop_extra_replicas_pred, c) ?: ret;
|
||||||
|
ret = bch2_replicas_gc2(c) ?: ret;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bch2_move_stats_exit(stats, c);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,6 +56,8 @@ do { \
|
|||||||
typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
|
typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
|
||||||
struct bch_io_opts *, struct data_update_opts *);
|
struct bch_io_opts *, struct data_update_opts *);
|
||||||
|
|
||||||
|
extern const char * const bch2_data_ops_strs[];
|
||||||
|
|
||||||
void bch2_moving_ctxt_exit(struct moving_context *);
|
void bch2_moving_ctxt_exit(struct moving_context *);
|
||||||
void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
|
void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
|
||||||
struct bch_ratelimit *, struct bch_move_stats *,
|
struct bch_ratelimit *, struct bch_move_stats *,
|
||||||
@ -130,7 +132,7 @@ int bch2_data_job(struct bch_fs *,
|
|||||||
|
|
||||||
void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
|
void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
|
||||||
void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
|
void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
|
||||||
void bch2_move_stats_init(struct bch_move_stats *, char *);
|
void bch2_move_stats_init(struct bch_move_stats *, const char *);
|
||||||
|
|
||||||
void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
|
void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
|
||||||
|
|
||||||
|
@ -153,8 +153,11 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
|||||||
|
|
||||||
move_buckets_wait(ctxt, buckets_in_flight, false);
|
move_buckets_wait(ctxt, buckets_in_flight, false);
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush(trans);
|
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
|
if (bch2_err_matches(ret, EROFS))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
|
||||||
__func__, bch2_err_str(ret)))
|
__func__, bch2_err_str(ret)))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -233,11 +233,6 @@ enum fsck_err_opts {
|
|||||||
OPT_BOOL(), \
|
OPT_BOOL(), \
|
||||||
BCH2_NO_SB_OPT, true, \
|
BCH2_NO_SB_OPT, true, \
|
||||||
NULL, "Stash pointer to in memory btree node in btree ptr")\
|
NULL, "Stash pointer to in memory btree node in btree ptr")\
|
||||||
x(btree_write_buffer_size, u32, \
|
|
||||||
OPT_FS|OPT_MOUNT, \
|
|
||||||
OPT_UINT(16, (1U << 20) - 1), \
|
|
||||||
BCH2_NO_SB_OPT, 1U << 13, \
|
|
||||||
NULL, "Number of btree write buffer entries") \
|
|
||||||
x(gc_reserve_percent, u8, \
|
x(gc_reserve_percent, u8, \
|
||||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||||
OPT_UINT(5, 21), \
|
OPT_UINT(5, 21), \
|
||||||
|
@ -159,6 +159,8 @@ static int bch2_journal_replay(struct bch_fs *c)
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BUG_ON(!atomic_read(&keys->ref));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, attempt to replay keys in sorted order. This is more
|
* First, attempt to replay keys in sorted order. This is more
|
||||||
* efficient - better locality of btree access - but some might fail if
|
* efficient - better locality of btree access - but some might fail if
|
||||||
@ -218,14 +220,15 @@ static int bch2_journal_replay(struct bch_fs *c)
|
|||||||
bch2_trans_put(trans);
|
bch2_trans_put(trans);
|
||||||
trans = NULL;
|
trans = NULL;
|
||||||
|
|
||||||
|
if (!c->opts.keep_journal)
|
||||||
|
bch2_journal_keys_put_initial(c);
|
||||||
|
|
||||||
replay_now_at(j, j->replay_journal_seq_end);
|
replay_now_at(j, j->replay_journal_seq_end);
|
||||||
j->replay_journal_seq = 0;
|
j->replay_journal_seq = 0;
|
||||||
|
|
||||||
bch2_journal_set_replay_done(j);
|
bch2_journal_set_replay_done(j);
|
||||||
bch2_journal_flush_all_pins(j);
|
|
||||||
ret = bch2_journal_error(j);
|
|
||||||
|
|
||||||
if (keys->nr && !ret)
|
if (keys->nr)
|
||||||
bch2_journal_log_msg(c, "journal replay finished");
|
bch2_journal_log_msg(c, "journal replay finished");
|
||||||
err:
|
err:
|
||||||
if (trans)
|
if (trans)
|
||||||
@ -935,8 +938,12 @@ use_clean:
|
|||||||
|
|
||||||
bch2_move_stats_init(&stats, "recovery");
|
bch2_move_stats_init(&stats, "recovery");
|
||||||
|
|
||||||
bch_info(c, "scanning for old btree nodes");
|
struct printbuf buf = PRINTBUF;
|
||||||
ret = bch2_fs_read_write(c) ?:
|
bch2_version_to_text(&buf, c->sb.version_min);
|
||||||
|
bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
|
||||||
|
ret = bch2_fs_read_write_early(c) ?:
|
||||||
bch2_scan_old_btree_nodes(c, &stats);
|
bch2_scan_old_btree_nodes(c, &stats);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -953,10 +960,8 @@ out:
|
|||||||
bch2_flush_fsck_errs(c);
|
bch2_flush_fsck_errs(c);
|
||||||
|
|
||||||
if (!c->opts.keep_journal &&
|
if (!c->opts.keep_journal &&
|
||||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) {
|
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||||
bch2_journal_keys_free(&c->journal_keys);
|
bch2_journal_keys_put_initial(c);
|
||||||
bch2_journal_entries_free(c);
|
|
||||||
}
|
|
||||||
kfree(clean);
|
kfree(clean);
|
||||||
|
|
||||||
if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
|
if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
|
||||||
|
@ -11,7 +11,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
|||||||
|
|
||||||
/* Replicas tracking - in memory: */
|
/* Replicas tracking - in memory: */
|
||||||
|
|
||||||
static void verify_replicas_entry(struct bch_replicas_entry *e)
|
static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@ -26,7 +26,7 @@ static void verify_replicas_entry(struct bch_replicas_entry *e)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_replicas_entry_sort(struct bch_replicas_entry *e)
|
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
|
||||||
{
|
{
|
||||||
bubble_sort(e->devs, e->nr_devs, u8_cmp);
|
bubble_sort(e->devs, e->nr_devs, u8_cmp);
|
||||||
}
|
}
|
||||||
@ -53,7 +53,7 @@ static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch2_replicas_entry_to_text(struct printbuf *out,
|
void bch2_replicas_entry_to_text(struct printbuf *out,
|
||||||
struct bch_replicas_entry *e)
|
struct bch_replicas_entry_v1 *e)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
|
|||||||
void bch2_cpu_replicas_to_text(struct printbuf *out,
|
void bch2_cpu_replicas_to_text(struct printbuf *out,
|
||||||
struct bch_replicas_cpu *r)
|
struct bch_replicas_cpu *r)
|
||||||
{
|
{
|
||||||
struct bch_replicas_entry *e;
|
struct bch_replicas_entry_v1 *e;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
for_each_cpu_replicas_entry(r, e) {
|
for_each_cpu_replicas_entry(r, e) {
|
||||||
@ -84,7 +84,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void extent_to_replicas(struct bkey_s_c k,
|
static void extent_to_replicas(struct bkey_s_c k,
|
||||||
struct bch_replicas_entry *r)
|
struct bch_replicas_entry_v1 *r)
|
||||||
{
|
{
|
||||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||||
const union bch_extent_entry *entry;
|
const union bch_extent_entry *entry;
|
||||||
@ -104,7 +104,7 @@ static void extent_to_replicas(struct bkey_s_c k,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void stripe_to_replicas(struct bkey_s_c k,
|
static void stripe_to_replicas(struct bkey_s_c k,
|
||||||
struct bch_replicas_entry *r)
|
struct bch_replicas_entry_v1 *r)
|
||||||
{
|
{
|
||||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||||
const struct bch_extent_ptr *ptr;
|
const struct bch_extent_ptr *ptr;
|
||||||
@ -117,7 +117,7 @@ static void stripe_to_replicas(struct bkey_s_c k,
|
|||||||
r->devs[r->nr_devs++] = ptr->dev;
|
r->devs[r->nr_devs++] = ptr->dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
|
void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
e->nr_devs = 0;
|
e->nr_devs = 0;
|
||||||
@ -142,7 +142,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
|
|||||||
bch2_replicas_entry_sort(e);
|
bch2_replicas_entry_sort(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
|
void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
|
||||||
enum bch_data_type data_type,
|
enum bch_data_type data_type,
|
||||||
struct bch_devs_list devs)
|
struct bch_devs_list devs)
|
||||||
{
|
{
|
||||||
@ -164,7 +164,7 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
|
|||||||
|
|
||||||
static struct bch_replicas_cpu
|
static struct bch_replicas_cpu
|
||||||
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||||
struct bch_replicas_entry *new_entry)
|
struct bch_replicas_entry_v1 *new_entry)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
struct bch_replicas_cpu new = {
|
struct bch_replicas_cpu new = {
|
||||||
@ -194,7 +194,7 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||||
struct bch_replicas_entry *search)
|
struct bch_replicas_entry_v1 *search)
|
||||||
{
|
{
|
||||||
int idx, entry_size = replicas_entry_bytes(search);
|
int idx, entry_size = replicas_entry_bytes(search);
|
||||||
|
|
||||||
@ -212,7 +212,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int bch2_replicas_entry_idx(struct bch_fs *c,
|
int bch2_replicas_entry_idx(struct bch_fs *c,
|
||||||
struct bch_replicas_entry *search)
|
struct bch_replicas_entry_v1 *search)
|
||||||
{
|
{
|
||||||
bch2_replicas_entry_sort(search);
|
bch2_replicas_entry_sort(search);
|
||||||
|
|
||||||
@ -220,13 +220,13 @@ int bch2_replicas_entry_idx(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool __replicas_has_entry(struct bch_replicas_cpu *r,
|
static bool __replicas_has_entry(struct bch_replicas_cpu *r,
|
||||||
struct bch_replicas_entry *search)
|
struct bch_replicas_entry_v1 *search)
|
||||||
{
|
{
|
||||||
return __replicas_entry_idx(r, search) >= 0;
|
return __replicas_entry_idx(r, search) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool bch2_replicas_marked(struct bch_fs *c,
|
bool bch2_replicas_marked(struct bch_fs *c,
|
||||||
struct bch_replicas_entry *search)
|
struct bch_replicas_entry_v1 *search)
|
||||||
{
|
{
|
||||||
bool marked;
|
bool marked;
|
||||||
|
|
||||||
@ -343,7 +343,7 @@ err:
|
|||||||
static unsigned reserve_journal_replicas(struct bch_fs *c,
|
static unsigned reserve_journal_replicas(struct bch_fs *c,
|
||||||
struct bch_replicas_cpu *r)
|
struct bch_replicas_cpu *r)
|
||||||
{
|
{
|
||||||
struct bch_replicas_entry *e;
|
struct bch_replicas_entry_v1 *e;
|
||||||
unsigned journal_res_u64s = 0;
|
unsigned journal_res_u64s = 0;
|
||||||
|
|
||||||
/* nr_inodes: */
|
/* nr_inodes: */
|
||||||
@ -368,7 +368,7 @@ static unsigned reserve_journal_replicas(struct bch_fs *c,
|
|||||||
|
|
||||||
noinline
|
noinline
|
||||||
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||||
struct bch_replicas_entry *new_entry)
|
struct bch_replicas_entry_v1 *new_entry)
|
||||||
{
|
{
|
||||||
struct bch_replicas_cpu new_r, new_gc;
|
struct bch_replicas_cpu new_r, new_gc;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@ -433,7 +433,7 @@ err:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
|
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
|
||||||
{
|
{
|
||||||
return likely(bch2_replicas_marked(c, r))
|
return likely(bch2_replicas_marked(c, r))
|
||||||
? 0 : bch2_mark_replicas_slowpath(c, r);
|
? 0 : bch2_mark_replicas_slowpath(c, r);
|
||||||
@ -484,7 +484,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
|||||||
|
|
||||||
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||||
{
|
{
|
||||||
struct bch_replicas_entry *e;
|
struct bch_replicas_entry_v1 *e;
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
|
|
||||||
lockdep_assert_held(&c->replicas_gc_lock);
|
lockdep_assert_held(&c->replicas_gc_lock);
|
||||||
@ -559,7 +559,7 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
|
|
||||||
if (e->data_type == BCH_DATA_journal ||
|
if (e->data_type == BCH_DATA_journal ||
|
||||||
@ -590,7 +590,7 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int bch2_replicas_set_usage(struct bch_fs *c,
|
int bch2_replicas_set_usage(struct bch_fs *c,
|
||||||
struct bch_replicas_entry *r,
|
struct bch_replicas_entry_v1 *r,
|
||||||
u64 sectors)
|
u64 sectors)
|
||||||
{
|
{
|
||||||
int ret, idx = bch2_replicas_entry_idx(c, r);
|
int ret, idx = bch2_replicas_entry_idx(c, r);
|
||||||
@ -623,7 +623,7 @@ static int
|
|||||||
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
|
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
|
||||||
struct bch_replicas_cpu *cpu_r)
|
struct bch_replicas_cpu *cpu_r)
|
||||||
{
|
{
|
||||||
struct bch_replicas_entry *e, *dst;
|
struct bch_replicas_entry_v1 *e, *dst;
|
||||||
unsigned nr = 0, entry_size = 0, idx = 0;
|
unsigned nr = 0, entry_size = 0, idx = 0;
|
||||||
|
|
||||||
for_each_replicas_entry(sb_r, e) {
|
for_each_replicas_entry(sb_r, e) {
|
||||||
@ -661,7 +661,7 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
|
|||||||
nr++;
|
nr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry_size += sizeof(struct bch_replicas_entry) -
|
entry_size += sizeof(struct bch_replicas_entry_v1) -
|
||||||
sizeof(struct bch_replicas_entry_v0);
|
sizeof(struct bch_replicas_entry_v0);
|
||||||
|
|
||||||
cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
|
cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
|
||||||
@ -672,7 +672,7 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
|
|||||||
cpu_r->entry_size = entry_size;
|
cpu_r->entry_size = entry_size;
|
||||||
|
|
||||||
for_each_replicas_entry(sb_r, e) {
|
for_each_replicas_entry(sb_r, e) {
|
||||||
struct bch_replicas_entry *dst =
|
struct bch_replicas_entry_v1 *dst =
|
||||||
cpu_replicas_entry(cpu_r, idx++);
|
cpu_replicas_entry(cpu_r, idx++);
|
||||||
|
|
||||||
dst->data_type = e->data_type;
|
dst->data_type = e->data_type;
|
||||||
@ -716,7 +716,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
|
|||||||
{
|
{
|
||||||
struct bch_sb_field_replicas_v0 *sb_r;
|
struct bch_sb_field_replicas_v0 *sb_r;
|
||||||
struct bch_replicas_entry_v0 *dst;
|
struct bch_replicas_entry_v0 *dst;
|
||||||
struct bch_replicas_entry *src;
|
struct bch_replicas_entry_v1 *src;
|
||||||
size_t bytes;
|
size_t bytes;
|
||||||
|
|
||||||
bytes = sizeof(struct bch_sb_field_replicas);
|
bytes = sizeof(struct bch_sb_field_replicas);
|
||||||
@ -754,7 +754,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
|||||||
struct bch_replicas_cpu *r)
|
struct bch_replicas_cpu *r)
|
||||||
{
|
{
|
||||||
struct bch_sb_field_replicas *sb_r;
|
struct bch_sb_field_replicas *sb_r;
|
||||||
struct bch_replicas_entry *dst, *src;
|
struct bch_replicas_entry_v1 *dst, *src;
|
||||||
bool need_v1 = false;
|
bool need_v1 = false;
|
||||||
size_t bytes;
|
size_t bytes;
|
||||||
|
|
||||||
@ -805,7 +805,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
|||||||
memcmp, NULL);
|
memcmp, NULL);
|
||||||
|
|
||||||
for (i = 0; i < cpu_r->nr; i++) {
|
for (i = 0; i < cpu_r->nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(cpu_r, i);
|
cpu_replicas_entry(cpu_r, i);
|
||||||
|
|
||||||
if (e->data_type >= BCH_DATA_NR) {
|
if (e->data_type >= BCH_DATA_NR) {
|
||||||
@ -835,7 +835,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (i + 1 < cpu_r->nr) {
|
if (i + 1 < cpu_r->nr) {
|
||||||
struct bch_replicas_entry *n =
|
struct bch_replicas_entry_v1 *n =
|
||||||
cpu_replicas_entry(cpu_r, i + 1);
|
cpu_replicas_entry(cpu_r, i + 1);
|
||||||
|
|
||||||
BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
|
BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
|
||||||
@ -872,7 +872,7 @@ static void bch2_sb_replicas_to_text(struct printbuf *out,
|
|||||||
struct bch_sb_field *f)
|
struct bch_sb_field *f)
|
||||||
{
|
{
|
||||||
struct bch_sb_field_replicas *r = field_to_type(f, replicas);
|
struct bch_sb_field_replicas *r = field_to_type(f, replicas);
|
||||||
struct bch_replicas_entry *e;
|
struct bch_replicas_entry_v1 *e;
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
for_each_replicas_entry(r, e) {
|
for_each_replicas_entry(r, e) {
|
||||||
@ -934,7 +934,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
|||||||
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||||
unsigned flags, bool print)
|
unsigned flags, bool print)
|
||||||
{
|
{
|
||||||
struct bch_replicas_entry *e;
|
struct bch_replicas_entry_v1 *e;
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
percpu_down_read(&c->mark_lock);
|
percpu_down_read(&c->mark_lock);
|
||||||
@ -994,7 +994,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
|
|||||||
replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
|
replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
|
||||||
|
|
||||||
if (replicas) {
|
if (replicas) {
|
||||||
struct bch_replicas_entry *r;
|
struct bch_replicas_entry_v1 *r;
|
||||||
|
|
||||||
for_each_replicas_entry(replicas, r)
|
for_each_replicas_entry(replicas, r)
|
||||||
for (i = 0; i < r->nr_devs; i++)
|
for (i = 0; i < r->nr_devs; i++)
|
||||||
|
@ -6,26 +6,26 @@
|
|||||||
#include "eytzinger.h"
|
#include "eytzinger.h"
|
||||||
#include "replicas_types.h"
|
#include "replicas_types.h"
|
||||||
|
|
||||||
void bch2_replicas_entry_sort(struct bch_replicas_entry *);
|
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
|
||||||
void bch2_replicas_entry_to_text(struct printbuf *,
|
void bch2_replicas_entry_to_text(struct printbuf *,
|
||||||
struct bch_replicas_entry *);
|
struct bch_replicas_entry_v1 *);
|
||||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||||
|
|
||||||
static inline struct bch_replicas_entry *
|
static inline struct bch_replicas_entry_v1 *
|
||||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||||
{
|
{
|
||||||
return (void *) r->entries + r->entry_size * i;
|
return (void *) r->entries + r->entry_size * i;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_replicas_entry_idx(struct bch_fs *,
|
int bch2_replicas_entry_idx(struct bch_fs *,
|
||||||
struct bch_replicas_entry *);
|
struct bch_replicas_entry_v1 *);
|
||||||
|
|
||||||
void bch2_devlist_to_replicas(struct bch_replicas_entry *,
|
void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *,
|
||||||
enum bch_data_type,
|
enum bch_data_type,
|
||||||
struct bch_devs_list);
|
struct bch_devs_list);
|
||||||
bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
|
bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry_v1 *);
|
||||||
int bch2_mark_replicas(struct bch_fs *,
|
int bch2_mark_replicas(struct bch_fs *,
|
||||||
struct bch_replicas_entry *);
|
struct bch_replicas_entry_v1 *);
|
||||||
|
|
||||||
static inline struct replicas_delta *
|
static inline struct replicas_delta *
|
||||||
replicas_delta_next(struct replicas_delta *d)
|
replicas_delta_next(struct replicas_delta *d)
|
||||||
@ -35,9 +35,9 @@ replicas_delta_next(struct replicas_delta *d)
|
|||||||
|
|
||||||
int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
|
int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
|
||||||
|
|
||||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
|
void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *, struct bkey_s_c);
|
||||||
|
|
||||||
static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
|
static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e,
|
||||||
unsigned dev)
|
unsigned dev)
|
||||||
{
|
{
|
||||||
e->data_type = BCH_DATA_cached;
|
e->data_type = BCH_DATA_cached;
|
||||||
@ -57,7 +57,7 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
|||||||
int bch2_replicas_gc2(struct bch_fs *);
|
int bch2_replicas_gc2(struct bch_fs *);
|
||||||
|
|
||||||
int bch2_replicas_set_usage(struct bch_fs *,
|
int bch2_replicas_set_usage(struct bch_fs *,
|
||||||
struct bch_replicas_entry *,
|
struct bch_replicas_entry_v1 *,
|
||||||
u64);
|
u64);
|
||||||
|
|
||||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||||
|
@ -5,12 +5,12 @@
|
|||||||
struct bch_replicas_cpu {
|
struct bch_replicas_cpu {
|
||||||
unsigned nr;
|
unsigned nr;
|
||||||
unsigned entry_size;
|
unsigned entry_size;
|
||||||
struct bch_replicas_entry *entries;
|
struct bch_replicas_entry_v1 *entries;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct replicas_delta {
|
struct replicas_delta {
|
||||||
s64 delta;
|
s64 delta;
|
||||||
struct bch_replicas_entry r;
|
struct bch_replicas_entry_v1 r;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
struct replicas_delta_list {
|
struct replicas_delta_list {
|
||||||
|
@ -235,7 +235,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry_v1 *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
struct jset_entry_data_usage *u =
|
struct jset_entry_data_usage *u =
|
||||||
container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs),
|
container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs),
|
||||||
|
@ -248,7 +248,8 @@
|
|||||||
x(root_inode_not_dir, 240) \
|
x(root_inode_not_dir, 240) \
|
||||||
x(dir_loop, 241) \
|
x(dir_loop, 241) \
|
||||||
x(hash_table_key_duplicate, 242) \
|
x(hash_table_key_duplicate, 242) \
|
||||||
x(hash_table_key_wrong_offset, 243)
|
x(hash_table_key_wrong_offset, 243) \
|
||||||
|
x(unlinked_inode_not_on_deleted_list, 244)
|
||||||
|
|
||||||
enum bch_sb_error_id {
|
enum bch_sb_error_id {
|
||||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||||
|
@ -324,7 +324,7 @@ bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(six_relock_ip);
|
EXPORT_SYMBOL_GPL(six_relock_ip);
|
||||||
|
|
||||||
#ifdef CONFIG_LOCK_SPIN_ON_OWNER
|
#ifdef CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN
|
||||||
|
|
||||||
static inline bool six_owner_running(struct six_lock *lock)
|
static inline bool six_owner_running(struct six_lock *lock)
|
||||||
{
|
{
|
||||||
|
@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
|
|||||||
if (!IS_ERR_OR_NULL(sb->bdev))
|
if (!IS_ERR_OR_NULL(sb->bdev))
|
||||||
blkdev_put(sb->bdev, sb->holder);
|
blkdev_put(sb->bdev, sb->holder);
|
||||||
kfree(sb->holder);
|
kfree(sb->holder);
|
||||||
|
kfree(sb->sb_name);
|
||||||
|
|
||||||
kfree(sb->sb);
|
kfree(sb->sb);
|
||||||
memset(sb, 0, sizeof(*sb));
|
memset(sb, 0, sizeof(*sb));
|
||||||
@ -657,12 +658,13 @@ reread:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_read_super(const char *path, struct bch_opts *opts,
|
int __bch2_read_super(const char *path, struct bch_opts *opts,
|
||||||
struct bch_sb_handle *sb)
|
struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
|
||||||
{
|
{
|
||||||
u64 offset = opt_get(*opts, sb);
|
u64 offset = opt_get(*opts, sb);
|
||||||
struct bch_sb_layout layout;
|
struct bch_sb_layout layout;
|
||||||
struct printbuf err = PRINTBUF;
|
struct printbuf err = PRINTBUF;
|
||||||
|
struct printbuf err2 = PRINTBUF;
|
||||||
__le64 *i;
|
__le64 *i;
|
||||||
int ret;
|
int ret;
|
||||||
#ifndef __KERNEL__
|
#ifndef __KERNEL__
|
||||||
@ -675,6 +677,10 @@ retry:
|
|||||||
if (!sb->holder)
|
if (!sb->holder)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
sb->sb_name = kstrdup(path, GFP_KERNEL);
|
||||||
|
if (!sb->sb_name)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
#ifndef __KERNEL__
|
#ifndef __KERNEL__
|
||||||
if (opt_get(*opts, direct_io) == false)
|
if (opt_get(*opts, direct_io) == false)
|
||||||
sb->mode |= BLK_OPEN_BUFFERED;
|
sb->mode |= BLK_OPEN_BUFFERED;
|
||||||
@ -721,8 +727,14 @@ retry:
|
|||||||
if (opt_defined(*opts, sb))
|
if (opt_defined(*opts, sb))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n",
|
prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n",
|
||||||
path, err.buf);
|
path, err.buf);
|
||||||
|
if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg)
|
||||||
|
printk(KERN_INFO "%s", err2.buf);
|
||||||
|
else
|
||||||
|
printk(KERN_ERR "%s", err2.buf);
|
||||||
|
|
||||||
|
printbuf_exit(&err2);
|
||||||
printbuf_reset(&err);
|
printbuf_reset(&err);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -798,6 +810,20 @@ err_no_print:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_read_super(const char *path, struct bch_opts *opts,
|
||||||
|
struct bch_sb_handle *sb)
|
||||||
|
{
|
||||||
|
return __bch2_read_super(path, opts, sb, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* provide a silenced version for mount.bcachefs */
|
||||||
|
|
||||||
|
int bch2_read_super_silent(const char *path, struct bch_opts *opts,
|
||||||
|
struct bch_sb_handle *sb)
|
||||||
|
{
|
||||||
|
return __bch2_read_super(path, opts, sb, true);
|
||||||
|
}
|
||||||
|
|
||||||
/* write superblock: */
|
/* write superblock: */
|
||||||
|
|
||||||
static void write_super_endio(struct bio *bio)
|
static void write_super_endio(struct bio *bio)
|
||||||
|
@ -74,6 +74,7 @@ void bch2_free_super(struct bch_sb_handle *);
|
|||||||
int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
|
int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
|
||||||
|
|
||||||
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
|
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
|
||||||
|
int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *);
|
||||||
int bch2_write_super(struct bch_fs *);
|
int bch2_write_super(struct bch_fs *);
|
||||||
void __bch2_check_set_feature(struct bch_fs *, unsigned);
|
void __bch2_check_set_feature(struct bch_fs *, unsigned);
|
||||||
|
|
||||||
|
@ -314,7 +314,8 @@ void bch2_fs_read_only(struct bch_fs *c)
|
|||||||
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
|
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
|
||||||
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
||||||
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
|
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
|
||||||
BUG_ON(c->btree_write_buffer.state.nr);
|
BUG_ON(c->btree_write_buffer.inc.keys.nr);
|
||||||
|
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
|
||||||
|
|
||||||
bch_verbose(c, "marking filesystem clean");
|
bch_verbose(c, "marking filesystem clean");
|
||||||
bch2_fs_mark_clean(c);
|
bch2_fs_mark_clean(c);
|
||||||
@ -504,8 +505,8 @@ static void __bch2_fs_free(struct bch_fs *c)
|
|||||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||||
bch2_fs_compress_exit(c);
|
bch2_fs_compress_exit(c);
|
||||||
bch2_journal_keys_free(&c->journal_keys);
|
bch2_journal_keys_put_initial(c);
|
||||||
bch2_journal_entries_free(c);
|
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||||
bch2_fs_btree_write_buffer_exit(c);
|
bch2_fs_btree_write_buffer_exit(c);
|
||||||
percpu_free_rwsem(&c->mark_lock);
|
percpu_free_rwsem(&c->mark_lock);
|
||||||
free_percpu(c->online_reserved);
|
free_percpu(c->online_reserved);
|
||||||
@ -704,6 +705,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
|
|
||||||
init_rwsem(&c->gc_lock);
|
init_rwsem(&c->gc_lock);
|
||||||
mutex_init(&c->gc_gens_lock);
|
mutex_init(&c->gc_gens_lock);
|
||||||
|
atomic_set(&c->journal_keys.ref, 1);
|
||||||
|
c->journal_keys.initial_ref_held = true;
|
||||||
|
|
||||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||||
bch2_time_stats_init(&c->times[i]);
|
bch2_time_stats_init(&c->times[i]);
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
struct bch_sb_handle {
|
struct bch_sb_handle {
|
||||||
struct bch_sb *sb;
|
struct bch_sb *sb;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
|
char *sb_name;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
void *holder;
|
void *holder;
|
||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
|
@ -496,7 +496,7 @@ STORE(bch2_fs)
|
|||||||
|
|
||||||
sc.gfp_mask = GFP_KERNEL;
|
sc.gfp_mask = GFP_KERNEL;
|
||||||
sc.nr_to_scan = strtoul_or_return(buf);
|
sc.nr_to_scan = strtoul_or_return(buf);
|
||||||
c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
|
c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attr == &sysfs_btree_wakeup)
|
if (attr == &sysfs_btree_wakeup)
|
||||||
|
@ -188,6 +188,25 @@ DEFINE_EVENT(bch_fs, journal_entry_full,
|
|||||||
TP_ARGS(c)
|
TP_ARGS(c)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(journal_entry_close,
|
||||||
|
TP_PROTO(struct bch_fs *c, unsigned bytes),
|
||||||
|
TP_ARGS(c, bytes),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev )
|
||||||
|
__field(u32, bytes )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = c->dev;
|
||||||
|
__entry->bytes = bytes;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("%d,%d entry bytes %u",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->bytes)
|
||||||
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(bio, journal_write,
|
DEFINE_EVENT(bio, journal_write,
|
||||||
TP_PROTO(struct bio *bio),
|
TP_PROTO(struct bio *bio),
|
||||||
TP_ARGS(bio)
|
TP_ARGS(bio)
|
||||||
@ -1313,21 +1332,38 @@ TRACE_EVENT(write_buffer_flush,
|
|||||||
__entry->nr, __entry->size, __entry->skipped, __entry->fast)
|
__entry->nr, __entry->size, __entry->skipped, __entry->fast)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(write_buffer_flush_slowpath,
|
TRACE_EVENT(write_buffer_flush_sync,
|
||||||
TP_PROTO(struct btree_trans *trans, size_t nr, size_t size),
|
TP_PROTO(struct btree_trans *trans, unsigned long caller_ip),
|
||||||
TP_ARGS(trans, nr, size),
|
TP_ARGS(trans, caller_ip),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field(size_t, nr )
|
__array(char, trans_fn, 32 )
|
||||||
__field(size_t, size )
|
__field(unsigned long, caller_ip )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->nr = nr;
|
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||||
__entry->size = size;
|
__entry->caller_ip = caller_ip;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%zu/%zu", __entry->nr, __entry->size)
|
TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(write_buffer_flush_slowpath,
|
||||||
|
TP_PROTO(struct btree_trans *trans, size_t slowpath, size_t total),
|
||||||
|
TP_ARGS(trans, slowpath, total),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(size_t, slowpath )
|
||||||
|
__field(size_t, total )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->slowpath = slowpath;
|
||||||
|
__entry->total = total;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
|
||||||
);
|
);
|
||||||
|
|
||||||
#endif /* _TRACE_BCACHEFS_H */
|
#endif /* _TRACE_BCACHEFS_H */
|
||||||
|
@ -36,7 +36,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
|
|||||||
closure_debug_destroy(cl);
|
closure_debug_destroy(cl);
|
||||||
|
|
||||||
if (destructor)
|
if (destructor)
|
||||||
destructor(cl);
|
destructor(&cl->work);
|
||||||
|
|
||||||
if (parent)
|
if (parent)
|
||||||
closure_put(parent);
|
closure_put(parent);
|
||||||
@ -108,8 +108,9 @@ struct closure_syncer {
|
|||||||
int done;
|
int done;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void closure_sync_fn(struct closure *cl)
|
static CLOSURE_CALLBACK(closure_sync_fn)
|
||||||
{
|
{
|
||||||
|
struct closure *cl = container_of(ws, struct closure, work);
|
||||||
struct closure_syncer *s = cl->s;
|
struct closure_syncer *s = cl->s;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
||||||
|
@ -12,7 +12,12 @@
|
|||||||
static LIST_HEAD(shrinker_list);
|
static LIST_HEAD(shrinker_list);
|
||||||
static DEFINE_MUTEX(shrinker_lock);
|
static DEFINE_MUTEX(shrinker_lock);
|
||||||
|
|
||||||
int register_shrinker(struct shrinker *shrinker, const char *fmt, ...)
|
struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
return calloc(sizeof(struct shrinker), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int shrinker_register(struct shrinker *shrinker)
|
||||||
{
|
{
|
||||||
mutex_lock(&shrinker_lock);
|
mutex_lock(&shrinker_lock);
|
||||||
list_add_tail(&shrinker->list, &shrinker_list);
|
list_add_tail(&shrinker->list, &shrinker_list);
|
||||||
|
Loading…
Reference in New Issue
Block a user