mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to feaca6edbd24 mean and variance: Promote to lib/math
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
7465d1934c
commit
97c86db4f2
@ -1 +1 @@
|
||||
8c94740b1bf8645d3398170f41c9c88b78332252
|
||||
feaca6edbd240bbd98d261097a97037c56a09eec
|
||||
|
@ -97,11 +97,11 @@ int cmd_fsck(int argc, char *argv[])
|
||||
exit(8);
|
||||
}
|
||||
|
||||
if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags)) {
|
||||
if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
|
||||
fprintf(stderr, "%s: errors fixed\n", c->name);
|
||||
ret |= 1;
|
||||
}
|
||||
if (test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
if (test_bit(BCH_FS_error, &c->flags)) {
|
||||
fprintf(stderr, "%s: still has errors\n", c->name);
|
||||
ret |= 4;
|
||||
}
|
||||
|
@ -136,15 +136,30 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
|
||||
struct bkey_i_backpointer *bp_k,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k,
|
||||
bool insert)
|
||||
{
|
||||
struct btree_iter bp_iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_backpointer *bp_k;
|
||||
int ret;
|
||||
|
||||
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
|
||||
ret = PTR_ERR_OR_ZERO(bp_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_backpointer_init(&bp_k->k_i);
|
||||
bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
|
||||
bp_k->v = bp;
|
||||
|
||||
if (!insert) {
|
||||
bp_k->k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp_k->k, 0);
|
||||
}
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
|
||||
bp_k->k.p,
|
||||
BTREE_ITER_INTENT|
|
||||
|
@ -63,7 +63,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *,
|
||||
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos bucket,
|
||||
struct bch_backpointer, struct bkey_s_c, bool);
|
||||
|
||||
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
@ -72,28 +72,21 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
struct bkey_s_c orig_k,
|
||||
bool insert)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_backpointer *bp_k;
|
||||
int ret;
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer))
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert);
|
||||
|
||||
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
|
||||
ret = PTR_ERR_OR_ZERO(bp_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
struct bkey_i_backpointer bp_k;
|
||||
|
||||
bkey_backpointer_init(&bp_k->k_i);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset);
|
||||
bp_k->v = bp;
|
||||
bkey_backpointer_init(&bp_k.k_i);
|
||||
bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
|
||||
bp_k.v = bp;
|
||||
|
||||
if (!insert) {
|
||||
bp_k->k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp_k->k, 0);
|
||||
bp_k.k.type = KEY_TYPE_deleted;
|
||||
set_bkey_val_u64s(&bp_k.k, 0);
|
||||
}
|
||||
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer))
|
||||
return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert);
|
||||
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
|
||||
}
|
||||
|
||||
static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
|
||||
|
@ -406,7 +406,6 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
x(blocked_write_buffer_full) \
|
||||
x(nocow_lock_contended)
|
||||
|
||||
enum bch_time_stats {
|
||||
@ -567,32 +566,38 @@ struct bch_dev {
|
||||
struct io_count __percpu *io_done;
|
||||
};
|
||||
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_STARTED,
|
||||
BCH_FS_MAY_GO_RW,
|
||||
BCH_FS_RW,
|
||||
BCH_FS_WAS_RW,
|
||||
/*
|
||||
* fsck_done - kill?
|
||||
*
|
||||
* replace with something more general from enumated fsck passes/errors:
|
||||
* initial_gc_unfixed
|
||||
* error
|
||||
* topology error
|
||||
*/
|
||||
|
||||
/* shutdown: */
|
||||
BCH_FS_STOPPING,
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_GOING_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
BCH_FS_CLEAN_SHUTDOWN,
|
||||
#define BCH_FS_FLAGS() \
|
||||
x(started) \
|
||||
x(may_go_rw) \
|
||||
x(rw) \
|
||||
x(was_rw) \
|
||||
x(stopping) \
|
||||
x(emergency_ro) \
|
||||
x(going_ro) \
|
||||
x(write_disable_complete) \
|
||||
x(clean_shutdown) \
|
||||
x(fsck_done) \
|
||||
x(initial_gc_unfixed) \
|
||||
x(need_another_gc) \
|
||||
x(need_delete_dead_snapshots) \
|
||||
x(error) \
|
||||
x(topology_error) \
|
||||
x(errors_fixed) \
|
||||
x(errors_not_fixed)
|
||||
|
||||
/* fsck passes: */
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
|
||||
BCH_FS_NEED_ANOTHER_GC,
|
||||
|
||||
BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS,
|
||||
|
||||
/* errors: */
|
||||
BCH_FS_ERROR,
|
||||
BCH_FS_TOPOLOGY_ERROR,
|
||||
BCH_FS_ERRORS_FIXED,
|
||||
BCH_FS_ERRORS_NOT_FIXED,
|
||||
enum bch_fs_flags {
|
||||
#define x(n) BCH_FS_##n,
|
||||
BCH_FS_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct btree_debug {
|
||||
@ -1068,20 +1073,10 @@ static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
||||
{
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
|
||||
atomic_long_inc_not_zero(&c->writes[ref]);
|
||||
#else
|
||||
return percpu_ref_tryget(&c->writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
|
||||
{
|
||||
#ifdef BCH_WRITE_REF_DEBUG
|
||||
return !test_bit(BCH_FS_GOING_RO, &c->flags) &&
|
||||
return !test_bit(BCH_FS_going_ro, &c->flags) &&
|
||||
atomic_long_inc_not_zero(&c->writes[ref]);
|
||||
#else
|
||||
return percpu_ref_tryget_live(&c->writes);
|
||||
@ -1100,7 +1095,7 @@ static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
|
||||
if (atomic_long_read(&c->writes[i]))
|
||||
return;
|
||||
|
||||
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
set_bit(BCH_FS_write_disable_complete, &c->flags);
|
||||
wake_up(&bch2_read_only_wait);
|
||||
#else
|
||||
percpu_ref_put(&c->writes);
|
||||
|
@ -1539,7 +1539,7 @@ struct bch_sb_field_disk_groups {
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_alloc_mem_fail, 39) \
|
||||
x(move_extent_start_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
@ -1576,7 +1576,9 @@ struct bch_sb_field_disk_groups {
|
||||
x(write_super, 73) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74) \
|
||||
x(trans_restart_write_buffer_flush, 75) \
|
||||
x(trans_restart_split_race, 76)
|
||||
x(trans_restart_split_race, 76) \
|
||||
x(write_buffer_flush_slowpath, 77) \
|
||||
x(write_buffer_flush_sync, 78)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
@ -2135,8 +2137,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
|
||||
x(clock, 7) \
|
||||
x(dev_usage, 8) \
|
||||
x(log, 9) \
|
||||
x(overwrite, 10) \
|
||||
x(write_buffer_keys, 11)
|
||||
x(overwrite, 10)
|
||||
|
||||
enum {
|
||||
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
|
||||
|
@ -13,13 +13,6 @@
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
|
||||
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
|
||||
do { \
|
||||
if (shrinker_counter) \
|
||||
bc->not_freed_##counter++; \
|
||||
} while (0)
|
||||
|
||||
const char * const bch2_btree_node_flags[] = {
|
||||
#define x(f) #f,
|
||||
@ -208,7 +201,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
|
||||
* this version is for btree nodes that have already been freed (we're not
|
||||
* reaping a real btree node)
|
||||
*/
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
|
||||
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
int ret = 0;
|
||||
@ -218,64 +211,38 @@ wait_on_io:
|
||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||
(1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush) {
|
||||
if (btree_node_dirty(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
|
||||
else if (btree_node_read_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
|
||||
else if (btree_node_write_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
|
||||
if (!flush)
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
/* XXX: waiting on IO with btree cache lock held */
|
||||
bch2_btree_node_wait_on_read(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
}
|
||||
|
||||
if (!six_trylock_intent(&b->c.lock)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
|
||||
if (!six_trylock_intent(&b->c.lock))
|
||||
return -BCH_ERR_ENOMEM_btree_node_reclaim;
|
||||
}
|
||||
|
||||
if (!six_trylock_write(&b->c.lock)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
|
||||
if (!six_trylock_write(&b->c.lock))
|
||||
goto out_unlock_intent;
|
||||
}
|
||||
|
||||
/* recheck under lock */
|
||||
if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush) {
|
||||
if (btree_node_read_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
|
||||
else if (btree_node_write_in_flight(b))
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
|
||||
if (!flush)
|
||||
goto out_unlock;
|
||||
}
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
|
||||
if (btree_node_noevict(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
|
||||
if (btree_node_noevict(b) ||
|
||||
btree_node_write_blocked(b) ||
|
||||
btree_node_will_make_reachable(b))
|
||||
goto out_unlock;
|
||||
}
|
||||
if (btree_node_write_blocked(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
|
||||
goto out_unlock;
|
||||
}
|
||||
if (btree_node_will_make_reachable(b)) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (btree_node_dirty(b)) {
|
||||
if (!flush) {
|
||||
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
|
||||
if (!flush)
|
||||
goto out_unlock;
|
||||
}
|
||||
/*
|
||||
* Using the underscore version because we don't want to compact
|
||||
* bsets after the write, since this node is about to be evicted
|
||||
@ -305,14 +272,14 @@ out_unlock_intent:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
|
||||
static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return __btree_node_reclaim(c, b, false, shrinker_counter);
|
||||
return __btree_node_reclaim(c, b, false);
|
||||
}
|
||||
|
||||
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return __btree_node_reclaim(c, b, true, false);
|
||||
return __btree_node_reclaim(c, b, true);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
@ -360,12 +327,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
|
||||
if (touched >= nr)
|
||||
goto out;
|
||||
|
||||
if (!btree_node_reclaim(c, b, true)) {
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
btree_node_data_free(c, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
freed++;
|
||||
bc->freed++;
|
||||
}
|
||||
}
|
||||
restart:
|
||||
@ -374,11 +340,9 @@ restart:
|
||||
|
||||
if (btree_node_accessed(b)) {
|
||||
clear_btree_node_accessed(b);
|
||||
bc->not_freed_access_bit++;
|
||||
} else if (!btree_node_reclaim(c, b, true)) {
|
||||
} else if (!btree_node_reclaim(c, b)) {
|
||||
freed++;
|
||||
btree_node_data_free(c, b);
|
||||
bc->freed++;
|
||||
|
||||
bch2_btree_node_hash_remove(bc, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
@ -428,17 +392,6 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
|
||||
return btree_cache_can_free(bc);
|
||||
}
|
||||
|
||||
static void bch2_btree_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct bch_fs *c = shrink->private_data;
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_cache_to_text(&out, &c->btree_cache);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
@ -525,7 +478,6 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
|
||||
bc->shrink = shrink;
|
||||
shrink->count_objects = bch2_btree_cache_count;
|
||||
shrink->scan_objects = bch2_btree_cache_scan;
|
||||
shrink->to_text = bch2_btree_cache_shrinker_to_text;
|
||||
shrink->seeks = 4;
|
||||
shrink->private_data = c;
|
||||
shrinker_register(shrink);
|
||||
@ -599,7 +551,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
|
||||
struct btree *b;
|
||||
|
||||
list_for_each_entry_reverse(b, &bc->live, list)
|
||||
if (!btree_node_reclaim(c, b, false))
|
||||
if (!btree_node_reclaim(c, b))
|
||||
return b;
|
||||
|
||||
while (1) {
|
||||
@ -635,7 +587,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
* disk node. Check the freed list before allocating a new one:
|
||||
*/
|
||||
list_for_each_entry(b, freed, list)
|
||||
if (!btree_node_reclaim(c, b, false)) {
|
||||
if (!btree_node_reclaim(c, b)) {
|
||||
list_del_init(&b->list);
|
||||
goto got_node;
|
||||
}
|
||||
@ -661,7 +613,7 @@ got_node:
|
||||
* the list. Check if there's any freed nodes there:
|
||||
*/
|
||||
list_for_each_entry(b2, &bc->freeable, list)
|
||||
if (!btree_node_reclaim(c, b2, false)) {
|
||||
if (!btree_node_reclaim(c, b2)) {
|
||||
swap(b->data, b2->data);
|
||||
swap(b->aux_data, b2->aux_data);
|
||||
btree_node_to_freedlist(bc, b2);
|
||||
@ -1257,21 +1209,9 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
|
||||
stats.failed);
|
||||
}
|
||||
|
||||
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
|
||||
void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
|
||||
{
|
||||
prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
|
||||
prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
|
||||
prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
|
||||
|
||||
prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
|
||||
prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
|
||||
prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
|
||||
prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
|
||||
prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
|
||||
prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
|
||||
prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
|
||||
prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
|
||||
prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
|
||||
prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
|
||||
|
||||
prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
|
||||
prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
|
||||
prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
|
||||
}
|
||||
|
@ -126,6 +126,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
|
||||
const char *bch2_btree_id_str(enum btree_id);
|
||||
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
|
||||
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_CACHE_H */
|
||||
|
@ -108,7 +108,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
goto err;
|
||||
} else {
|
||||
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
|
||||
set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -134,7 +134,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
goto err;
|
||||
} else {
|
||||
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
|
||||
set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -619,7 +619,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
g->data_type = 0;
|
||||
g->dirty_sectors = 0;
|
||||
g->cached_sectors = 0;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
set_bit(BCH_FS_need_another_gc, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
@ -664,7 +664,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
if (data_type == BCH_DATA_btree) {
|
||||
g->data_type = data_type;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
set_bit(BCH_FS_need_another_gc, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
@ -996,7 +996,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
/* Continue marking when opted to not
|
||||
* fix the error: */
|
||||
ret = 0;
|
||||
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
|
||||
set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
|
||||
continue;
|
||||
}
|
||||
} else if (ret) {
|
||||
@ -1847,7 +1847,7 @@ again:
|
||||
#endif
|
||||
c->gc_count++;
|
||||
|
||||
if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
|
||||
if (test_bit(BCH_FS_need_another_gc, &c->flags) ||
|
||||
(!iter && bch2_test_restart_gc)) {
|
||||
if (iter++ > 2) {
|
||||
bch_info(c, "Unable to fix bucket gens, looping");
|
||||
@ -1859,7 +1859,7 @@ again:
|
||||
* XXX: make sure gens we fixed got saved
|
||||
*/
|
||||
bch_info(c, "Second GC pass needed, restarting:");
|
||||
clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
clear_bit(BCH_FS_need_another_gc, &c->flags);
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||
|
||||
bch2_gc_stripes_reset(c, metadata_only);
|
||||
|
@ -781,7 +781,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
|
||||
struct btree_node_iter node_iter = l->iter;
|
||||
struct bkey_packed *k;
|
||||
struct bkey_buf tmp;
|
||||
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
|
||||
unsigned nr = test_bit(BCH_FS_started, &c->flags)
|
||||
? (path->level > 1 ? 0 : 2)
|
||||
: (path->level > 1 ? 1 : 16);
|
||||
bool was_locked = btree_node_locked(path, path->level);
|
||||
@ -816,7 +816,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
|
||||
unsigned nr = test_bit(BCH_FS_started, &c->flags)
|
||||
? (path->level > 1 ? 0 : 2)
|
||||
: (path->level > 1 ? 1 : 16);
|
||||
bool was_locked = btree_node_locked(path, path->level);
|
||||
|
@ -177,7 +177,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
|
||||
BUG_ON(test_bit(BCH_FS_rw, &c->flags));
|
||||
|
||||
if (idx < keys->size &&
|
||||
journal_key_cmp(&n, &keys->d[idx]) == 0) {
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/seq_buf.h>
|
||||
|
||||
static inline bool btree_uses_pcpu_readers(enum btree_id id)
|
||||
{
|
||||
@ -779,7 +778,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
ck->valid = true;
|
||||
|
||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
|
||||
set_bit(BKEY_CACHED_DIRTY, &ck->flags);
|
||||
atomic_long_inc(&c->btree_key_cache.nr_dirty);
|
||||
|
||||
@ -1008,7 +1007,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
|
||||
if (atomic_long_read(&bc->nr_dirty) &&
|
||||
!bch2_journal_error(&c->journal) &&
|
||||
test_bit(BCH_FS_WAS_RW, &c->flags))
|
||||
test_bit(BCH_FS_was_rw, &c->flags))
|
||||
panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
|
||||
atomic_long_read(&bc->nr_dirty));
|
||||
|
||||
@ -1029,18 +1028,6 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
|
||||
INIT_LIST_HEAD(&c->freed_nonpcpu);
|
||||
}
|
||||
|
||||
static void bch2_btree_key_cache_shrinker_to_text(struct seq_buf *s, struct shrinker *shrink)
|
||||
{
|
||||
struct bch_fs *c = shrink->private_data;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
char *cbuf;
|
||||
size_t buflen = seq_buf_get_buf(s, &cbuf);
|
||||
struct printbuf out = PRINTBUF_EXTERN(cbuf, buflen);
|
||||
|
||||
bch2_btree_key_cache_to_text(&out, bc);
|
||||
seq_buf_commit(s, out.pos);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
{
|
||||
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
|
||||
@ -1064,7 +1051,6 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
|
||||
shrink->seeks = 0;
|
||||
shrink->count_objects = bch2_btree_key_cache_count;
|
||||
shrink->scan_objects = bch2_btree_key_cache_scan;
|
||||
shrink->to_text = bch2_btree_key_cache_shrinker_to_text;
|
||||
shrink->private_data = c;
|
||||
shrinker_register(shrink);
|
||||
return 0;
|
||||
|
@ -287,7 +287,7 @@ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
|
||||
bch2_btree_add_journal_pin(c, b, journal_seq);
|
||||
|
||||
if (unlikely(!btree_node_dirty(b))) {
|
||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
|
||||
set_btree_node_dirty_acct(c, b);
|
||||
}
|
||||
|
||||
@ -659,6 +659,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
i->k->k.needs_whiteout = false;
|
||||
}
|
||||
|
||||
if (trans->nr_wb_updates &&
|
||||
trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
|
||||
return -BCH_ERR_btree_insert_need_flush_buffer;
|
||||
|
||||
/*
|
||||
* Don't get journal reservation until after we know insert will
|
||||
* succeed:
|
||||
@ -693,6 +697,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
|
||||
return -BCH_ERR_btree_insert_need_mark_replicas;
|
||||
|
||||
if (trans->nr_wb_updates) {
|
||||
EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res);
|
||||
|
||||
ret = bch2_btree_insert_keys_write_buffer(trans);
|
||||
if (ret)
|
||||
goto revert_fs_usage;
|
||||
}
|
||||
|
||||
h = trans->hooks;
|
||||
while (h) {
|
||||
ret = h->fn(trans, h);
|
||||
@ -754,7 +766,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
|
||||
trans_for_each_wb_update(trans, wb) {
|
||||
entry = bch2_journal_add_entry(j, &trans->journal_res,
|
||||
BCH_JSET_ENTRY_write_buffer_keys,
|
||||
BCH_JSET_ENTRY_btree_keys,
|
||||
wb->btree, 0,
|
||||
wb->k.k.u64s);
|
||||
bkey_copy((struct bkey_i *) entry->start, &wb->k);
|
||||
@ -938,6 +950,30 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
break;
|
||||
case -BCH_ERR_btree_insert_need_flush_buffer: {
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
ret = 0;
|
||||
|
||||
if (wb->state.nr > wb->size * 3 / 4) {
|
||||
bch2_trans_unlock(trans);
|
||||
mutex_lock(&wb->flush_lock);
|
||||
|
||||
if (wb->state.nr > wb->size * 3 / 4) {
|
||||
bch2_trans_begin(trans);
|
||||
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||
mutex_unlock(&wb->flush_lock);
|
||||
if (!ret) {
|
||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
||||
}
|
||||
} else {
|
||||
mutex_unlock(&wb->flush_lock);
|
||||
ret = bch2_trans_relock(trans);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG_ON(ret >= 0);
|
||||
break;
|
||||
@ -959,7 +995,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
|
||||
int ret;
|
||||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) ||
|
||||
test_bit(BCH_FS_STARTED, &c->flags))
|
||||
test_bit(BCH_FS_started, &c->flags))
|
||||
return -BCH_ERR_erofs_trans_commit;
|
||||
|
||||
ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
|
||||
@ -1024,7 +1060,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
|
||||
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
|
||||
ret = do_bch2_trans_commit_to_journal_replay(trans);
|
||||
goto out_reset;
|
||||
}
|
||||
@ -1036,7 +1072,21 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||
if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
|
||||
mutex_trylock(&c->btree_write_buffer.flush_lock)) {
|
||||
bch2_trans_begin(trans);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||
mutex_unlock(&c->btree_write_buffer.flush_lock);
|
||||
if (!ret) {
|
||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
|
||||
|
||||
trans->journal_u64s = trans->extra_journal_entries.nr;
|
||||
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
|
||||
|
@ -162,16 +162,6 @@ struct btree_cache {
|
||||
/* Number of elements in live + freeable lists */
|
||||
unsigned used;
|
||||
unsigned reserve;
|
||||
unsigned freed;
|
||||
unsigned not_freed_lock_intent;
|
||||
unsigned not_freed_lock_write;
|
||||
unsigned not_freed_dirty;
|
||||
unsigned not_freed_read_in_flight;
|
||||
unsigned not_freed_write_in_flight;
|
||||
unsigned not_freed_noevict;
|
||||
unsigned not_freed_write_blocked;
|
||||
unsigned not_freed_will_make_reachable;
|
||||
unsigned not_freed_access_bit;
|
||||
atomic_t dirty;
|
||||
struct shrinker *shrink;
|
||||
|
||||
|
@ -693,20 +693,6 @@ int bch2_btree_delete_at(struct btree_trans *trans,
|
||||
return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
|
||||
}
|
||||
|
||||
int bch2_btree_delete_at_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bpos pos)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
|
||||
k = bch2_trans_kmalloc(trans, sizeof(*k));
|
||||
if (IS_ERR(k))
|
||||
return PTR_ERR(k);
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.p = pos;
|
||||
return bch2_trans_update_buffered(trans, btree, k);
|
||||
}
|
||||
|
||||
int bch2_btree_delete(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bpos pos,
|
||||
unsigned update_flags)
|
||||
@ -811,19 +797,13 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
|
||||
struct bpos pos, bool set)
|
||||
{
|
||||
struct bkey_i *k;
|
||||
int ret = 0;
|
||||
struct bkey_i k;
|
||||
|
||||
k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
|
||||
ret = PTR_ERR_OR_ZERO(k);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
bkey_init(&k.k);
|
||||
k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
|
||||
k.k.p = pos;
|
||||
|
||||
bkey_init(&k->k);
|
||||
k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
|
||||
k->k.p = pos;
|
||||
|
||||
return bch2_trans_update_buffered(trans, btree, k);
|
||||
return bch2_trans_update_buffered(trans, btree, &k);
|
||||
}
|
||||
|
||||
__printf(2, 0)
|
||||
|
@ -47,7 +47,6 @@ enum bch_trans_commit_flags {
|
||||
int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
|
||||
unsigned, unsigned);
|
||||
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
|
||||
int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos);
|
||||
int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
|
||||
|
||||
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
|
||||
@ -65,6 +64,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
||||
|
||||
int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
|
||||
|
||||
static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree, struct bpos pos)
|
||||
{
|
||||
return bch2_btree_bit_mod(trans, btree, pos, false);
|
||||
}
|
||||
|
||||
int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
|
||||
struct bpos, struct bpos);
|
||||
|
||||
|
@ -1082,8 +1082,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Always check for space for two keys, even if we won't have to
|
||||
* split at prior level - it might have been a merge instead:
|
||||
*/
|
||||
if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
|
||||
BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
|
||||
BKEY_BTREE_PTR_U64s_MAX * 2))
|
||||
break;
|
||||
|
||||
split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
|
||||
@ -2052,7 +2056,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
a->seq = b->data->keys.seq;
|
||||
INIT_WORK(&a->work, async_btree_node_rewrite_work);
|
||||
|
||||
if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
|
||||
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
|
||||
mutex_lock(&c->pending_node_rewrites_lock);
|
||||
list_add(&a->list, &c->pending_node_rewrites);
|
||||
mutex_unlock(&c->pending_node_rewrites_lock);
|
||||
@ -2060,7 +2064,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
}
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
|
||||
if (test_bit(BCH_FS_STARTED, &c->flags)) {
|
||||
if (test_bit(BCH_FS_started, &c->flags)) {
|
||||
bch_err(c, "%s: error getting c->writes ref", __func__);
|
||||
kfree(a);
|
||||
return;
|
||||
|
@ -7,130 +7,43 @@
|
||||
#include "btree_write_buffer.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_btree_write_buffer_journal_flush(struct journal *,
|
||||
struct journal_entry_pin *, u64);
|
||||
|
||||
static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *);
|
||||
|
||||
static inline bool __wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
|
||||
static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
return (cmp_int(l->hi, r->hi) ?:
|
||||
cmp_int(l->mi, r->mi) ?:
|
||||
cmp_int(l->lo, r->lo)) >= 0;
|
||||
const struct btree_write_buffered_key *l = _l;
|
||||
const struct btree_write_buffered_key *r = _r;
|
||||
|
||||
return cmp_int(l->btree, r->btree) ?:
|
||||
bpos_cmp(l->k.k.p, r->k.k.p) ?:
|
||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->journal_offset, r->journal_offset);
|
||||
}
|
||||
|
||||
static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
|
||||
static int btree_write_buffered_journal_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int cmp;
|
||||
const struct btree_write_buffered_key *l = _l;
|
||||
const struct btree_write_buffered_key *r = _r;
|
||||
|
||||
asm("mov (%[l]), %%rax;"
|
||||
"sub (%[r]), %%rax;"
|
||||
"mov 8(%[l]), %%rax;"
|
||||
"sbb 8(%[r]), %%rax;"
|
||||
"mov 16(%[l]), %%rax;"
|
||||
"sbb 16(%[r]), %%rax;"
|
||||
: "=@ccae" (cmp)
|
||||
: [l] "r" (l), [r] "r" (r)
|
||||
: "rax", "cc");
|
||||
|
||||
EBUG_ON(cmp != __wb_key_cmp(l, r));
|
||||
return cmp;
|
||||
#else
|
||||
return __wb_key_cmp(l, r);
|
||||
#endif
|
||||
return cmp_int(l->journal_seq, r->journal_seq);
|
||||
}
|
||||
|
||||
/* Compare excluding idx, the low 24 bits: */
|
||||
static inline bool wb_key_eq(const void *_l, const void *_r)
|
||||
{
|
||||
const struct wb_key_ref *l = _l;
|
||||
const struct wb_key_ref *r = _r;
|
||||
|
||||
return !((l->hi ^ r->hi)|
|
||||
(l->mi ^ r->mi)|
|
||||
((l->lo >> 24) ^ (r->lo >> 24)));
|
||||
}
|
||||
|
||||
static noinline void wb_sort(struct wb_key_ref *base, size_t num)
|
||||
{
|
||||
size_t n = num, a = num / 2;
|
||||
|
||||
if (!a) /* num < 2 || size == 0 */
|
||||
return;
|
||||
|
||||
for (;;) {
|
||||
size_t b, c, d;
|
||||
|
||||
if (a) /* Building heap: sift down --a */
|
||||
--a;
|
||||
else if (--n) /* Sorting: Extract root to --n */
|
||||
swap(base[0], base[n]);
|
||||
else /* Sort complete */
|
||||
break;
|
||||
|
||||
/*
|
||||
* Sift element at "a" down into heap. This is the
|
||||
* "bottom-up" variant, which significantly reduces
|
||||
* calls to cmp_func(): we find the sift-down path all
|
||||
* the way to the leaves (one compare per level), then
|
||||
* backtrack to find where to insert the target element.
|
||||
*
|
||||
* Because elements tend to sift down close to the leaves,
|
||||
* this uses fewer compares than doing two per level
|
||||
* on the way down. (A bit more than half as many on
|
||||
* average, 3/4 worst-case.)
|
||||
*/
|
||||
for (b = a; c = 2*b + 1, (d = c + 1) < n;)
|
||||
b = wb_key_cmp(base + c, base + d) ? c : d;
|
||||
if (d == n) /* Special case last leaf with no sibling */
|
||||
b = c;
|
||||
|
||||
/* Now backtrack from "b" to the correct location for "a" */
|
||||
while (b != a && wb_key_cmp(base + a, base + b))
|
||||
b = (b - 1) / 2;
|
||||
c = b; /* Where "a" belongs */
|
||||
while (b != a) { /* Shift it into place */
|
||||
b = (b - 1) / 2;
|
||||
swap(base[b], base[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
|
||||
static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct btree_write_buffered_key *wb)
|
||||
{
|
||||
bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b);
|
||||
|
||||
trans->journal_res.seq = wb->journal_seq;
|
||||
|
||||
return bch2_trans_update(trans, iter, &wb->k,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_journal_res|
|
||||
BCH_TRANS_COMMIT_journal_reclaim);
|
||||
}
|
||||
|
||||
static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct btree_write_buffered_key *wb,
|
||||
bool *write_locked, size_t *fast)
|
||||
unsigned commit_flags,
|
||||
bool *write_locked,
|
||||
size_t *fast)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path *path;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!wb->journal_seq);
|
||||
EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
|
||||
EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -153,14 +66,46 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
||||
*write_locked = true;
|
||||
}
|
||||
|
||||
if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
|
||||
if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) {
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
|
||||
*write_locked = false;
|
||||
return wb_flush_one_slowpath(trans, iter, wb);
|
||||
goto trans_commit;
|
||||
}
|
||||
|
||||
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
|
||||
(*fast)++;
|
||||
return 0;
|
||||
trans_commit:
|
||||
trans->journal_res.seq = wb->journal_seq;
|
||||
|
||||
return bch2_trans_update(trans, iter, &wb->k,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
commit_flags|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_journal_res|
|
||||
BCH_TRANS_COMMIT_journal_reclaim);
|
||||
}
|
||||
|
||||
static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
|
||||
{
|
||||
union btree_write_buffer_state old, new;
|
||||
u64 v = READ_ONCE(wb->state.v);
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
new.nr = 0;
|
||||
new.idx++;
|
||||
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
|
||||
|
||||
while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
|
||||
cpu_relax();
|
||||
|
||||
smp_mb();
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -192,79 +137,31 @@ btree_write_buffered_insert(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
|
||||
{
|
||||
struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
|
||||
struct journal *j = &c->journal;
|
||||
|
||||
if (!wb->inc.keys.nr)
|
||||
return;
|
||||
|
||||
bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
|
||||
darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
|
||||
darray_resize(&wb->sorted, wb->flushing.keys.size);
|
||||
|
||||
if (!wb->flushing.keys.nr && wb->sorted.size >= wb->inc.keys.nr) {
|
||||
swap(wb->flushing.keys, wb->inc.keys);
|
||||
goto out;
|
||||
}
|
||||
|
||||
size_t nr = min(darray_room(wb->flushing.keys),
|
||||
wb->sorted.size - wb->flushing.keys.nr);
|
||||
nr = min(nr, wb->inc.keys.nr);
|
||||
|
||||
memcpy(&darray_top(wb->flushing.keys),
|
||||
wb->inc.keys.data,
|
||||
sizeof(wb->inc.keys.data[0]) * nr);
|
||||
|
||||
memmove(wb->inc.keys.data,
|
||||
wb->inc.keys.data + nr,
|
||||
sizeof(wb->inc.keys.data[0]) * (wb->inc.keys.nr - nr));
|
||||
|
||||
wb->flushing.keys.nr += nr;
|
||||
wb->inc.keys.nr -= nr;
|
||||
out:
|
||||
if (!wb->inc.keys.nr)
|
||||
bch2_journal_pin_drop(j, &wb->inc.pin);
|
||||
else
|
||||
bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
|
||||
if (j->watermark) {
|
||||
spin_lock(&j->lock);
|
||||
bch2_journal_set_watermark(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
|
||||
}
|
||||
|
||||
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal *j = &c->journal;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
struct wb_key_ref *i;
|
||||
struct journal_entry_pin pin;
|
||||
struct btree_write_buffered_key *i, *keys;
|
||||
struct btree_iter iter = { NULL };
|
||||
size_t skipped = 0, fast = 0, slowpath = 0;
|
||||
size_t nr = 0, skipped = 0, fast = 0, slowpath = 0;
|
||||
bool write_locked = false;
|
||||
union btree_write_buffer_state s;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_begin(trans);
|
||||
memset(&pin, 0, sizeof(pin));
|
||||
|
||||
mutex_lock(&wb->inc.lock);
|
||||
move_keys_from_inc_to_flushing(wb);
|
||||
mutex_unlock(&wb->inc.lock);
|
||||
bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
bch2_journal_pin_drop(j, &wb->journal_pin);
|
||||
|
||||
for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
|
||||
wb->sorted.data[i].idx = i;
|
||||
wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree;
|
||||
memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos));
|
||||
}
|
||||
wb->sorted.nr = wb->flushing.keys.nr;
|
||||
s = btree_write_buffer_switch(wb);
|
||||
keys = wb->keys[s.idx];
|
||||
nr = s.nr;
|
||||
|
||||
if (race_fault())
|
||||
goto slowpath;
|
||||
|
||||
/*
|
||||
* We first sort so that we can detect and skip redundant updates, and
|
||||
@ -280,88 +177,83 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
* If that happens, simply skip the key so we can optimistically insert
|
||||
* as many keys as possible in the fast path.
|
||||
*/
|
||||
wb_sort(wb->sorted.data, wb->sorted.nr);
|
||||
|
||||
darray_for_each(wb->sorted, i) {
|
||||
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
||||
|
||||
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
|
||||
prefetch(&wb->flushing.keys.data[n->idx]);
|
||||
|
||||
BUG_ON(!k->journal_seq);
|
||||
|
||||
if (i + 1 < &darray_top(wb->sorted) &&
|
||||
wb_key_eq(i, i + 1)) {
|
||||
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
|
||||
sort(keys, nr, sizeof(keys[0]),
|
||||
btree_write_buffered_key_cmp, NULL);
|
||||
|
||||
for (i = keys; i < keys + nr; i++) {
|
||||
if (i + 1 < keys + nr &&
|
||||
i[0].btree == i[1].btree &&
|
||||
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
|
||||
skipped++;
|
||||
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
|
||||
k->journal_seq = 0;
|
||||
i->journal_seq = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (write_locked &&
|
||||
(iter.path->btree_id != k->btree ||
|
||||
bpos_gt(k->k.k.p, iter.path->l[0].b->key.k.p))) {
|
||||
(iter.path->btree_id != i->btree ||
|
||||
bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) {
|
||||
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
||||
write_locked = false;
|
||||
}
|
||||
|
||||
if (!iter.path || iter.path->btree_id != k->btree) {
|
||||
if (!iter.path || iter.path->btree_id != i->btree) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p,
|
||||
bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p,
|
||||
BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, k->k.k.p);
|
||||
bch2_btree_iter_set_pos(&iter, i->k.k.p);
|
||||
iter.path->preserve = false;
|
||||
|
||||
do {
|
||||
if (race_fault()) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
|
||||
ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, 0,
|
||||
&write_locked, &fast);
|
||||
if (!write_locked)
|
||||
bch2_trans_begin(trans);
|
||||
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||
|
||||
if (!ret) {
|
||||
k->journal_seq = 0;
|
||||
} else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
|
||||
if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
|
||||
slowpath++;
|
||||
ret = 0;
|
||||
} else
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
i->journal_seq = 0;
|
||||
}
|
||||
|
||||
if (write_locked)
|
||||
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
|
||||
|
||||
if (slowpath)
|
||||
goto slowpath;
|
||||
|
||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||
out:
|
||||
bch2_journal_pin_drop(j, &pin);
|
||||
return ret;
|
||||
slowpath:
|
||||
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr);
|
||||
|
||||
if (slowpath) {
|
||||
/*
|
||||
* Flush in the order they were present in the journal, so that
|
||||
* we can release journal pins:
|
||||
* The fastpath zapped the seq of keys that were successfully flushed so
|
||||
* Now sort the rest by journal seq and bump the journal pin as we go.
|
||||
* The slowpath zapped the seq of keys that were successfully flushed so
|
||||
* we can skip those here.
|
||||
*/
|
||||
trace_write_buffer_flush_slowpath(trans, slowpath, wb->flushing.keys.nr);
|
||||
sort(keys, nr, sizeof(keys[0]),
|
||||
btree_write_buffered_journal_cmp,
|
||||
NULL);
|
||||
|
||||
struct btree_write_buffered_key *i;
|
||||
darray_for_each(wb->flushing.keys, i) {
|
||||
for (i = keys; i < keys + nr; i++) {
|
||||
if (!i->journal_seq)
|
||||
continue;
|
||||
|
||||
bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
|
||||
bch2_journal_pin_update(j, i->journal_seq, &pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_WATERMARK_reclaim|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
@ -369,62 +261,27 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
BCH_TRANS_COMMIT_no_journal_res|
|
||||
BCH_TRANS_COMMIT_journal_reclaim,
|
||||
btree_write_buffered_insert(trans, i));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
|
||||
bch2_journal_pin_drop(j, &wb->flushing.pin);
|
||||
wb->flushing.keys.nr = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_buf *buf;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&j->buf_lock);
|
||||
while ((buf = bch2_next_write_buffer_flush_journal_buf(j, seq)))
|
||||
if (bch2_journal_keys_to_write_buffer(c, buf)) {
|
||||
ret = -ENOMEM;
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&j->buf_lock);
|
||||
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret = 0, fetch_from_journal_err;
|
||||
|
||||
trace_write_buffer_flush_sync(trans, _RET_IP_);
|
||||
retry:
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
bch2_journal_block_reservations(&c->journal);
|
||||
fetch_from_journal_err = fetch_wb_keys_from_journal(c, U64_MAX);
|
||||
bch2_journal_unblock(&c->journal);
|
||||
|
||||
/*
|
||||
* On memory allocation failure, bch2_btree_write_buffer_flush_locked()
|
||||
* is not guaranteed to empty wb->inc:
|
||||
*/
|
||||
mutex_lock(&wb->flushing.lock);
|
||||
while (!ret &&
|
||||
(wb->flushing.keys.nr || wb->inc.keys.nr))
|
||||
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
|
||||
if (!ret && fetch_from_journal_err)
|
||||
goto retry;
|
||||
|
||||
mutex_lock(&c->btree_write_buffer.flush_lock);
|
||||
int ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||
mutex_unlock(&c->btree_write_buffer.flush_lock);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -434,9 +291,9 @@ int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret = 0;
|
||||
|
||||
if (mutex_trylock(&wb->flushing.lock)) {
|
||||
if (mutex_trylock(&wb->flush_lock)) {
|
||||
ret = bch2_btree_write_buffer_flush_locked(trans);
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
mutex_unlock(&wb->flush_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -459,195 +316,85 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret, fetch_from_journal_err;
|
||||
|
||||
do {
|
||||
fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
|
||||
|
||||
mutex_lock(&wb->flushing.lock);
|
||||
ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
} while (!ret &&
|
||||
(fetch_from_journal_err ||
|
||||
(wb->flushing.pin.seq && wb->flushing.pin.seq <= seq) ||
|
||||
(wb->inc.pin.seq && wb->inc.pin.seq <= seq)));
|
||||
mutex_lock(&wb->flush_lock);
|
||||
int ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
|
||||
mutex_unlock(&wb->flush_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
|
||||
static inline u64 btree_write_buffer_ref(int idx)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&wb->flushing.lock);
|
||||
do {
|
||||
ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
|
||||
} while (!ret && bch2_btree_write_buffer_should_flush(c));
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||
return ((union btree_write_buffer_state) {
|
||||
.ref0 = idx == 0,
|
||||
.ref1 = idx == 1,
|
||||
}).v;
|
||||
}
|
||||
|
||||
int __bch2_journal_key_to_wb(struct bch_fs *c,
|
||||
struct journal_keys_to_wb *dst,
|
||||
enum btree_id btree, struct bkey_i *k)
|
||||
int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
int ret;
|
||||
retry:
|
||||
ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL);
|
||||
if (!ret && dst->wb == &wb->flushing)
|
||||
ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
if (dst->wb == &c->btree_write_buffer.flushing) {
|
||||
mutex_unlock(&dst->wb->lock);
|
||||
dst->wb = &c->btree_write_buffer.inc;
|
||||
bch2_journal_pin_add(&c->journal, dst->seq, &dst->wb->pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
dst->room = darray_room(dst->wb->keys);
|
||||
if (dst->wb == &wb->flushing)
|
||||
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
||||
BUG_ON(!dst->room);
|
||||
BUG_ON(!dst->seq);
|
||||
|
||||
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
||||
wb_k->journal_seq = dst->seq;
|
||||
wb_k->btree = btree;
|
||||
bkey_copy(&wb_k->k, k);
|
||||
dst->wb->keys.nr++;
|
||||
dst->room--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
if (mutex_trylock(&wb->flushing.lock)) {
|
||||
mutex_lock(&wb->inc.lock);
|
||||
move_keys_from_inc_to_flushing(wb);
|
||||
|
||||
/*
|
||||
* Attempt to skip wb->inc, and add keys directly to
|
||||
* wb->flushing, saving us a copy later:
|
||||
*/
|
||||
|
||||
if (!wb->inc.keys.nr) {
|
||||
dst->wb = &wb->flushing;
|
||||
} else {
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
dst->wb = &wb->inc;
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&wb->inc.lock);
|
||||
dst->wb = &wb->inc;
|
||||
}
|
||||
|
||||
dst->room = darray_room(dst->wb->keys);
|
||||
if (dst->wb == &wb->flushing)
|
||||
dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
|
||||
dst->seq = seq;
|
||||
|
||||
bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
}
|
||||
|
||||
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
if (!dst->wb->keys.nr)
|
||||
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
|
||||
|
||||
if (bch2_btree_write_buffer_should_flush(c) &&
|
||||
__bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
|
||||
!queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
|
||||
|
||||
if (dst->wb == &wb->flushing)
|
||||
mutex_unlock(&wb->flushing.lock);
|
||||
mutex_unlock(&wb->inc.lock);
|
||||
}
|
||||
|
||||
static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
|
||||
{
|
||||
struct journal_keys_to_wb dst;
|
||||
struct jset_entry *entry;
|
||||
struct bkey_i *k;
|
||||
struct btree_write_buffered_key *i;
|
||||
union btree_write_buffer_state old, new;
|
||||
int ret = 0;
|
||||
u64 v;
|
||||
|
||||
bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq));
|
||||
trans_for_each_wb_update(trans, i) {
|
||||
EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
|
||||
|
||||
for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) {
|
||||
jset_entry_for_each_key(entry, k) {
|
||||
ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k);
|
||||
if (ret)
|
||||
i->journal_seq = trans->journal_res.seq;
|
||||
i->journal_offset = trans->journal_res.offset;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
v = READ_ONCE(wb->state.v);
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
new.v += btree_write_buffer_ref(new.idx);
|
||||
new.nr += trans->nr_wb_updates;
|
||||
if (new.nr > wb->size) {
|
||||
ret = -BCH_ERR_btree_insert_need_flush_buffer;
|
||||
goto out;
|
||||
}
|
||||
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
|
||||
|
||||
entry->type = BCH_JSET_ENTRY_btree_keys;
|
||||
}
|
||||
memcpy(wb->keys[new.idx] + old.nr,
|
||||
trans->wb_updates,
|
||||
sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
|
||||
|
||||
buf->need_flush_to_write_buffer = false;
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
|
||||
atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
|
||||
out:
|
||||
bch2_journal_keys_to_write_buffer_end(c, &dst);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
|
||||
{
|
||||
if (wb->keys.size >= new_size)
|
||||
return 0;
|
||||
|
||||
if (!mutex_trylock(&wb->lock))
|
||||
return -EINTR;
|
||||
|
||||
int ret = darray_resize(&wb->keys, new_size);
|
||||
mutex_unlock(&wb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb_keys_resize(&wb->flushing, new_size) ?:
|
||||
wb_keys_resize(&wb->inc, new_size);
|
||||
}
|
||||
|
||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
|
||||
!bch2_journal_error(&c->journal));
|
||||
BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
|
||||
|
||||
darray_exit(&wb->sorted);
|
||||
darray_exit(&wb->flushing.keys);
|
||||
darray_exit(&wb->inc.keys);
|
||||
kvfree(wb->keys[1]);
|
||||
kvfree(wb->keys[0]);
|
||||
}
|
||||
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
mutex_init(&wb->inc.lock);
|
||||
mutex_init(&wb->flushing.lock);
|
||||
INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
|
||||
mutex_init(&wb->flush_lock);
|
||||
wb->size = c->opts.btree_write_buffer_size;
|
||||
|
||||
/* Will be resized by journal as needed: */
|
||||
unsigned initial_size = 1 << 16;
|
||||
wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
|
||||
wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
|
||||
if (!wb->keys[0] || !wb->keys[1])
|
||||
return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init;
|
||||
|
||||
return darray_make_room(&wb->inc.keys, initial_size) ?:
|
||||
darray_make_room(&wb->flushing.keys, initial_size) ?:
|
||||
darray_make_room(&wb->sorted, initial_size);
|
||||
return 0;
|
||||
}
|
||||
|
@ -2,59 +2,13 @@
|
||||
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
|
||||
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
|
||||
|
||||
#include "bkey.h"
|
||||
|
||||
static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
|
||||
}
|
||||
|
||||
static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
|
||||
{
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
|
||||
return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
|
||||
}
|
||||
|
||||
struct btree_trans;
|
||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_flush_locked(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
|
||||
|
||||
struct journal_keys_to_wb {
|
||||
struct btree_write_buffer_keys *wb;
|
||||
size_t room;
|
||||
u64 seq;
|
||||
};
|
||||
int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
|
||||
|
||||
int __bch2_journal_key_to_wb(struct bch_fs *,
|
||||
struct journal_keys_to_wb *,
|
||||
enum btree_id, struct bkey_i *);
|
||||
|
||||
static inline int bch2_journal_key_to_wb(struct bch_fs *c,
|
||||
struct journal_keys_to_wb *dst,
|
||||
enum btree_id btree, struct bkey_i *k)
|
||||
{
|
||||
EBUG_ON(!dst->seq);
|
||||
|
||||
if (unlikely(!dst->room))
|
||||
return __bch2_journal_key_to_wb(c, dst, btree, k);
|
||||
|
||||
struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
|
||||
wb_k->journal_seq = dst->seq;
|
||||
wb_k->btree = btree;
|
||||
bkey_copy(&wb_k->k, k);
|
||||
dst->wb->keys.nr++;
|
||||
dst->room--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
|
||||
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
|
||||
|
||||
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
|
||||
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
|
||||
|
||||
|
@ -2,56 +2,43 @@
|
||||
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||
#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
|
||||
|
||||
#include "darray.h"
|
||||
#include "journal_types.h"
|
||||
|
||||
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
|
||||
#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
|
||||
|
||||
struct wb_key_ref {
|
||||
union {
|
||||
struct {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
unsigned idx:24;
|
||||
u8 pos[sizeof(struct bpos)];
|
||||
enum btree_id btree:8;
|
||||
#else
|
||||
enum btree_id btree:8;
|
||||
u8 pos[sizeof(struct bpos)];
|
||||
unsigned idx:24;
|
||||
#endif
|
||||
} __packed;
|
||||
struct {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
u64 lo;
|
||||
u64 mi;
|
||||
u64 hi;
|
||||
#else
|
||||
u64 hi;
|
||||
u64 mi;
|
||||
u64 lo;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct btree_write_buffered_key {
|
||||
enum btree_id btree:8;
|
||||
u64 journal_seq:56;
|
||||
u64 journal_seq;
|
||||
unsigned journal_offset;
|
||||
enum btree_id btree;
|
||||
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
|
||||
};
|
||||
|
||||
struct btree_write_buffer_keys {
|
||||
DARRAY(struct btree_write_buffered_key) keys;
|
||||
struct journal_entry_pin pin;
|
||||
struct mutex lock;
|
||||
union btree_write_buffer_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 v;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 nr:23;
|
||||
u64 idx:1;
|
||||
u64 ref0:20;
|
||||
u64 ref1:20;
|
||||
};
|
||||
};
|
||||
|
||||
struct btree_write_buffer {
|
||||
DARRAY(struct wb_key_ref) sorted;
|
||||
struct btree_write_buffer_keys inc;
|
||||
struct btree_write_buffer_keys flushing;
|
||||
struct work_struct flush_work;
|
||||
struct mutex flush_lock;
|
||||
struct journal_entry_pin journal_pin;
|
||||
|
||||
union btree_write_buffer_state state;
|
||||
size_t size;
|
||||
|
||||
struct btree_write_buffered_key *keys[2];
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
|
||||
|
@ -334,7 +334,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
|
||||
static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
|
||||
{
|
||||
return (struct bch_alloc_v4) {
|
||||
.gen = b.gen,
|
||||
@ -346,13 +346,12 @@ struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
|
||||
}
|
||||
|
||||
static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket old, struct bucket new,
|
||||
u64 journal_seq, bool gc)
|
||||
struct bucket old, struct bucket new)
|
||||
{
|
||||
bch2_dev_usage_update(c, ca,
|
||||
bucket_m_to_alloc(old),
|
||||
bucket_m_to_alloc(new),
|
||||
journal_seq, gc);
|
||||
0, true);
|
||||
}
|
||||
|
||||
static inline int __update_replicas(struct bch_fs *c,
|
||||
@ -658,7 +657,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
err:
|
||||
bucket_unlock(g);
|
||||
if (!ret)
|
||||
bch2_dev_usage_update_m(c, ca, old, new, 0, true);
|
||||
bch2_dev_usage_update_m(c, ca, old, new);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -773,7 +772,6 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u64 journal_seq = trans->journal_res.seq;
|
||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
||||
unsigned nr_data = s->nr_blocks - s->nr_redundant;
|
||||
bool parity = ptr_idx >= nr_data;
|
||||
@ -820,7 +818,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
err:
|
||||
bucket_unlock(g);
|
||||
if (!ret)
|
||||
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
|
||||
bch2_dev_usage_update_m(c, ca, old, new);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
@ -843,8 +841,12 @@ static int __mark_pointer(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
*dst_sectors += sectors;
|
||||
*bucket_data_type = *dirty_sectors || *cached_sectors
|
||||
? ptr_data_type : 0;
|
||||
|
||||
if (!*dirty_sectors && !*cached_sectors)
|
||||
*bucket_data_type = 0;
|
||||
else if (*bucket_data_type != BCH_DATA_stripe)
|
||||
*bucket_data_type = ptr_data_type;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -855,7 +857,6 @@ static int bch2_mark_pointer(struct btree_trans *trans,
|
||||
s64 sectors,
|
||||
unsigned flags)
|
||||
{
|
||||
u64 journal_seq = trans->journal_res.seq;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket old, new, *g;
|
||||
@ -882,7 +883,7 @@ static int bch2_mark_pointer(struct btree_trans *trans,
|
||||
new = *g;
|
||||
bucket_unlock(g);
|
||||
if (!ret)
|
||||
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
|
||||
bch2_dev_usage_update_m(c, ca, old, new);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
|
@ -418,7 +418,7 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
|
||||
@ -492,7 +492,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&arg, user_arg, sizeof(arg)))
|
||||
@ -533,7 +533,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
|
||||
struct bch_dev *ca;
|
||||
int ret = 0;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&arg, user_arg, sizeof(arg)))
|
||||
@ -725,7 +725,7 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
||||
BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
switch (cmd) {
|
||||
|
@ -1005,7 +1005,7 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
|
||||
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1415,7 +1415,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (test_bit(BCH_FS_GOING_RO, &c->flags)) {
|
||||
if (test_bit(BCH_FS_going_ro, &c->flags)) {
|
||||
h = ERR_PTR(-BCH_ERR_erofs_no_writes);
|
||||
goto found;
|
||||
}
|
||||
|
@ -150,6 +150,7 @@
|
||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
|
||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
|
||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
|
||||
x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \
|
||||
x(0, backpointer_to_overwritten_btree_node) \
|
||||
x(0, lock_fail_root_changed) \
|
||||
x(0, journal_reclaim_would_deadlock) \
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
{
|
||||
set_bit(BCH_FS_ERROR, &c->flags);
|
||||
set_bit(BCH_FS_error, &c->flags);
|
||||
|
||||
switch (c->opts.errors) {
|
||||
case BCH_ON_ERROR_continue:
|
||||
@ -26,8 +26,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
|
||||
void bch2_topology_error(struct bch_fs *c)
|
||||
{
|
||||
set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags);
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||
set_bit(BCH_FS_topology_error, &c->flags);
|
||||
if (test_bit(BCH_FS_fsck_done, &c->flags))
|
||||
bch2_inconsistent_error(c);
|
||||
}
|
||||
|
||||
@ -114,7 +114,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
||||
{
|
||||
struct fsck_err_state *s;
|
||||
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||
if (test_bit(BCH_FS_fsck_done, &c->flags))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_error_msgs, list)
|
||||
@ -193,7 +193,7 @@ int bch2_fsck_err(struct bch_fs *c,
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
|
||||
if (test_bit(BCH_FS_fsck_done, &c->flags)) {
|
||||
if (c->opts.errors != BCH_ON_ERROR_continue ||
|
||||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
||||
prt_str(out, ", shutting down");
|
||||
@ -253,7 +253,7 @@ int bch2_fsck_err(struct bch_fs *c,
|
||||
if (print)
|
||||
bch2_print_string_as_lines(KERN_ERR, out->buf);
|
||||
|
||||
if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) &&
|
||||
if (!test_bit(BCH_FS_fsck_done, &c->flags) &&
|
||||
(ret != -BCH_ERR_fsck_fix &&
|
||||
ret != -BCH_ERR_fsck_ignore))
|
||||
bch_err(c, "Unable to continue, halting");
|
||||
@ -271,10 +271,10 @@ int bch2_fsck_err(struct bch_fs *c,
|
||||
bch2_inconsistent_error(c);
|
||||
|
||||
if (ret == -BCH_ERR_fsck_fix) {
|
||||
set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
|
||||
set_bit(BCH_FS_errors_fixed, &c->flags);
|
||||
} else {
|
||||
set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
|
||||
set_bit(BCH_FS_ERROR, &c->flags);
|
||||
set_bit(BCH_FS_errors_not_fixed, &c->flags);
|
||||
set_bit(BCH_FS_error, &c->flags);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -638,7 +638,7 @@ do_io:
|
||||
/* Check for writing past i_size: */
|
||||
WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
|
||||
round_up(i_size, block_bytes(c)) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags),
|
||||
!test_bit(BCH_FS_emergency_ro, &c->flags),
|
||||
"writing past i_size: %llu > %llu (unrounded %llu)\n",
|
||||
bio_end_sector(&w->io->op.wbio.bio) << 9,
|
||||
round_up(i_size, block_bytes(c)),
|
||||
|
@ -448,7 +448,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
|
||||
bch2_btree_id_str(btree_id),
|
||||
pos.inode, pos.offset,
|
||||
i->id, n.id, n.equiv);
|
||||
set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
||||
return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots);
|
||||
}
|
||||
}
|
||||
|
@ -1173,7 +1173,7 @@ again:
|
||||
break;
|
||||
|
||||
if (ret) {
|
||||
if (!test_bit(BCH_FS_RW, &c->flags)) {
|
||||
if (!test_bit(BCH_FS_rw, &c->flags)) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_fs_lazy_rw(c);
|
||||
}
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
@ -148,7 +147,6 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
if (write)
|
||||
closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
|
||||
wake_up(&j->wait);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -332,7 +330,6 @@ static int journal_entry_open(struct journal *j)
|
||||
buf->must_flush = false;
|
||||
buf->separate_flush = false;
|
||||
buf->flush_time = 0;
|
||||
buf->need_flush_to_write_buffer = true;
|
||||
|
||||
memset(buf->data, 0, sizeof(*buf->data));
|
||||
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
|
||||
@ -769,75 +766,6 @@ void bch2_journal_block(struct journal *j)
|
||||
journal_quiesce(j);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: ideally this would not be closing the current journal entry, but
|
||||
* otherwise we do not have a way to avoid racing with res_get() - j->blocked
|
||||
* will race.
|
||||
*/
|
||||
static bool journal_reservations_stopped(struct journal *j)
|
||||
{
|
||||
union journal_res_state s;
|
||||
|
||||
journal_entry_close(j);
|
||||
|
||||
s.v = atomic64_read_acquire(&j->reservations.counter);
|
||||
|
||||
return s.buf0_count == 0 &&
|
||||
s.buf1_count == 0 &&
|
||||
s.buf2_count == 0 &&
|
||||
s.buf3_count == 0;
|
||||
}
|
||||
|
||||
void bch2_journal_block_reservations(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
j->blocked++;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
wait_event(j->wait, journal_reservations_stopped(j));
|
||||
}
|
||||
|
||||
static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
max_seq = min(max_seq, journal_cur_seq(j));
|
||||
|
||||
for (u64 seq = journal_last_unwritten_seq(j);
|
||||
seq <= max_seq;
|
||||
seq++) {
|
||||
unsigned idx = seq & JOURNAL_BUF_MASK;
|
||||
struct journal_buf *buf = j->buf + idx;
|
||||
union journal_res_state s;
|
||||
|
||||
if (!buf->need_flush_to_write_buffer)
|
||||
continue;
|
||||
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
|
||||
s.v = atomic64_read_acquire(&j->reservations.counter);
|
||||
|
||||
if (journal_state_count(s, idx)) {
|
||||
spin_unlock(&j->lock);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
return buf;
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
|
||||
{
|
||||
struct journal_buf *ret;
|
||||
|
||||
wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* allocate journal on a device: */
|
||||
|
||||
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
@ -1289,7 +1217,6 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
static struct lock_class_key res_key;
|
||||
unsigned i;
|
||||
|
||||
mutex_init(&j->buf_lock);
|
||||
spin_lock_init(&j->lock);
|
||||
spin_lock_init(&j->err_lock);
|
||||
init_waitqueue_head(&j->wait);
|
||||
|
@ -259,7 +259,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
|
||||
{
|
||||
union journal_res_state s;
|
||||
|
||||
s.v = atomic64_sub_return_release(((union journal_res_state) {
|
||||
s.v = atomic64_sub_return(((union journal_res_state) {
|
||||
.buf0_count = idx == 0,
|
||||
.buf1_count = idx == 1,
|
||||
.buf2_count = idx == 2,
|
||||
@ -427,8 +427,6 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
|
||||
void bch2_journal_unblock(struct journal *);
|
||||
void bch2_journal_block(struct journal *);
|
||||
void bch2_journal_block_reservations(struct journal *);
|
||||
struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq);
|
||||
|
||||
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
||||
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
#include "checksum.h"
|
||||
#include "disk_groups.h"
|
||||
@ -722,22 +721,6 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
|
||||
journal_entry_btree_keys_to_text(out, c, entry);
|
||||
}
|
||||
|
||||
static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bkey_invalid_flags flags)
|
||||
{
|
||||
return journal_entry_btree_keys_validate(c, jset, entry,
|
||||
version, big_endian, READ);
|
||||
}
|
||||
|
||||
static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct jset_entry *entry)
|
||||
{
|
||||
journal_entry_btree_keys_to_text(out, c, entry);
|
||||
}
|
||||
|
||||
struct jset_entry_ops {
|
||||
int (*validate)(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int,
|
||||
@ -1518,8 +1501,6 @@ done:
|
||||
|
||||
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
/* we aren't holding j->lock: */
|
||||
unsigned new_size = READ_ONCE(j->buf_size_want);
|
||||
void *new_buf;
|
||||
@ -1527,11 +1508,6 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
if (buf->buf_size >= new_size)
|
||||
return;
|
||||
|
||||
size_t btree_write_buffer_size = new_size / 64;
|
||||
|
||||
if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
|
||||
return;
|
||||
|
||||
new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
|
||||
if (!new_buf)
|
||||
return;
|
||||
@ -1621,7 +1597,6 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
bch2_journal_reclaim_fast(j);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
@ -1725,11 +1700,9 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct jset_entry *start, *end, *i, *next, *prev = NULL;
|
||||
struct jset *jset = w->data;
|
||||
struct journal_keys_to_wb wb = { NULL };
|
||||
unsigned sectors, bytes, u64s;
|
||||
unsigned long btree_roots_have = 0;
|
||||
bool validate_before_checksum = false;
|
||||
u64 seq = le64_to_cpu(jset->seq);
|
||||
unsigned long btree_roots_have = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -1757,28 +1730,9 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
* to c->btree_roots we have to get any missing btree roots and
|
||||
* add them to this journal entry:
|
||||
*/
|
||||
switch (i->type) {
|
||||
case BCH_JSET_ENTRY_btree_root:
|
||||
if (i->type == BCH_JSET_ENTRY_btree_root) {
|
||||
bch2_journal_entry_to_btree_root(c, i);
|
||||
__set_bit(i->btree_id, &btree_roots_have);
|
||||
break;
|
||||
case BCH_JSET_ENTRY_write_buffer_keys:
|
||||
EBUG_ON(!w->need_flush_to_write_buffer);
|
||||
|
||||
if (!wb.wb)
|
||||
bch2_journal_keys_to_write_buffer_start(c, &wb, seq);
|
||||
|
||||
struct bkey_i *k;
|
||||
jset_entry_for_each_key(i, k) {
|
||||
ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
|
||||
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
i->type = BCH_JSET_ENTRY_btree_keys;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Can we merge with previous entry? */
|
||||
@ -1801,10 +1755,6 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
memmove_u64s_down(prev, i, jset_u64s(u64s));
|
||||
}
|
||||
|
||||
if (wb.wb)
|
||||
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||
w->need_flush_to_write_buffer = false;
|
||||
|
||||
prev = prev ? vstruct_next(prev) : jset->start;
|
||||
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
|
||||
|
||||
@ -1812,7 +1762,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
|
||||
end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
|
||||
|
||||
bch2_journal_super_entries_add_common(c, &end, seq);
|
||||
bch2_journal_super_entries_add_common(c, &end,
|
||||
le64_to_cpu(jset->seq));
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
@ -1835,7 +1786,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||
|
||||
if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
|
||||
j->last_empty_seq = seq;
|
||||
j->last_empty_seq = le64_to_cpu(jset->seq);
|
||||
|
||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
||||
validate_before_checksum = true;
|
||||
@ -1931,11 +1882,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&j->buf_lock);
|
||||
journal_buf_realloc(j, w);
|
||||
|
||||
ret = bch2_journal_write_prep(j, w);
|
||||
mutex_unlock(&j->buf_lock);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include "bcachefs.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
@ -51,23 +50,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
||||
return available;
|
||||
}
|
||||
|
||||
void bch2_journal_set_watermark(struct journal *j)
|
||||
static inline void journal_set_watermark(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool low_on_space = j->space[journal_space_clean].total * 4 <=
|
||||
j->space[journal_space_total].total;
|
||||
bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
|
||||
bool low_on_wb = bch2_btree_write_buffer_must_wait(c);
|
||||
unsigned watermark = low_on_space || low_on_pin || low_on_wb
|
||||
unsigned watermark = low_on_space || low_on_pin
|
||||
? BCH_WATERMARK_reclaim
|
||||
: BCH_WATERMARK_stripe;
|
||||
|
||||
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
|
||||
&j->low_on_space_start, low_on_space) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
|
||||
&j->low_on_pin_start, low_on_pin) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
|
||||
&j->write_buffer_full_start, low_on_wb))
|
||||
&j->low_on_pin_start, low_on_pin))
|
||||
trace_and_count(c, journal_full, c);
|
||||
|
||||
swap(watermark, j->watermark);
|
||||
@ -234,7 +230,7 @@ void bch2_journal_space_available(struct journal *j)
|
||||
else
|
||||
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
||||
|
||||
bch2_journal_set_watermark(j);
|
||||
journal_set_watermark(j);
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||
j->cur_entry_error = ret;
|
||||
@ -307,7 +303,6 @@ void bch2_journal_reclaim_fast(struct journal *j)
|
||||
* all btree nodes got written out
|
||||
*/
|
||||
while (!fifo_empty(&j->pin) &&
|
||||
j->pin.front <= j->seq_ondisk &&
|
||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
j->pin.front++;
|
||||
popped = true;
|
||||
|
@ -16,7 +16,6 @@ static inline void journal_reclaim_kick(struct journal *j)
|
||||
unsigned bch2_journal_dev_buckets_available(struct journal *,
|
||||
struct journal_device *,
|
||||
enum journal_space_from);
|
||||
void bch2_journal_set_watermark(struct journal *);
|
||||
void bch2_journal_space_available(struct journal *);
|
||||
|
||||
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
||||
|
@ -267,7 +267,7 @@ retry:
|
||||
|
||||
while (!(ret = PTR_ERR_OR_ZERO(b)) &&
|
||||
b &&
|
||||
!test_bit(BCH_FS_STOPPING, &c->flags))
|
||||
!test_bit(BCH_FS_stopping, &c->flags))
|
||||
b = bch2_btree_iter_next_node(&iter);
|
||||
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
|
@ -36,7 +36,6 @@ struct journal_buf {
|
||||
bool noflush; /* write has already been kicked off, and was noflush */
|
||||
bool must_flush; /* something wants a flush */
|
||||
bool separate_flush;
|
||||
bool need_flush_to_write_buffer;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -182,12 +181,6 @@ struct journal {
|
||||
*/
|
||||
darray_u64 early_journal_entries;
|
||||
|
||||
/*
|
||||
* Protects journal_buf->data, when accessing without a jorunal
|
||||
* reservation: for synchronization between the btree write buffer code
|
||||
* and the journal write path:
|
||||
*/
|
||||
struct mutex buf_lock;
|
||||
/*
|
||||
* Two journal entries -- one is currently open for new entries, the
|
||||
* other is possibly being written out.
|
||||
@ -278,7 +271,6 @@ struct journal {
|
||||
u64 low_on_space_start;
|
||||
u64 low_on_pin_start;
|
||||
u64 max_in_flight_start;
|
||||
u64 write_buffer_full_start;
|
||||
|
||||
struct bch2_time_stats *flush_write_time;
|
||||
struct bch2_time_stats *noflush_write_time;
|
||||
|
@ -123,9 +123,11 @@ int bch2_check_lru_key(struct btree_trans *trans,
|
||||
if (lru_k.k->type != KEY_TYPE_set ||
|
||||
lru_pos_time(lru_k.k->p) != idx) {
|
||||
if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
if (!ret) {
|
||||
*last_flushed_pos = lru_k.k->p;
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
|
||||
-BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -56,17 +56,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (trace_move_extent_alloc_mem_fail_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
trace_move_extent_alloc_mem_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
struct moving_io {
|
||||
struct list_head read_list;
|
||||
struct list_head io_list;
|
||||
@ -356,8 +345,16 @@ err:
|
||||
if (ret == -BCH_ERR_data_update_done)
|
||||
return 0;
|
||||
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
|
||||
trace_move_extent_alloc_mem_fail2(c, k);
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]);
|
||||
if (trace_move_extent_start_fail_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, ": ");
|
||||
prt_str(&buf, bch2_err_str(ret));
|
||||
trace_move_extent_start_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -233,6 +233,11 @@ enum fsck_err_opts {
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, true, \
|
||||
NULL, "Stash pointer to in memory btree node in btree ptr")\
|
||||
x(btree_write_buffer_size, u32, \
|
||||
OPT_FS|OPT_MOUNT, \
|
||||
OPT_UINT(16, (1U << 20) - 1), \
|
||||
BCH2_NO_SB_OPT, 1U << 13, \
|
||||
NULL, "Number of btree write buffer entries") \
|
||||
x(gc_reserve_percent, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_UINT(5, 21), \
|
||||
|
@ -530,7 +530,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
|
||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||
keys->gap = keys->nr;
|
||||
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||
if (keys->nr)
|
||||
return bch2_fs_read_write_early(c);
|
||||
return 0;
|
||||
@ -876,13 +876,13 @@ use_clean:
|
||||
|
||||
/* If we fixed errors, verify that fs is actually clean now: */
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
|
||||
test_bit(BCH_FS_ERRORS_FIXED, &c->flags) &&
|
||||
!test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
test_bit(BCH_FS_errors_fixed, &c->flags) &&
|
||||
!test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
|
||||
!test_bit(BCH_FS_error, &c->flags)) {
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
|
||||
clear_bit(BCH_FS_ERRORS_FIXED, &c->flags);
|
||||
clear_bit(BCH_FS_errors_fixed, &c->flags);
|
||||
|
||||
c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
|
||||
|
||||
@ -890,13 +890,13 @@ use_clean:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) ||
|
||||
test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
|
||||
if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
|
||||
test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
|
||||
bch_err(c, "Second fsck run was not clean");
|
||||
set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
|
||||
set_bit(BCH_FS_errors_not_fixed, &c->flags);
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
|
||||
set_bit(BCH_FS_errors_fixed, &c->flags);
|
||||
}
|
||||
|
||||
if (enabled_qtypes(c)) {
|
||||
@ -913,14 +913,14 @@ use_clean:
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
if (!test_bit(BCH_FS_error, &c->flags)) {
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (c->opts.fsck &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
!test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) {
|
||||
!test_bit(BCH_FS_error, &c->flags) &&
|
||||
!test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
|
||||
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
|
||||
SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0);
|
||||
write_sb = true;
|
||||
@ -954,7 +954,7 @@ use_clean:
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
set_bit(BCH_FS_fsck_done, &c->flags);
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
if (!c->opts.keep_journal &&
|
||||
@ -962,7 +962,7 @@ out:
|
||||
bch2_journal_keys_put_initial(c);
|
||||
kfree(clean);
|
||||
|
||||
if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) {
|
||||
if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) {
|
||||
bch2_fs_read_write_early(c);
|
||||
bch2_delete_dead_snapshots_async(c);
|
||||
}
|
||||
@ -1001,8 +1001,8 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||
set_bit(BCH_FS_fsck_done, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
@ -318,7 +318,7 @@ int bch2_mark_snapshot(struct btree_trans *trans,
|
||||
__set_is_ancestor_bitmap(c, id);
|
||||
|
||||
if (BCH_SNAPSHOT_DELETED(s.v)) {
|
||||
set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
|
||||
bch2_delete_dead_snapshots_async(c);
|
||||
}
|
||||
@ -1376,10 +1376,10 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
u32 *i, id;
|
||||
int ret = 0;
|
||||
|
||||
if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags))
|
||||
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
|
||||
return 0;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
|
||||
if (!test_bit(BCH_FS_started, &c->flags)) {
|
||||
ret = bch2_fs_read_write_early(c);
|
||||
if (ret) {
|
||||
bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
|
||||
@ -1680,7 +1680,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
|
||||
if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
||||
bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
|
||||
(ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
|
||||
set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags);
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -916,9 +916,9 @@ int bch2_write_super(struct bch_fs *c)
|
||||
|
||||
le64_add_cpu(&c->disk_sb.sb->seq, 1);
|
||||
|
||||
if (test_bit(BCH_FS_ERROR, &c->flags))
|
||||
if (test_bit(BCH_FS_error, &c->flags))
|
||||
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
|
||||
if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags))
|
||||
if (test_bit(BCH_FS_topology_error, &c->flags))
|
||||
SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1);
|
||||
|
||||
SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
|
||||
|
@ -73,6 +73,13 @@ MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
|
||||
MODULE_DESCRIPTION("bcachefs filesystem");
|
||||
|
||||
const char * const bch2_fs_flag_strs[] = {
|
||||
#define x(n) #n,
|
||||
BCH_FS_FLAGS()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
#define KTYPE(type) \
|
||||
static const struct attribute_group type ## _group = { \
|
||||
.attrs = type ## _files \
|
||||
@ -240,8 +247,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
journal_cur_seq(&c->journal));
|
||||
|
||||
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
|
||||
!test_bit(BCH_FS_emergency_ro, &c->flags))
|
||||
set_bit(BCH_FS_clean_shutdown, &c->flags);
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/*
|
||||
@ -256,19 +263,19 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
|
||||
{
|
||||
struct bch_fs *c = container_of(writes, struct bch_fs, writes);
|
||||
|
||||
set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
set_bit(BCH_FS_write_disable_complete, &c->flags);
|
||||
wake_up(&bch2_read_only_wait);
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
if (!test_bit(BCH_FS_RW, &c->flags)) {
|
||||
if (!test_bit(BCH_FS_rw, &c->flags)) {
|
||||
bch2_journal_reclaim_stop(&c->journal);
|
||||
return;
|
||||
}
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
|
||||
BUG_ON(test_bit(BCH_FS_write_disable_complete, &c->flags));
|
||||
|
||||
bch_verbose(c, "going read-only");
|
||||
|
||||
@ -276,7 +283,7 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
* Block new foreground-end write operations from starting - any new
|
||||
* writes will return -EROFS:
|
||||
*/
|
||||
set_bit(BCH_FS_GOING_RO, &c->flags);
|
||||
set_bit(BCH_FS_going_ro, &c->flags);
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_kill(&c->writes);
|
||||
#else
|
||||
@ -296,36 +303,35 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
* that going RO is complete:
|
||||
*/
|
||||
wait_event(bch2_read_only_wait,
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
|
||||
test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
|
||||
test_bit(BCH_FS_write_disable_complete, &c->flags) ||
|
||||
test_bit(BCH_FS_emergency_ro, &c->flags));
|
||||
|
||||
bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
bool writes_disabled = test_bit(BCH_FS_write_disable_complete, &c->flags);
|
||||
if (writes_disabled)
|
||||
bch_verbose(c, "finished waiting for writes to stop");
|
||||
|
||||
__bch2_fs_read_only(c);
|
||||
|
||||
wait_event(bch2_read_only_wait,
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
|
||||
test_bit(BCH_FS_write_disable_complete, &c->flags));
|
||||
|
||||
if (!writes_disabled)
|
||||
bch_verbose(c, "finished waiting for writes to stop");
|
||||
|
||||
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
clear_bit(BCH_FS_GOING_RO, &c->flags);
|
||||
clear_bit(BCH_FS_RW, &c->flags);
|
||||
clear_bit(BCH_FS_write_disable_complete, &c->flags);
|
||||
clear_bit(BCH_FS_going_ro, &c->flags);
|
||||
clear_bit(BCH_FS_rw, &c->flags);
|
||||
|
||||
if (!bch2_journal_error(&c->journal) &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
|
||||
test_bit(BCH_FS_STARTED, &c->flags) &&
|
||||
test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
|
||||
!test_bit(BCH_FS_error, &c->flags) &&
|
||||
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
|
||||
test_bit(BCH_FS_started, &c->flags) &&
|
||||
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
|
||||
!c->opts.norecovery) {
|
||||
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
|
||||
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
||||
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
|
||||
BUG_ON(c->btree_write_buffer.inc.keys.nr);
|
||||
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
|
||||
BUG_ON(c->btree_write_buffer.state.nr);
|
||||
|
||||
bch_verbose(c, "marking filesystem clean");
|
||||
bch2_fs_mark_clean(c);
|
||||
@ -351,7 +357,7 @@ static void bch2_fs_read_only_async(struct bch_fs *c)
|
||||
|
||||
bool bch2_fs_emergency_read_only(struct bch_fs *c)
|
||||
{
|
||||
bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
|
||||
bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
|
||||
|
||||
bch2_journal_halt(&c->journal);
|
||||
bch2_fs_read_only_async(c);
|
||||
@ -392,12 +398,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) {
|
||||
if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) {
|
||||
bch_err(c, "cannot go rw, unfixed btree errors");
|
||||
return -BCH_ERR_erofs_unfixed_errors;
|
||||
}
|
||||
|
||||
if (test_bit(BCH_FS_RW, &c->flags))
|
||||
if (test_bit(BCH_FS_rw, &c->flags))
|
||||
return 0;
|
||||
|
||||
if (c->opts.norecovery)
|
||||
@ -420,7 +426,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
|
||||
clear_bit(BCH_FS_clean_shutdown, &c->flags);
|
||||
|
||||
/*
|
||||
* First journal write must be a flush write: after a clean shutdown we
|
||||
@ -434,8 +440,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
set_bit(BCH_FS_rw, &c->flags);
|
||||
set_bit(BCH_FS_was_rw, &c->flags);
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
@ -468,7 +474,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_do_pending_node_rewrites(c);
|
||||
return 0;
|
||||
err:
|
||||
if (test_bit(BCH_FS_RW, &c->flags))
|
||||
if (test_bit(BCH_FS_rw, &c->flags))
|
||||
bch2_fs_read_only(c);
|
||||
else
|
||||
__bch2_fs_read_only(c);
|
||||
@ -568,7 +574,7 @@ void __bch2_fs_stop(struct bch_fs *c)
|
||||
|
||||
bch_verbose(c, "shutting down");
|
||||
|
||||
set_bit(BCH_FS_STOPPING, &c->flags);
|
||||
set_bit(BCH_FS_stopping, &c->flags);
|
||||
|
||||
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
|
||||
|
||||
@ -960,7 +966,7 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
|
||||
down_write(&c->state_lock);
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
|
||||
BUG_ON(test_bit(BCH_FS_started, &c->flags));
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@ -995,12 +1001,12 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_STARTED, &c->flags);
|
||||
set_bit(BCH_FS_started, &c->flags);
|
||||
|
||||
if (c->opts.read_only || c->opts.nochanges) {
|
||||
bch2_fs_read_only(c);
|
||||
} else {
|
||||
ret = !test_bit(BCH_FS_RW, &c->flags)
|
||||
ret = !test_bit(BCH_FS_rw, &c->flags)
|
||||
? bch2_fs_read_write(c)
|
||||
: bch2_fs_read_write_late(c);
|
||||
if (ret)
|
||||
|
@ -8,6 +8,8 @@
|
||||
|
||||
#include <linux/math64.h>
|
||||
|
||||
extern const char * const bch2_fs_flag_strs[];
|
||||
|
||||
struct bch_fs *bch2_dev_to_fs(dev_t);
|
||||
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
|
||||
|
||||
@ -37,8 +39,8 @@ int bch2_fs_read_write_early(struct bch_fs *);
|
||||
*/
|
||||
static inline void bch2_fs_lazy_rw(struct bch_fs *c)
|
||||
{
|
||||
if (!test_bit(BCH_FS_RW, &c->flags) &&
|
||||
!test_bit(BCH_FS_WAS_RW, &c->flags))
|
||||
if (!test_bit(BCH_FS_rw, &c->flags) &&
|
||||
!test_bit(BCH_FS_was_rw, &c->flags))
|
||||
bch2_fs_read_write_early(c);
|
||||
}
|
||||
|
||||
|
@ -145,6 +145,7 @@ rw_attribute(gc_gens_pos);
|
||||
|
||||
read_attribute(uuid);
|
||||
read_attribute(minor);
|
||||
read_attribute(flags);
|
||||
read_attribute(bucket_size);
|
||||
read_attribute(first_bucket);
|
||||
read_attribute(nbuckets);
|
||||
@ -268,7 +269,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
|
||||
memset(s, 0, sizeof(s));
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EPERM;
|
||||
|
||||
trans = bch2_trans_get(c);
|
||||
@ -384,6 +385,9 @@ SHOW(bch2_fs)
|
||||
sysfs_print(minor, c->minor);
|
||||
sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
|
||||
|
||||
if (attr == &sysfs_flags)
|
||||
prt_bitflags(out, bch2_fs_flag_strs, c->flags);
|
||||
|
||||
sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
|
||||
|
||||
if (attr == &sysfs_btree_write_stats)
|
||||
@ -416,7 +420,7 @@ SHOW(bch2_fs)
|
||||
bch2_btree_updates_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_btree_cache)
|
||||
bch2_btree_cache_to_text(out, &c->btree_cache);
|
||||
bch2_btree_cache_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_btree_key_cache)
|
||||
bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
|
||||
@ -497,12 +501,12 @@ STORE(bch2_fs)
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return -EPERM;
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (!test_bit(BCH_FS_RW, &c->flags))
|
||||
if (!test_bit(BCH_FS_rw, &c->flags))
|
||||
return -EROFS;
|
||||
|
||||
if (attr == &sysfs_prune_cache) {
|
||||
@ -634,6 +638,7 @@ STORE(bch2_fs_internal)
|
||||
SYSFS_OPS(bch2_fs_internal);
|
||||
|
||||
struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_flags,
|
||||
&sysfs_journal_debug,
|
||||
&sysfs_btree_updates,
|
||||
&sysfs_btree_cache,
|
||||
|
@ -775,9 +775,9 @@ TRACE_EVENT(move_extent_fail,
|
||||
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(str, move_extent_alloc_mem_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
DEFINE_EVENT(str, move_extent_start_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
TRACE_EVENT(move_data,
|
||||
|
Loading…
Reference in New Issue
Block a user