mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to 75e8a078b8 bcachefs: improved flush_held_btree_writes()
This commit is contained in:
parent
17c5215c1c
commit
a4eb187a6f
@ -1 +1 @@
|
||||
09a546543006b60d44c4c51e7b40cd3ec7837a5e
|
||||
75e8a078b85703322fcf558f75a6845c0ef5dbb0
|
||||
|
@ -319,6 +319,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
struct bkey_i_extent *e;
|
||||
BKEY_PADDED(k) k;
|
||||
u64 b = sector_to_bucket(ca, physical);
|
||||
struct bucket_mark m;
|
||||
struct disk_reservation res;
|
||||
unsigned sectors;
|
||||
int ret;
|
||||
@ -337,7 +338,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
.gen = bucket(ca, b)->mark.gen,
|
||||
});
|
||||
|
||||
bucket_set_dirty(ca, b);
|
||||
bucket_cmpxchg(bucket(ca, b), m, m.dirty = true);
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &res, sectors, 1,
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
|
@ -128,6 +128,34 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
|
||||
*p += bytes;
|
||||
}
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
|
||||
{
|
||||
struct bkey_alloc_unpacked ret = { .gen = a->gen };
|
||||
const void *d = a->data;
|
||||
unsigned idx = 0;
|
||||
|
||||
#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_alloc_pack(struct bkey_i_alloc *dst,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
{
|
||||
unsigned idx = 0;
|
||||
void *d = dst->v.data;
|
||||
|
||||
dst->v.fields = 0;
|
||||
dst->v.gen = src.gen;
|
||||
|
||||
#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name);
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
|
||||
set_bkey_val_bytes(&dst->k, (void *) d - (void *) &dst->v);
|
||||
}
|
||||
|
||||
static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
|
||||
{
|
||||
unsigned i, bytes = offsetof(struct bch_alloc, data);
|
||||
@ -173,15 +201,24 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a)
|
||||
{
|
||||
const void *d = a->data;
|
||||
unsigned idx = 0;
|
||||
unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
|
||||
struct bucket_mark m;
|
||||
|
||||
g->_mark.gen = a->gen;
|
||||
g->gen_valid = 1;
|
||||
g->io_time[READ] = get_alloc_field(a, &d, idx++);
|
||||
g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.data_type = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.dirty_sectors = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.cached_sectors = get_alloc_field(a, &d, idx++);
|
||||
data_type = get_alloc_field(a, &d, idx++);
|
||||
dirty_sectors = get_alloc_field(a, &d, idx++);
|
||||
cached_sectors = get_alloc_field(a, &d, idx++);
|
||||
g->oldest_gen = get_alloc_field(a, &d, idx++);
|
||||
|
||||
bucket_cmpxchg(g, m, ({
|
||||
m.gen = a->gen;
|
||||
m.data_type = data_type;
|
||||
m.dirty_sectors = dirty_sectors;
|
||||
m.cached_sectors = cached_sectors;
|
||||
}));
|
||||
|
||||
g->gen_valid = 1;
|
||||
}
|
||||
|
||||
static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
|
||||
@ -199,6 +236,7 @@ static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
|
||||
put_alloc_field(a, &d, idx++, m.data_type);
|
||||
put_alloc_field(a, &d, idx++, m.dirty_sectors);
|
||||
put_alloc_field(a, &d, idx++, m.cached_sectors);
|
||||
put_alloc_field(a, &d, idx++, g->oldest_gen);
|
||||
|
||||
set_bkey_val_bytes(&a->k, (void *) d - (void *) &a->v);
|
||||
}
|
||||
@ -315,6 +353,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
BTREE_INSERT_NOMARK|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret)
|
||||
@ -358,7 +397,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
? 0
|
||||
: bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_REPLAY,
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK,
|
||||
BTREE_INSERT_ENTRY(&iter, k));
|
||||
err:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@ -824,6 +864,142 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns sequence number of most recent journal entry that updated this
|
||||
* bucket:
|
||||
*/
|
||||
static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
|
||||
{
|
||||
if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
return bucket_seq;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree_iter *iter,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
#if 0
|
||||
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
|
||||
#else
|
||||
/* hack: */
|
||||
__BKEY_PADDED(k, 8) alloc_key;
|
||||
#endif
|
||||
struct bkey_i_alloc *a;
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bucket_mark m;
|
||||
struct bkey_s_c k;
|
||||
bool invalidating_cached_data;
|
||||
size_t b;
|
||||
int ret;
|
||||
|
||||
BUG_ON(!ca->alloc_heap.used ||
|
||||
!ca->alloc_heap.data[0].nr);
|
||||
b = ca->alloc_heap.data[0].bucket;
|
||||
|
||||
/* first, put on free_inc and mark as owned by allocator: */
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
verify_not_on_freelist(c, ca, b);
|
||||
|
||||
BUG_ON(!fifo_push(&ca->free_inc, b));
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
|
||||
m = bucket(ca, b)->mark;
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
retry:
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = btree_iter_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (k.k && k.k->type == KEY_TYPE_alloc)
|
||||
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
|
||||
else
|
||||
memset(&u, 0, sizeof(u));
|
||||
|
||||
invalidating_cached_data = u.cached_sectors != 0;
|
||||
|
||||
//BUG_ON(u.dirty_sectors);
|
||||
u.data_type = 0;
|
||||
u.dirty_sectors = 0;
|
||||
u.cached_sectors = 0;
|
||||
u.read_time = c->bucket_clock[READ].hand;
|
||||
u.write_time = c->bucket_clock[WRITE].hand;
|
||||
u.gen++;
|
||||
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
a->k.p = iter->pos;
|
||||
bch2_alloc_pack(a, u);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL,
|
||||
invalidating_cached_data ? journal_seq : NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
if (!ret) {
|
||||
/* remove from alloc_heap: */
|
||||
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
|
||||
|
||||
top->bucket++;
|
||||
top->nr--;
|
||||
|
||||
if (!top->nr)
|
||||
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
|
||||
|
||||
/*
|
||||
* Make sure we flush the last journal entry that updated this
|
||||
* bucket (i.e. deleting the last reference) before writing to
|
||||
* this bucket again:
|
||||
*/
|
||||
*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
|
||||
} else {
|
||||
size_t b2;
|
||||
|
||||
/* remove from free_inc: */
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, b, false,
|
||||
gc_pos_alloc(c, NULL), 0);
|
||||
|
||||
BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
|
||||
BUG_ON(b != b2);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket, u64 *flush_seq)
|
||||
{
|
||||
@ -844,18 +1020,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
*flush_seq = max(*flush_seq, bucket_seq);
|
||||
}
|
||||
*flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
|
||||
|
||||
return m.cached_sectors != 0;
|
||||
}
|
||||
@ -868,7 +1033,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
struct btree_iter iter;
|
||||
u64 journal_seq = 0;
|
||||
int ret = 0;
|
||||
long b;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
@ -876,14 +1040,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
/* Only use nowait if we've already invalidated at least one bucket: */
|
||||
while (!ret &&
|
||||
!fifo_full(&ca->free_inc) &&
|
||||
(b = next_alloc_bucket(ca)) >= 0) {
|
||||
bool must_flush =
|
||||
bch2_invalidate_one_bucket(c, ca, b, &journal_seq);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter,
|
||||
must_flush ? &journal_seq : NULL,
|
||||
!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0);
|
||||
}
|
||||
ca->alloc_heap.used)
|
||||
ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
|
||||
BTREE_INSERT_GC_LOCK_HELD|
|
||||
(!fifo_empty(&ca->free_inc)
|
||||
? BTREE_INSERT_NOWAIT : 0));
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
@ -1305,24 +1466,16 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flush_held_btree_writes(struct bch_fs *c)
|
||||
static bool flush_done(struct bch_fs *c)
|
||||
{
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
bool nodes_blocked;
|
||||
bool nodes_unwritten;
|
||||
size_t i;
|
||||
struct closure cl;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
pr_debug("flushing dirty btree nodes");
|
||||
cond_resched();
|
||||
closure_wait(&c->btree_interior_update_wait, &cl);
|
||||
|
||||
nodes_blocked = false;
|
||||
nodes_unwritten = false;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
@ -1334,24 +1487,25 @@ again:
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
} else {
|
||||
nodes_blocked = true;
|
||||
nodes_unwritten = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (c->btree_roots_dirty)
|
||||
if (c->btree_roots_dirty) {
|
||||
bch2_journal_meta(&c->journal);
|
||||
|
||||
if (nodes_blocked) {
|
||||
closure_sync(&cl);
|
||||
goto again;
|
||||
}
|
||||
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
closure_sync(&cl);
|
||||
return !nodes_unwritten &&
|
||||
!bch2_btree_interior_updates_nr_pending(c);
|
||||
}
|
||||
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
static void flush_held_btree_writes(struct bch_fs *c)
|
||||
{
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
|
||||
closure_wait_event(&c->btree_interior_update_wait, flush_done(c));
|
||||
}
|
||||
|
||||
static void allocator_start_issue_discards(struct bch_fs *c)
|
||||
@ -1470,7 +1624,6 @@ not_enough:
|
||||
&journal_seq);
|
||||
|
||||
fifo_push(&ca->free[RESERVE_BTREE], bu);
|
||||
bucket_set_dirty(ca, bu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1517,7 +1670,6 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
bool wrote;
|
||||
int ret;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
@ -1536,8 +1688,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
|
||||
return bch2_alloc_write(c, false, &wrote);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *c)
|
||||
|
@ -5,6 +5,15 @@
|
||||
#include "alloc_types.h"
|
||||
#include "debug.h"
|
||||
|
||||
struct bkey_alloc_unpacked {
|
||||
u8 gen;
|
||||
#define x(_name, _bits) u##_bits _name;
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
|
||||
|
||||
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -723,7 +723,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
|
||||
{
|
||||
u64 stranded = c->write_points_nr * c->bucket_size_max;
|
||||
u64 free = bch2_fs_sectors_free(c);
|
||||
u64 free = bch2_fs_usage_read_short(c).free;
|
||||
|
||||
return stranded * factor > free;
|
||||
}
|
||||
|
@ -396,8 +396,6 @@ struct bch_dev {
|
||||
struct bucket_array __rcu *buckets[2];
|
||||
unsigned long *buckets_nouse;
|
||||
unsigned long *buckets_written;
|
||||
/* most out of date gen in the btree */
|
||||
u8 *oldest_gens;
|
||||
struct rw_semaphore bucket_lock;
|
||||
|
||||
struct bch_dev_usage __percpu *usage[2];
|
||||
|
@ -821,11 +821,12 @@ struct bch_alloc {
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#define BCH_ALLOC_FIELDS() \
|
||||
x(read_time, 2) \
|
||||
x(write_time, 2) \
|
||||
x(data_type, 1) \
|
||||
x(dirty_sectors, 2) \
|
||||
x(cached_sectors, 2)
|
||||
x(read_time, 16) \
|
||||
x(write_time, 16) \
|
||||
x(data_type, 8) \
|
||||
x(dirty_sectors, 16) \
|
||||
x(cached_sectors, 16) \
|
||||
x(oldest_gen, 8)
|
||||
|
||||
enum {
|
||||
#define x(name, bytes) BCH_ALLOC_FIELD_##name,
|
||||
@ -835,12 +836,12 @@ enum {
|
||||
};
|
||||
|
||||
static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
|
||||
#define x(name, bytes) [BCH_ALLOC_FIELD_##name] = bytes,
|
||||
#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define x(name, bytes) + bytes
|
||||
#define x(name, bits) + (bits / 8)
|
||||
static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
|
||||
DIV_ROUND_UP(offsetof(struct bch_alloc, data)
|
||||
BCH_ALLOC_FIELDS(), sizeof(u64));
|
||||
|
@ -138,24 +138,24 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, ptr);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, true);
|
||||
struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
|
||||
|
||||
if (mustfix_fsck_err_on(!g->gen_valid, c,
|
||||
"found ptr with missing gen in alloc btree,\n"
|
||||
"type %u gen %u",
|
||||
k.k->type, ptr->gen)) {
|
||||
g->_mark.gen = ptr->gen;
|
||||
g->gen_valid = 1;
|
||||
bucket_set_dirty(ca, b);
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->_mark.dirty = g->_mark.dirty = true;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
|
||||
"%u ptr gen in the future: %u > %u",
|
||||
k.k->type, ptr->gen, g->mark.gen)) {
|
||||
g->_mark.gen = ptr->gen;
|
||||
g->gen_valid = 1;
|
||||
bucket_set_dirty(ca, b);
|
||||
g2->_mark.gen = g->_mark.gen = ptr->gen;
|
||||
g2->_mark.dirty = g->_mark.dirty = true;
|
||||
g2->gen_valid = g->gen_valid = true;
|
||||
set_bit(BCH_FS_FIXED_GENS, &c->flags);
|
||||
}
|
||||
}
|
||||
@ -163,10 +163,10 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, ptr);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, true);
|
||||
|
||||
if (gen_after(ca->oldest_gens[b], ptr->gen))
|
||||
ca->oldest_gens[b] = ptr->gen;
|
||||
if (gen_after(g->oldest_gen, ptr->gen))
|
||||
g->oldest_gen = ptr->gen;
|
||||
|
||||
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
|
||||
}
|
||||
@ -230,12 +230,12 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
|
||||
gc_pos_set(c, gc_pos_btree_node(b));
|
||||
|
||||
ret = btree_gc_mark_node(c, b, &max_stale, initial);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
gc_pos_set(c, gc_pos_btree_node(b));
|
||||
|
||||
if (!initial) {
|
||||
if (max_stale > 64)
|
||||
bch2_btree_node_rewrite(c, &iter,
|
||||
@ -483,88 +483,38 @@ static void bch2_gc_free(struct bch_fs *c)
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
static void bch2_gc_done_nocheck(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
{
|
||||
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
|
||||
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
|
||||
struct stripe *dst, *src;
|
||||
|
||||
c->ec_stripes_heap.used = 0;
|
||||
|
||||
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
|
||||
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
|
||||
*dst = *src;
|
||||
|
||||
if (dst->alive)
|
||||
bch2_stripes_heap_insert(c, dst, dst_iter.pos);
|
||||
|
||||
genradix_iter_advance(&dst_iter, &c->stripes[0]);
|
||||
genradix_iter_advance(&src_iter, &c->stripes[1]);
|
||||
}
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *src = __bucket_array(ca, 1);
|
||||
|
||||
memcpy(__bucket_array(ca, 0), src,
|
||||
sizeof(struct bucket_array) +
|
||||
sizeof(struct bucket) * src->nbuckets);
|
||||
};
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
struct bch_dev_usage *dst = (void *)
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
|
||||
struct bch_dev_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
|
||||
|
||||
*dst = *src;
|
||||
}
|
||||
|
||||
{
|
||||
unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
c->replicas.nr;
|
||||
struct bch_fs_usage *dst = (void *)
|
||||
bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
struct bch_fs_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
|
||||
memcpy(&dst->s.gc_start[0],
|
||||
&src->s.gc_start[0],
|
||||
nr * sizeof(u64) - offsetof(typeof(*dst), s.gc_start));
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool verify = !initial ||
|
||||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
|
||||
unsigned i;
|
||||
|
||||
#define copy_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
bch_err(c, _msg ": got %llu, should be %llu, fixing" \
|
||||
, ##__VA_ARGS__, dst->_f, src->_f); \
|
||||
if (verify) \
|
||||
bch_err(c, _msg ": got %llu, should be %llu, fixing"\
|
||||
, ##__VA_ARGS__, dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
}
|
||||
#define copy_stripe_field(_f, _msg, ...) \
|
||||
if (dst->_f != src->_f) { \
|
||||
bch_err_ratelimited(c, "stripe %zu has wrong "_msg \
|
||||
": got %u, should be %u, fixing", \
|
||||
dst_iter.pos, ##__VA_ARGS__, \
|
||||
dst->_f, src->_f); \
|
||||
if (verify) \
|
||||
bch_err_ratelimited(c, "stripe %zu has wrong "_msg\
|
||||
": got %u, should be %u, fixing", \
|
||||
dst_iter.pos, ##__VA_ARGS__, \
|
||||
dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
dst->dirty = true; \
|
||||
}
|
||||
#define copy_bucket_field(_f) \
|
||||
if (dst->b[b].mark._f != src->b[b].mark._f) { \
|
||||
bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
|
||||
": got %u, should be %u, fixing", \
|
||||
i, b, dst->b[b].mark._f, src->b[b].mark._f); \
|
||||
if (verify) \
|
||||
bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
|
||||
": got %u, should be %u, fixing", i, b, \
|
||||
dst->b[b].mark._f, src->b[b].mark._f); \
|
||||
dst->b[b]._mark._f = src->b[b].mark._f; \
|
||||
dst->b[b]._mark.dirty = true; \
|
||||
}
|
||||
#define copy_dev_field(_f, _msg, ...) \
|
||||
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
|
||||
@ -573,12 +523,6 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
if (initial &&
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
|
||||
bch2_gc_done_nocheck(c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
{
|
||||
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
|
||||
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
|
||||
@ -629,6 +573,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
copy_bucket_field(stripe);
|
||||
copy_bucket_field(dirty_sectors);
|
||||
copy_bucket_field(cached_sectors);
|
||||
|
||||
if (dst->b[b].oldest_gen != src->b[b].oldest_gen) {
|
||||
dst->b[b].oldest_gen = src->b[b].oldest_gen;
|
||||
dst->b[b]._mark.dirty = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -641,44 +590,46 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
unsigned b;
|
||||
|
||||
for (b = 0; b < BCH_DATA_NR; b++)
|
||||
copy_dev_field(buckets[b],
|
||||
"buckets[%s]", bch2_data_types[b]);
|
||||
copy_dev_field(buckets_alloc, "buckets_alloc");
|
||||
copy_dev_field(buckets_ec, "buckets_ec");
|
||||
copy_dev_field(buckets[b], "buckets[%s]",
|
||||
bch2_data_types[b]);
|
||||
copy_dev_field(buckets_alloc, "buckets_alloc");
|
||||
copy_dev_field(buckets_ec, "buckets_ec");
|
||||
copy_dev_field(buckets_unavailable, "buckets_unavailable");
|
||||
|
||||
for (b = 0; b < BCH_DATA_NR; b++)
|
||||
copy_dev_field(sectors[b],
|
||||
"sectors[%s]", bch2_data_types[b]);
|
||||
copy_dev_field(sectors_fragmented,
|
||||
"sectors_fragmented");
|
||||
copy_dev_field(sectors[b], "sectors[%s]",
|
||||
bch2_data_types[b]);
|
||||
copy_dev_field(sectors_fragmented, "sectors_fragmented");
|
||||
}
|
||||
|
||||
{
|
||||
unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
c->replicas.nr;
|
||||
unsigned nr = fs_usage_u64s(c);
|
||||
struct bch_fs_usage *dst = (void *)
|
||||
bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
struct bch_fs_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
|
||||
copy_fs_field(s.hidden, "hidden");
|
||||
copy_fs_field(s.data, "data");
|
||||
copy_fs_field(s.cached, "cached");
|
||||
copy_fs_field(s.reserved, "reserved");
|
||||
copy_fs_field(s.nr_inodes, "nr_inodes");
|
||||
copy_fs_field(hidden, "hidden");
|
||||
copy_fs_field(data, "data");
|
||||
copy_fs_field(cached, "cached");
|
||||
copy_fs_field(reserved, "reserved");
|
||||
copy_fs_field(nr_inodes, "nr_inodes");
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
copy_fs_field(persistent_reserved[i],
|
||||
"persistent_reserved[%i]", i);
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
/*
|
||||
* XXX: print out replicas entry
|
||||
*/
|
||||
copy_fs_field(data[i], "data[%i]", i);
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
char buf[80];
|
||||
|
||||
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
||||
|
||||
copy_fs_field(replicas[i], "%s", buf);
|
||||
}
|
||||
}
|
||||
out:
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
#undef copy_fs_field
|
||||
@ -693,19 +644,18 @@ static int bch2_gc_start(struct bch_fs *c)
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* indicate to stripe code that we need to allocate for the gc stripes
|
||||
* radix tree, too
|
||||
*/
|
||||
gc_pos_set(c, gc_phase(GC_PHASE_START));
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
BUG_ON(c->usage[1]);
|
||||
|
||||
c->usage[1] = __alloc_percpu_gfp(sizeof(struct bch_fs_usage) +
|
||||
sizeof(u64) * c->replicas.nr,
|
||||
sizeof(u64),
|
||||
GFP_KERNEL);
|
||||
c->usage[1] = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
|
||||
sizeof(u64), GFP_KERNEL);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
if (!c->usage[1])
|
||||
@ -740,8 +690,12 @@ static int bch2_gc_start(struct bch_fs *c)
|
||||
dst->first_bucket = src->first_bucket;
|
||||
dst->nbuckets = src->nbuckets;
|
||||
|
||||
for (b = 0; b < src->nbuckets; b++)
|
||||
dst->b[b]._mark.gen = src->b[b].mark.gen;
|
||||
for (b = 0; b < src->nbuckets; b++) {
|
||||
dst->b[b]._mark.gen =
|
||||
dst->b[b].oldest_gen =
|
||||
src->b[b].mark.gen;
|
||||
dst->b[b].gen_valid = src->b[b].gen_valid;
|
||||
}
|
||||
};
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
@ -800,6 +754,8 @@ out:
|
||||
if (iter++ <= 2) {
|
||||
bch_info(c, "Fixed gens, restarting mark and sweep:");
|
||||
clear_bit(BCH_FS_FIXED_GENS, &c->flags);
|
||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||
bch2_gc_free(c);
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
@ -455,6 +455,7 @@ static inline bool btree_node_is_extents(struct btree *b)
|
||||
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BKEY_TYPE_ALLOC:
|
||||
case BKEY_TYPE_BTREE:
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
case BKEY_TYPE_INODES:
|
||||
@ -489,7 +490,6 @@ enum btree_insert_ret {
|
||||
/* leaf node needs to be split */
|
||||
BTREE_INSERT_BTREE_NODE_FULL,
|
||||
BTREE_INSERT_ENOSPC,
|
||||
BTREE_INSERT_NEED_GC_LOCK,
|
||||
BTREE_INSERT_NEED_MARK_REPLICAS,
|
||||
};
|
||||
|
||||
|
@ -81,6 +81,7 @@ enum {
|
||||
__BTREE_INSERT_USE_RESERVE,
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__BTREE_INSERT_NOMARK,
|
||||
__BTREE_INSERT_NOWAIT,
|
||||
__BTREE_INSERT_GC_LOCK_HELD,
|
||||
__BCH_HASH_SET_MUST_CREATE,
|
||||
@ -107,12 +108,12 @@ enum {
|
||||
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
|
||||
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
|
||||
|
||||
/*
|
||||
* Insert is for journal replay: don't get journal reservations, or mark extents
|
||||
* (bch_mark_key)
|
||||
*/
|
||||
/* Insert is for journal replay - don't get journal reservations: */
|
||||
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
|
||||
|
||||
/* Don't call bch2_mark_key: */
|
||||
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
|
||||
|
||||
/* Don't block on allocation failure (for new btree nodes: */
|
||||
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
|
||||
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
|
||||
|
@ -483,7 +483,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
struct btree *b;
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
unsigned sectors = nr_nodes * c->opts.btree_node_size;
|
||||
int ret, disk_res_flags = BCH_DISK_RESERVATION_GC_LOCK_HELD;
|
||||
int ret, disk_res_flags = 0;
|
||||
|
||||
if (flags & BTREE_INSERT_NOFAIL)
|
||||
disk_res_flags |= BCH_DISK_RESERVATION_NOFAIL;
|
||||
@ -1086,8 +1086,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
|
||||
bch2_btree_node_free_index(as, NULL,
|
||||
bkey_i_to_s_c(&old->key),
|
||||
fs_usage);
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
|
||||
gc_pos_btree_root(b->btree_id));
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
@ -1188,8 +1187,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
||||
bkey_disassemble(b, k, &tmp),
|
||||
fs_usage);
|
||||
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
|
||||
gc_pos_btree_node(b));
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
@ -1564,7 +1562,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
|
||||
closure_init_stack(&cl);
|
||||
|
||||
/* Hack, because gc and splitting nodes doesn't mix yet: */
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
|
||||
!down_read_trylock(&c->gc_lock)) {
|
||||
if (flags & BTREE_INSERT_NOUNLOCK)
|
||||
return -EINTR;
|
||||
|
||||
@ -1607,7 +1606,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
|
||||
*/
|
||||
__bch2_btree_iter_downgrade(iter, 1);
|
||||
out:
|
||||
up_read(&c->gc_lock);
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
closure_sync(&cl);
|
||||
return ret;
|
||||
}
|
||||
@ -1685,7 +1685,8 @@ retry:
|
||||
}
|
||||
|
||||
/* We're changing btree topology, doesn't mix with gc: */
|
||||
if (!down_read_trylock(&c->gc_lock))
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
|
||||
!down_read_trylock(&c->gc_lock))
|
||||
goto err_cycle_gc_lock;
|
||||
|
||||
if (!bch2_btree_iter_upgrade(iter, U8_MAX,
|
||||
@ -1745,7 +1746,8 @@ retry:
|
||||
|
||||
bch2_btree_update_done(as);
|
||||
|
||||
up_read(&c->gc_lock);
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
out:
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
|
||||
@ -1776,7 +1778,8 @@ err_cycle_gc_lock:
|
||||
|
||||
err_unlock:
|
||||
six_unlock_intent(&m->lock);
|
||||
up_read(&c->gc_lock);
|
||||
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
err:
|
||||
BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
|
||||
|
||||
@ -1942,8 +1945,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
ret = bch2_disk_reservation_add(c, &as->reserve->disk_res,
|
||||
c->opts.btree_node_size *
|
||||
bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)),
|
||||
BCH_DISK_RESERVATION_NOFAIL|
|
||||
BCH_DISK_RESERVATION_GC_LOCK_HELD);
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
BUG_ON(ret);
|
||||
|
||||
parent = btree_node_parent(iter, b);
|
||||
@ -1989,8 +1991,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
bch2_btree_node_free_index(as, NULL,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
fs_usage);
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
|
||||
gc_pos_btree_root(b->btree_id));
|
||||
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
@ -415,6 +415,25 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
|
||||
btree_iter_cmp(l.iter, r.iter);
|
||||
}
|
||||
|
||||
static bool btree_trans_relock(struct btree_insert *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_iter(trans, i)
|
||||
return bch2_btree_iter_relock(i->iter);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void btree_trans_unlock(struct btree_insert *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_iter(trans, i) {
|
||||
bch2_btree_iter_unlock(i->iter);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Normal update interface: */
|
||||
|
||||
static enum btree_insert_ret
|
||||
@ -466,49 +485,12 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
struct btree_iter *linked;
|
||||
unsigned u64s;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
trans_for_each_iter(trans, i)
|
||||
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
/* reserve space for deferred updates */
|
||||
__trans_for_each_entry(trans, i, i->deferred) {
|
||||
|
||||
}
|
||||
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i)
|
||||
u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
while ((ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK)) == -EAGAIN) {
|
||||
struct btree_iter *iter = NULL;
|
||||
|
||||
trans_for_each_iter(trans, i)
|
||||
iter = i->iter;
|
||||
|
||||
if (iter)
|
||||
bch2_btree_iter_unlock(iter);
|
||||
|
||||
ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_CHECK);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (iter && !bch2_btree_iter_relock(iter)) {
|
||||
trans_restart(" (iter relock after journal res get blocked)");
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
multi_lock_write(c, trans);
|
||||
|
||||
if (race_fault()) {
|
||||
@ -536,6 +518,36 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i)
|
||||
u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_NONBLOCK);
|
||||
if (likely(!ret))
|
||||
goto got_journal_res;
|
||||
if (ret != -EAGAIN)
|
||||
goto out;
|
||||
|
||||
multi_unlock_write(trans);
|
||||
btree_trans_unlock(trans);
|
||||
|
||||
ret = bch2_journal_res_get(&c->journal,
|
||||
&trans->journal_res, u64s,
|
||||
JOURNAL_RES_GET_CHECK);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!btree_trans_relock(trans)) {
|
||||
trans_restart(" (iter relock after journal res get blocked)");
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
got_journal_res:
|
||||
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
|
||||
if (journal_seq_verify(c))
|
||||
trans_for_each_entry(trans, i)
|
||||
@ -623,6 +635,9 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
/* for the sake of sanity: */
|
||||
BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
|
||||
|
||||
if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
|
||||
bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
|
||||
|
||||
trans_for_each_entry(trans, i)
|
||||
@ -715,18 +730,6 @@ err:
|
||||
ret = -EINTR;
|
||||
}
|
||||
break;
|
||||
case BTREE_INSERT_NEED_GC_LOCK:
|
||||
ret = -EINTR;
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
if (flags & BTREE_INSERT_NOUNLOCK)
|
||||
goto out;
|
||||
|
||||
bch2_btree_iter_unlock(trans->entries[0].iter);
|
||||
down_read(&c->gc_lock);
|
||||
}
|
||||
up_read(&c->gc_lock);
|
||||
break;
|
||||
case BTREE_INSERT_ENOSPC:
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
|
@ -116,14 +116,14 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
|
||||
void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *usage;
|
||||
unsigned i, nr;
|
||||
unsigned i;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
nr = sizeof(struct bch_fs_usage) / sizeof(u64) + c->replicas.nr;
|
||||
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0],
|
||||
fs_usage_u64s(c));
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
usage->s.reserved += usage->persistent_reserved[i];
|
||||
usage->reserved += usage->persistent_reserved[i];
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
@ -132,10 +132,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
switch (e->data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_USER:
|
||||
usage->s.data += usage->data[i];
|
||||
usage->data += usage->replicas[i];
|
||||
break;
|
||||
case BCH_DATA_CACHED:
|
||||
usage->s.cached += usage->data[i];
|
||||
usage->cached += usage->replicas[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -143,44 +143,38 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
#define bch2_usage_read_raw(_stats) \
|
||||
({ \
|
||||
typeof(*this_cpu_ptr(_stats)) _acc; \
|
||||
\
|
||||
memset(&_acc, 0, sizeof(_acc)); \
|
||||
acc_u64s_percpu((u64 *) &_acc, \
|
||||
(u64 __percpu *) _stats, \
|
||||
sizeof(_acc) / sizeof(u64)); \
|
||||
\
|
||||
_acc; \
|
||||
})
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
return bch2_usage_read_raw(ca->usage[0]);
|
||||
struct bch_dev_usage ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
acc_u64s_percpu((u64 *) &ret,
|
||||
(u64 __percpu *) ca->usage[0],
|
||||
sizeof(ret) / sizeof(u64));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *ret;
|
||||
unsigned nr = READ_ONCE(c->replicas.nr);
|
||||
unsigned v, u64s = fs_usage_u64s(c);
|
||||
retry:
|
||||
ret = kzalloc(sizeof(*ret) + nr * sizeof(u64), GFP_NOFS);
|
||||
ret = kzalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||
if (unlikely(!ret))
|
||||
return NULL;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
|
||||
if (unlikely(nr < c->replicas.nr)) {
|
||||
nr = c->replicas.nr;
|
||||
v = fs_usage_u64s(c);
|
||||
if (unlikely(u64s != v)) {
|
||||
u64s = v;
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
kfree(ret);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
acc_u64s_percpu((u64 *) ret,
|
||||
(u64 __percpu *) c->usage[0],
|
||||
sizeof(*ret) / sizeof(u64) + nr);
|
||||
acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -197,27 +191,44 @@ static u64 avail_factor(u64 r)
|
||||
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||
{
|
||||
return min(fs_usage.s.hidden +
|
||||
fs_usage.s.data +
|
||||
reserve_factor(fs_usage.s.reserved +
|
||||
fs_usage.s.online_reserved),
|
||||
return min(fs_usage->hidden +
|
||||
fs_usage->data +
|
||||
reserve_factor(fs_usage->reserved +
|
||||
fs_usage->online_reserved),
|
||||
c->capacity);
|
||||
}
|
||||
|
||||
static struct bch_fs_usage_short
|
||||
__bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_short ret;
|
||||
u64 data, reserved;
|
||||
|
||||
ret.capacity = c->capacity -
|
||||
percpu_u64_get(&c->usage[0]->hidden);
|
||||
|
||||
data = percpu_u64_get(&c->usage[0]->data);
|
||||
reserved = percpu_u64_get(&c->usage[0]->reserved) +
|
||||
percpu_u64_get(&c->usage[0]->online_reserved);
|
||||
|
||||
ret.used = min(ret.capacity, data + reserve_factor(reserved));
|
||||
ret.free = ret.capacity - ret.used;
|
||||
|
||||
ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_summarized usage =
|
||||
bch2_usage_read_raw(&c->usage[0]->s);
|
||||
struct bch_fs_usage_short ret;
|
||||
|
||||
ret.capacity = READ_ONCE(c->capacity) - usage.hidden;
|
||||
ret.used = min(ret.capacity, usage.data +
|
||||
reserve_factor(usage.reserved +
|
||||
usage.online_reserved));
|
||||
ret.nr_inodes = usage.nr_inodes;
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
ret = __bch2_fs_usage_read_short(c);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -254,10 +265,9 @@ static bool bucket_became_unavailable(struct bucket_mark old,
|
||||
|
||||
int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
struct disk_reservation *disk_res)
|
||||
{
|
||||
s64 added = fs_usage->s.data + fs_usage->s.reserved;
|
||||
s64 added = fs_usage->data + fs_usage->reserved;
|
||||
s64 should_not_have_added;
|
||||
int ret = 0;
|
||||
|
||||
@ -277,19 +287,11 @@ int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
|
||||
if (added > 0) {
|
||||
disk_res->sectors -= added;
|
||||
fs_usage->s.online_reserved -= added;
|
||||
fs_usage->online_reserved -= added;
|
||||
}
|
||||
|
||||
acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
|
||||
(u64 *) fs_usage,
|
||||
sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr);
|
||||
|
||||
if (gc_visited(c, gc_pos)) {
|
||||
BUG_ON(!c->usage[1]);
|
||||
acc_u64s((u64 *) this_cpu_ptr(c->usage[1]),
|
||||
(u64 *) fs_usage,
|
||||
sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr);
|
||||
}
|
||||
(u64 *) fs_usage, fs_usage_u64s(c));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -300,7 +302,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
int nr, s64 size)
|
||||
{
|
||||
if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
|
||||
fs_usage->s.hidden += size;
|
||||
fs_usage->hidden += size;
|
||||
|
||||
dev_usage->buckets[type] += nr;
|
||||
}
|
||||
@ -384,10 +386,10 @@ static inline void update_replicas(struct bch_fs *c,
|
||||
BUG_ON(!sectors);
|
||||
|
||||
if (r->data_type == BCH_DATA_CACHED)
|
||||
fs_usage->s.cached += sectors;
|
||||
fs_usage->cached += sectors;
|
||||
else
|
||||
fs_usage->s.data += sectors;
|
||||
fs_usage->data[idx] += sectors;
|
||||
fs_usage->data += sectors;
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
}
|
||||
|
||||
static inline void update_cached_sectors(struct bch_fs *c,
|
||||
@ -401,15 +403,28 @@ static inline void update_cached_sectors(struct bch_fs *c,
|
||||
update_replicas(c, fs_usage, &r.e, sectors);
|
||||
}
|
||||
|
||||
static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct bucket_mark *old,
|
||||
bool gc)
|
||||
#define do_mark_fn(fn, c, pos, flags, ...) \
|
||||
({ \
|
||||
int gc, ret = 0; \
|
||||
\
|
||||
percpu_rwsem_assert_held(&c->mark_lock); \
|
||||
\
|
||||
for (gc = 0; gc < 2 && !ret; gc++) \
|
||||
if (!gc == !(flags & BCH_BUCKET_MARK_GC) || \
|
||||
(gc && gc_visited(c, pos))) \
|
||||
ret = fn(c, __VA_ARGS__, gc); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct bucket_mark *ret,
|
||||
bool gc)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
struct bucket_mark new;
|
||||
struct bucket_mark old, new;
|
||||
|
||||
*old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
BUG_ON(!is_available_bucket(new));
|
||||
|
||||
new.owned_by_allocator = true;
|
||||
@ -420,26 +435,29 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.gen++;
|
||||
}));
|
||||
|
||||
if (old->cached_sectors)
|
||||
if (old.cached_sectors)
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old->cached_sectors);
|
||||
-((s64) old.cached_sectors));
|
||||
|
||||
if (!gc)
|
||||
*ret = old;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct bucket_mark *old)
|
||||
{
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
__bch2_invalidate_bucket(c, ca, b, old, false);
|
||||
do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
|
||||
ca, b, old);
|
||||
|
||||
if (!old->owned_by_allocator && old->cached_sectors)
|
||||
trace_invalidate(ca, bucket_to_sector(ca, b),
|
||||
old->cached_sectors);
|
||||
}
|
||||
|
||||
static void __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
bool gc)
|
||||
static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
bool gc)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
@ -451,20 +469,70 @@ static void __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
BUG_ON(!gc &&
|
||||
!owned_by_allocator && !old.owned_by_allocator);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
struct gc_pos pos, unsigned flags)
|
||||
{
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
|
||||
ca, b, owned_by_allocator);
|
||||
}
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC))
|
||||
__bch2_mark_alloc_bucket(c, ca, b, owned_by_allocator, false);
|
||||
static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
bool inserting,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned journal_seq, unsigned flags,
|
||||
bool gc)
|
||||
{
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, m;
|
||||
|
||||
if ((flags & BCH_BUCKET_MARK_GC) ||
|
||||
gc_visited(c, pos))
|
||||
__bch2_mark_alloc_bucket(c, ca, b, owned_by_allocator, true);
|
||||
if (!inserting)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* alloc btree is read in by bch2_alloc_read, not gc:
|
||||
*/
|
||||
if (flags & BCH_BUCKET_MARK_GC)
|
||||
return 0;
|
||||
|
||||
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
|
||||
/*
|
||||
* this should currently only be getting called from the bucket
|
||||
* invalidate path:
|
||||
*/
|
||||
BUG_ON(u.dirty_sectors);
|
||||
BUG_ON(u.cached_sectors);
|
||||
BUG_ON(!g->mark.owned_by_allocator);
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
|
||||
m.gen = u.gen;
|
||||
m.data_type = u.data_type;
|
||||
m.dirty_sectors = u.dirty_sectors;
|
||||
m.cached_sectors = u.cached_sectors;
|
||||
}));
|
||||
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
|
||||
if (old.cached_sectors) {
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old.cached_sectors);
|
||||
trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
|
||||
old.cached_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define checked_add(a, b) \
|
||||
@ -474,9 +542,9 @@ do { \
|
||||
BUG_ON((a) != _res); \
|
||||
} while (0)
|
||||
|
||||
static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, enum bch_data_type type,
|
||||
unsigned sectors, bool gc)
|
||||
static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, enum bch_data_type type,
|
||||
unsigned sectors, bool gc)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
@ -490,6 +558,8 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -501,15 +571,8 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
type != BCH_DATA_JOURNAL);
|
||||
|
||||
if (likely(c)) {
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC))
|
||||
__bch2_mark_metadata_bucket(c, ca, b, type, sectors,
|
||||
false);
|
||||
if ((flags & BCH_BUCKET_MARK_GC) ||
|
||||
gc_visited(c, pos))
|
||||
__bch2_mark_metadata_bucket(c, ca, b, type, sectors,
|
||||
true);
|
||||
do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
|
||||
ca, b, type, sectors);
|
||||
} else {
|
||||
struct bucket *g;
|
||||
struct bucket_mark new;
|
||||
@ -553,7 +616,7 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
|
||||
* loop, to avoid racing with the start of gc clearing all the marks - GC does
|
||||
* that with the gc pos seqlock held.
|
||||
*/
|
||||
static void bch2_mark_pointer(struct bch_fs *c,
|
||||
static bool bch2_mark_pointer(struct bch_fs *c,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
@ -581,7 +644,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
|
||||
EBUG_ON(!p.ptr.cached &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!p.ptr.cached)
|
||||
@ -612,6 +675,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
|
||||
|
||||
BUG_ON(!gc && bucket_became_unavailable(old, new));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
@ -694,13 +759,13 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
s64 disk_sectors = data_type == BCH_DATA_BTREE
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, sectors);
|
||||
|
||||
bch2_mark_pointer(c, p, disk_sectors, data_type,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
|
||||
if (p.ptr.cached) {
|
||||
update_cached_sectors(c, fs_usage, p.ptr.dev,
|
||||
disk_sectors);
|
||||
if (disk_sectors && !stale)
|
||||
update_cached_sectors(c, fs_usage, p.ptr.dev,
|
||||
disk_sectors);
|
||||
} else if (!p.ec_nr) {
|
||||
dirty_sectors += disk_sectors;
|
||||
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
|
||||
@ -826,30 +891,31 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
unsigned journal_seq, unsigned flags,
|
||||
bool gc)
|
||||
{
|
||||
int ret = 0;
|
||||
if (!fs_usage || gc)
|
||||
fs_usage = this_cpu_ptr(c->usage[gc]);
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_alloc:
|
||||
return bch2_mark_alloc(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_btree_ptr:
|
||||
ret = bch2_mark_extent(c, k, inserting
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
break;
|
||||
return bch2_mark_extent(c, k, inserting
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_extent:
|
||||
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
break;
|
||||
return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_stripe:
|
||||
ret = bch2_mark_stripe(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
break;
|
||||
return bch2_mark_stripe(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_inode:
|
||||
if (inserting)
|
||||
fs_usage->s.nr_inodes++;
|
||||
fs_usage->nr_inodes++;
|
||||
else
|
||||
fs_usage->s.nr_inodes--;
|
||||
break;
|
||||
fs_usage->nr_inodes--;
|
||||
return 0;
|
||||
case KEY_TYPE_reservation: {
|
||||
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
|
||||
@ -857,15 +923,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
replicas = clamp_t(unsigned, replicas, 1,
|
||||
ARRAY_SIZE(fs_usage->persistent_reserved));
|
||||
|
||||
fs_usage->s.reserved += sectors;
|
||||
fs_usage->reserved += sectors;
|
||||
fs_usage->persistent_reserved[replicas - 1] += sectors;
|
||||
break;
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_mark_key_locked(struct bch_fs *c,
|
||||
@ -875,26 +939,9 @@ int bch2_mark_key_locked(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC)) {
|
||||
ret = __bch2_mark_key(c, k, inserting, sectors,
|
||||
fs_usage ?: this_cpu_ptr(c->usage[0]),
|
||||
journal_seq, flags, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ((flags & BCH_BUCKET_MARK_GC) ||
|
||||
gc_visited(c, pos)) {
|
||||
ret = __bch2_mark_key(c, k, inserting, sectors,
|
||||
this_cpu_ptr(c->usage[1]),
|
||||
journal_seq, flags, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return do_mark_fn(__bch2_mark_key, c, pos, flags,
|
||||
k, inserting, sectors, fs_usage,
|
||||
journal_seq, flags);
|
||||
}
|
||||
|
||||
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
@ -932,7 +979,7 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
fs_usage = bch2_fs_usage_get_scratch(c);
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
if (!(trans->flags & BTREE_INSERT_NOMARK))
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||
bkey_start_offset(&insert->k->k),
|
||||
@ -985,7 +1032,7 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
|
||||
if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res, pos) &&
|
||||
if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
|
||||
!warned_disk_usage &&
|
||||
!xchg(&warned_disk_usage, 1)) {
|
||||
char buf[200];
|
||||
@ -1026,13 +1073,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
|
||||
{
|
||||
percpu_u64_set(&c->pcpu->sectors_available, 0);
|
||||
|
||||
return avail_factor(bch2_fs_sectors_free(c));
|
||||
return avail_factor(__bch2_fs_usage_read_short(c).free);
|
||||
}
|
||||
|
||||
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
|
||||
{
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
this_cpu_sub(c->usage[0]->s.online_reserved,
|
||||
this_cpu_sub(c->usage[0]->online_reserved,
|
||||
res->sectors);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
@ -1071,38 +1118,22 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
|
||||
out:
|
||||
pcpu->sectors_available -= sectors;
|
||||
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
return 0;
|
||||
|
||||
recalculate:
|
||||
/*
|
||||
* GC recalculates sectors_available when it starts, so that hopefully
|
||||
* we don't normally end up blocking here:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Piss fuck, we can be called from extent_insert_fixup() with btree
|
||||
* locks held:
|
||||
*/
|
||||
|
||||
if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD)) {
|
||||
if (!(flags & BCH_DISK_RESERVATION_BTREE_LOCKS_HELD))
|
||||
down_read(&c->gc_lock);
|
||||
else if (!down_read_trylock(&c->gc_lock))
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
sectors_available = bch2_recalc_sectors_available(c);
|
||||
|
||||
if (sectors <= sectors_available ||
|
||||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
|
||||
atomic64_set(&c->sectors_available,
|
||||
max_t(s64, 0, sectors_available - sectors));
|
||||
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
ret = 0;
|
||||
} else {
|
||||
@ -1112,9 +1143,6 @@ recalculate:
|
||||
|
||||
percpu_up_write(&c->mark_lock);
|
||||
|
||||
if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1135,7 +1163,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
struct bucket_array *buckets = NULL, *old_buckets = NULL;
|
||||
unsigned long *buckets_nouse = NULL;
|
||||
unsigned long *buckets_written = NULL;
|
||||
u8 *oldest_gens = NULL;
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
alloc_heap alloc_heap;
|
||||
@ -1161,8 +1188,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(oldest_gens = kvpmalloc(nbuckets * sizeof(u8),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
@ -1197,9 +1222,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
memcpy(buckets->b,
|
||||
old_buckets->b,
|
||||
n * sizeof(struct bucket));
|
||||
memcpy(oldest_gens,
|
||||
ca->oldest_gens,
|
||||
n * sizeof(u8));
|
||||
memcpy(buckets_nouse,
|
||||
ca->buckets_nouse,
|
||||
BITS_TO_LONGS(n) * sizeof(unsigned long));
|
||||
@ -1211,7 +1233,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
rcu_assign_pointer(ca->buckets[0], buckets);
|
||||
buckets = old_buckets;
|
||||
|
||||
swap(ca->oldest_gens, oldest_gens);
|
||||
swap(ca->buckets_nouse, buckets_nouse);
|
||||
swap(ca->buckets_written, buckets_written);
|
||||
|
||||
@ -1255,8 +1276,6 @@ err:
|
||||
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(buckets_written,
|
||||
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(oldest_gens,
|
||||
nbuckets * sizeof(u8));
|
||||
if (buckets)
|
||||
call_rcu(&old_buckets->rcu, buckets_free_rcu);
|
||||
|
||||
@ -1276,7 +1295,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
|
||||
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(ca->buckets_nouse,
|
||||
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
|
||||
kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
|
||||
sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket));
|
||||
|
@ -16,13 +16,14 @@
|
||||
|
||||
#define bucket_cmpxchg(g, new, expr) \
|
||||
({ \
|
||||
struct bucket *_g = g; \
|
||||
u64 _v = atomic64_read(&(g)->_mark.v); \
|
||||
struct bucket_mark _old; \
|
||||
\
|
||||
do { \
|
||||
(new).v.counter = _old.v.counter = _v; \
|
||||
expr; \
|
||||
} while ((_v = atomic64_cmpxchg(&(g)->_mark.v, \
|
||||
} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \
|
||||
_old.v.counter, \
|
||||
(new).v.counter)) != _old.v.counter);\
|
||||
_old; \
|
||||
@ -56,18 +57,6 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
|
||||
return __bucket(ca, b, false);
|
||||
}
|
||||
|
||||
static inline void bucket_set_dirty(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
struct bucket *g;
|
||||
struct bucket_mark m;
|
||||
|
||||
rcu_read_lock();
|
||||
g = bucket(ca, b);
|
||||
bucket_cmpxchg(g, m, m.dirty = true);
|
||||
rcu_read_unlock();
|
||||
|
||||
}
|
||||
|
||||
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, int rw)
|
||||
{
|
||||
@ -86,7 +75,9 @@ static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
|
||||
|
||||
static inline u8 bucket_gc_gen(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
return bucket(ca, b)->mark.gen - ca->oldest_gens[b];
|
||||
struct bucket *g = bucket(ca, b);
|
||||
|
||||
return g->mark.gen - g->oldest_gen;
|
||||
}
|
||||
|
||||
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
|
||||
@ -96,9 +87,10 @@ static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
|
||||
}
|
||||
|
||||
static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
const struct bch_extent_ptr *ptr,
|
||||
bool gc)
|
||||
{
|
||||
return bucket(ca, PTR_BUCKET_NR(ca, ptr));
|
||||
return __bucket(ca, PTR_BUCKET_NR(ca, ptr), gc);
|
||||
}
|
||||
|
||||
static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
|
||||
@ -219,31 +211,28 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline unsigned fs_usage_u64s(struct bch_fs *c)
|
||||
{
|
||||
|
||||
return sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
READ_ONCE(c->replicas.nr);
|
||||
}
|
||||
|
||||
static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage *ret;
|
||||
|
||||
ret = this_cpu_ptr(c->usage_scratch);
|
||||
|
||||
memset(ret, 0, sizeof(*ret) + c->replicas.nr * sizeof(u64));
|
||||
struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
|
||||
|
||||
memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *);
|
||||
|
||||
static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
|
||||
|
||||
return usage.capacity - usage.used;
|
||||
}
|
||||
|
||||
/* key/bucket marking: */
|
||||
|
||||
void bch2_bucket_seq_cleanup(struct bch_fs *);
|
||||
@ -257,8 +246,8 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, enum bch_data_type, unsigned,
|
||||
struct gc_pos, unsigned);
|
||||
|
||||
#define BCH_BUCKET_MARK_NOATOMIC (1 << 0)
|
||||
#define BCH_BUCKET_MARK_GC (1 << 1)
|
||||
#define BCH_BUCKET_MARK_GC (1 << 0)
|
||||
#define BCH_BUCKET_MARK_NOATOMIC (1 << 1)
|
||||
|
||||
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
|
||||
bool, s64, struct gc_pos,
|
||||
@ -268,7 +257,7 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
|
||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, struct gc_pos);
|
||||
struct disk_reservation *);
|
||||
|
||||
/* disk reservations: */
|
||||
|
||||
@ -282,8 +271,6 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
|
||||
}
|
||||
|
||||
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
|
||||
#define BCH_DISK_RESERVATION_GC_LOCK_HELD (1 << 1)
|
||||
#define BCH_DISK_RESERVATION_BTREE_LOCKS_HELD (1 << 2)
|
||||
|
||||
int bch2_disk_reservation_add(struct bch_fs *,
|
||||
struct disk_reservation *,
|
||||
|
@ -38,6 +38,7 @@ struct bucket {
|
||||
};
|
||||
|
||||
u16 io_time[2];
|
||||
u8 oldest_gen;
|
||||
unsigned gen_valid:1;
|
||||
};
|
||||
|
||||
@ -62,35 +63,33 @@ struct bch_dev_usage {
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
|
||||
/* summarized: */
|
||||
struct bch_fs_usage_summarized {
|
||||
u64 online_reserved;
|
||||
u64 online_reserved;
|
||||
|
||||
/* fields after online_reserved are cleared/recalculated by gc: */
|
||||
u64 gc_start[0];
|
||||
/* fields after online_reserved are cleared/recalculated by gc: */
|
||||
u64 gc_start[0];
|
||||
|
||||
u64 hidden;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
u64 nr_inodes;
|
||||
u64 hidden;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
u64 nr_inodes;
|
||||
|
||||
/* XXX: add stats for compression ratio */
|
||||
/* XXX: add stats for compression ratio */
|
||||
#if 0
|
||||
u64 uncompressed;
|
||||
u64 compressed;
|
||||
u64 uncompressed;
|
||||
u64 compressed;
|
||||
#endif
|
||||
} s;
|
||||
|
||||
/* broken out: */
|
||||
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
u64 data[];
|
||||
u64 replicas[];
|
||||
};
|
||||
|
||||
struct bch_fs_usage_short {
|
||||
u64 capacity;
|
||||
u64 used;
|
||||
u64 free;
|
||||
u64 nr_inodes;
|
||||
};
|
||||
|
||||
|
@ -402,10 +402,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
|
||||
if (!src)
|
||||
return -ENOMEM;
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
dst.used = bch2_fs_sectors_used(c, src);
|
||||
dst.online_reserved = src->online_reserved;
|
||||
|
||||
dst.used = bch2_fs_sectors_used(c, *src);
|
||||
dst.online_reserved = src->s.online_reserved;
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
dst.persistent_reserved[i] =
|
||||
|
@ -979,10 +979,8 @@ bch2_extent_can_insert(struct btree_insert *trans,
|
||||
|
||||
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
|
||||
(sectors = bch2_extent_is_compressed(k))) {
|
||||
int flags = BCH_DISK_RESERVATION_BTREE_LOCKS_HELD;
|
||||
|
||||
if (trans->flags & BTREE_INSERT_NOFAIL)
|
||||
flags |= BCH_DISK_RESERVATION_NOFAIL;
|
||||
int flags = trans->flags & BTREE_INSERT_NOFAIL
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0;
|
||||
|
||||
switch (bch2_disk_reservation_add(trans->c,
|
||||
trans->disk_res,
|
||||
@ -991,8 +989,6 @@ bch2_extent_can_insert(struct btree_insert *trans,
|
||||
break;
|
||||
case -ENOSPC:
|
||||
return BTREE_INSERT_ENOSPC;
|
||||
case -EINTR:
|
||||
return BTREE_INSERT_NEED_GC_LOCK;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ do { \
|
||||
({ \
|
||||
bool _r = !fifo_empty((fifo)); \
|
||||
if (_r) \
|
||||
(i) = (fifo)->data[--(fifo)->back & (fifo)->mask] \
|
||||
(i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \
|
||||
_r; \
|
||||
})
|
||||
|
||||
|
@ -17,23 +17,14 @@
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static bool journal_entry_is_open(struct journal *j)
|
||||
static bool __journal_entry_is_open(union journal_res_state state)
|
||||
{
|
||||
return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
|
||||
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
|
||||
}
|
||||
|
||||
void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
||||
static bool journal_entry_is_open(struct journal *j)
|
||||
{
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
|
||||
atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
|
||||
|
||||
if (!need_write_just_set &&
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||
bch2_time_stats_update(j->delay_time,
|
||||
j->need_write_time);
|
||||
|
||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||
return __journal_entry_is_open(j->reservations);
|
||||
}
|
||||
|
||||
static void journal_pin_new_entry(struct journal *j, int count)
|
||||
@ -77,39 +68,71 @@ static inline bool journal_entry_empty(struct jset *j)
|
||||
return true;
|
||||
}
|
||||
|
||||
static enum {
|
||||
JOURNAL_ENTRY_ERROR,
|
||||
JOURNAL_ENTRY_INUSE,
|
||||
JOURNAL_ENTRY_CLOSED,
|
||||
JOURNAL_UNLOCKED,
|
||||
} journal_buf_switch(struct journal *j, bool need_write_just_set)
|
||||
void bch2_journal_halt(struct journal *j)
|
||||
{
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
return;
|
||||
|
||||
new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
journal_wake(j);
|
||||
closure_wake_up(&journal_cur_buf(j)->wait);
|
||||
}
|
||||
|
||||
/* journal entry close/open: */
|
||||
|
||||
void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
|
||||
{
|
||||
if (!need_write_just_set &&
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||
bch2_time_stats_update(j->delay_time,
|
||||
j->need_write_time);
|
||||
|
||||
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
|
||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if journal entry is now closed:
|
||||
*/
|
||||
static bool __journal_entry_close(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
bool set_need_write = false;
|
||||
unsigned sectors;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
|
||||
return JOURNAL_ENTRY_CLOSED;
|
||||
return true;
|
||||
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
|
||||
/* this entry will never be written: */
|
||||
closure_wake_up(&buf->wait);
|
||||
return JOURNAL_ENTRY_ERROR;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
||||
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
j->need_write_time = local_clock();
|
||||
set_need_write = true;
|
||||
}
|
||||
|
||||
if (new.prev_buf_unwritten)
|
||||
return JOURNAL_ENTRY_INUSE;
|
||||
|
||||
/*
|
||||
* avoid race between setting buf->data->u64s and
|
||||
* journal_res_put starting write:
|
||||
*/
|
||||
journal_state_inc(&new);
|
||||
return false;
|
||||
|
||||
new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
|
||||
new.idx++;
|
||||
@ -119,15 +142,12 @@ static enum {
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
|
||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
|
||||
j->prev_buf_sectors =
|
||||
vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
buf->u64s_reserved) *
|
||||
c->opts.block_size;
|
||||
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
||||
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
buf->u64s_reserved) << c->block_bits;
|
||||
BUG_ON(sectors > buf->sectors);
|
||||
buf->sectors = sectors;
|
||||
|
||||
bkey_extent_init(&buf->key);
|
||||
|
||||
@ -150,7 +170,6 @@ static enum {
|
||||
* Hence, we want update/set last_seq on the current journal entry right
|
||||
* before we open a new one:
|
||||
*/
|
||||
bch2_journal_reclaim_fast(j);
|
||||
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
||||
|
||||
if (journal_entry_empty(buf->data))
|
||||
@ -163,32 +182,22 @@ static enum {
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
cancel_delayed_work(&j->write_work);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/* ugh - might be called from __journal_res_get() under wait_event() */
|
||||
__set_current_state(TASK_RUNNING);
|
||||
bch2_journal_buf_put(j, old.idx, need_write_just_set);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
return JOURNAL_UNLOCKED;
|
||||
bch2_journal_buf_put(j, old.idx, set_need_write);
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_journal_halt(struct journal *j)
|
||||
static bool journal_entry_close(struct journal *j)
|
||||
{
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
bool ret;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
return;
|
||||
spin_lock(&j->lock);
|
||||
ret = __journal_entry_close(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
journal_wake(j);
|
||||
closure_wake_up(&journal_cur_buf(j)->wait);
|
||||
closure_wake_up(&journal_prev_buf(j)->wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -196,46 +205,39 @@ void bch2_journal_halt(struct journal *j)
|
||||
* journal reservation - journal entry is open means journal is dirty:
|
||||
*
|
||||
* returns:
|
||||
* 1: success
|
||||
* 0: journal currently full (must wait)
|
||||
* -EROFS: insufficient rw devices
|
||||
* -EIO: journal error
|
||||
* 0: success
|
||||
* -ENOSPC: journal currently full, must invoke reclaim
|
||||
* -EAGAIN: journal blocked, must wait
|
||||
* -EROFS: insufficient rw devices or journal error
|
||||
*/
|
||||
static int journal_entry_open(struct journal *j)
|
||||
{
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
union journal_res_state old, new;
|
||||
ssize_t u64s;
|
||||
int sectors;
|
||||
int u64s;
|
||||
u64 v;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
BUG_ON(journal_entry_is_open(j));
|
||||
|
||||
if (!fifo_free(&j->pin))
|
||||
return 0;
|
||||
if (j->blocked)
|
||||
return -EAGAIN;
|
||||
|
||||
sectors = bch2_journal_entry_sectors(j);
|
||||
if (sectors <= 0)
|
||||
return sectors;
|
||||
if (j->cur_entry_error)
|
||||
return j->cur_entry_error;
|
||||
|
||||
BUG_ON(!j->cur_entry_sectors);
|
||||
|
||||
buf->disk_sectors = sectors;
|
||||
buf->u64s_reserved = j->entry_u64s_reserved;
|
||||
buf->disk_sectors = j->cur_entry_sectors;
|
||||
buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
|
||||
|
||||
sectors = min_t(unsigned, sectors, buf->size >> 9);
|
||||
j->cur_buf_sectors = sectors;
|
||||
|
||||
u64s = (sectors << 9) / sizeof(u64);
|
||||
|
||||
/* Subtract the journal header */
|
||||
u64s -= sizeof(struct jset) / sizeof(u64);
|
||||
u64s -= buf->u64s_reserved;
|
||||
u64s = max_t(ssize_t, 0L, u64s);
|
||||
|
||||
BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
|
||||
u64s = (int) (buf->sectors << 9) / sizeof(u64) -
|
||||
journal_entry_overhead(j);
|
||||
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
|
||||
|
||||
if (u64s <= le32_to_cpu(buf->data->u64s))
|
||||
return 0;
|
||||
return -ENOSPC;
|
||||
|
||||
/*
|
||||
* Must be set before marking the journal entry as open:
|
||||
@ -246,11 +248,14 @@ static int journal_entry_open(struct journal *j)
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
EBUG_ON(journal_state_count(new, new.idx));
|
||||
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
return -EIO;
|
||||
return -EROFS;
|
||||
|
||||
/* Handle any already added entries */
|
||||
new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
|
||||
journal_state_inc(&new);
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
@ -263,37 +268,22 @@ static int journal_entry_open(struct journal *j)
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(j->write_delay_ms));
|
||||
journal_wake(j);
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool __journal_entry_close(struct journal *j)
|
||||
static bool journal_quiesced(struct journal *j)
|
||||
{
|
||||
bool set_need_write;
|
||||
union journal_res_state state = READ_ONCE(j->reservations);
|
||||
bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
|
||||
|
||||
if (!journal_entry_is_open(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
if (set_need_write)
|
||||
j->need_write_time = local_clock();
|
||||
|
||||
switch (journal_buf_switch(j, set_need_write)) {
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
spin_unlock(&j->lock);
|
||||
return false;
|
||||
default:
|
||||
spin_unlock(&j->lock);
|
||||
case JOURNAL_UNLOCKED:
|
||||
return true;
|
||||
}
|
||||
if (!ret)
|
||||
journal_entry_close(j);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool journal_entry_close(struct journal *j)
|
||||
static void journal_quiesce(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
return __journal_entry_close(j);
|
||||
wait_event(j->wait, journal_quiesced(j));
|
||||
}
|
||||
|
||||
static void journal_write_work(struct work_struct *work)
|
||||
@ -337,7 +327,11 @@ retry:
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -EROFS;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/*
|
||||
* Recheck after taking the lock, so we don't race with another thread
|
||||
* that just did journal_entry_open() and call journal_entry_close()
|
||||
@ -355,56 +349,43 @@ retry:
|
||||
*/
|
||||
buf = journal_cur_buf(j);
|
||||
if (journal_entry_is_open(j) &&
|
||||
buf->size >> 9 < buf->disk_sectors &&
|
||||
buf->size < JOURNAL_ENTRY_SIZE_MAX)
|
||||
j->buf_size_want = max(j->buf_size_want, buf->size << 1);
|
||||
buf->buf_size >> 9 < buf->disk_sectors &&
|
||||
buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
|
||||
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
|
||||
|
||||
/*
|
||||
* Close the current journal entry if necessary, then try to start a new
|
||||
* one:
|
||||
*/
|
||||
switch (journal_buf_switch(j, false)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
spin_unlock(&j->lock);
|
||||
return -EROFS;
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
if (journal_entry_is_open(j) &&
|
||||
!__journal_entry_close(j)) {
|
||||
/*
|
||||
* The current journal entry is still open, but we failed to get
|
||||
* a journal reservation because there's not enough space in it,
|
||||
* and we can't close it and start another because we haven't
|
||||
* finished writing out the previous entry:
|
||||
* We failed to get a reservation on the current open journal
|
||||
* entry because it's full, and we can't close it because
|
||||
* there's still a previous one in flight:
|
||||
*/
|
||||
spin_unlock(&j->lock);
|
||||
trace_journal_entry_full(c);
|
||||
goto blocked;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
break;
|
||||
case JOURNAL_UNLOCKED:
|
||||
goto retry;
|
||||
ret = -EAGAIN;
|
||||
} else {
|
||||
ret = journal_entry_open(j);
|
||||
}
|
||||
|
||||
/* We now have a new, closed journal buf - see if we can open it: */
|
||||
ret = journal_entry_open(j);
|
||||
if ((ret == -EAGAIN || ret == -ENOSPC) &&
|
||||
!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
if (!ret)
|
||||
goto retry;
|
||||
if (ret == -ENOSPC) {
|
||||
/*
|
||||
* Journal is full - can't rely on reclaim from work item due to
|
||||
* freezing:
|
||||
*/
|
||||
trace_journal_full(c);
|
||||
if (!(flags & JOURNAL_RES_GET_NONBLOCK))
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
/* Journal's full, we have to wait */
|
||||
|
||||
/*
|
||||
* Direct reclaim - can't rely on reclaim from work item
|
||||
* due to freezing..
|
||||
*/
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
|
||||
trace_journal_full(c);
|
||||
blocked:
|
||||
if (!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
return -EAGAIN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -422,7 +403,7 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
{
|
||||
int ret;
|
||||
|
||||
wait_event(j->wait,
|
||||
closure_wait_event(&j->async_wait,
|
||||
(ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
|
||||
(flags & JOURNAL_RES_GET_NONBLOCK));
|
||||
return ret;
|
||||
@ -441,9 +422,9 @@ void bch2_journal_entry_res_resize(struct journal *j,
|
||||
|
||||
j->entry_u64s_reserved += d;
|
||||
if (d <= 0)
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
|
||||
j->cur_entry_u64s -= d;
|
||||
j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
|
||||
smp_mb();
|
||||
state = READ_ONCE(j->reservations);
|
||||
|
||||
@ -454,15 +435,12 @@ void bch2_journal_entry_res_resize(struct journal *j,
|
||||
* Not enough room in current journal entry, have to flush it:
|
||||
*/
|
||||
__journal_entry_close(j);
|
||||
goto out;
|
||||
} else {
|
||||
journal_cur_buf(j)->u64s_reserved += d;
|
||||
}
|
||||
|
||||
journal_cur_buf(j)->u64s_reserved += d;
|
||||
out_unlock:
|
||||
spin_unlock(&j->lock);
|
||||
out:
|
||||
spin_unlock(&j->lock);
|
||||
res->u64s += d;
|
||||
return;
|
||||
}
|
||||
|
||||
/* journal flushing: */
|
||||
@ -492,47 +470,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
int ret;
|
||||
retry:
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
if (seq < journal_cur_seq(j) ||
|
||||
/*
|
||||
* Can't try to open more than one sequence number ahead:
|
||||
*/
|
||||
BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
|
||||
|
||||
if (journal_cur_seq(j) > seq ||
|
||||
journal_entry_is_open(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (journal_cur_seq(j) < seq) {
|
||||
switch (journal_buf_switch(j, false)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
spin_unlock(&j->lock);
|
||||
return -EROFS;
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
/* haven't finished writing out the previous one: */
|
||||
trace_journal_entry_full(c);
|
||||
goto blocked;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
break;
|
||||
case JOURNAL_UNLOCKED:
|
||||
goto retry;
|
||||
}
|
||||
if (journal_cur_seq(j) < seq &&
|
||||
!__journal_entry_close(j)) {
|
||||
/* haven't finished writing out the previous one: */
|
||||
trace_journal_entry_full(c);
|
||||
ret = -EAGAIN;
|
||||
} else {
|
||||
BUG_ON(journal_cur_seq(j) != seq);
|
||||
|
||||
ret = journal_entry_open(j);
|
||||
}
|
||||
|
||||
BUG_ON(journal_cur_seq(j) < seq);
|
||||
|
||||
ret = journal_entry_open(j);
|
||||
if (ret) {
|
||||
spin_unlock(&j->lock);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
blocked:
|
||||
if (!j->res_get_blocked_start)
|
||||
if ((ret == -EAGAIN || ret == -ENOSPC) &&
|
||||
!j->res_get_blocked_start)
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
|
||||
closure_wait(&j->async_wait, cl);
|
||||
if (ret == -EAGAIN || ret == -ENOSPC)
|
||||
closure_wait(&j->async_wait, cl);
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
return -EAGAIN;
|
||||
if (ret == -ENOSPC) {
|
||||
trace_journal_full(c);
|
||||
bch2_journal_reclaim_work(&j->reclaim_work.work);
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_seq_error(struct journal *j, u64 seq)
|
||||
@ -615,8 +593,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
||||
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j);
|
||||
else
|
||||
spin_unlock(&j->lock);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static int journal_seq_flushed(struct journal *j, u64 seq)
|
||||
@ -628,8 +605,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq)
|
||||
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j);
|
||||
else
|
||||
spin_unlock(&j->lock);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -721,6 +697,26 @@ int bch2_journal_flush(struct journal *j)
|
||||
return bch2_journal_flush_seq(j, seq);
|
||||
}
|
||||
|
||||
/* block/unlock the journal: */
|
||||
|
||||
void bch2_journal_unblock(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
j->blocked--;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
void bch2_journal_block(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
j->blocked++;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_quiesce(j);
|
||||
}
|
||||
|
||||
/* allocate journal on a device: */
|
||||
|
||||
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
@ -743,7 +739,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
goto err;
|
||||
|
||||
journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
|
||||
nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||
nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||
if (!journal_buckets)
|
||||
goto err;
|
||||
|
||||
@ -806,9 +802,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
ja->nr++;
|
||||
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
0);
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
0);
|
||||
|
||||
if (c) {
|
||||
spin_unlock(&c->journal.lock);
|
||||
@ -859,7 +855,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
*/
|
||||
|
||||
if (bch2_disk_reservation_get(c, &disk_res,
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
|
||||
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
@ -930,8 +926,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
c->btree_roots_dirty)
|
||||
bch2_journal_meta(j);
|
||||
|
||||
BUG_ON(journal_entry_is_open(j) ||
|
||||
j->reservations.prev_buf_unwritten);
|
||||
journal_quiesce(j);
|
||||
|
||||
BUG_ON(!bch2_journal_error(j) &&
|
||||
test_bit(JOURNAL_NOT_EMPTY, &j->flags));
|
||||
@ -957,7 +952,7 @@ void bch2_fs_journal_start(struct journal *j)
|
||||
journal_pin_new_entry(j, 0);
|
||||
|
||||
/*
|
||||
* journal_buf_switch() only inits the next journal entry when it
|
||||
* __journal_entry_close() only inits the next journal entry when it
|
||||
* closes an open journal entry - the very first journal entry gets
|
||||
* initialized here:
|
||||
*/
|
||||
@ -966,6 +961,7 @@ void bch2_fs_journal_start(struct journal *j)
|
||||
|
||||
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
||||
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
@ -975,7 +971,7 @@ void bch2_fs_journal_start(struct journal *j)
|
||||
*/
|
||||
bch2_journal_seq_blacklist_write(j);
|
||||
|
||||
queue_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
|
||||
}
|
||||
|
||||
/* init/exit: */
|
||||
@ -1021,8 +1017,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
|
||||
void bch2_fs_journal_exit(struct journal *j)
|
||||
{
|
||||
kvpfree(j->buf[1].data, j->buf[1].size);
|
||||
kvpfree(j->buf[0].data, j->buf[0].size);
|
||||
kvpfree(j->buf[1].data, j->buf[1].buf_size);
|
||||
kvpfree(j->buf[0].data, j->buf[0].buf_size);
|
||||
free_fifo(&j->pin);
|
||||
}
|
||||
|
||||
@ -1046,8 +1042,8 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
|
||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||
|
||||
j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->write_delay_ms = 1000;
|
||||
j->reclaim_delay_ms = 100;
|
||||
|
||||
@ -1060,8 +1056,8 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
|
||||
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
|
||||
!(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
|
||||
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
|
||||
!(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
|
||||
!(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -1078,35 +1074,54 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
union journal_res_state *s = &j->reservations;
|
||||
union journal_res_state s;
|
||||
struct bch_dev *ca;
|
||||
unsigned iter;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock(&j->lock);
|
||||
s = READ_ONCE(j->reservations);
|
||||
|
||||
pr_buf(&out,
|
||||
"active journal entries:\t%llu\n"
|
||||
"seq:\t\t\t%llu\n"
|
||||
"last_seq:\t\t%llu\n"
|
||||
"last_seq_ondisk:\t%llu\n"
|
||||
"reservation count:\t%u\n"
|
||||
"reservation offset:\t%u\n"
|
||||
"current entry u64s:\t%u\n"
|
||||
"io in flight:\t\t%i\n"
|
||||
"need write:\t\t%i\n"
|
||||
"dirty:\t\t\t%i\n"
|
||||
"replay done:\t\t%i\n",
|
||||
"current entry:\t\t",
|
||||
fifo_used(&j->pin),
|
||||
journal_cur_seq(j),
|
||||
journal_last_seq(j),
|
||||
j->last_seq_ondisk,
|
||||
journal_state_count(*s, s->idx),
|
||||
s->cur_entry_offset,
|
||||
j->cur_entry_u64s,
|
||||
s->prev_buf_unwritten,
|
||||
j->last_seq_ondisk);
|
||||
|
||||
switch (s.cur_entry_offset) {
|
||||
case JOURNAL_ENTRY_ERROR_VAL:
|
||||
pr_buf(&out, "error\n");
|
||||
break;
|
||||
case JOURNAL_ENTRY_CLOSED_VAL:
|
||||
pr_buf(&out, "closed\n");
|
||||
break;
|
||||
default:
|
||||
pr_buf(&out, "%u/%u\n",
|
||||
s.cur_entry_offset,
|
||||
j->cur_entry_u64s);
|
||||
break;
|
||||
}
|
||||
|
||||
pr_buf(&out,
|
||||
"current entry refs:\t%u\n"
|
||||
"prev entry unwritten:\t",
|
||||
journal_state_count(s, s.idx));
|
||||
|
||||
if (s.prev_buf_unwritten)
|
||||
pr_buf(&out, "yes, ref %u\n",
|
||||
journal_state_count(s, !s.idx));
|
||||
else
|
||||
pr_buf(&out, "no\n");
|
||||
|
||||
pr_buf(&out,
|
||||
"need write:\t\t%i\n"
|
||||
"replay done:\t\t%i\n",
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags),
|
||||
journal_entry_is_open(j),
|
||||
test_bit(JOURNAL_REPLAY_DONE, &j->flags));
|
||||
|
||||
for_each_member_device_rcu(ca, c, iter,
|
||||
@ -1119,9 +1134,12 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
||||
pr_buf(&out,
|
||||
"dev %u:\n"
|
||||
"\tnr\t\t%u\n"
|
||||
"\tavailable\t%u:%u\n"
|
||||
"\tcur_idx\t\t%u (seq %llu)\n"
|
||||
"\tlast_idx\t%u (seq %llu)\n",
|
||||
iter, ja->nr,
|
||||
bch2_journal_dev_buckets_available(j, ja),
|
||||
ja->sectors_free,
|
||||
ja->cur_idx, ja->bucket_seq[ja->cur_idx],
|
||||
ja->last_idx, ja->bucket_seq[ja->last_idx]);
|
||||
}
|
||||
|
@ -178,6 +178,11 @@ static inline unsigned jset_u64s(unsigned u64s)
|
||||
return u64s + sizeof(struct jset_entry) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline int journal_entry_overhead(struct journal *j)
|
||||
{
|
||||
return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
|
||||
}
|
||||
|
||||
static inline struct jset_entry *
|
||||
bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
|
||||
{
|
||||
@ -222,7 +227,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *
|
||||
id, 0, k, k->k.u64s);
|
||||
}
|
||||
|
||||
void bch2_journal_buf_put_slowpath(struct journal *, bool);
|
||||
void __bch2_journal_buf_put(struct journal *, bool);
|
||||
|
||||
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
|
||||
bool need_write_just_set)
|
||||
@ -233,17 +238,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
|
||||
.buf0_count = idx == 0,
|
||||
.buf1_count = idx == 1,
|
||||
}).v, &j->reservations.counter);
|
||||
|
||||
EBUG_ON(s.idx != idx && !s.prev_buf_unwritten);
|
||||
|
||||
/*
|
||||
* Do not initiate a journal write if the journal is in an error state
|
||||
* (previous journal entry write may have failed)
|
||||
*/
|
||||
if (s.idx != idx &&
|
||||
!journal_state_count(s, idx) &&
|
||||
s.cur_entry_offset != JOURNAL_ENTRY_ERROR_VAL)
|
||||
bch2_journal_buf_put_slowpath(j, need_write_just_set);
|
||||
if (!journal_state_count(s, idx)) {
|
||||
EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
|
||||
__bch2_journal_buf_put(j, need_write_just_set);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -291,6 +289,8 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
|
||||
return 0;
|
||||
|
||||
EBUG_ON(!journal_state_count(new, new.idx));
|
||||
|
||||
if (flags & JOURNAL_RES_GET_CHECK)
|
||||
return 1;
|
||||
|
||||
@ -330,6 +330,8 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *,
|
||||
struct journal_entry_res *,
|
||||
unsigned);
|
||||
@ -367,6 +369,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
|
||||
}
|
||||
|
||||
void bch2_journal_unblock(struct journal *);
|
||||
void bch2_journal_block(struct journal *);
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
|
||||
|
@ -825,7 +825,6 @@ fsck_err:
|
||||
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct bkey_i *k, *_n;
|
||||
struct jset_entry *entry;
|
||||
struct journal_replay *i, *n;
|
||||
@ -854,7 +853,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
ret = bch2_btree_insert(c, entry->btree_id, k,
|
||||
&disk_res, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_REPLAY);
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@ -866,10 +866,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
pin_list = journal_seq_pin(j, j->replay_journal_seq);
|
||||
|
||||
if (atomic_dec_and_test(&pin_list->count))
|
||||
journal_wake(j);
|
||||
bch2_journal_pin_put(j, j->replay_journal_seq);
|
||||
}
|
||||
|
||||
j->replay_journal_seq = 0;
|
||||
@ -884,82 +881,6 @@ err:
|
||||
|
||||
/* journal write: */
|
||||
|
||||
static unsigned journal_dev_buckets_available(struct journal *j,
|
||||
struct journal_device *ja)
|
||||
{
|
||||
unsigned next = (ja->cur_idx + 1) % ja->nr;
|
||||
unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
|
||||
|
||||
/*
|
||||
* Don't use the last bucket unless writing the new last_seq
|
||||
* will make another bucket available:
|
||||
*/
|
||||
if (available &&
|
||||
journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
|
||||
--available;
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
/* returns number of sectors available for next journal entry: */
|
||||
int bch2_journal_entry_sectors(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned sectors_available = UINT_MAX;
|
||||
unsigned i, nr_online = 0, nr_devs = 0;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned buckets_this_device, sectors_this_device;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
buckets_this_device = journal_dev_buckets_available(j, ja);
|
||||
sectors_this_device = ja->sectors_free;
|
||||
|
||||
nr_online++;
|
||||
|
||||
/*
|
||||
* We that we don't allocate the space for a journal entry
|
||||
* until we write it out - thus, account for it here:
|
||||
*/
|
||||
if (j->prev_buf_sectors >= sectors_this_device) {
|
||||
if (!buckets_this_device)
|
||||
continue;
|
||||
|
||||
buckets_this_device--;
|
||||
sectors_this_device = ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
sectors_this_device -= j->prev_buf_sectors;
|
||||
|
||||
if (buckets_this_device)
|
||||
sectors_this_device = ca->mi.bucket_size;
|
||||
|
||||
if (!sectors_this_device)
|
||||
continue;
|
||||
|
||||
sectors_available = min(sectors_available,
|
||||
sectors_this_device);
|
||||
nr_devs++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (nr_online < c->opts.metadata_replicas_required)
|
||||
return -EROFS;
|
||||
|
||||
if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas))
|
||||
return 0;
|
||||
|
||||
return sectors_available;
|
||||
}
|
||||
|
||||
static void __journal_write_alloc(struct journal *j,
|
||||
struct journal_buf *w,
|
||||
struct dev_alloc_list *devs_sorted,
|
||||
@ -1033,7 +954,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
__journal_write_alloc(j, w, &devs_sorted,
|
||||
sectors, &replicas, replicas_want);
|
||||
|
||||
@ -1049,7 +969,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
|
||||
if (sectors > ja->sectors_free &&
|
||||
sectors <= ca->mi.bucket_size &&
|
||||
journal_dev_buckets_available(j, ja)) {
|
||||
bch2_journal_dev_buckets_available(j, ja)) {
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
ja->sectors_free = ca->mi.bucket_size;
|
||||
}
|
||||
@ -1058,10 +978,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
__journal_write_alloc(j, w, &devs_sorted,
|
||||
sectors, &replicas, replicas_want);
|
||||
done:
|
||||
if (replicas >= replicas_want)
|
||||
j->prev_buf_sectors = 0;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
|
||||
@ -1116,17 +1032,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
unsigned new_size = READ_ONCE(j->buf_size_want);
|
||||
void *new_buf;
|
||||
|
||||
if (buf->size >= new_size)
|
||||
if (buf->buf_size >= new_size)
|
||||
return;
|
||||
|
||||
new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
|
||||
if (!new_buf)
|
||||
return;
|
||||
|
||||
memcpy(new_buf, buf->data, buf->size);
|
||||
kvpfree(buf->data, buf->size);
|
||||
memcpy(new_buf, buf->data, buf->buf_size);
|
||||
kvpfree(buf->data, buf->buf_size);
|
||||
buf->data = new_buf;
|
||||
buf->size = new_size;
|
||||
buf->buf_size = new_size;
|
||||
}
|
||||
|
||||
static void journal_write_done(struct closure *cl)
|
||||
@ -1166,7 +1082,7 @@ static void journal_write_done(struct closure *cl)
|
||||
* Must come before signaling write completion, for
|
||||
* bch2_fs_journal_stop():
|
||||
*/
|
||||
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||
mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
|
||||
out:
|
||||
/* also must come before signalling write completion: */
|
||||
closure_debug_destroy(cl);
|
||||
@ -1220,20 +1136,22 @@ void bch2_journal_write(struct closure *cl)
|
||||
struct bch_extent_ptr *ptr;
|
||||
bool validate_before_checksum = false;
|
||||
unsigned i, sectors, bytes, u64s;
|
||||
int ret;
|
||||
|
||||
bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
|
||||
|
||||
journal_buf_realloc(j, w);
|
||||
jset = w->data;
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
start = vstruct_last(w->data);
|
||||
start = vstruct_last(jset);
|
||||
end = bch2_journal_super_entries_add_common(c, start);
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
le32_add_cpu(&w->data->u64s, u64s);
|
||||
BUG_ON(vstruct_sectors(jset, c->block_bits) >
|
||||
w->disk_sectors);
|
||||
le32_add_cpu(&jset->u64s, u64s);
|
||||
BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
|
||||
|
||||
journal_write_compact(jset);
|
||||
|
||||
@ -1271,12 +1189,28 @@ void bch2_journal_write(struct closure *cl)
|
||||
goto err;
|
||||
|
||||
sectors = vstruct_sectors(jset, c->block_bits);
|
||||
BUG_ON(sectors > j->prev_buf_sectors);
|
||||
BUG_ON(sectors > w->sectors);
|
||||
|
||||
bytes = vstruct_bytes(w->data);
|
||||
memset((void *) w->data + bytes, 0, (sectors << 9) - bytes);
|
||||
bytes = vstruct_bytes(jset);
|
||||
memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
|
||||
|
||||
if (journal_write_alloc(j, w, sectors)) {
|
||||
spin_lock(&j->lock);
|
||||
ret = journal_write_alloc(j, w, sectors);
|
||||
|
||||
/*
|
||||
* write is allocated, no longer need to account for it in
|
||||
* bch2_journal_space_available():
|
||||
*/
|
||||
w->sectors = 0;
|
||||
|
||||
/*
|
||||
* journal entry has been compacted and allocated, recalculate space
|
||||
* available:
|
||||
*/
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (ret) {
|
||||
bch2_journal_halt(j);
|
||||
bch_err(c, "Unable to allocate journal write");
|
||||
bch2_fatal_error(c);
|
||||
@ -1316,7 +1250,7 @@ void bch2_journal_write(struct closure *cl)
|
||||
trace_journal_write(bio);
|
||||
closure_bio_submit(bio, cl);
|
||||
|
||||
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
|
||||
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, i)
|
||||
|
@ -39,7 +39,6 @@ int bch2_journal_read(struct bch_fs *, struct list_head *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
||||
|
||||
int bch2_journal_entry_sectors(struct journal *);
|
||||
void bch2_journal_write(struct closure *);
|
||||
|
||||
#endif /* _BCACHEFS_JOURNAL_IO_H */
|
||||
|
@ -1,15 +1,213 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
|
||||
/* Free space calculations: */
|
||||
|
||||
unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
||||
struct journal_device *ja)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
unsigned next = (ja->cur_idx + 1) % ja->nr;
|
||||
unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
|
||||
|
||||
/*
|
||||
* Allocator startup needs some journal space before we can do journal
|
||||
* replay:
|
||||
*/
|
||||
if (available &&
|
||||
test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
|
||||
available--;
|
||||
|
||||
/*
|
||||
* Don't use the last bucket unless writing the new last_seq
|
||||
* will make another bucket available:
|
||||
*/
|
||||
if (available &&
|
||||
journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
|
||||
--available;
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
void bch2_journal_space_available(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned sectors_next_entry = UINT_MAX;
|
||||
unsigned sectors_total = UINT_MAX;
|
||||
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
||||
j->buf[1].buf_size >> 9);
|
||||
unsigned i, nr_online = 0, nr_devs = 0;
|
||||
unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
|
||||
? journal_prev_buf(j)->sectors
|
||||
: 0;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned buckets_this_device, sectors_this_device;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
nr_online++;
|
||||
|
||||
buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
|
||||
sectors_this_device = ja->sectors_free;
|
||||
|
||||
/*
|
||||
* We that we don't allocate the space for a journal entry
|
||||
* until we write it out - thus, account for it here:
|
||||
*/
|
||||
if (unwritten_sectors >= sectors_this_device) {
|
||||
if (!buckets_this_device)
|
||||
continue;
|
||||
|
||||
buckets_this_device--;
|
||||
sectors_this_device = ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
sectors_this_device -= unwritten_sectors;
|
||||
|
||||
if (sectors_this_device < ca->mi.bucket_size &&
|
||||
buckets_this_device) {
|
||||
buckets_this_device--;
|
||||
sectors_this_device = ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
if (!sectors_this_device)
|
||||
continue;
|
||||
|
||||
sectors_next_entry = min(sectors_next_entry,
|
||||
sectors_this_device);
|
||||
|
||||
sectors_total = min(sectors_total,
|
||||
buckets_this_device * ca->mi.bucket_size +
|
||||
sectors_this_device);
|
||||
|
||||
max_entry_size = min_t(unsigned, max_entry_size,
|
||||
ca->mi.bucket_size);
|
||||
|
||||
nr_devs++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (nr_online < c->opts.metadata_replicas_required) {
|
||||
ret = -EROFS;
|
||||
sectors_next_entry = 0;
|
||||
} else if (!sectors_next_entry ||
|
||||
nr_devs < min_t(unsigned, nr_online,
|
||||
c->opts.metadata_replicas)) {
|
||||
ret = -ENOSPC;
|
||||
sectors_next_entry = 0;
|
||||
} else if (!fifo_free(&j->pin)) {
|
||||
ret = -ENOSPC;
|
||||
sectors_next_entry = 0;
|
||||
}
|
||||
|
||||
j->cur_entry_sectors = sectors_next_entry;
|
||||
j->cur_entry_error = ret;
|
||||
|
||||
if (!ret)
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
/* Discards - last part of journal reclaim: */
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = ja->nr &&
|
||||
ja->last_idx != ja->cur_idx &&
|
||||
ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance ja->last_idx as long as it points to buckets that are no longer
|
||||
* dirty, issuing discards if necessary:
|
||||
*/
|
||||
static void journal_do_discards(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned iter;
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
for_each_rw_member(ca, c, iter) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
while (should_discard_bucket(j, ja)) {
|
||||
if (ca->mi.discard &&
|
||||
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca,
|
||||
ja->buckets[ja->last_idx]),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ja->last_idx = (ja->last_idx + 1) % ja->nr;
|
||||
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal entry pinning - machinery for holding a reference on a given journal
|
||||
* entry, holding it open to ensure it gets replayed during recovery:
|
||||
*/
|
||||
|
||||
static void bch2_journal_reclaim_fast(struct journal *j)
|
||||
{
|
||||
struct journal_entry_pin_list temp;
|
||||
bool popped = false;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
/*
|
||||
* Unpin journal entries whose reference counts reached zero, meaning
|
||||
* all btree nodes got written out
|
||||
*/
|
||||
while (!fifo_empty(&j->pin) &&
|
||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
|
||||
BUG_ON(!fifo_pop(&j->pin, temp));
|
||||
popped = true;
|
||||
}
|
||||
|
||||
if (popped)
|
||||
bch2_journal_space_available(j);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_put(struct journal *j, u64 seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||
|
||||
if (atomic_dec_and_test(&pin_list->count)) {
|
||||
spin_lock(&j->lock);
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __journal_pin_add(struct journal *j,
|
||||
u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
@ -24,10 +222,7 @@ static inline void __journal_pin_add(struct journal *j,
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
|
||||
if (flush_fn)
|
||||
list_add(&pin->list, &pin_list->list);
|
||||
else
|
||||
INIT_LIST_HEAD(&pin->list);
|
||||
list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
|
||||
|
||||
/*
|
||||
* If the journal is currently full, we might want to call flush_fn
|
||||
@ -129,88 +324,55 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
|
||||
* data off of a specific device:
|
||||
*/
|
||||
|
||||
/**
|
||||
* bch2_journal_reclaim_fast - do the fast part of journal reclaim
|
||||
*
|
||||
* Called from IO submission context, does not block. Cleans up after btree
|
||||
* write completions by advancing the journal pin and each cache's last_idx,
|
||||
* kicking off discards and background reclaim as necessary.
|
||||
*/
|
||||
void bch2_journal_reclaim_fast(struct journal *j)
|
||||
{
|
||||
struct journal_entry_pin_list temp;
|
||||
bool popped = false;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
|
||||
/*
|
||||
* Unpin journal entries whose reference counts reached zero, meaning
|
||||
* all btree nodes got written out
|
||||
*/
|
||||
while (!fifo_empty(&j->pin) &&
|
||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
|
||||
BUG_ON(!fifo_pop(&j->pin, temp));
|
||||
popped = true;
|
||||
}
|
||||
|
||||
if (popped)
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
static void journal_pin_mark_flushing(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
{
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = pin;
|
||||
}
|
||||
|
||||
static void journal_pin_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
{
|
||||
pin->flush(j, pin, seq);
|
||||
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
}
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
|
||||
/* no need to iterate over empty fifo entries: */
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(!atomic_read(&fifo_peek_front(&j->pin).count));
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
|
||||
if (*seq > seq_to_flush ||
|
||||
if (*seq > max_seq ||
|
||||
(ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list)))
|
||||
break;
|
||||
|
||||
return ret;
|
||||
}
|
||||
if (ret) {
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = ret;
|
||||
j->last_flushed = jiffies;
|
||||
}
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = ja->nr &&
|
||||
(ja->last_idx != ja->cur_idx &&
|
||||
ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
||||
unsigned min_nr)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 seq;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
while ((pin = journal_get_next_pin(j, min_nr
|
||||
? U64_MAX : seq_to_flush, &seq))) {
|
||||
if (min_nr)
|
||||
min_nr--;
|
||||
|
||||
pin->flush(j, pin, seq);
|
||||
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_journal_reclaim_work - free up journal buckets
|
||||
*
|
||||
@ -235,104 +397,44 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
struct bch_fs, journal.reclaim_work);
|
||||
struct journal *j = &c->journal;
|
||||
struct bch_dev *ca;
|
||||
struct journal_entry_pin *pin;
|
||||
u64 seq, seq_to_flush = 0;
|
||||
unsigned iter, bucket_to_flush;
|
||||
unsigned long next_flush;
|
||||
bool reclaim_lock_held = false, need_flush;
|
||||
unsigned iter, bucket_to_flush, min_nr = 0;
|
||||
u64 seq_to_flush = 0;
|
||||
|
||||
journal_do_discards(j);
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/*
|
||||
* Advance last_idx to point to the oldest journal entry containing
|
||||
* btree node updates that have not yet been written out
|
||||
*/
|
||||
for_each_rw_member(ca, c, iter) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
while (should_discard_bucket(j, ja)) {
|
||||
if (!reclaim_lock_held) {
|
||||
/*
|
||||
* ugh:
|
||||
* might be called from __journal_res_get()
|
||||
* under wait_event() - have to go back to
|
||||
* TASK_RUNNING before doing something that
|
||||
* would block, but only if we're doing work:
|
||||
*/
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
reclaim_lock_held = true;
|
||||
/* recheck under reclaim_lock: */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ca->mi.discard &&
|
||||
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca,
|
||||
ja->buckets[ja->last_idx]),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ja->last_idx = (ja->last_idx + 1) % ja->nr;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out enough btree nodes to free up 50% journal
|
||||
* buckets
|
||||
*/
|
||||
spin_lock(&j->lock);
|
||||
/* Try to keep the journal at most half full: */
|
||||
bucket_to_flush = (ja->cur_idx + (ja->nr >> 1)) % ja->nr;
|
||||
seq_to_flush = max_t(u64, seq_to_flush,
|
||||
ja->bucket_seq[bucket_to_flush]);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
/* Also flush if the pin fifo is more than half full */
|
||||
spin_lock(&j->lock);
|
||||
seq_to_flush = max_t(s64, seq_to_flush,
|
||||
(s64) journal_cur_seq(j) -
|
||||
(j->pin.size >> 1));
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* If it's been longer than j->reclaim_delay_ms since we last flushed,
|
||||
* make sure to flush at least one journal pin:
|
||||
*/
|
||||
next_flush = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
|
||||
need_flush = time_after(jiffies, next_flush);
|
||||
if (time_after(jiffies, j->last_flushed +
|
||||
msecs_to_jiffies(j->reclaim_delay_ms)))
|
||||
min_nr = 1;
|
||||
|
||||
while ((pin = journal_get_next_pin(j, need_flush
|
||||
? U64_MAX
|
||||
: seq_to_flush, &seq))) {
|
||||
if (!reclaim_lock_held) {
|
||||
spin_unlock(&j->lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
reclaim_lock_held = true;
|
||||
spin_lock(&j->lock);
|
||||
continue;
|
||||
}
|
||||
journal_flush_pins(j, seq_to_flush, min_nr);
|
||||
|
||||
journal_pin_mark_flushing(j, pin, seq);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_pin_flush(j, pin, seq);
|
||||
|
||||
need_flush = false;
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (reclaim_lock_held)
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
|
||||
if (!test_bit(BCH_FS_RO, &c->flags))
|
||||
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
|
||||
@ -341,8 +443,6 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 pin_seq;
|
||||
int ret;
|
||||
|
||||
ret = bch2_journal_error(j);
|
||||
@ -350,16 +450,10 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
journal_flush_pins(j, seq_to_flush, 0);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq))) {
|
||||
journal_pin_mark_flushing(j, pin, pin_seq);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_pin_flush(j, pin, pin_seq);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
/*
|
||||
* If journal replay hasn't completed, the unreplayed journal entries
|
||||
* hold refs on their corresponding sequence numbers
|
||||
|
@ -3,6 +3,10 @@
|
||||
|
||||
#define JOURNAL_PIN (32 * 1024)
|
||||
|
||||
unsigned bch2_journal_dev_buckets_available(struct journal *,
|
||||
struct journal_device *);
|
||||
void bch2_journal_space_available(struct journal *);
|
||||
|
||||
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
||||
{
|
||||
return pin->seq != 0;
|
||||
@ -16,6 +20,8 @@ journal_seq_pin(struct journal *j, u64 seq)
|
||||
return &j->pin.data[seq & j->pin.mask];
|
||||
}
|
||||
|
||||
void bch2_journal_pin_put(struct journal *, u64);
|
||||
|
||||
void bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_pin_update(struct journal *, u64, struct journal_entry_pin *,
|
||||
@ -27,7 +33,6 @@ void bch2_journal_pin_add_if_older(struct journal *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
|
||||
|
||||
void bch2_journal_reclaim_fast(struct journal *);
|
||||
void bch2_journal_reclaim_work(struct work_struct *);
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
|
@ -21,8 +21,10 @@ struct journal_buf {
|
||||
|
||||
struct closure_waitlist wait;
|
||||
|
||||
unsigned size;
|
||||
unsigned disk_sectors;
|
||||
unsigned buf_size; /* size in bytes of @data */
|
||||
unsigned sectors; /* maximum size for current entry */
|
||||
unsigned disk_sectors; /* maximum size entry could have been, if
|
||||
buf_size was bigger */
|
||||
unsigned u64s_reserved;
|
||||
/* bloom filter: */
|
||||
unsigned long has_inode[1024 / sizeof(unsigned long)];
|
||||
@ -128,9 +130,20 @@ struct journal {
|
||||
unsigned long flags;
|
||||
|
||||
union journal_res_state reservations;
|
||||
|
||||
/* Max size of current journal entry */
|
||||
unsigned cur_entry_u64s;
|
||||
unsigned prev_buf_sectors;
|
||||
unsigned cur_buf_sectors;
|
||||
unsigned cur_entry_sectors;
|
||||
|
||||
/*
|
||||
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
|
||||
* insufficient devices:
|
||||
*/
|
||||
int cur_entry_error;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
unsigned buf_size_want;
|
||||
|
||||
/*
|
||||
@ -141,6 +154,9 @@ struct journal {
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
/* if nonzero, we may not open a new journal entry: */
|
||||
unsigned blocked;
|
||||
|
||||
/* Used when waiting because the journal was full */
|
||||
wait_queue_head_t wait;
|
||||
struct closure_waitlist async_wait;
|
||||
@ -155,9 +171,6 @@ struct journal {
|
||||
u64 seq_ondisk;
|
||||
u64 last_seq_ondisk;
|
||||
|
||||
/* Reserved space in journal entry to be used just prior to write */
|
||||
unsigned entry_u64s_reserved;
|
||||
|
||||
/*
|
||||
* FIFO of journal entries whose btree updates have not yet been
|
||||
* written out.
|
||||
|
@ -82,7 +82,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_INODES:
|
||||
percpu_u64_set(&c->usage[0]->s.nr_inodes,
|
||||
percpu_u64_set(&c->usage[0]->nr_inodes,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_KEY_VERSION:
|
||||
@ -406,22 +406,19 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
ret = bch2_gc(c, &journal, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_online_member(ca, c, i)
|
||||
if (bch2_dev_journal_alloc(ca)) {
|
||||
for_each_online_member(ca, c, i) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* journal_res_get() will crash if called before this has
|
||||
|
@ -244,14 +244,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
|
||||
*dst = *src;
|
||||
|
||||
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
|
||||
if (!src->data[src_idx])
|
||||
if (!src->replicas[src_idx])
|
||||
continue;
|
||||
|
||||
dst_idx = __replicas_entry_idx(dst_r,
|
||||
cpu_replicas_entry(src_r, src_idx));
|
||||
BUG_ON(dst_idx < 0);
|
||||
|
||||
dst->data[dst_idx] = src->data[src_idx];
|
||||
dst->replicas[dst_idx] = src->replicas[src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
@ -261,39 +261,37 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
|
||||
static int replicas_table_update(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *new_r)
|
||||
{
|
||||
struct bch_fs_usage __percpu *new_usage[3] = { NULL, NULL, NULL };
|
||||
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
|
||||
struct bch_fs_usage __percpu *new_scratch = NULL;
|
||||
unsigned bytes = sizeof(struct bch_fs_usage) +
|
||||
sizeof(u64) * new_r->nr;
|
||||
unsigned i;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (i < 2 && !c->usage[i])
|
||||
continue;
|
||||
if (!(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
|
||||
GFP_NOIO)) ||
|
||||
(c->usage[1] &&
|
||||
!(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
|
||||
GFP_NOIO))) ||
|
||||
!(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64),
|
||||
GFP_NOIO)))
|
||||
goto err;
|
||||
|
||||
new_usage[i] = __alloc_percpu_gfp(bytes, sizeof(u64),
|
||||
GFP_NOIO);
|
||||
if (!new_usage[i])
|
||||
goto err;
|
||||
}
|
||||
if (c->usage[0])
|
||||
__replicas_table_update(new_usage[0], new_r,
|
||||
c->usage[0], &c->replicas);
|
||||
if (c->usage[1])
|
||||
__replicas_table_update(new_usage[1], new_r,
|
||||
c->usage[1], &c->replicas);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (!c->usage[i])
|
||||
continue;
|
||||
|
||||
__replicas_table_update(new_usage[i], new_r,
|
||||
c->usage[i], &c->replicas);
|
||||
|
||||
swap(c->usage[i], new_usage[i]);
|
||||
}
|
||||
|
||||
swap(c->usage_scratch, new_usage[2]);
|
||||
|
||||
swap(c->replicas, *new_r);
|
||||
swap(c->usage[0], new_usage[0]);
|
||||
swap(c->usage[1], new_usage[1]);
|
||||
swap(c->usage_scratch, new_scratch);
|
||||
swap(c->replicas, *new_r);
|
||||
ret = 0;
|
||||
err:
|
||||
for (i = 0; i < 3; i++)
|
||||
free_percpu(new_usage[i]);
|
||||
free_percpu(new_scratch);
|
||||
free_percpu(new_usage[1]);
|
||||
free_percpu(new_usage[0]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -456,7 +454,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
if (__replicas_has_entry(&c->replicas_gc, e))
|
||||
continue;
|
||||
|
||||
v = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
v = percpu_u64_get(&c->usage[0]->replicas[i]);
|
||||
if (!v)
|
||||
continue;
|
||||
|
||||
@ -557,7 +555,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
percpu_u64_set(&c->usage[0]->data[idx], sectors);
|
||||
percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -974,5 +972,6 @@ int bch2_fs_replicas_init(struct bch_fs *c)
|
||||
{
|
||||
c->journal.entry_u64s_reserved +=
|
||||
reserve_journal_replicas(c, &c->replicas);
|
||||
return 0;
|
||||
|
||||
return replicas_table_update(c, &c->replicas);
|
||||
}
|
||||
|
@ -125,7 +125,7 @@ struct bch_hash_desc {
|
||||
bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
|
||||
};
|
||||
|
||||
static inline struct btree_iter *
|
||||
static __always_inline struct btree_iter *
|
||||
bch2_hash_lookup(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
@ -159,7 +159,7 @@ bch2_hash_lookup(struct btree_trans *trans,
|
||||
return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
static inline struct btree_iter *
|
||||
static __always_inline struct btree_iter *
|
||||
bch2_hash_hole(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
@ -185,10 +185,11 @@ bch2_hash_hole(struct btree_trans *trans,
|
||||
return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOSPC);
|
||||
}
|
||||
|
||||
static inline int bch2_hash_needs_whiteout(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *start)
|
||||
static __always_inline
|
||||
int bch2_hash_needs_whiteout(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *start)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
@ -211,10 +212,11 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans,
|
||||
return btree_iter_err(k);
|
||||
}
|
||||
|
||||
static inline int __bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, struct bkey_i *insert, int flags)
|
||||
static __always_inline
|
||||
int __bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, struct bkey_i *insert, int flags)
|
||||
{
|
||||
struct btree_iter *iter, *slot = NULL;
|
||||
struct bkey_s_c k;
|
||||
@ -276,10 +278,11 @@ static inline int bch2_hash_set(const struct bch_hash_desc desc,
|
||||
inode, insert, flags));
|
||||
}
|
||||
|
||||
static inline int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *iter)
|
||||
static __always_inline
|
||||
int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct bkey_i *delete;
|
||||
int ret;
|
||||
@ -300,10 +303,11 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bch2_hash_delete(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, const void *key)
|
||||
static __always_inline
|
||||
int bch2_hash_delete(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
u64 inode, const void *key)
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
|
||||
|
@ -136,7 +136,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
|
||||
sb->bio = bio;
|
||||
}
|
||||
|
||||
new_sb = (void *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
|
||||
new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
|
||||
if (!new_sb)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -923,7 +923,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
|
||||
{
|
||||
u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
|
||||
u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
|
||||
@ -970,7 +970,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
|
||||
|
@ -567,7 +567,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
{
|
||||
struct bch_sb_field_members *mi;
|
||||
struct bch_fs *c;
|
||||
unsigned i, iter_size, fs_usage_size;
|
||||
unsigned i, iter_size;
|
||||
const char *err;
|
||||
|
||||
pr_verbose_init(opts, "");
|
||||
@ -661,9 +661,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
(btree_blocks(c) + 1) * 2 *
|
||||
sizeof(struct btree_node_iter_set);
|
||||
|
||||
fs_usage_size = sizeof(struct bch_fs_usage) +
|
||||
sizeof(u64) * c->replicas.nr;
|
||||
|
||||
if (!(c->wq = alloc_workqueue("bcachefs",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
|
||||
@ -680,8 +677,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
max(offsetof(struct btree_read_bio, bio),
|
||||
offsetof(struct btree_write_bio, wbio.bio)),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
!(c->usage[0] = __alloc_percpu(fs_usage_size, sizeof(u64))) ||
|
||||
!(c->usage_scratch = __alloc_percpu(fs_usage_size, sizeof(u64))) ||
|
||||
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
|
@ -243,17 +243,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
|
||||
|
||||
pr_buf(&out, "hidden:\t\t\t\t%llu\n",
|
||||
fs_usage->s.hidden);
|
||||
fs_usage->hidden);
|
||||
pr_buf(&out, "data:\t\t\t\t%llu\n",
|
||||
fs_usage->s.data);
|
||||
fs_usage->data);
|
||||
pr_buf(&out, "cached:\t\t\t\t%llu\n",
|
||||
fs_usage->s.cached);
|
||||
fs_usage->cached);
|
||||
pr_buf(&out, "reserved:\t\t\t%llu\n",
|
||||
fs_usage->s.reserved);
|
||||
fs_usage->reserved);
|
||||
pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
|
||||
fs_usage->s.nr_inodes);
|
||||
fs_usage->nr_inodes);
|
||||
pr_buf(&out, "online reserved:\t\t%llu\n",
|
||||
fs_usage->s.online_reserved);
|
||||
fs_usage->online_reserved);
|
||||
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
@ -269,7 +269,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
|
||||
pr_buf(&out, "\t");
|
||||
bch2_replicas_entry_to_text(&out, e);
|
||||
pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
|
||||
pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
|
||||
}
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
Loading…
Reference in New Issue
Block a user