mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-02 00:00:03 +03:00
Update bcachefs sources to f05b3c1af9 bcachefs: Improve bucket_alloc_fail tracepoint
This commit is contained in:
parent
d34e731082
commit
3765483ff0
@ -1 +1 @@
|
||||
e48731a188639563444d475622782b7963df4b47
|
||||
f05b3c1af906802e46f9caca13fb6260d8293fdf
|
||||
|
@ -491,9 +491,30 @@ DEFINE_EVENT(bucket_alloc, bucket_alloc,
|
||||
TP_ARGS(ca, reserve)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
|
||||
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
|
||||
TP_ARGS(ca, reserve)
|
||||
TRACE_EVENT(bucket_alloc_fail,
|
||||
TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve,
|
||||
u64 avail, u64 need_journal_commit),
|
||||
TP_ARGS(ca, reserve, avail, need_journal_commit),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(enum alloc_reserve, reserve )
|
||||
__field(u64, avail )
|
||||
__field(u64, need_journal_commit )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = ca->dev;
|
||||
__entry->reserve = reserve;
|
||||
__entry->avail = avail;
|
||||
__entry->need_journal_commit = need_journal_commit;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d reserve %d avail %llu need_journal_commit %llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->reserve,
|
||||
__entry->avail,
|
||||
__entry->need_journal_commit)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,8 +8,6 @@
|
||||
#include "debug.h"
|
||||
#include "super.h"
|
||||
|
||||
extern const char * const bch2_allocator_states[];
|
||||
|
||||
struct bkey_alloc_unpacked {
|
||||
u64 journal_seq;
|
||||
u64 bucket;
|
||||
@ -17,6 +15,8 @@ struct bkey_alloc_unpacked {
|
||||
u8 gen;
|
||||
u8 oldest_gen;
|
||||
u8 data_type;
|
||||
bool need_discard:1;
|
||||
bool need_inc_gen:1;
|
||||
#define x(_name, _bits) u##_bits _name;
|
||||
BCH_ALLOC_FIELDS_V2()
|
||||
#undef x
|
||||
@ -25,6 +25,50 @@ struct bkey_alloc_unpacked {
|
||||
/* How out of date a pointer gen is allowed to be: */
|
||||
#define BUCKET_GC_GEN_MAX 96U
|
||||
|
||||
static inline u8 alloc_gc_gen(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return a.gen - a.oldest_gen;
|
||||
}
|
||||
|
||||
enum bucket_state {
|
||||
BUCKET_free,
|
||||
BUCKET_need_gc_gens,
|
||||
BUCKET_need_discard,
|
||||
BUCKET_cached,
|
||||
BUCKET_dirty,
|
||||
};
|
||||
|
||||
extern const char * const bch2_bucket_states[];
|
||||
|
||||
static inline enum bucket_state bucket_state(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
if (a.dirty_sectors || a.stripe)
|
||||
return BUCKET_dirty;
|
||||
if (a.cached_sectors)
|
||||
return BUCKET_cached;
|
||||
BUG_ON(a.data_type);
|
||||
if (a.need_discard)
|
||||
return BUCKET_need_discard;
|
||||
if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX)
|
||||
return BUCKET_need_gc_gens;
|
||||
return BUCKET_free;
|
||||
}
|
||||
|
||||
static inline u64 alloc_lru_idx(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return bucket_state(a) == BUCKET_cached ? a.read_time : 0;
|
||||
}
|
||||
|
||||
static inline u64 alloc_freespace_genbits(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return ((u64) alloc_gc_gen(a) >> 4) << 56;
|
||||
}
|
||||
|
||||
static inline struct bpos alloc_freespace_pos(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return POS(a.dev, a.bucket | alloc_freespace_genbits(a));
|
||||
}
|
||||
|
||||
/* returns true if not equal */
|
||||
static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
|
||||
struct bkey_alloc_unpacked r)
|
||||
@ -65,18 +109,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
|
||||
.key_invalid = bch2_alloc_v1_invalid, \
|
||||
.val_to_text = bch2_alloc_to_text, \
|
||||
.trans_trigger = bch2_trans_mark_alloc, \
|
||||
.atomic_trigger = bch2_mark_alloc, \
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_alloc_v2 (struct bkey_ops) { \
|
||||
.key_invalid = bch2_alloc_v2_invalid, \
|
||||
.val_to_text = bch2_alloc_to_text, \
|
||||
.trans_trigger = bch2_trans_mark_alloc, \
|
||||
.atomic_trigger = bch2_mark_alloc, \
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \
|
||||
.key_invalid = bch2_alloc_v3_invalid, \
|
||||
.val_to_text = bch2_alloc_to_text, \
|
||||
.trans_trigger = bch2_trans_mark_alloc, \
|
||||
.atomic_trigger = bch2_mark_alloc, \
|
||||
}
|
||||
|
||||
@ -87,44 +134,31 @@ static inline bool bkey_is_alloc(const struct bkey *k)
|
||||
k->type == KEY_TYPE_alloc_v3;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *, bool, bool);
|
||||
int bch2_alloc_read(struct bch_fs *);
|
||||
|
||||
static inline void bch2_wake_allocator(struct bch_dev *ca)
|
||||
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
|
||||
struct bkey_i *, unsigned);
|
||||
int bch2_check_alloc_info(struct bch_fs *, bool);
|
||||
void bch2_do_discards(struct bch_fs *);
|
||||
|
||||
static inline bool should_invalidate_buckets(struct bch_dev *ca)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct bch_dev_usage u = bch2_dev_usage_read(ca);
|
||||
|
||||
rcu_read_lock();
|
||||
p = rcu_dereference(ca->alloc_thread);
|
||||
if (p)
|
||||
wake_up_process(p);
|
||||
rcu_read_unlock();
|
||||
return u.d[BCH_DATA_cached].buckets &&
|
||||
u.buckets_unavailable + u.d[BCH_DATA_cached].buckets <
|
||||
ca->mi.nbuckets >> 7;
|
||||
}
|
||||
|
||||
static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket)
|
||||
{
|
||||
if (bch2_expensive_debug_checks) {
|
||||
size_t iter;
|
||||
long i;
|
||||
unsigned j;
|
||||
void bch2_do_invalidates(struct bch_fs *);
|
||||
|
||||
for (j = 0; j < RESERVE_NR; j++)
|
||||
fifo_for_each_entry(i, &ca->free[j], iter)
|
||||
BUG_ON(i == bucket);
|
||||
fifo_for_each_entry(i, &ca->free_inc, iter)
|
||||
BUG_ON(i == bucket);
|
||||
}
|
||||
}
|
||||
int bch2_fs_freespace_init(struct bch_fs *);
|
||||
|
||||
void bch2_recalc_capacity(struct bch_fs *);
|
||||
|
||||
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
@ -14,13 +14,18 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
#include "buckets_waiting_for_journal.h"
|
||||
#include "clock.h"
|
||||
#include "debug.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
|
||||
#include <linux/math64.h>
|
||||
#include <linux/rculist.h>
|
||||
@ -78,7 +83,6 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
percpu_down_read(&c->mark_lock);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, ob->bucket, false);
|
||||
ob->valid = false;
|
||||
ob->data_type = 0;
|
||||
|
||||
@ -178,39 +182,28 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_bucket_alloc - allocate a single bucket from a specific device
|
||||
*
|
||||
* Returns index of bucket on success, 0 on failure
|
||||
* */
|
||||
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
bool may_alloc_partial,
|
||||
struct closure *cl)
|
||||
static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
struct bkey_alloc_unpacked a,
|
||||
size_t *need_journal_commit,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
long b = 0;
|
||||
|
||||
if (unlikely(ca->buckets_nouse && test_bit(a.bucket, ca->buckets_nouse)))
|
||||
return NULL;
|
||||
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket))
|
||||
return NULL;
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, ca->dev_idx, a.bucket)) {
|
||||
(*need_journal_commit)++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (may_alloc_partial) {
|
||||
int i;
|
||||
|
||||
for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
ob = c->open_buckets + ca->open_buckets_partial[i];
|
||||
|
||||
if (reserve <= ob->alloc_reserve) {
|
||||
array_remove_item(ca->open_buckets_partial,
|
||||
ca->open_buckets_partial_nr,
|
||||
i);
|
||||
ob->on_partial_list = false;
|
||||
ob->alloc_reserve = reserve;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ob;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
|
||||
if (cl)
|
||||
closure_wait(&c->open_buckets_wait, cl);
|
||||
@ -219,36 +212,17 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
c->blocked_allocate_open_bucket = local_clock();
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
trace_open_bucket_alloc_fail(ca, reserve);
|
||||
return ERR_PTR(-OPEN_BUCKETS_EMPTY);
|
||||
}
|
||||
|
||||
if (likely(fifo_pop(&ca->free[RESERVE_NONE], b)))
|
||||
goto out;
|
||||
|
||||
switch (reserve) {
|
||||
case RESERVE_BTREE_MOVINGGC:
|
||||
case RESERVE_MOVINGGC:
|
||||
if (fifo_pop(&ca->free[RESERVE_MOVINGGC], b))
|
||||
goto out;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
/* Recheck under lock: */
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket)) {
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (cl)
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
|
||||
if (!c->blocked_allocate)
|
||||
c->blocked_allocate = local_clock();
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
trace_bucket_alloc_fail(ca, reserve);
|
||||
return ERR_PTR(-FREELIST_EMPTY);
|
||||
out:
|
||||
verify_not_on_freelist(c, ca, b);
|
||||
|
||||
ob = bch2_open_bucket_alloc(c);
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
@ -257,8 +231,8 @@ out:
|
||||
ob->sectors_free = ca->mi.bucket_size;
|
||||
ob->alloc_reserve = reserve;
|
||||
ob->dev = ca->dev_idx;
|
||||
ob->gen = *bucket_gen(ca, b);
|
||||
ob->bucket = b;
|
||||
ob->gen = a.gen;
|
||||
ob->bucket = a.bucket;
|
||||
spin_unlock(&ob->lock);
|
||||
|
||||
ca->nr_open_buckets++;
|
||||
@ -280,12 +254,246 @@ out:
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bch2_wake_allocator(ca);
|
||||
|
||||
trace_bucket_alloc(ca, reserve);
|
||||
return ob;
|
||||
}
|
||||
|
||||
static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve, u64 free_entry,
|
||||
size_t *need_journal_commit,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob;
|
||||
struct bkey_alloc_unpacked a;
|
||||
u64 b = free_entry & ~(~0ULL << 56);
|
||||
unsigned genbits = free_entry >> 56;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
ob = ERR_PTR(ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
a = bch2_alloc_unpack(k);
|
||||
|
||||
if (bch2_fs_inconsistent_on(bucket_state(a) != BUCKET_free, c,
|
||||
"non free bucket in freespace btree (state %s)\n"
|
||||
" %s\n"
|
||||
" at %llu (genbits %u)",
|
||||
bch2_bucket_states[bucket_state(a)],
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
|
||||
free_entry, genbits)) {
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_fs_inconsistent_on(genbits != (alloc_freespace_genbits(a) >> 56), c,
|
||||
"bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
|
||||
" %s",
|
||||
genbits, alloc_freespace_genbits(a) >> 56,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_fs_inconsistent_on(b < ca->mi.first_bucket || b >= ca->mi.nbuckets, c,
|
||||
"freespace btree has bucket outside allowed range (got %llu, valid %u-%llu)",
|
||||
b, ca->mi.first_bucket, ca->mi.nbuckets)) {
|
||||
ob = ERR_PTR(-EIO);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ob = __try_alloc_bucket(c, ca, reserve, a, need_journal_commit, cl);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf);
|
||||
return ob;
|
||||
}
|
||||
|
||||
static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
int i;
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
|
||||
ob = c->open_buckets + ca->open_buckets_partial[i];
|
||||
|
||||
if (reserve <= ob->alloc_reserve) {
|
||||
array_remove_item(ca->open_buckets_partial,
|
||||
ca->open_buckets_partial_nr,
|
||||
i);
|
||||
ob->on_partial_list = false;
|
||||
ob->alloc_reserve = reserve;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return ob;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This path is for before the freespace btree is initialized:
|
||||
*
|
||||
* If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
|
||||
* journal buckets - journal buckets will be < ca->new_fs_bucket_idx
|
||||
*/
|
||||
static noinline struct open_bucket *
|
||||
bch2_bucket_alloc_trans_early(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
u64 *b,
|
||||
size_t *need_journal_commit,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob = NULL;
|
||||
int ret;
|
||||
|
||||
*b = max_t(u64, *b, ca->mi.first_bucket);
|
||||
*b = max_t(u64, *b, ca->new_fs_bucket_idx);
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *b),
|
||||
BTREE_ITER_SLOTS, k, ret) {
|
||||
struct bkey_alloc_unpacked a;
|
||||
|
||||
if (bkey_cmp(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
|
||||
break;
|
||||
|
||||
if (ca->new_fs_bucket_idx &&
|
||||
is_superblock_bucket(ca, k.k->p.offset))
|
||||
continue;
|
||||
|
||||
a = bch2_alloc_unpack(k);
|
||||
|
||||
if (bucket_state(a) != BUCKET_free)
|
||||
continue;
|
||||
|
||||
ob = __try_alloc_bucket(trans->c, ca, reserve, a,
|
||||
need_journal_commit, cl);
|
||||
if (ob)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
*b = iter.pos.offset;
|
||||
|
||||
return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY);
|
||||
}
|
||||
|
||||
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
u64 *b,
|
||||
size_t *need_journal_commit,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct open_bucket *ob = NULL;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!ca->mi.freespace_initialized))
|
||||
return bch2_bucket_alloc_trans_early(trans, ca, reserve, b,
|
||||
need_journal_commit, cl);
|
||||
|
||||
BUG_ON(ca->new_fs_bucket_idx);
|
||||
|
||||
for_each_btree_key(trans, iter, BTREE_ID_freespace,
|
||||
POS(ca->dev_idx, *b), 0, k, ret) {
|
||||
if (k.k->p.inode != ca->dev_idx)
|
||||
break;
|
||||
|
||||
for (*b = max(*b, bkey_start_offset(k.k));
|
||||
*b != k.k->p.offset && !ob;
|
||||
(*b)++) {
|
||||
if (btree_trans_too_many_iters(trans)) {
|
||||
ob = ERR_PTR(-EINTR);
|
||||
break;
|
||||
}
|
||||
|
||||
ob = try_alloc_bucket(trans, ca, reserve, *b,
|
||||
need_journal_commit, cl);
|
||||
}
|
||||
if (ob)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
return ob ?: ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_bucket_alloc - allocate a single bucket from a specific device
|
||||
*
|
||||
* Returns index of bucket on success, 0 on failure
|
||||
* */
|
||||
struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum alloc_reserve reserve,
|
||||
bool may_alloc_partial,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob = NULL;
|
||||
size_t need_journal_commit = 0;
|
||||
u64 avail = dev_buckets_available(ca, reserve);
|
||||
u64 b = 0;
|
||||
int ret;
|
||||
|
||||
if (may_alloc_partial) {
|
||||
ob = try_alloc_partial_bucket(c, ca, reserve);
|
||||
if (ob)
|
||||
return ob;
|
||||
}
|
||||
again:
|
||||
if (!avail) {
|
||||
if (cl) {
|
||||
closure_wait(&c->freelist_wait, cl);
|
||||
/* recheck after putting ourself on waitlist */
|
||||
avail = dev_buckets_available(ca, reserve);
|
||||
if (avail) {
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->blocked_allocate)
|
||||
c->blocked_allocate = local_clock();
|
||||
|
||||
ob = ERR_PTR(-FREELIST_EMPTY);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans,
|
||||
ca, reserve, &b,
|
||||
&need_journal_commit, cl)));
|
||||
|
||||
if (need_journal_commit * 2 > avail)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
err:
|
||||
if (!ob)
|
||||
ob = ERR_PTR(ret ?: -FREELIST_EMPTY);
|
||||
|
||||
if (ob == ERR_PTR(-FREELIST_EMPTY)) {
|
||||
trace_bucket_alloc_fail(ca, reserve, avail, need_journal_commit);
|
||||
atomic_long_inc(&c->bucket_alloc_fail);
|
||||
}
|
||||
|
||||
return ob;
|
||||
}
|
||||
|
||||
static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
|
||||
unsigned l, unsigned r)
|
||||
{
|
||||
@ -313,7 +521,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
|
||||
struct dev_stripe_state *stripe)
|
||||
{
|
||||
u64 *v = stripe->next_alloc + ca->dev_idx;
|
||||
u64 free_space = dev_buckets_available(ca);
|
||||
u64 free_space = dev_buckets_available(ca, RESERVE_NONE);
|
||||
u64 free_space_inv = free_space
|
||||
? div64_u64(1ULL << 48, free_space)
|
||||
: 1ULL << 48;
|
||||
@ -364,6 +572,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
{
|
||||
struct dev_alloc_list devs_sorted =
|
||||
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
unsigned dev;
|
||||
struct bch_dev *ca;
|
||||
int ret = -INSUFFICIENT_DEVICES;
|
||||
unsigned i;
|
||||
@ -373,30 +582,43 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
for (i = 0; i < devs_sorted.nr; i++) {
|
||||
struct open_bucket *ob;
|
||||
|
||||
ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
|
||||
dev = devs_sorted.devs[i];
|
||||
|
||||
rcu_read_lock();
|
||||
ca = rcu_dereference(c->devs[dev]);
|
||||
if (ca)
|
||||
percpu_ref_get(&ca->ref);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
if (!ca->mi.durability && *have_cache)
|
||||
if (!ca->mi.durability && *have_cache) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
continue;
|
||||
}
|
||||
|
||||
ob = bch2_bucket_alloc(c, ca, reserve,
|
||||
flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
|
||||
if (!IS_ERR(ob))
|
||||
bch2_dev_stripe_increment(ca, stripe);
|
||||
percpu_ref_put(&ca->ref);
|
||||
|
||||
if (IS_ERR(ob)) {
|
||||
ret = PTR_ERR(ob);
|
||||
|
||||
if (cl)
|
||||
return ret;
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_effective, have_cache, flags, ob);
|
||||
|
||||
bch2_dev_stripe_increment(ca, stripe);
|
||||
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
if (*nr_effective >= nr_replicas) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -564,9 +786,6 @@ static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
if (*nr_effective >= nr_replicas)
|
||||
return 0;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
retry_blocking:
|
||||
/*
|
||||
* Try nonblocking first, so that if one device is full we'll try from
|
||||
@ -580,9 +799,6 @@ retry_blocking:
|
||||
goto retry_blocking;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -863,7 +1079,7 @@ err:
|
||||
case -INSUFFICIENT_DEVICES:
|
||||
return ERR_PTR(-EROFS);
|
||||
default:
|
||||
BUG();
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,6 +115,20 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (bch2_bucket_is_open(c, dev, bucket))
|
||||
return true;
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
ret = bch2_bucket_is_open(c, dev, bucket);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
|
||||
struct dev_stripe_state *, struct bch_devs_mask *,
|
||||
unsigned, unsigned *, bool *, enum alloc_reserve,
|
||||
|
@ -10,18 +10,6 @@
|
||||
|
||||
struct ec_bucket_buf;
|
||||
|
||||
#define ALLOC_THREAD_STATES() \
|
||||
x(stopped) \
|
||||
x(running) \
|
||||
x(blocked) \
|
||||
x(blocked_full)
|
||||
|
||||
enum allocator_states {
|
||||
#define x(n) ALLOCATOR_##n,
|
||||
ALLOC_THREAD_STATES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
enum alloc_reserve {
|
||||
RESERVE_BTREE_MOVINGGC = -2,
|
||||
RESERVE_BTREE = -1,
|
||||
@ -30,8 +18,6 @@ enum alloc_reserve {
|
||||
RESERVE_NR = 2,
|
||||
};
|
||||
|
||||
typedef FIFO(long) alloc_fifo;
|
||||
|
||||
#define OPEN_BUCKETS_COUNT 1024
|
||||
|
||||
#define WRITE_POINT_HASH_NR 32
|
||||
@ -94,12 +80,4 @@ struct write_point_specifier {
|
||||
unsigned long v;
|
||||
};
|
||||
|
||||
struct alloc_heap_entry {
|
||||
size_t bucket;
|
||||
size_t nr;
|
||||
unsigned long key;
|
||||
};
|
||||
|
||||
typedef HEAP(struct alloc_heap_entry) alloc_heap;
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_TYPES_H */
|
||||
|
@ -391,6 +391,9 @@ enum gc_phase {
|
||||
GC_PHASE_BTREE_reflink,
|
||||
GC_PHASE_BTREE_subvolumes,
|
||||
GC_PHASE_BTREE_snapshots,
|
||||
GC_PHASE_BTREE_lru,
|
||||
GC_PHASE_BTREE_freespace,
|
||||
GC_PHASE_BTREE_need_discard,
|
||||
|
||||
GC_PHASE_PENDING_DELETE,
|
||||
};
|
||||
@ -447,7 +450,7 @@ struct bch_dev {
|
||||
* gc_lock, for device resize - holding any is sufficient for access:
|
||||
* Or rcu_read_lock(), but only for ptr_stale():
|
||||
*/
|
||||
struct bucket_array __rcu *buckets[2];
|
||||
struct bucket_array __rcu *buckets_gc;
|
||||
struct bucket_gens __rcu *bucket_gens;
|
||||
u8 *oldest_gen;
|
||||
unsigned long *buckets_nouse;
|
||||
@ -459,34 +462,17 @@ struct bch_dev {
|
||||
|
||||
/* Allocator: */
|
||||
u64 new_fs_bucket_idx;
|
||||
struct task_struct __rcu *alloc_thread;
|
||||
|
||||
/*
|
||||
* free: Buckets that are ready to be used
|
||||
*
|
||||
* free_inc: Incoming buckets - these are buckets that currently have
|
||||
* cached data in them, and we can't reuse them until after we write
|
||||
* their new gen to disk. After prio_write() finishes writing the new
|
||||
* gens/prios, they'll be moved to the free list (and possibly discarded
|
||||
* in the process)
|
||||
*/
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
unsigned nr_open_buckets;
|
||||
unsigned nr_btree_reserve;
|
||||
|
||||
open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
open_bucket_idx_t open_buckets_partial_nr;
|
||||
|
||||
size_t fifo_last_bucket;
|
||||
|
||||
size_t inc_gen_needs_gc;
|
||||
size_t inc_gen_really_needs_gc;
|
||||
size_t buckets_waiting_on_journal;
|
||||
|
||||
enum allocator_states allocator_state;
|
||||
|
||||
alloc_heap alloc_heap;
|
||||
|
||||
atomic64_t rebalance_work;
|
||||
|
||||
struct journal_device journal;
|
||||
@ -508,8 +494,6 @@ struct bch_dev {
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_ALLOC_CLEAN,
|
||||
BCH_FS_ALLOCATOR_RUNNING,
|
||||
BCH_FS_ALLOCATOR_STOPPING,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED,
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
@ -773,6 +757,8 @@ struct bch_fs {
|
||||
unsigned write_points_nr;
|
||||
|
||||
struct buckets_waiting_for_journal buckets_waiting_for_journal;
|
||||
struct work_struct discard_work;
|
||||
struct work_struct invalidate_work;
|
||||
|
||||
/* GARBAGE COLLECTION */
|
||||
struct task_struct *gc_thread;
|
||||
@ -911,6 +897,7 @@ struct bch_fs {
|
||||
atomic_long_t read_realloc_races;
|
||||
atomic_long_t extent_migrate_done;
|
||||
atomic_long_t extent_migrate_raced;
|
||||
atomic_long_t bucket_alloc_fail;
|
||||
|
||||
unsigned btree_gc_periodic:1;
|
||||
unsigned copy_gc_enabled:1;
|
||||
|
@ -347,7 +347,9 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(subvolume, 21) \
|
||||
x(snapshot, 22) \
|
||||
x(inode_v2, 23) \
|
||||
x(alloc_v3, 24)
|
||||
x(alloc_v3, 24) \
|
||||
x(set, 25) \
|
||||
x(lru, 26)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -377,6 +379,10 @@ struct bch_hash_whiteout {
|
||||
struct bch_val v;
|
||||
};
|
||||
|
||||
struct bch_set {
|
||||
struct bch_val v;
|
||||
};
|
||||
|
||||
/* Extents */
|
||||
|
||||
/*
|
||||
@ -877,8 +883,8 @@ struct bch_alloc_v2 {
|
||||
#define BCH_ALLOC_FIELDS_V2() \
|
||||
x(read_time, 64) \
|
||||
x(write_time, 64) \
|
||||
x(dirty_sectors, 16) \
|
||||
x(cached_sectors, 16) \
|
||||
x(dirty_sectors, 32) \
|
||||
x(cached_sectors, 32) \
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
@ -893,11 +899,13 @@ struct bch_alloc_v3 {
|
||||
__u8 data[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
LE32_BITMASK(BCH_ALLOC_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
||||
LE32_BITMASK(BCH_ALLOC_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
enum {
|
||||
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
||||
BCH_ALLOC_FIELDS_V1()
|
||||
#undef x
|
||||
BCH_ALLOC_FIELD_NR
|
||||
};
|
||||
|
||||
/* Quotas: */
|
||||
@ -1015,6 +1023,15 @@ LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
|
||||
/* True if a subvolume points to this snapshot node: */
|
||||
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
|
||||
|
||||
/* LRU btree: */
|
||||
|
||||
struct bch_lru {
|
||||
struct bch_val v;
|
||||
__le64 idx;
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#define LRU_ID_STRIPES (1U << 16)
|
||||
|
||||
/* Optional/variable size superblock sections: */
|
||||
|
||||
struct bch_sb_field {
|
||||
@ -1023,16 +1040,17 @@ struct bch_sb_field {
|
||||
__le32 type;
|
||||
};
|
||||
|
||||
#define BCH_SB_FIELDS() \
|
||||
x(journal, 0) \
|
||||
x(members, 1) \
|
||||
x(crypt, 2) \
|
||||
x(replicas_v0, 3) \
|
||||
x(quota, 4) \
|
||||
x(disk_groups, 5) \
|
||||
x(clean, 6) \
|
||||
x(replicas, 7) \
|
||||
x(journal_seq_blacklist, 8)
|
||||
#define BCH_SB_FIELDS() \
|
||||
x(journal, 0) \
|
||||
x(members, 1) \
|
||||
x(crypt, 2) \
|
||||
x(replicas_v0, 3) \
|
||||
x(quota, 4) \
|
||||
x(disk_groups, 5) \
|
||||
x(clean, 6) \
|
||||
x(replicas, 7) \
|
||||
x(journal_seq_blacklist, 8) \
|
||||
x(journal_v2, 9)
|
||||
|
||||
enum bch_sb_field_type {
|
||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||
@ -1041,6 +1059,14 @@ enum bch_sb_field_type {
|
||||
BCH_SB_FIELD_NR
|
||||
};
|
||||
|
||||
/*
|
||||
* Most superblock fields are replicated in all device's superblocks - a few are
|
||||
* not:
|
||||
*/
|
||||
#define BCH_SINGLE_DEVICE_SB_FIELDS \
|
||||
((1U << BCH_SB_FIELD_journal)| \
|
||||
(1U << BCH_SB_FIELD_journal_v2))
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
|
||||
struct bch_sb_field_journal {
|
||||
@ -1048,6 +1074,15 @@ struct bch_sb_field_journal {
|
||||
__le64 buckets[0];
|
||||
};
|
||||
|
||||
struct bch_sb_field_journal_v2 {
|
||||
struct bch_sb_field field;
|
||||
|
||||
struct bch_sb_field_journal_v2_entry {
|
||||
__le64 start;
|
||||
__le64 nr;
|
||||
} d[0];
|
||||
};
|
||||
|
||||
/* BCH_SB_FIELD_members: */
|
||||
|
||||
#define BCH_MIN_NR_NBUCKETS (1 << 6)
|
||||
@ -1069,6 +1104,8 @@ LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15)
|
||||
LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags[0], 15, 20)
|
||||
LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28)
|
||||
LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags[0], 28, 30)
|
||||
LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
|
||||
struct bch_member, flags[0], 30, 31)
|
||||
|
||||
#if 0
|
||||
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
|
||||
@ -1287,7 +1324,8 @@ enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_reflink_p_fix = 16,
|
||||
bcachefs_metadata_version_subvol_dirent = 17,
|
||||
bcachefs_metadata_version_inode_v2 = 18,
|
||||
bcachefs_metadata_version_max = 19,
|
||||
bcachefs_metadata_version_freespace = 19,
|
||||
bcachefs_metadata_version_max = 20,
|
||||
};
|
||||
|
||||
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
|
||||
@ -1804,7 +1842,10 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
x(stripes, 6) \
|
||||
x(reflink, 7) \
|
||||
x(subvolumes, 8) \
|
||||
x(snapshots, 9)
|
||||
x(snapshots, 9) \
|
||||
x(lru, 10) \
|
||||
x(freespace, 11) \
|
||||
x(need_discard, 12)
|
||||
|
||||
enum btree_id {
|
||||
#define x(kwd, val) BTREE_ID_##kwd = val,
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "inode.h"
|
||||
#include "lru.h"
|
||||
#include "quota.h"
|
||||
#include "reflink.h"
|
||||
#include "subvolume.h"
|
||||
@ -85,6 +86,24 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
.val_to_text = key_type_inline_data_to_text, \
|
||||
}
|
||||
|
||||
static const char *key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (bkey_val_bytes(k.k))
|
||||
return "nonempty value";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
|
||||
{
|
||||
bch2_key_resize(l.k, l.k->size + r.k->size);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_set (struct bkey_ops) { \
|
||||
.key_invalid = key_type_set_invalid, \
|
||||
.key_merge = key_type_set_merge, \
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_ops[] = {
|
||||
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
|
||||
BCH_BKEY_TYPES()
|
||||
@ -147,6 +166,15 @@ static unsigned bch2_key_types_allowed[] = {
|
||||
[BKEY_TYPE_snapshots] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_snapshot),
|
||||
[BKEY_TYPE_lru] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_lru),
|
||||
[BKEY_TYPE_freespace] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_set),
|
||||
[BKEY_TYPE_need_discard] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_set),
|
||||
[BKEY_TYPE_btree] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_btree_ptr)|
|
||||
|
@ -571,37 +571,37 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
g->gen = p.ptr.gen;
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(p.ptr.gen, g->mark.gen) > 0, c,
|
||||
if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, c,
|
||||
"bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen, g->mark.gen,
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
if (!p.ptr.cached) {
|
||||
g->_mark.gen = p.ptr.gen;
|
||||
g->gen_valid = true;
|
||||
g->_mark.data_type = 0;
|
||||
g->_mark.dirty_sectors = 0;
|
||||
g->_mark.cached_sectors = 0;
|
||||
g->gen = p.ptr.gen;
|
||||
g->data_type = 0;
|
||||
g->dirty_sectors = 0;
|
||||
g->cached_sectors = 0;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fsck_err_on(gen_cmp(g->mark.gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
|
||||
if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, c,
|
||||
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen,
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen,
|
||||
(printbuf_reset(&buf),
|
||||
@ -609,30 +609,30 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
do_update = true;
|
||||
|
||||
if (fsck_err_on(!p.ptr.cached &&
|
||||
gen_cmp(p.ptr.gen, g->mark.gen) < 0, c,
|
||||
gen_cmp(p.ptr.gen, g->gen) < 0, c,
|
||||
"bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[ptr_data_type(k->k, &p.ptr)],
|
||||
p.ptr.gen, g->mark.gen,
|
||||
p.ptr.gen, g->gen,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
|
||||
do_update = true;
|
||||
|
||||
if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen)
|
||||
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
|
||||
continue;
|
||||
|
||||
if (fsck_err_on(g->mark.data_type &&
|
||||
g->mark.data_type != data_type, c,
|
||||
if (fsck_err_on(g->data_type &&
|
||||
g->data_type != data_type, c,
|
||||
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||
bch2_data_types[g->mark.data_type],
|
||||
bch2_data_types[g->data_type],
|
||||
bch2_data_types[data_type],
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
|
||||
if (data_type == BCH_DATA_btree) {
|
||||
g->_mark.data_type = data_type;
|
||||
g->data_type = data_type;
|
||||
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
|
||||
} else {
|
||||
do_update = true;
|
||||
@ -692,7 +692,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
|
||||
|
||||
ptr->gen = g->mark.gen;
|
||||
ptr->gen = g->gen;
|
||||
}
|
||||
} else {
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
|
||||
@ -701,12 +701,12 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
|
||||
|
||||
(ptr->cached &&
|
||||
(!g->gen_valid || gen_cmp(ptr->gen, g->mark.gen) > 0)) ||
|
||||
(!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
|
||||
(!ptr->cached &&
|
||||
gen_cmp(ptr->gen, g->mark.gen) < 0) ||
|
||||
gen_cmp(g->mark.gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
|
||||
(g->mark.data_type &&
|
||||
g->mark.data_type != data_type);
|
||||
gen_cmp(ptr->gen, g->gen) < 0) ||
|
||||
gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
|
||||
(g->data_type &&
|
||||
g->data_type != data_type);
|
||||
}));
|
||||
again:
|
||||
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
@ -1163,10 +1163,10 @@ static void bch2_gc_free(struct bch_fs *c)
|
||||
genradix_free(&c->gc_stripes);
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
|
||||
kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
|
||||
sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket));
|
||||
ca->buckets[1] = NULL;
|
||||
ca->buckets_gc = NULL;
|
||||
|
||||
free_percpu(ca->usage_gc);
|
||||
ca->usage_gc = NULL;
|
||||
@ -1295,7 +1295,7 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
BUG_ON(ca->buckets[1]);
|
||||
BUG_ON(ca->buckets_gc);
|
||||
BUG_ON(ca->usage_gc);
|
||||
|
||||
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
|
||||
@ -1315,9 +1315,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
|
||||
struct bucket *g;
|
||||
struct bucket gc;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked old_u, new_u, gc_u;
|
||||
struct bkey_alloc_unpacked old_u, new_u;
|
||||
struct bkey_alloc_buf *a;
|
||||
int ret;
|
||||
|
||||
@ -1329,39 +1329,27 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
old_u = new_u = bch2_alloc_unpack(k);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = gc_bucket(ca, iter->pos.offset);
|
||||
gc_u = (struct bkey_alloc_unpacked) {
|
||||
.dev = iter->pos.inode,
|
||||
.bucket = iter->pos.offset,
|
||||
.gen = g->mark.gen,
|
||||
.data_type = g->mark.data_type,
|
||||
.dirty_sectors = g->mark.dirty_sectors,
|
||||
.cached_sectors = g->mark.cached_sectors,
|
||||
.read_time = g->io_time[READ],
|
||||
.write_time = g->io_time[WRITE],
|
||||
.stripe = g->stripe,
|
||||
.stripe_redundancy = g->stripe_redundancy,
|
||||
};
|
||||
gc = *gc_bucket(ca, iter->pos.offset);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (metadata_only &&
|
||||
gc_u.data_type != BCH_DATA_sb &&
|
||||
gc_u.data_type != BCH_DATA_journal &&
|
||||
gc_u.data_type != BCH_DATA_btree)
|
||||
gc.data_type != BCH_DATA_sb &&
|
||||
gc.data_type != BCH_DATA_journal &&
|
||||
gc.data_type != BCH_DATA_btree)
|
||||
return 0;
|
||||
|
||||
if (gen_after(old_u.gen, gc_u.gen))
|
||||
if (gen_after(old_u.gen, gc.gen))
|
||||
return 0;
|
||||
|
||||
#define copy_bucket_field(_f) \
|
||||
if (fsck_err_on(new_u._f != gc_u._f, c, \
|
||||
if (fsck_err_on(new_u._f != gc._f, c, \
|
||||
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", \
|
||||
iter->pos.inode, iter->pos.offset, \
|
||||
new_u.gen, \
|
||||
bch2_data_types[new_u.data_type], \
|
||||
new_u._f, gc_u._f)) \
|
||||
new_u._f = gc_u._f; \
|
||||
gc.gen, \
|
||||
bch2_data_types[gc.data_type], \
|
||||
new_u._f, gc._f)) \
|
||||
new_u._f = gc._f; \
|
||||
|
||||
copy_bucket_field(gen);
|
||||
copy_bucket_field(data_type);
|
||||
@ -1379,7 +1367,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN);
|
||||
ret = bch2_trans_update(trans, iter, &a->k, 0);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -1426,7 +1414,13 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
|
||||
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bucket *g;
|
||||
struct bkey_alloc_unpacked u;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
@ -1434,17 +1428,45 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!buckets) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
percpu_up_write(&c->mark_lock);
|
||||
bch_err(c, "error allocating ca->buckets[gc]");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
buckets->first_bucket = ca->mi.first_bucket;
|
||||
buckets->nbuckets = ca->mi.nbuckets;
|
||||
rcu_assign_pointer(ca->buckets[1], buckets);
|
||||
rcu_assign_pointer(ca->buckets_gc, buckets);
|
||||
};
|
||||
|
||||
return bch2_alloc_read(c, true, metadata_only);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = gc_bucket(ca, k.k->p.offset);
|
||||
u = bch2_alloc_unpack(k);
|
||||
|
||||
g->gen_valid = 1;
|
||||
g->gen = u.gen;
|
||||
|
||||
if (metadata_only &&
|
||||
(u.data_type == BCH_DATA_user ||
|
||||
u.data_type == BCH_DATA_cached ||
|
||||
u.data_type == BCH_DATA_parity)) {
|
||||
g->data_type = u.data_type;
|
||||
g->dirty_sectors = u.dirty_sectors;
|
||||
g->cached_sectors = u.cached_sectors;
|
||||
g->stripe = u.stripe;
|
||||
g->stripe_redundancy = u.stripe_redundancy;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error reading alloc info at gc start: %i", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
|
||||
@ -1453,17 +1475,17 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
|
||||
unsigned i;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
struct bucket_array *buckets = __bucket_array(ca, true);
|
||||
struct bucket_array *buckets = gc_bucket_array(ca);
|
||||
struct bucket *g;
|
||||
|
||||
for_each_bucket(g, buckets) {
|
||||
if (metadata_only &&
|
||||
(g->mark.data_type == BCH_DATA_user ||
|
||||
g->mark.data_type == BCH_DATA_cached ||
|
||||
g->mark.data_type == BCH_DATA_parity))
|
||||
(g->data_type == BCH_DATA_user ||
|
||||
g->data_type == BCH_DATA_cached ||
|
||||
g->data_type == BCH_DATA_parity))
|
||||
continue;
|
||||
g->_mark.dirty_sectors = 0;
|
||||
g->_mark.cached_sectors = 0;
|
||||
g->dirty_sectors = 0;
|
||||
g->cached_sectors = 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -1673,9 +1695,8 @@ static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
|
||||
*/
|
||||
int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
u64 start_time = local_clock();
|
||||
unsigned i, iter = 0;
|
||||
unsigned iter = 0;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
@ -1776,13 +1797,6 @@ out:
|
||||
trace_gc_end(c);
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
|
||||
|
||||
/*
|
||||
* Wake up allocator in case it was waiting for buckets
|
||||
* because of not being able to inc gens
|
||||
*/
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_wake_allocator(ca);
|
||||
|
||||
/*
|
||||
* At startup, allocations can happen directly instead of via the
|
||||
* allocator thread - issue wakeup in case they blocked on gc_lock:
|
||||
@ -1891,7 +1905,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
|
||||
|
||||
u.oldest_gen = ca->oldest_gen[iter->pos.offset];
|
||||
|
||||
return bch2_alloc_write(trans, iter, &u, BTREE_TRIGGER_NORUN);
|
||||
return bch2_alloc_write(trans, iter, &u, 0);
|
||||
}
|
||||
|
||||
int bch2_gc_gens(struct bch_fs *c)
|
||||
|
@ -930,7 +930,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
"error decrypting btree node: %i", ret))
|
||||
goto fsck_err;
|
||||
|
||||
btree_err_on(btree_node_is_extents(b) &&
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
|
||||
BTREE_ERR_FATAL, c, NULL, b, NULL,
|
||||
"btree node does not have NEW_EXTENT_OVERWRITE set");
|
||||
|
@ -596,24 +596,9 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
|
||||
return __btree_node_type(b->c.level, b->c.btree_id);
|
||||
}
|
||||
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BKEY_TYPE_extents:
|
||||
case BKEY_TYPE_reflink:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool btree_node_is_extents(struct btree *b)
|
||||
{
|
||||
return btree_node_type_is_extents(btree_node_type(b));
|
||||
}
|
||||
|
||||
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
|
||||
((1U << BKEY_TYPE_extents)| \
|
||||
(1U << BKEY_TYPE_alloc)| \
|
||||
(1U << BKEY_TYPE_inodes)| \
|
||||
(1U << BKEY_TYPE_stripes)| \
|
||||
(1U << BKEY_TYPE_reflink)| \
|
||||
@ -629,6 +614,16 @@ static inline bool btree_node_is_extents(struct btree *b)
|
||||
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
|
||||
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
|
||||
|
||||
#define BTREE_ID_IS_EXTENTS \
|
||||
((1U << BTREE_ID_extents)| \
|
||||
(1U << BTREE_ID_reflink)| \
|
||||
(1U << BTREE_ID_freespace))
|
||||
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
return (1U << type) & BTREE_ID_IS_EXTENTS;
|
||||
}
|
||||
|
||||
#define BTREE_ID_HAS_SNAPSHOTS \
|
||||
((1U << BTREE_ID_extents)| \
|
||||
(1U << BTREE_ID_inodes)| \
|
||||
|
@ -279,29 +279,24 @@ bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int is_unavailable_bucket(struct bucket_mark m)
|
||||
static inline int is_unavailable_bucket(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return !is_available_bucket(m);
|
||||
return a.dirty_sectors || a.stripe;
|
||||
}
|
||||
|
||||
static inline int bucket_sectors_fragmented(struct bch_dev *ca,
|
||||
struct bucket_mark m)
|
||||
struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return m.dirty_sectors
|
||||
? max(0, (int) ca->mi.bucket_size - (int) m.dirty_sectors)
|
||||
return a.dirty_sectors
|
||||
? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline int is_stripe_data_bucket(struct bucket_mark m)
|
||||
static inline enum bch_data_type bucket_type(struct bkey_alloc_unpacked a)
|
||||
{
|
||||
return m.stripe && m.data_type != BCH_DATA_parity;
|
||||
}
|
||||
|
||||
static inline enum bch_data_type bucket_type(struct bucket_mark m)
|
||||
{
|
||||
return m.cached_sectors && !m.dirty_sectors
|
||||
return a.cached_sectors && !a.dirty_sectors
|
||||
? BCH_DATA_cached
|
||||
: m.data_type;
|
||||
: a.data_type;
|
||||
}
|
||||
|
||||
static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
@ -316,7 +311,8 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
}
|
||||
|
||||
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket_mark old, struct bucket_mark new,
|
||||
struct bkey_alloc_unpacked old,
|
||||
struct bkey_alloc_unpacked new,
|
||||
u64 journal_seq, bool gc)
|
||||
{
|
||||
struct bch_fs_usage *fs_usage;
|
||||
@ -347,9 +343,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (!is_available_bucket(old) && is_available_bucket(new))
|
||||
bch2_wake_allocator(ca);
|
||||
static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket old, struct bucket new,
|
||||
u64 journal_seq, bool gc)
|
||||
{
|
||||
struct bkey_alloc_unpacked old_a = {
|
||||
.gen = old.gen,
|
||||
.data_type = old.data_type,
|
||||
.dirty_sectors = old.dirty_sectors,
|
||||
.cached_sectors = old.cached_sectors,
|
||||
.stripe = old.stripe,
|
||||
};
|
||||
struct bkey_alloc_unpacked new_a = {
|
||||
.gen = new.gen,
|
||||
.data_type = new.data_type,
|
||||
.dirty_sectors = new.dirty_sectors,
|
||||
.cached_sectors = new.cached_sectors,
|
||||
.stripe = new.stripe,
|
||||
};
|
||||
|
||||
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
|
||||
}
|
||||
|
||||
static inline int __update_replicas(struct bch_fs *c,
|
||||
@ -484,19 +499,6 @@ static inline void update_cached_sectors_list(struct btree_trans *trans,
|
||||
update_replicas_list(trans, &r.e, sectors);
|
||||
}
|
||||
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator)
|
||||
{
|
||||
struct bucket *g = bucket(ca, b);
|
||||
struct bucket_mark old, new;
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
|
||||
BUG_ON(owned_by_allocator == old.owned_by_allocator);
|
||||
}
|
||||
|
||||
int bch2_mark_alloc(struct btree_trans *trans,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
unsigned flags)
|
||||
@ -507,8 +509,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
|
||||
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
|
||||
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
|
||||
struct bucket *g;
|
||||
struct bucket_mark old_m, m;
|
||||
int ret = 0;
|
||||
|
||||
if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
|
||||
@ -555,28 +555,46 @@ int bch2_mark_alloc(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_u.data_type &&
|
||||
(!new_u.journal_seq || new_u.journal_seq < c->journal.flushed_seq_ondisk))
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
if ((flags & BTREE_TRIGGER_INSERT) &&
|
||||
new_u.need_discard &&
|
||||
!new_u.journal_seq)
|
||||
bch2_do_discards(c);
|
||||
|
||||
if (!old_u.data_type &&
|
||||
new_u.data_type &&
|
||||
should_invalidate_buckets(ca))
|
||||
bch2_do_invalidates(c);
|
||||
|
||||
if (bucket_state(new_u) == BUCKET_need_gc_gens) {
|
||||
atomic_inc(&c->kick_gc);
|
||||
wake_up_process(c->gc_thread);
|
||||
}
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
if (!gc && new_u.gen != old_u.gen)
|
||||
*bucket_gen(ca, new_u.bucket) = new_u.gen;
|
||||
|
||||
g = __bucket(ca, new_u.bucket, gc);
|
||||
bch2_dev_usage_update(c, ca, old_u, new_u, journal_seq, gc);
|
||||
|
||||
old_m = bucket_cmpxchg(g, m, ({
|
||||
m.gen = new_u.gen;
|
||||
m.data_type = new_u.data_type;
|
||||
m.dirty_sectors = new_u.dirty_sectors;
|
||||
m.cached_sectors = new_u.cached_sectors;
|
||||
m.stripe = new_u.stripe != 0;
|
||||
}));
|
||||
if (gc) {
|
||||
struct bucket *g = gc_bucket(ca, new_u.bucket);
|
||||
|
||||
bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc);
|
||||
bucket_lock(g);
|
||||
|
||||
g->io_time[READ] = new_u.read_time;
|
||||
g->io_time[WRITE] = new_u.write_time;
|
||||
g->oldest_gen = new_u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
g->stripe = new_u.stripe;
|
||||
g->stripe_redundancy = new_u.stripe_redundancy;
|
||||
g->gen_valid = 1;
|
||||
g->gen = new_u.gen;
|
||||
g->data_type = new_u.data_type;
|
||||
g->stripe = new_u.stripe;
|
||||
g->stripe_redundancy = new_u.stripe_redundancy;
|
||||
g->dirty_sectors = new_u.dirty_sectors;
|
||||
g->cached_sectors = new_u.cached_sectors;
|
||||
|
||||
bucket_unlock(g);
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
/*
|
||||
@ -585,9 +603,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
|
||||
*/
|
||||
|
||||
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
|
||||
old_m.cached_sectors) {
|
||||
old_u.cached_sectors) {
|
||||
ret = update_cached_sectors(c, new, ca->dev_idx,
|
||||
-old_m.cached_sectors,
|
||||
-old_u.cached_sectors,
|
||||
journal_seq, gc);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
|
||||
@ -595,29 +613,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket),
|
||||
old_m.cached_sectors);
|
||||
old_u.cached_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define checked_add(a, b) \
|
||||
({ \
|
||||
unsigned _res = (unsigned) (a) + (b); \
|
||||
bool overflow = _res > U16_MAX; \
|
||||
if (overflow) \
|
||||
_res = U16_MAX; \
|
||||
(a) = _res; \
|
||||
overflow; \
|
||||
})
|
||||
|
||||
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, enum bch_data_type data_type,
|
||||
unsigned sectors, struct gc_pos pos,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
struct bucket old, new, *g;
|
||||
bool overflow;
|
||||
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_GC));
|
||||
@ -632,10 +639,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = gc_bucket(ca, b);
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.data_type = data_type;
|
||||
overflow = checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
bucket_lock(g);
|
||||
old = *g;
|
||||
|
||||
g->data_type = data_type;
|
||||
g->dirty_sectors += sectors;
|
||||
overflow = g->dirty_sectors < sectors;
|
||||
|
||||
new = *g;
|
||||
bucket_unlock(g);
|
||||
|
||||
bch2_fs_inconsistent_on(old.data_type &&
|
||||
old.data_type != data_type, c,
|
||||
@ -649,7 +662,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_data_types[old.data_type ?: data_type],
|
||||
old.dirty_sectors, sectors);
|
||||
|
||||
bch2_dev_usage_update(c, ca, old, new, 0, true);
|
||||
bch2_dev_usage_update_m(c, ca, old, new, 0, true);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
@ -669,7 +682,7 @@ static int check_bucket_ref(struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
s64 sectors, enum bch_data_type ptr_data_type,
|
||||
u8 b_gen, u8 bucket_data_type,
|
||||
u16 dirty_sectors, u16 cached_sectors)
|
||||
u32 dirty_sectors, u32 cached_sectors)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
|
||||
@ -737,7 +750,7 @@ static int check_bucket_ref(struct bch_fs *c,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
|
||||
if ((unsigned) (bucket_sectors + sectors) > U32_MAX) {
|
||||
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
|
||||
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
|
||||
"while marking %s",
|
||||
@ -768,8 +781,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
|
||||
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g;
|
||||
struct bucket_mark new, old;
|
||||
struct bucket old, new, *g;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
@ -781,34 +793,38 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
buf.atomic++;
|
||||
g = PTR_GC_BUCKET(ca, ptr);
|
||||
|
||||
if (g->mark.dirty_sectors ||
|
||||
if (g->dirty_sectors ||
|
||||
(g->stripe && g->stripe != k.k->p.offset)) {
|
||||
bch2_fs_inconsistent(c,
|
||||
"bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
|
||||
ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
ret = check_bucket_ref(c, k, ptr, sectors, data_type,
|
||||
new.gen, new.data_type,
|
||||
new.dirty_sectors, new.cached_sectors);
|
||||
if (ret)
|
||||
goto err;
|
||||
bucket_lock(g);
|
||||
old = *g;
|
||||
|
||||
new.dirty_sectors += sectors;
|
||||
if (data_type)
|
||||
new.data_type = data_type;
|
||||
ret = check_bucket_ref(c, k, ptr, sectors, data_type,
|
||||
new.gen, new.data_type,
|
||||
new.dirty_sectors, new.cached_sectors);
|
||||
if (ret) {
|
||||
bucket_unlock(g);
|
||||
goto err;
|
||||
}
|
||||
|
||||
new.stripe = true;
|
||||
}));
|
||||
new.dirty_sectors += sectors;
|
||||
if (data_type)
|
||||
new.data_type = data_type;
|
||||
|
||||
g->stripe = k.k->p.offset;
|
||||
g->stripe_redundancy = s->nr_redundant;
|
||||
|
||||
bch2_dev_usage_update(c, ca, old, new, journal_seq, true);
|
||||
new = *g;
|
||||
bucket_unlock(g);
|
||||
|
||||
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
|
||||
err:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
printbuf_exit(&buf);
|
||||
@ -820,9 +836,9 @@ static int __mark_pointer(struct btree_trans *trans,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
s64 sectors, enum bch_data_type ptr_data_type,
|
||||
u8 bucket_gen, u8 *bucket_data_type,
|
||||
u16 *dirty_sectors, u16 *cached_sectors)
|
||||
u32 *dirty_sectors, u32 *cached_sectors)
|
||||
{
|
||||
u16 *dst_sectors = !ptr->cached
|
||||
u32 *dst_sectors = !ptr->cached
|
||||
? dirty_sectors
|
||||
: cached_sectors;
|
||||
int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type,
|
||||
@ -846,11 +862,9 @@ static int bch2_mark_pointer(struct btree_trans *trans,
|
||||
{
|
||||
u64 journal_seq = trans->journal_res.seq;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bucket_mark old, new;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g;
|
||||
struct bucket old, new, *g;
|
||||
u8 bucket_data_type;
|
||||
u64 v;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_GC));
|
||||
@ -858,30 +872,27 @@ static int bch2_mark_pointer(struct btree_trans *trans,
|
||||
percpu_down_read(&c->mark_lock);
|
||||
g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
|
||||
v = atomic64_read(&g->_mark.v);
|
||||
do {
|
||||
new.v.counter = old.v.counter = v;
|
||||
bucket_data_type = new.data_type;
|
||||
bucket_lock(g);
|
||||
old = *g;
|
||||
|
||||
ret = __mark_pointer(trans, k, &p.ptr, sectors,
|
||||
data_type, new.gen,
|
||||
&bucket_data_type,
|
||||
&new.dirty_sectors,
|
||||
&new.cached_sectors);
|
||||
if (ret)
|
||||
goto err;
|
||||
bucket_data_type = g->data_type;
|
||||
|
||||
new.data_type = bucket_data_type;
|
||||
ret = __mark_pointer(trans, k, &p.ptr, sectors,
|
||||
data_type, g->gen,
|
||||
&bucket_data_type,
|
||||
&g->dirty_sectors,
|
||||
&g->cached_sectors);
|
||||
if (ret) {
|
||||
bucket_unlock(g);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_NOATOMIC) {
|
||||
g->_mark = new;
|
||||
break;
|
||||
}
|
||||
} while ((v = atomic64_cmpxchg(&g->_mark.v,
|
||||
old.v.counter,
|
||||
new.v.counter)) != old.v.counter);
|
||||
g->data_type = bucket_data_type;
|
||||
|
||||
bch2_dev_usage_update(c, ca, old, new, journal_seq, true);
|
||||
new = *g;
|
||||
bucket_unlock(g);
|
||||
|
||||
bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
|
||||
err:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
@ -2041,16 +2052,6 @@ recalculate:
|
||||
|
||||
/* Startup/shutdown: */
|
||||
|
||||
static void buckets_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bucket_array *buckets =
|
||||
container_of(rcu, struct bucket_array, rcu);
|
||||
|
||||
kvpfree(buckets,
|
||||
sizeof(*buckets) +
|
||||
buckets->nbuckets * sizeof(struct bucket));
|
||||
}
|
||||
|
||||
static void bucket_gens_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bucket_gens *buckets =
|
||||
@ -2061,46 +2062,19 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu)
|
||||
|
||||
int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
{
|
||||
struct bucket_array *buckets = NULL, *old_buckets = NULL;
|
||||
struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
|
||||
unsigned long *buckets_nouse = NULL;
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
alloc_heap alloc_heap;
|
||||
|
||||
size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
|
||||
ca->mi.bucket_size / btree_sectors(c));
|
||||
/* XXX: these should be tunable */
|
||||
size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
|
||||
size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6);
|
||||
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
|
||||
btree_reserve * 2);
|
||||
bool resize = ca->buckets[0] != NULL;
|
||||
bool resize = ca->bucket_gens != NULL;
|
||||
int ret = -ENOMEM;
|
||||
unsigned i;
|
||||
|
||||
memset(&free, 0, sizeof(free));
|
||||
memset(&free_inc, 0, sizeof(free_inc));
|
||||
memset(&alloc_heap, 0, sizeof(alloc_heap));
|
||||
|
||||
if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
|
||||
nbuckets * sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
|
||||
if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
(c->opts.buckets_nouse &&
|
||||
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL|__GFP_ZERO))) ||
|
||||
!init_fifo(&free[RESERVE_MOVINGGC],
|
||||
copygc_reserve, GFP_KERNEL) ||
|
||||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||
!init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) ||
|
||||
!init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
|
||||
GFP_KERNEL|__GFP_ZERO))))
|
||||
goto err;
|
||||
|
||||
buckets->first_bucket = ca->mi.first_bucket;
|
||||
buckets->nbuckets = nbuckets;
|
||||
bucket_gens->first_bucket = ca->mi.first_bucket;
|
||||
bucket_gens->nbuckets = nbuckets;
|
||||
|
||||
@ -2112,15 +2086,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
old_buckets = bucket_array(ca);
|
||||
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
|
||||
|
||||
if (resize) {
|
||||
size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
|
||||
size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
|
||||
|
||||
memcpy(buckets->b,
|
||||
old_buckets->b,
|
||||
n * sizeof(struct bucket));
|
||||
memcpy(bucket_gens->b,
|
||||
old_bucket_gens->b,
|
||||
n);
|
||||
@ -2130,47 +2100,25 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
BITS_TO_LONGS(n) * sizeof(unsigned long));
|
||||
}
|
||||
|
||||
rcu_assign_pointer(ca->buckets[0], buckets);
|
||||
rcu_assign_pointer(ca->bucket_gens, bucket_gens);
|
||||
buckets = old_buckets;
|
||||
bucket_gens = old_bucket_gens;
|
||||
|
||||
swap(ca->buckets_nouse, buckets_nouse);
|
||||
|
||||
nbuckets = ca->mi.nbuckets;
|
||||
|
||||
if (resize) {
|
||||
percpu_up_write(&c->mark_lock);
|
||||
up_write(&ca->bucket_lock);
|
||||
up_write(&c->gc_lock);
|
||||
}
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++) {
|
||||
fifo_move(&free[i], &ca->free[i]);
|
||||
swap(ca->free[i], free[i]);
|
||||
}
|
||||
fifo_move(&free_inc, &ca->free_inc);
|
||||
swap(ca->free_inc, free_inc);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
/* with gc lock held, alloc_heap can't be in use: */
|
||||
swap(ca->alloc_heap, alloc_heap);
|
||||
|
||||
nbuckets = ca->mi.nbuckets;
|
||||
|
||||
if (resize)
|
||||
up_write(&ca->bucket_lock);
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
free_heap(&alloc_heap);
|
||||
free_fifo(&free_inc);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
free_fifo(&free[i]);
|
||||
kvpfree(buckets_nouse,
|
||||
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
|
||||
if (bucket_gens)
|
||||
call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
|
||||
if (buckets)
|
||||
call_rcu(&buckets->rcu, buckets_free_rcu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2179,17 +2127,10 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
free_heap(&ca->alloc_heap);
|
||||
free_fifo(&ca->free_inc);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
free_fifo(&ca->free[i]);
|
||||
kvpfree(ca->buckets_nouse,
|
||||
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
|
||||
kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
|
||||
sizeof(struct bucket_gens) + ca->mi.nbuckets);
|
||||
kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
|
||||
sizeof(struct bucket_array) +
|
||||
ca->mi.nbuckets * sizeof(struct bucket));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
|
||||
free_percpu(ca->usage[i]);
|
||||
|
@ -15,54 +15,34 @@
|
||||
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
|
||||
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
|
||||
|
||||
#define bucket_cmpxchg(g, new, expr) \
|
||||
({ \
|
||||
struct bucket *_g = g; \
|
||||
u64 _v = atomic64_read(&(g)->_mark.v); \
|
||||
struct bucket_mark _old; \
|
||||
\
|
||||
do { \
|
||||
(new).v.counter = _old.v.counter = _v; \
|
||||
expr; \
|
||||
} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \
|
||||
_old.v.counter, \
|
||||
(new).v.counter)) != _old.v.counter);\
|
||||
_old; \
|
||||
})
|
||||
|
||||
static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
|
||||
bool gc)
|
||||
static inline void bucket_unlock(struct bucket *b)
|
||||
{
|
||||
return rcu_dereference_check(ca->buckets[gc],
|
||||
smp_store_release(&b->lock, 0);
|
||||
}
|
||||
|
||||
static inline void bucket_lock(struct bucket *b)
|
||||
{
|
||||
while (xchg(&b->lock, 1))
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
|
||||
{
|
||||
return rcu_dereference_check(ca->buckets_gc,
|
||||
!ca->fs ||
|
||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
}
|
||||
|
||||
static inline struct bucket_array *bucket_array(struct bch_dev *ca)
|
||||
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
return __bucket_array(ca, false);
|
||||
}
|
||||
|
||||
static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
|
||||
{
|
||||
struct bucket_array *buckets = __bucket_array(ca, gc);
|
||||
struct bucket_array *buckets = gc_bucket_array(ca);
|
||||
|
||||
BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
|
||||
return buckets->b + b;
|
||||
}
|
||||
|
||||
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
return __bucket(ca, b, true);
|
||||
}
|
||||
|
||||
static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
return __bucket(ca, b, false);
|
||||
}
|
||||
|
||||
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
||||
{
|
||||
return rcu_dereference_check(ca->bucket_gens,
|
||||
@ -70,7 +50,6 @@ static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
|
||||
}
|
||||
|
||||
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
|
||||
@ -81,16 +60,6 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
|
||||
return gens->b + b;
|
||||
}
|
||||
|
||||
/*
|
||||
* bucket_gc_gen() returns the difference between the bucket's current gen and
|
||||
* the oldest gen of any pointer into that bucket in the btree.
|
||||
*/
|
||||
|
||||
static inline u8 bucket_gc_gen(struct bucket *g)
|
||||
{
|
||||
return g->mark.gen - g->oldest_gen;
|
||||
}
|
||||
|
||||
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
@ -141,62 +110,55 @@ static inline u8 ptr_stale(struct bch_dev *ca,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bucket gc marks */
|
||||
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
return !mark.dirty_sectors && !mark.stripe;
|
||||
}
|
||||
|
||||
/* Device usage: */
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
|
||||
|
||||
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
struct bch_dev_usage stats)
|
||||
struct bch_dev_usage stats,
|
||||
enum alloc_reserve reserve)
|
||||
{
|
||||
u64 total = ca->mi.nbuckets - ca->mi.first_bucket;
|
||||
s64 total = ca->mi.nbuckets - ca->mi.first_bucket;
|
||||
s64 reserved = 0;
|
||||
|
||||
switch (reserve) {
|
||||
case RESERVE_NONE:
|
||||
reserved += ca->mi.nbuckets >> 6;
|
||||
fallthrough;
|
||||
case RESERVE_MOVINGGC:
|
||||
reserved += ca->nr_btree_reserve;
|
||||
fallthrough;
|
||||
case RESERVE_BTREE:
|
||||
reserved += ca->nr_btree_reserve;
|
||||
fallthrough;
|
||||
case RESERVE_BTREE_MOVINGGC:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (WARN_ONCE(stats.buckets_unavailable > total,
|
||||
"buckets_unavailable overflow (%llu > %llu)\n",
|
||||
stats.buckets_unavailable, total))
|
||||
return 0;
|
||||
|
||||
return total - stats.buckets_unavailable;
|
||||
return max_t(s64, 0,
|
||||
total -
|
||||
stats.buckets_unavailable -
|
||||
ca->nr_open_buckets -
|
||||
reserved);
|
||||
}
|
||||
|
||||
static inline u64 dev_buckets_available(struct bch_dev *ca)
|
||||
static inline u64 dev_buckets_available(struct bch_dev *ca,
|
||||
enum alloc_reserve reserve)
|
||||
{
|
||||
return __dev_buckets_available(ca, bch2_dev_usage_read(ca));
|
||||
}
|
||||
|
||||
static inline u64 __dev_buckets_reclaimable(struct bch_dev *ca,
|
||||
struct bch_dev_usage stats)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
s64 available = __dev_buckets_available(ca, stats);
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
available -= fifo_used(&ca->free[i]);
|
||||
available -= fifo_used(&ca->free_inc);
|
||||
available -= ca->nr_open_buckets;
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
return max(available, 0LL);
|
||||
}
|
||||
|
||||
static inline u64 dev_buckets_reclaimable(struct bch_dev *ca)
|
||||
{
|
||||
return __dev_buckets_reclaimable(ca, bch2_dev_usage_read(ca));
|
||||
return __dev_buckets_available(ca, bch2_dev_usage_read(ca), reserve);
|
||||
}
|
||||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline unsigned fs_usage_u64s(struct bch_fs *c)
|
||||
{
|
||||
|
||||
return sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
READ_ONCE(c->replicas.nr);
|
||||
}
|
||||
@ -224,7 +186,6 @@ bch2_fs_usage_read_short(struct bch_fs *);
|
||||
|
||||
void bch2_fs_usage_initialize(struct bch_fs *);
|
||||
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *, size_t, bool);
|
||||
void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, enum bch_data_type, unsigned,
|
||||
struct gc_pos, unsigned);
|
||||
|
@ -7,32 +7,15 @@
|
||||
|
||||
#define BUCKET_JOURNAL_SEQ_BITS 16
|
||||
|
||||
struct bucket_mark {
|
||||
union {
|
||||
atomic64_t v;
|
||||
|
||||
struct {
|
||||
u8 gen;
|
||||
u8 data_type:3,
|
||||
owned_by_allocator:1,
|
||||
stripe:1;
|
||||
u16 dirty_sectors;
|
||||
u16 cached_sectors;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
union {
|
||||
struct bucket_mark _mark;
|
||||
const struct bucket_mark mark;
|
||||
};
|
||||
|
||||
u64 io_time[2];
|
||||
u8 oldest_gen;
|
||||
unsigned gen_valid:1;
|
||||
u8 stripe_redundancy;
|
||||
u32 stripe;
|
||||
u8 lock;
|
||||
u8 gen_valid:1;
|
||||
u8 data_type:7;
|
||||
u8 gen;
|
||||
u8 stripe_redundancy;
|
||||
u32 stripe;
|
||||
u32 dirty_sectors;
|
||||
u32 cached_sectors;
|
||||
};
|
||||
|
||||
struct bucket_array {
|
||||
@ -111,7 +94,7 @@ struct copygc_heap_entry {
|
||||
u8 dev;
|
||||
u8 gen;
|
||||
u8 replicas;
|
||||
u16 fragmentation;
|
||||
u32 fragmentation;
|
||||
u32 sectors;
|
||||
u64 offset;
|
||||
};
|
||||
|
@ -15,17 +15,26 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
unsigned ret = 0;
|
||||
unsigned ret = 0, lru = 0;
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
switch (__extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
/* Might also be updating LRU btree */
|
||||
if (entry->ptr.cached)
|
||||
lru++;
|
||||
|
||||
fallthrough;
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr:
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
/*
|
||||
* Updating keys in the alloc btree may also update keys in the
|
||||
* freespace or discard btrees:
|
||||
*/
|
||||
return lru + ret * 2;
|
||||
}
|
||||
|
||||
static int count_iters_for_insert(struct btree_trans *trans,
|
||||
|
@ -15,8 +15,8 @@
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_sb.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
@ -767,86 +767,75 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
bool new_fs, struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct journal *j = &c->journal;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets;
|
||||
u64 *new_bucket_seq = NULL, *new_buckets = NULL;
|
||||
struct open_bucket **ob = NULL;
|
||||
long *bu = NULL;
|
||||
unsigned i, nr_got = 0, nr_want = nr - ja->nr;
|
||||
unsigned old_nr = ja->nr;
|
||||
unsigned old_discard_idx = ja->discard_idx;
|
||||
unsigned old_dirty_idx_ondisk = ja->dirty_idx_ondisk;
|
||||
unsigned old_dirty_idx = ja->dirty_idx;
|
||||
unsigned old_cur_idx = ja->cur_idx;
|
||||
int ret = 0;
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr <= ja->nr)
|
||||
return 0;
|
||||
bch2_journal_block(j);
|
||||
bch2_journal_flush_all_pins(j);
|
||||
|
||||
bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);
|
||||
ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL);
|
||||
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_buckets || !new_bucket_seq) {
|
||||
if (!bu || !ob || !new_buckets || !new_bucket_seq) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
goto err_unblock;
|
||||
}
|
||||
|
||||
journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
|
||||
nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||
if (!journal_buckets) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
for (nr_got = 0; nr_got < nr_want; nr_got++) {
|
||||
if (new_fs) {
|
||||
bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
|
||||
if (bu[nr_got] < 0) {
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_NONE,
|
||||
false, cl);
|
||||
if (IS_ERR(ob[nr_got])) {
|
||||
ret = cl ? -EAGAIN : -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
bu[nr_got] = ob[nr_got]->bucket;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nr_got)
|
||||
goto err_unblock;
|
||||
|
||||
/*
|
||||
* We may be called from the device add path, before the new device has
|
||||
* actually been added to the running filesystem:
|
||||
*/
|
||||
if (!new_fs)
|
||||
spin_lock(&c->journal.lock);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
|
||||
memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
|
||||
if (!new_fs)
|
||||
spin_unlock(&c->journal.lock);
|
||||
for (i = 0; i < nr_got; i++) {
|
||||
unsigned pos = ja->discard_idx ?: ja->nr;
|
||||
long b = bu[i];
|
||||
|
||||
while (ja->nr < nr) {
|
||||
struct open_bucket *ob = NULL;
|
||||
unsigned pos;
|
||||
long b;
|
||||
|
||||
if (new_fs) {
|
||||
b = bch2_bucket_alloc_new_fs(ca);
|
||||
if (b < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
rcu_read_lock();
|
||||
ob = bch2_bucket_alloc(c, ca, RESERVE_NONE,
|
||||
false, cl);
|
||||
rcu_read_unlock();
|
||||
if (IS_ERR(ob)) {
|
||||
ret = cl ? -EAGAIN : -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
b = ob->bucket;
|
||||
}
|
||||
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* For resize at runtime, we should be writing the new
|
||||
* superblock before inserting into the journal array
|
||||
*/
|
||||
|
||||
pos = ja->discard_idx ?: ja->nr;
|
||||
__array_insert_item(ja->buckets, ja->nr, pos);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, pos);
|
||||
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
|
||||
ja->nr++;
|
||||
|
||||
ja->buckets[pos] = b;
|
||||
ja->bucket_seq[pos] = 0;
|
||||
journal_buckets->buckets[pos] = cpu_to_le64(b);
|
||||
|
||||
if (pos <= ja->discard_idx)
|
||||
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
|
||||
@ -856,29 +845,54 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
}
|
||||
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
ret = bch2_journal_buckets_to_sb(c, ca);
|
||||
if (ret) {
|
||||
/* Revert: */
|
||||
swap(new_buckets, ja->buckets);
|
||||
swap(new_bucket_seq, ja->bucket_seq);
|
||||
ja->nr = old_nr;
|
||||
ja->discard_idx = old_discard_idx;
|
||||
ja->dirty_idx_ondisk = old_dirty_idx_ondisk;
|
||||
ja->dirty_idx = old_dirty_idx;
|
||||
ja->cur_idx = old_cur_idx;
|
||||
}
|
||||
|
||||
if (!new_fs) {
|
||||
if (!new_fs)
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch2_journal_unblock(j);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!new_fs) {
|
||||
for (i = 0; i < nr_got; i++) {
|
||||
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
bch2_trans_mark_metadata_bucket(&trans, ca,
|
||||
b, BCH_DATA_journal,
|
||||
bu[i], BCH_DATA_journal,
|
||||
ca->mi.bucket_size));
|
||||
|
||||
bch2_open_bucket_put(c, ob);
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "error marking new journal buckets: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_sb_resize_journal(&ca->disk_sb,
|
||||
ja->nr + sizeof(*journal_buckets) / sizeof(u64));
|
||||
if (ob && !new_fs)
|
||||
for (i = 0; i < nr_got; i++)
|
||||
bch2_open_bucket_put(c, ob[i]);
|
||||
|
||||
kfree(new_bucket_seq);
|
||||
kfree(new_buckets);
|
||||
kfree(ob);
|
||||
kfree(bu);
|
||||
|
||||
return ret;
|
||||
err_unblock:
|
||||
bch2_journal_unblock(j);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -891,11 +905,15 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct closure cl;
|
||||
unsigned current_nr;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
/* don't handle reducing nr of buckets yet: */
|
||||
if (nr < ja->nr)
|
||||
return 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
do {
|
||||
while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) {
|
||||
struct disk_reservation disk_res = { 0, 0 };
|
||||
|
||||
closure_sync(&cl);
|
||||
@ -923,7 +941,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (ja->nr != current_nr)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} while (ret == -EAGAIN);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1092,9 +1110,20 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal *journal_buckets =
|
||||
bch2_sb_get_journal(sb);
|
||||
struct bch_sb_field_journal_v2 *journal_buckets_v2 =
|
||||
bch2_sb_get_journal_v2(sb);
|
||||
unsigned i;
|
||||
|
||||
ja->nr = bch2_nr_journal_buckets(journal_buckets);
|
||||
ja->nr = 0;
|
||||
|
||||
if (journal_buckets_v2) {
|
||||
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
|
||||
} else if (journal_buckets) {
|
||||
ja->nr = bch2_nr_journal_buckets(journal_buckets);
|
||||
}
|
||||
|
||||
ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
|
||||
if (!ja->bucket_seq)
|
||||
@ -1109,8 +1138,18 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
|
||||
if (!ja->buckets)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < ja->nr; i++)
|
||||
ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
|
||||
if (journal_buckets_v2) {
|
||||
unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
|
||||
unsigned j, dst = 0;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
|
||||
ja->buckets[dst++] =
|
||||
le64_to_cpu(journal_buckets_v2->d[i].start) + j;
|
||||
} else if (journal_buckets) {
|
||||
for (i = 0; i < ja->nr; i++)
|
||||
ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update_interior.h"
|
||||
@ -1372,6 +1373,9 @@ static void journal_write_done(struct closure *cl)
|
||||
if (!JSET_NO_FLUSH(w->data)) {
|
||||
j->flushed_seq_ondisk = seq;
|
||||
j->last_seq_ondisk = w->last_seq;
|
||||
|
||||
bch2_do_discards(c);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
}
|
||||
} else if (!j->err_seq || seq < j->err_seq)
|
||||
j->err_seq = seq;
|
||||
|
222
libbcachefs/journal_sb.c
Normal file
222
libbcachefs/journal_sb.c
Normal file
@ -0,0 +1,222 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "journal_sb.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
|
||||
static int u64_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const u64 *l = _l;
|
||||
const u64 *r = _r;
|
||||
|
||||
return cmp_int(*l, *r);
|
||||
}
|
||||
|
||||
static int bch2_sb_journal_validate(struct bch_sb *sb,
|
||||
struct bch_sb_field *f,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_sb_field_journal *journal = field_to_type(f, journal);
|
||||
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
|
||||
int ret = -EINVAL;
|
||||
unsigned nr;
|
||||
unsigned i;
|
||||
u64 *b;
|
||||
|
||||
nr = bch2_nr_journal_buckets(journal);
|
||||
if (!nr)
|
||||
return 0;
|
||||
|
||||
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
b[i] = le64_to_cpu(journal->buckets[i]);
|
||||
|
||||
sort(b, nr, sizeof(u64), u64_cmp, NULL);
|
||||
|
||||
if (!b[0]) {
|
||||
pr_buf(err, "journal bucket at sector 0");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[0] < le16_to_cpu(m->first_bucket)) {
|
||||
pr_buf(err, "journal bucket %llu before first bucket %u",
|
||||
b[0], le16_to_cpu(m->first_bucket));
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) {
|
||||
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
|
||||
b[nr - 1], le64_to_cpu(m->nbuckets));
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < nr; i++)
|
||||
if (b[i] == b[i + 1]) {
|
||||
pr_buf(err, "duplicate journal buckets %llu", b[i]);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
kfree(b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_journal *journal = field_to_type(f, journal);
|
||||
unsigned i, nr = bch2_nr_journal_buckets(journal);
|
||||
|
||||
pr_buf(out, "Buckets: ");
|
||||
for (i = 0; i < nr; i++)
|
||||
pr_buf(out, " %llu", le64_to_cpu(journal->buckets[i]));
|
||||
pr_newline(out);
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_journal = {
|
||||
.validate = bch2_sb_journal_validate,
|
||||
.to_text = bch2_sb_journal_to_text,
|
||||
};
|
||||
|
||||
struct u64_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static int u64_range_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct u64_range *l = _l;
|
||||
const struct u64_range *r = _r;
|
||||
|
||||
return cmp_int(l->start, r->start);
|
||||
}
|
||||
|
||||
static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
|
||||
struct bch_sb_field *f,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
|
||||
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
|
||||
int ret = -EINVAL;
|
||||
unsigned nr;
|
||||
unsigned i;
|
||||
struct u64_range *b;
|
||||
|
||||
nr = bch2_sb_field_journal_v2_nr_entries(journal);
|
||||
if (!nr)
|
||||
return 0;
|
||||
|
||||
b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
b[i].start = le64_to_cpu(journal->d[i].start);
|
||||
b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
|
||||
}
|
||||
|
||||
sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
|
||||
|
||||
if (!b[0].start) {
|
||||
pr_buf(err, "journal bucket at sector 0");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[0].start < le16_to_cpu(m->first_bucket)) {
|
||||
pr_buf(err, "journal bucket %llu before first bucket %u",
|
||||
b[0], le16_to_cpu(m->first_bucket));
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[nr - 1].end > le64_to_cpu(m->nbuckets)) {
|
||||
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
|
||||
b[nr - 1], le64_to_cpu(m->nbuckets));
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < nr; i++) {
|
||||
if (b[i].end == b[i + 1].start) {
|
||||
pr_buf(err, "contiguous journal buckets ranges %llu-%llu, %llu-%llu",
|
||||
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[i].end > b[i + 1].start) {
|
||||
pr_buf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
|
||||
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
kfree(b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_sb_journal_v2_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
|
||||
unsigned i, nr = bch2_sb_field_journal_v2_nr_entries(journal);
|
||||
|
||||
pr_buf(out, "Buckets: ");
|
||||
for (i = 0; i < nr; i++)
|
||||
pr_buf(out, " %llu-%llu",
|
||||
le64_to_cpu(journal->d[i].start),
|
||||
le64_to_cpu(journal->d[i].start) + le64_to_cpu(journal->d[i].nr));
|
||||
pr_newline(out);
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
|
||||
.validate = bch2_sb_journal_v2_validate,
|
||||
.to_text = bch2_sb_journal_v2_to_text,
|
||||
};
|
||||
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bch_sb_field_journal_v2 *j;
|
||||
unsigned i, dst = 0, nr = 1;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
if (!ja->nr) {
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < ja->nr; i++)
|
||||
if (ja->buckets[i] + 1 != ja->buckets[i + 1])
|
||||
nr++;
|
||||
|
||||
j = bch2_sb_resize_journal_v2(&ca->disk_sb,
|
||||
(sizeof(*j) + sizeof(j->d[0]) * nr) / sizeof(u64));
|
||||
if (!j)
|
||||
return -ENOSPC;
|
||||
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
|
||||
j->d[dst].start = le64_to_cpu(ja->buckets[0]);
|
||||
j->d[dst].nr = le64_to_cpu(1);
|
||||
|
||||
for (i = 1; i < ja->nr; i++) {
|
||||
if (ja->buckets[i] == ja->buckets[i - 1] + 1) {
|
||||
le64_add_cpu(&j->d[dst].nr, 1);
|
||||
} else {
|
||||
dst++;
|
||||
j->d[dst].start = le64_to_cpu(ja->buckets[i]);
|
||||
j->d[dst].nr = le64_to_cpu(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
24
libbcachefs/journal_sb.h
Normal file
24
libbcachefs/journal_sb.h
Normal file
@ -0,0 +1,24 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include "super-io.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
|
||||
{
|
||||
return j
|
||||
? (__le64 *) vstruct_end(&j->field) - j->buckets
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_journal_v2 *j)
|
||||
{
|
||||
if (!j)
|
||||
return 0;
|
||||
|
||||
return (struct bch_sb_field_journal_v2_entry *) vstruct_end(&j->field) - &j->d[0];
|
||||
}
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
|
||||
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *);
|
203
libbcachefs/lru.c
Normal file
203
libbcachefs/lru.c
Normal file
@ -0,0 +1,203 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "error.h"
|
||||
#include "lru.h"
|
||||
#include "recovery.h"
|
||||
|
||||
const char *bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
const struct bch_lru *lru = bkey_s_c_to_lru(k).v;
|
||||
|
||||
if (bkey_val_bytes(k.k) < sizeof(*lru))
|
||||
return "incorrect value size";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct bch_lru *lru = bkey_s_c_to_lru(k).v;
|
||||
|
||||
pr_buf(out, "idx %llu", le64_to_cpu(lru->idx));
|
||||
}
|
||||
|
||||
static int lru_delete(struct btree_trans *trans, u64 id, u64 idx, u64 time)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 existing_idx;
|
||||
int ret = 0;
|
||||
|
||||
if (!time)
|
||||
return 0;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
|
||||
POS(id, time),
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_lru) {
|
||||
bch2_fs_inconsistent(c,
|
||||
"pointer to nonexistent lru %llu:%llu",
|
||||
id, time);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
existing_idx = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
|
||||
if (existing_idx != idx) {
|
||||
bch2_fs_inconsistent(c,
|
||||
"lru %llu:%llu with wrong backpointer: got %llu, should be %llu",
|
||||
id, time, existing_idx, idx);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lru_set(struct btree_trans *trans, u64 lru_id, u64 idx, u64 *time)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_lru *lru;
|
||||
int ret = 0;
|
||||
|
||||
if (!*time)
|
||||
return 0;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_lru,
|
||||
POS(lru_id, *time),
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_WITH_UPDATES, k, ret)
|
||||
if (bkey_deleted(k.k))
|
||||
break;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(iter.pos.inode != lru_id);
|
||||
*time = iter.pos.offset;
|
||||
|
||||
lru = bch2_trans_kmalloc(trans, sizeof(*lru));
|
||||
ret = PTR_ERR_OR_ZERO(lru);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_lru_init(&lru->k_i);
|
||||
lru->k.p = iter.pos;
|
||||
lru->v.idx = cpu_to_le64(idx);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &lru->k_i, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
|
||||
u64 old_time, u64 *new_time)
|
||||
{
|
||||
if (old_time == *new_time)
|
||||
return 0;
|
||||
|
||||
return lru_delete(trans, id, idx, old_time) ?:
|
||||
lru_set(trans, id, idx, new_time);
|
||||
}
|
||||
|
||||
static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c lru_k, k;
|
||||
struct bkey_alloc_unpacked a;
|
||||
struct printbuf buf1 = PRINTBUF;
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
u64 idx;
|
||||
int ret;
|
||||
|
||||
lru_k = bch2_btree_iter_peek(lru_iter);
|
||||
if (!lru_k.k)
|
||||
return 0;
|
||||
|
||||
ret = bkey_err(lru_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
idx = le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
POS(lru_k.k->p.inode, idx), 0);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
a = bch2_alloc_unpack(k);
|
||||
|
||||
if (fsck_err_on(bucket_state(a) != BUCKET_cached ||
|
||||
a.read_time != lru_k.k->p.offset, c,
|
||||
"incorrect lru entry %s\n"
|
||||
" for %s",
|
||||
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
|
||||
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
|
||||
struct bkey_i *update =
|
||||
bch2_trans_kmalloc(trans, sizeof(*update));
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(update);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bkey_init(&update->k);
|
||||
update->k.p = lru_iter->pos;
|
||||
|
||||
ret = bch2_trans_update(trans, lru_iter, update, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_lrus(struct bch_fs *c, bool initial)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
bch2_check_lru_key(&trans, &iter, initial));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
|
||||
}
|
17
libbcachefs/lru.h
Normal file
17
libbcachefs/lru.h
Normal file
@ -0,0 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_LRU_H
|
||||
#define _BCACHEFS_LRU_H
|
||||
|
||||
const char *bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_lru (struct bkey_ops) { \
|
||||
.key_invalid = bch2_lru_invalid, \
|
||||
.val_to_text = bch2_lru_to_text, \
|
||||
}
|
||||
|
||||
int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *);
|
||||
|
||||
int bch2_check_lrus(struct bch_fs *, bool);
|
||||
|
||||
#endif /* _BCACHEFS_LRU_H */
|
@ -119,18 +119,6 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
return DATA_SKIP;
|
||||
}
|
||||
|
||||
static bool have_copygc_reserve(struct bch_dev *ca)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&ca->fs->freelist_lock);
|
||||
ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
|
||||
ca->allocator_state != ALLOCATOR_running;
|
||||
spin_unlock(&ca->fs->freelist_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int fragmentation_cmp(copygc_heap *heap,
|
||||
struct copygc_heap_entry l,
|
||||
struct copygc_heap_entry r)
|
||||
@ -165,7 +153,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
.dev = iter.pos.inode,
|
||||
.gen = u.gen,
|
||||
.replicas = 1 + u.stripe_redundancy,
|
||||
.fragmentation = u.dirty_sectors * (1U << 15)
|
||||
.fragmentation = (u64) u.dirty_sectors * (1ULL << 31)
|
||||
/ ca->mi.bucket_size,
|
||||
.sectors = u.dirty_sectors,
|
||||
.offset = bucket_to_sector(ca, iter.pos.offset),
|
||||
@ -262,11 +250,10 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
|
||||
s64 avail = min(dev_buckets_available(ca, RESERVE_MOVINGGC),
|
||||
ca->mi.nbuckets >> 6);
|
||||
|
||||
spin_lock(&ca->fs->freelist_lock);
|
||||
sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size;
|
||||
spin_unlock(&ca->fs->freelist_lock);
|
||||
sectors_reserved += avail * ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
ret = walk_buckets_to_copygc(c);
|
||||
@ -367,8 +354,8 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
||||
|
||||
fragmented_allowed = ((__dev_buckets_reclaimable(ca, usage) *
|
||||
ca->mi.bucket_size) >> 1);
|
||||
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_NONE) *
|
||||
ca->mi.bucket_size) >> 1);
|
||||
fragmented = usage.d[BCH_DATA_user].fragmented;
|
||||
|
||||
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
|
||||
|
@ -265,7 +265,7 @@ enum opt_type {
|
||||
x(discard, u8, \
|
||||
OPT_FS|OPT_MOUNT|OPT_DEVICE, \
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
BCH2_NO_SB_OPT, true, \
|
||||
NULL, "Enable discard/TRIM support") \
|
||||
x(verbose, u8, \
|
||||
OPT_FS|OPT_MOUNT, \
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "lru.h"
|
||||
#include "move.h"
|
||||
#include "quota.h"
|
||||
#include "recovery.h"
|
||||
@ -1027,8 +1028,8 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
bch_info(c, "filesystem version is prior to subvol_dirent - upgrading");
|
||||
c->opts.version_upgrade = true;
|
||||
c->opts.fsck = true;
|
||||
} else if (c->sb.version < bcachefs_metadata_version_inode_v2) {
|
||||
bch_info(c, "filesystem version is prior to inode_v2 - upgrading");
|
||||
} else if (c->sb.version < bcachefs_metadata_version_freespace) {
|
||||
bch_info(c, "filesystem version is prior to freespace - upgrading");
|
||||
c->opts.version_upgrade = true;
|
||||
}
|
||||
}
|
||||
@ -1137,7 +1138,7 @@ use_clean:
|
||||
err = "error reading allocation information";
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_alloc_read(c, false, false);
|
||||
ret = bch2_alloc_read(c);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
if (ret)
|
||||
@ -1165,13 +1166,27 @@ use_clean:
|
||||
bool metadata_only = c->opts.norecovery;
|
||||
|
||||
bch_info(c, "checking allocations");
|
||||
err = "error in mark and sweep";
|
||||
err = "error checking allocations";
|
||||
ret = bch2_gc(c, true, metadata_only);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking allocations");
|
||||
}
|
||||
|
||||
if (c->opts.fsck &&
|
||||
c->sb.version >= bcachefs_metadata_version_freespace) {
|
||||
bch_info(c, "checking need_discard and freespace btrees");
|
||||
err = "error checking need_discard and freespace btrees";
|
||||
ret = bch2_check_alloc_info(c, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_check_lrus(c, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking need_discard and freespace btrees");
|
||||
}
|
||||
|
||||
bch2_stripes_heap_start(c);
|
||||
|
||||
clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
@ -1196,6 +1211,11 @@ use_clean:
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
err = "error initializing freespace";
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
|
||||
bch2_fs_lazy_rw(c);
|
||||
|
||||
@ -1368,6 +1388,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
* Write out the superblock and journal buckets, now that we can do
|
||||
* btree updates
|
||||
*/
|
||||
bch_verbose(c, "marking superblocks");
|
||||
err = "error marking superblock and journal";
|
||||
for_each_member_device(ca, c, i) {
|
||||
ret = bch2_trans_mark_dev_sb(c, ca);
|
||||
@ -1379,6 +1400,12 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
ca->new_fs_bucket_idx = 0;
|
||||
}
|
||||
|
||||
bch_verbose(c, "initializing freespace");
|
||||
err = "error initializing freespace";
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
err = "error creating root snapshot node";
|
||||
ret = bch2_fs_initialize_subvolumes(c);
|
||||
if (ret)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_sb.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "replicas.h"
|
||||
#include "quota.h"
|
||||
@ -424,7 +425,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
|
||||
memcpy(dst->compat, src->compat, sizeof(dst->compat));
|
||||
|
||||
for (i = 0; i < BCH_SB_FIELD_NR; i++) {
|
||||
if (i == BCH_SB_FIELD_journal)
|
||||
if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS)
|
||||
continue;
|
||||
|
||||
src_f = bch2_sb_field_get(src, i);
|
||||
@ -898,85 +899,6 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
|
||||
static int u64_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
|
||||
|
||||
return l < r ? -1 : l > r ? 1 : 0;
|
||||
}
|
||||
|
||||
static int bch2_sb_journal_validate(struct bch_sb *sb,
|
||||
struct bch_sb_field *f,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_sb_field_journal *journal = field_to_type(f, journal);
|
||||
struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
|
||||
int ret = -EINVAL;
|
||||
unsigned nr;
|
||||
unsigned i;
|
||||
u64 *b;
|
||||
|
||||
nr = bch2_nr_journal_buckets(journal);
|
||||
if (!nr)
|
||||
return 0;
|
||||
|
||||
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
b[i] = le64_to_cpu(journal->buckets[i]);
|
||||
|
||||
sort(b, nr, sizeof(u64), u64_cmp, NULL);
|
||||
|
||||
if (!b[0]) {
|
||||
pr_buf(err, "journal bucket at sector 0");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[0] < le16_to_cpu(m->first_bucket)) {
|
||||
pr_buf(err, "journal bucket %llu before first bucket %u",
|
||||
b[0], le16_to_cpu(m->first_bucket));
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) {
|
||||
pr_buf(err, "journal bucket %llu past end of device (nbuckets %llu)",
|
||||
b[nr - 1], le64_to_cpu(m->nbuckets));
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < nr; i++)
|
||||
if (b[i] == b[i + 1]) {
|
||||
pr_buf(err, "duplicate journal buckets %llu", b[i]);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
kfree(b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_journal *journal = field_to_type(f, journal);
|
||||
unsigned i, nr = bch2_nr_journal_buckets(journal);
|
||||
|
||||
pr_buf(out, "Buckets: ");
|
||||
for (i = 0; i < nr; i++)
|
||||
pr_buf(out, " %llu", le64_to_cpu(journal->buckets[i]));
|
||||
pr_newline(out);
|
||||
}
|
||||
|
||||
static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
|
||||
.validate = bch2_sb_journal_validate,
|
||||
.to_text = bch2_sb_journal_to_text,
|
||||
};
|
||||
|
||||
/* BCH_SB_FIELD_members: */
|
||||
|
||||
static int bch2_sb_members_validate(struct bch_sb *sb,
|
||||
@ -1130,6 +1052,11 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
pr_buf(out, "%llu", BCH_MEMBER_DISCARD(m));
|
||||
pr_newline(out);
|
||||
|
||||
pr_buf(out, "Freespace initialized:");
|
||||
pr_tab(out);
|
||||
pr_buf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(m));
|
||||
pr_newline(out);
|
||||
|
||||
pr_indent_pop(out, 2);
|
||||
}
|
||||
}
|
||||
|
@ -75,15 +75,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
|
||||
__bch2_check_set_feature(c, feat);
|
||||
}
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
|
||||
static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
|
||||
{
|
||||
return j
|
||||
? (__le64 *) vstruct_end(&j->field) - j->buckets
|
||||
: 0;
|
||||
}
|
||||
|
||||
/* BCH_SB_FIELD_members: */
|
||||
|
||||
static inline bool bch2_member_exists(struct bch_member *m)
|
||||
@ -112,6 +103,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
.durability = BCH_MEMBER_DURABILITY(mi)
|
||||
? BCH_MEMBER_DURABILITY(mi) - 1
|
||||
: 1,
|
||||
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
|
||||
.valid = !bch2_is_zero(mi->uuid.b, sizeof(uuid_le)),
|
||||
};
|
||||
}
|
||||
|
@ -199,17 +199,9 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
/*
|
||||
* If the allocator threads didn't all start up, the btree updates to
|
||||
* write out alloc info aren't going to work:
|
||||
*/
|
||||
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
|
||||
goto nowrote_alloc;
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators");
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
do {
|
||||
clean_passes++;
|
||||
@ -234,17 +226,11 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
bch_verbose(c, "flushing journal and stopping allocators complete");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
|
||||
nowrote_alloc:
|
||||
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
flush_work(&c->btree_interior_update_work);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/*
|
||||
@ -280,10 +266,6 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
/*
|
||||
* Block new foreground-end write operations from starting - any new
|
||||
* writes will return -EROFS:
|
||||
*
|
||||
* (This is really blocking new _allocations_, writes to previously
|
||||
* allocated space can still happen until stopping the allocator in
|
||||
* bch2_dev_allocator_stop()).
|
||||
*/
|
||||
percpu_ref_kill(&c->writes);
|
||||
|
||||
@ -412,19 +394,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
ret = bch2_dev_allocator_start(ca);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting allocator threads");
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
|
||||
|
||||
for_each_rw_member(ca, c, i)
|
||||
bch2_wake_allocator(ca);
|
||||
bch2_do_discards(c);
|
||||
|
||||
if (!early) {
|
||||
ret = bch2_fs_read_write_late(c);
|
||||
@ -941,20 +911,6 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
|
||||
set_bit(BCH_FS_STARTED, &c->flags);
|
||||
|
||||
/*
|
||||
* Allocator threads don't start filling copygc reserve until after we
|
||||
* set BCH_FS_STARTED - wake them now:
|
||||
*
|
||||
* XXX ugly hack:
|
||||
* Need to set ca->allocator_state here instead of relying on the
|
||||
* allocator threads to do it to avoid racing with the copygc threads
|
||||
* checking it and thinking they have no alloc reserve:
|
||||
*/
|
||||
for_each_online_member(ca, c, i) {
|
||||
ca->allocator_state = ALLOCATOR_running;
|
||||
bch2_wake_allocator(ca);
|
||||
}
|
||||
|
||||
if (c->opts.read_only || c->opts.nochanges) {
|
||||
bch2_fs_read_only(c);
|
||||
} else {
|
||||
@ -1046,8 +1002,6 @@ static void bch2_dev_release(struct kobject *kobj)
|
||||
|
||||
static void bch2_dev_free(struct bch_dev *ca)
|
||||
{
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
cancel_work_sync(&ca->io_error_work);
|
||||
|
||||
if (ca->kobj.state_in_sysfs &&
|
||||
@ -1162,6 +1116,9 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
ca->mi = bch2_mi_to_cpu(member);
|
||||
ca->uuid = member->uuid;
|
||||
|
||||
ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
|
||||
ca->mi.bucket_size / btree_sectors(c));
|
||||
|
||||
if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
|
||||
0, GFP_KERNEL) ||
|
||||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
|
||||
@ -1211,12 +1168,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
|
||||
ca->fs = c;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
bch2_dev_allocator_start(ca)) {
|
||||
bch2_dev_free(ca);
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
out:
|
||||
pr_verbose_init(c->opts, "ret %i", ret);
|
||||
@ -1402,14 +1353,13 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
/*
|
||||
* The allocator thread itself allocates btree nodes, so stop it first:
|
||||
*/
|
||||
bch2_dev_allocator_stop(ca);
|
||||
bch2_dev_allocator_remove(c, ca);
|
||||
bch2_dev_journal_stop(&c->journal, ca);
|
||||
|
||||
bch2_copygc_start(c);
|
||||
}
|
||||
|
||||
static int __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
@ -1417,8 +1367,6 @@ static int __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
return bch2_dev_allocator_start(ca);
|
||||
}
|
||||
|
||||
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -1445,7 +1393,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (new_state == BCH_MEMBER_STATE_rw)
|
||||
ret = __bch2_dev_read_write(c, ca);
|
||||
__bch2_dev_read_write(c, ca);
|
||||
|
||||
rebalance_wakeup(c);
|
||||
|
||||
@ -1468,30 +1416,20 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
size_t i;
|
||||
struct bpos start = POS(ca->dev_idx, 0);
|
||||
struct bpos end = POS(ca->dev_idx, U64_MAX);
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for (i = 0; i < ca->mi.nbuckets; i++) {
|
||||
ret = lockrestart_do(&trans,
|
||||
bch2_btree_key_cache_flush(&trans,
|
||||
BTREE_ID_alloc, POS(ca->dev_idx, i)));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret) {
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL);
|
||||
if (ret)
|
||||
bch_err(c, "error %i removing dev alloc info", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return bch2_btree_delete_range(c, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, 0),
|
||||
POS(ca->dev_idx + 1, 0),
|
||||
0, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
@ -1709,15 +1647,16 @@ have_slot:
|
||||
goto err_late;
|
||||
}
|
||||
|
||||
ret = bch2_fs_freespace_init(c);
|
||||
if (ret) {
|
||||
bch_err(c, "device add error: error initializing free space: %i", ret);
|
||||
goto err_late;
|
||||
}
|
||||
|
||||
ca->new_fs_bucket_idx = 0;
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw) {
|
||||
ret = __bch2_dev_read_write(c, ca);
|
||||
if (ret) {
|
||||
bch_err(c, "device add error: error going RW on new device: %i", ret);
|
||||
goto err_late;
|
||||
}
|
||||
}
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw)
|
||||
__bch2_dev_read_write(c, ca);
|
||||
|
||||
up_write(&c->state_lock);
|
||||
return 0;
|
||||
@ -1777,11 +1716,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw) {
|
||||
ret = __bch2_dev_read_write(c, ca);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw)
|
||||
__bch2_dev_read_write(c, ca);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
|
@ -32,6 +32,7 @@ struct bch_member_cpu {
|
||||
u8 discard;
|
||||
u8 data_allowed;
|
||||
u8 durability;
|
||||
u8 freespace_initialized;
|
||||
u8 valid;
|
||||
};
|
||||
|
||||
|
@ -170,7 +170,6 @@ read_attribute(congested);
|
||||
|
||||
read_attribute(btree_avg_write_size);
|
||||
|
||||
read_attribute(reserve_stats);
|
||||
read_attribute(btree_cache_size);
|
||||
read_attribute(compression_stats);
|
||||
read_attribute(journal_debug);
|
||||
@ -185,11 +184,11 @@ read_attribute(internal_uuid);
|
||||
|
||||
read_attribute(has_data);
|
||||
read_attribute(alloc_debug);
|
||||
write_attribute(wake_allocator);
|
||||
|
||||
read_attribute(read_realloc_races);
|
||||
read_attribute(extent_migrate_done);
|
||||
read_attribute(extent_migrate_raced);
|
||||
read_attribute(bucket_alloc_fail);
|
||||
|
||||
rw_attribute(discard);
|
||||
rw_attribute(label);
|
||||
@ -376,6 +375,8 @@ SHOW(bch2_fs)
|
||||
atomic_long_read(&c->extent_migrate_done));
|
||||
sysfs_print(extent_migrate_raced,
|
||||
atomic_long_read(&c->extent_migrate_raced));
|
||||
sysfs_print(bucket_alloc_fail,
|
||||
atomic_long_read(&c->bucket_alloc_fail));
|
||||
|
||||
sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
|
||||
|
||||
@ -572,6 +573,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_read_realloc_races,
|
||||
&sysfs_extent_migrate_done,
|
||||
&sysfs_extent_migrate_raced,
|
||||
&sysfs_bucket_alloc_fail,
|
||||
|
||||
&sysfs_gc_gens_pos,
|
||||
|
||||
@ -698,24 +700,6 @@ struct attribute *bch2_fs_time_stats_files[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
enum alloc_reserve i;
|
||||
|
||||
spin_lock(&ca->fs->freelist_lock);
|
||||
|
||||
pr_buf(out, "free_inc:\t%zu\t%zu\n",
|
||||
fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
|
||||
fifo_used(&ca->free[i]),
|
||||
ca->free[i].size);
|
||||
|
||||
spin_unlock(&ca->fs->freelist_lock);
|
||||
}
|
||||
|
||||
static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
@ -741,9 +725,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
"ec\t%16llu\n"
|
||||
"available%15llu\n"
|
||||
"\n"
|
||||
"free_inc\t\t%zu/%zu\n"
|
||||
"free[RESERVE_MOVINGGC]\t%zu/%zu\n"
|
||||
"free[RESERVE_NONE]\t%zu/%zu\n"
|
||||
"freelist_wait\t\t%s\n"
|
||||
"open buckets allocated\t%u\n"
|
||||
"open buckets this dev\t%u\n"
|
||||
@ -751,13 +732,9 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
"open_buckets_wait\t%s\n"
|
||||
"open_buckets_btree\t%u\n"
|
||||
"open_buckets_user\t%u\n"
|
||||
"btree reserve cache\t%u\n"
|
||||
"thread state:\t\t%s\n",
|
||||
"btree reserve cache\t%u\n",
|
||||
stats.buckets_ec,
|
||||
__dev_buckets_available(ca, stats),
|
||||
fifo_used(&ca->free_inc), ca->free_inc.size,
|
||||
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
|
||||
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
|
||||
__dev_buckets_available(ca, stats, RESERVE_NONE),
|
||||
c->freelist_wait.list.first ? "waiting" : "empty",
|
||||
OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
|
||||
ca->nr_open_buckets,
|
||||
@ -765,8 +742,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty",
|
||||
nr[BCH_DATA_btree],
|
||||
nr[BCH_DATA_user],
|
||||
c->btree_reserve_cache_nr,
|
||||
bch2_allocator_states[ca->allocator_state]);
|
||||
c->btree_reserve_cache_nr);
|
||||
}
|
||||
|
||||
static const char * const bch2_rw[] = {
|
||||
@ -841,9 +817,6 @@ SHOW(bch2_dev)
|
||||
clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
|
||||
* 100 / CONGESTED_MAX);
|
||||
|
||||
if (attr == &sysfs_reserve_stats)
|
||||
reserve_stats_to_text(out, ca);
|
||||
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
dev_alloc_debug_to_text(out, ca);
|
||||
|
||||
@ -883,9 +856,6 @@ STORE(bch2_dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_wake_allocator)
|
||||
bch2_wake_allocator(ca);
|
||||
|
||||
return size;
|
||||
}
|
||||
SYSFS_OPS(bch2_dev);
|
||||
@ -911,11 +881,8 @@ struct attribute *bch2_dev_files[] = {
|
||||
&sysfs_io_latency_stats_write,
|
||||
&sysfs_congested,
|
||||
|
||||
&sysfs_reserve_stats,
|
||||
|
||||
/* debug: */
|
||||
&sysfs_alloc_debug,
|
||||
&sysfs_wake_allocator,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user