mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 99750eab4d bcachefs: Persist stripe blocks_used
This commit is contained in:
parent
1c50d258e3
commit
35fca2f044
@ -1 +1 @@
|
||||
bcca1c557b1897ecc3aeb1f89ab91865487d91ab
|
||||
99750eab4d583132cf61f071082c7cf21f5295c0
|
||||
|
0
include/asm/page.h
Normal file
0
include/asm/page.h
Normal file
@ -37,6 +37,7 @@ typedef struct {
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
|
||||
|
||||
#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
|
||||
#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
|
||||
@ -77,6 +78,16 @@ typedef struct {
|
||||
__old; \
|
||||
})
|
||||
|
||||
#define cmpxchg_release(p, old, new) \
|
||||
({ \
|
||||
typeof(*(p)) __old = (old); \
|
||||
\
|
||||
__atomic_compare_exchange_n((p), &__old, new, false, \
|
||||
__ATOMIC_RELEASE, \
|
||||
__ATOMIC_RELEASE); \
|
||||
__old; \
|
||||
})
|
||||
|
||||
#define smp_mb__before_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_mb__after_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
#define smp_wmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
|
||||
|
@ -1,34 +1,60 @@
|
||||
#ifndef _LINUX_GENERIC_RADIX_TREE_H
|
||||
#define _LINUX_GENERIC_RADIX_TREE_H
|
||||
|
||||
/*
|
||||
* Generic radix trees/sparse arrays:
|
||||
/**
|
||||
* DOC: Generic radix trees/sparse arrays:
|
||||
*
|
||||
* A generic radix tree has all nodes of size PAGE_SIZE - both leaves and
|
||||
* interior nodes.
|
||||
* Very simple and minimalistic, supporting arbitrary size entries up to
|
||||
* PAGE_SIZE.
|
||||
*
|
||||
* A genradix is defined with the type it will store, like so:
|
||||
*
|
||||
* static GENRADIX(struct foo) foo_genradix;
|
||||
*
|
||||
* The main operations are:
|
||||
*
|
||||
* - genradix_init(radix) - initialize an empty genradix
|
||||
*
|
||||
* - genradix_free(radix) - free all memory owned by the genradix and
|
||||
* reinitialize it
|
||||
*
|
||||
* - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
|
||||
* NULL if that entry does not exist
|
||||
*
|
||||
* - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
|
||||
* allocating it if necessary
|
||||
*
|
||||
* - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
|
||||
*
|
||||
* The radix tree allocates one page of entries at a time, so entries may exist
|
||||
* that were never explicitly allocated - they will be initialized to all
|
||||
* zeroes.
|
||||
*
|
||||
* Internally, a genradix is just a radix tree of pages, and indexing works in
|
||||
* terms of byte offsets. The wrappers in this header file use sizeof on the
|
||||
* type the radix contains to calculate a byte offset from the index - see
|
||||
* __idx_to_offset.
|
||||
*/
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
struct genradix_node;
|
||||
struct genradix_root;
|
||||
|
||||
struct __genradix {
|
||||
struct genradix_node *root;
|
||||
size_t depth;
|
||||
struct genradix_root __rcu *root;
|
||||
};
|
||||
|
||||
/*
|
||||
* NOTE: currently, sizeof(_type) must be a power of two and not larger than
|
||||
* PAGE_SIZE:
|
||||
* NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
|
||||
*/
|
||||
|
||||
#define __GENRADIX_INITIALIZER \
|
||||
{ \
|
||||
.tree = { \
|
||||
.root = NULL, \
|
||||
.depth = 0, \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -49,6 +75,12 @@ struct { \
|
||||
#define DEFINE_GENRADIX(_name, _type) \
|
||||
GENRADIX(_type) _name = __GENRADIX_INITIALIZER
|
||||
|
||||
/**
|
||||
* genradix_init - initialize a genradix
|
||||
* @_radix: genradix to initialize
|
||||
*
|
||||
* Does not fail
|
||||
*/
|
||||
#define genradix_init(_radix) \
|
||||
do { \
|
||||
*(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \
|
||||
@ -56,11 +88,20 @@ do { \
|
||||
|
||||
void __genradix_free(struct __genradix *);
|
||||
|
||||
/**
|
||||
* genradix_free: free all memory owned by a genradix
|
||||
* @_radix: the genradix to free
|
||||
*
|
||||
* After freeing, @_radix will be reinitialized and empty
|
||||
*/
|
||||
#define genradix_free(_radix) __genradix_free(&(_radix)->tree)
|
||||
|
||||
static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
|
||||
{
|
||||
BUILD_BUG_ON(obj_size > PAGE_SIZE);
|
||||
if (__builtin_constant_p(obj_size))
|
||||
BUILD_BUG_ON(obj_size > PAGE_SIZE);
|
||||
else
|
||||
BUG_ON(obj_size > PAGE_SIZE);
|
||||
|
||||
if (!is_power_of_2(obj_size)) {
|
||||
size_t objs_per_page = PAGE_SIZE / obj_size;
|
||||
@ -79,7 +120,13 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
|
||||
|
||||
void *__genradix_ptr(struct __genradix *, size_t);
|
||||
|
||||
/* Returns a pointer to element at @_idx */
|
||||
/**
|
||||
* genradix_ptr - get a pointer to a genradix entry
|
||||
* @_radix: genradix to access
|
||||
* @_idx: index to fetch
|
||||
*
|
||||
* Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
|
||||
*/
|
||||
#define genradix_ptr(_radix, _idx) \
|
||||
(__genradix_cast(_radix) \
|
||||
__genradix_ptr(&(_radix)->tree, \
|
||||
@ -87,7 +134,15 @@ void *__genradix_ptr(struct __genradix *, size_t);
|
||||
|
||||
void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
|
||||
|
||||
/* Returns a pointer to element at @_idx, allocating it if necessary */
|
||||
/**
|
||||
* genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
|
||||
* if necessary
|
||||
* @_radix: genradix to access
|
||||
* @_idx: index to fetch
|
||||
* @_gfp: gfp mask
|
||||
*
|
||||
* Returns a pointer to entry at @_idx, or NULL on allocation failure
|
||||
*/
|
||||
#define genradix_ptr_alloc(_radix, _idx, _gfp) \
|
||||
(__genradix_cast(_radix) \
|
||||
__genradix_ptr_alloc(&(_radix)->tree, \
|
||||
@ -99,6 +154,11 @@ struct genradix_iter {
|
||||
size_t pos;
|
||||
};
|
||||
|
||||
/**
|
||||
* genradix_iter_init - initialize a genradix_iter
|
||||
* @_radix: genradix that will be iterated over
|
||||
* @_idx: index to start iterating from
|
||||
*/
|
||||
#define genradix_iter_init(_radix, _idx) \
|
||||
((struct genradix_iter) { \
|
||||
.pos = (_idx), \
|
||||
@ -107,6 +167,14 @@ struct genradix_iter {
|
||||
|
||||
void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
|
||||
|
||||
/**
|
||||
* genradix_iter_peek - get first entry at or above iterator's current
|
||||
* position
|
||||
* @_iter: a genradix_iter
|
||||
* @_radix: genradix being iterated over
|
||||
*
|
||||
* If no more entries exist at or above @_iter's current position, returns NULL
|
||||
*/
|
||||
#define genradix_iter_peek(_iter, _radix) \
|
||||
(__genradix_cast(_radix) \
|
||||
__genradix_iter_peek(_iter, &(_radix)->tree, \
|
||||
@ -127,4 +195,37 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
|
||||
#define genradix_iter_advance(_iter, _radix) \
|
||||
__genradix_iter_advance(_iter, __genradix_obj_size(_radix))
|
||||
|
||||
#define genradix_for_each_from(_radix, _iter, _p, _start) \
|
||||
for (_iter = genradix_iter_init(_radix, _start); \
|
||||
(_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
|
||||
genradix_iter_advance(&_iter, _radix))
|
||||
|
||||
/**
|
||||
* genradix_for_each - iterate over entry in a genradix
|
||||
* @_radix: genradix to iterate over
|
||||
* @_iter: a genradix_iter to track current position
|
||||
* @_p: pointer to genradix entry type
|
||||
*
|
||||
* On every iteration, @_p will point to the current entry, and @_iter.pos
|
||||
* will be the current entry's index.
|
||||
*/
|
||||
#define genradix_for_each(_radix, _iter, _p) \
|
||||
genradix_for_each_from(_radix, _iter, _p, 0)
|
||||
|
||||
int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
|
||||
|
||||
/**
|
||||
* genradix_prealloc - preallocate entries in a generic radix tree
|
||||
* @_radix: genradix to preallocate
|
||||
* @_nr: number of entries to preallocate
|
||||
* @_gfp: gfp mask
|
||||
*
|
||||
* Returns 0 on success, -ENOMEM on failure
|
||||
*/
|
||||
#define genradix_prealloc(_radix, _nr, _gfp) \
|
||||
__genradix_prealloc(&(_radix)->tree, \
|
||||
__genradix_idx_to_offset(_radix, _nr + 1),\
|
||||
_gfp)
|
||||
|
||||
|
||||
#endif /* _LINUX_GENERIC_RADIX_TREE_H */
|
||||
|
@ -249,6 +249,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_usage_from_buckets(c, ca);
|
||||
|
||||
mutex_lock(&c->bucket_clock[READ].lock);
|
||||
for_each_member_device(ca, c, i) {
|
||||
down_read(&ca->bucket_lock);
|
||||
@ -280,35 +283,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
#endif
|
||||
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
|
||||
struct bucket *g;
|
||||
struct bucket_mark m;
|
||||
struct bucket_mark m, new;
|
||||
int ret;
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
a->k.p = POS(ca->dev_idx, b);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, a->k.p);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
g = bucket(ca, b);
|
||||
m = bucket_cmpxchg(g, m, m.dirty = false);
|
||||
m = READ_ONCE(g->mark);
|
||||
|
||||
if (!m.dirty) {
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__alloc_write_key(a, g, m);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, a->k.p);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, journal_seq,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!ret && ca->buckets_written)
|
||||
new = m;
|
||||
new.dirty = false;
|
||||
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
|
||||
|
||||
if (ca->buckets_written)
|
||||
set_bit(b, ca->buckets_written);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
@ -898,10 +917,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
ca->allocator_blocked_full = false;
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!ca->allocator_blocked_full) {
|
||||
ca->allocator_blocked_full = true;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
}
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
if ((current->flags & PF_KTHREAD) &&
|
||||
@ -1226,6 +1254,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
}
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
|
||||
}
|
||||
|
||||
/* stop allocator thread: */
|
||||
void bch2_dev_allocator_stop(struct bch_dev *ca)
|
||||
{
|
||||
@ -1333,6 +1366,24 @@ static void allocator_start_issue_discards(struct bch_fs *c)
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
}
|
||||
|
||||
static int resize_free_inc(struct bch_dev *ca)
|
||||
{
|
||||
alloc_fifo free_inc;
|
||||
|
||||
if (!fifo_full(&ca->free_inc))
|
||||
return 0;
|
||||
|
||||
if (!init_fifo(&free_inc,
|
||||
ca->free_inc.size * 2,
|
||||
GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
fifo_move(&free_inc, &ca->free_inc);
|
||||
swap(free_inc, ca->free_inc);
|
||||
free_fifo(&free_inc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
@ -1408,6 +1459,12 @@ not_enough:
|
||||
|
||||
while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
|
||||
(bu = next_alloc_bucket(ca)) >= 0) {
|
||||
ret = resize_free_inc(ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_invalidate_one_bucket(c, ca, bu,
|
||||
&journal_seq);
|
||||
|
||||
|
@ -51,6 +51,7 @@ void bch2_recalc_capacity(struct bch_fs *);
|
||||
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_stop(struct bch_dev *);
|
||||
int bch2_dev_allocator_start(struct bch_dev *);
|
||||
|
||||
|
@ -106,6 +106,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
|
||||
false, gc_pos_alloc(c, ob), 0);
|
||||
ob->valid = false;
|
||||
ob->type = 0;
|
||||
|
||||
spin_unlock(&ob->lock);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
@ -141,6 +142,7 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
ob = c->open_buckets + c->open_buckets_freelist;
|
||||
c->open_buckets_freelist = ob->freelist;
|
||||
atomic_set(&ob->pin, 1);
|
||||
ob->type = 0;
|
||||
|
||||
c->open_buckets_nr_free--;
|
||||
return ob;
|
||||
@ -209,9 +211,9 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
|
||||
case RESERVE_ALLOC:
|
||||
return 0;
|
||||
case RESERVE_BTREE:
|
||||
return BTREE_NODE_RESERVE / 2;
|
||||
return BTREE_NODE_OPEN_BUCKET_RESERVE;
|
||||
default:
|
||||
return BTREE_NODE_RESERVE;
|
||||
return BTREE_NODE_OPEN_BUCKET_RESERVE * 2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -837,15 +839,17 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
|
||||
{
|
||||
struct write_point *wp;
|
||||
struct open_bucket *ob;
|
||||
unsigned nr_effective = 0;
|
||||
struct open_buckets ptrs = { .nr = 0 };
|
||||
bool have_cache = false;
|
||||
unsigned write_points_nr;
|
||||
int ret = 0, i;
|
||||
struct open_buckets ptrs;
|
||||
unsigned nr_effective, write_points_nr;
|
||||
bool have_cache;
|
||||
int ret, i;
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
retry:
|
||||
ptrs.nr = 0;
|
||||
nr_effective = 0;
|
||||
write_points_nr = c->write_points_nr;
|
||||
have_cache = false;
|
||||
|
||||
wp = writepoint_find(c, write_point.v);
|
||||
|
||||
|
@ -85,6 +85,7 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
|
||||
unsigned i;
|
||||
|
||||
open_bucket_for_each(c, &wp->ptrs, ob, i) {
|
||||
ob->type = wp->type;
|
||||
atomic_inc(&ob->pin);
|
||||
ob_push(c, ptrs, ob);
|
||||
}
|
||||
|
@ -55,9 +55,10 @@ struct open_bucket {
|
||||
spinlock_t lock;
|
||||
atomic_t pin;
|
||||
u8 freelist;
|
||||
bool valid;
|
||||
bool on_partial_list;
|
||||
u8 ec_idx;
|
||||
u8 type;
|
||||
unsigned valid:1;
|
||||
unsigned on_partial_list:1;
|
||||
unsigned sectors_free;
|
||||
struct bch_extent_ptr ptr;
|
||||
struct ec_stripe_new *ec;
|
||||
|
@ -330,6 +330,8 @@ enum bch_time_stats {
|
||||
/* Size of the freelist we allocate btree nodes from: */
|
||||
#define BTREE_NODE_RESERVE BTREE_RESERVE_MAX
|
||||
|
||||
#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
|
||||
|
||||
struct btree;
|
||||
|
||||
enum gc_phase {
|
||||
@ -426,7 +428,13 @@ struct bch_dev {
|
||||
|
||||
size_t inc_gen_needs_gc;
|
||||
size_t inc_gen_really_needs_gc;
|
||||
|
||||
/*
|
||||
* XXX: this should be an enum for allocator state, so as to include
|
||||
* error state
|
||||
*/
|
||||
bool allocator_blocked;
|
||||
bool allocator_blocked_full;
|
||||
|
||||
alloc_heap alloc_heap;
|
||||
|
||||
@ -597,6 +605,7 @@ struct bch_fs {
|
||||
struct workqueue_struct *wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
struct workqueue_struct *journal_reclaim_wq;
|
||||
|
||||
/* ALLOCATION */
|
||||
struct delayed_work pd_controllers_update;
|
||||
|
@ -1010,11 +1010,8 @@ static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
|
||||
nr_key_bits -= 64;
|
||||
}
|
||||
|
||||
if (l_v != r_v)
|
||||
return l_v < r_v ? -1 : 1;
|
||||
|
||||
if (!nr_key_bits)
|
||||
return 0;
|
||||
if (!nr_key_bits || l_v != r_v)
|
||||
break;
|
||||
|
||||
l = next_word(l);
|
||||
r = next_word(r);
|
||||
@ -1022,6 +1019,8 @@ static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
|
||||
l_v = *l;
|
||||
r_v = *r;
|
||||
}
|
||||
|
||||
return (l_v > r_v) - (l_v < r_v);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -483,31 +483,6 @@ static void bch2_gc_free(struct bch_fs *c)
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accumulate percpu counters onto one cpu's copy - only valid when access
|
||||
* against any percpu counter is guarded against
|
||||
*/
|
||||
static u64 *acc_percpu_u64s(u64 __percpu *p, unsigned nr)
|
||||
{
|
||||
u64 *ret;
|
||||
int cpu;
|
||||
|
||||
preempt_disable();
|
||||
ret = this_cpu_ptr(p);
|
||||
preempt_enable();
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
u64 *i = per_cpu_ptr(p, cpu);
|
||||
|
||||
if (i != ret) {
|
||||
acc_u64s(ret, i, nr);
|
||||
memset(i, 0, nr * sizeof(u64));
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_gc_done_nocheck(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
@ -543,9 +518,9 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
|
||||
for_each_member_device(ca, c, i) {
|
||||
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
struct bch_dev_usage *dst = (void *)
|
||||
acc_percpu_u64s((void *) ca->usage[0], nr);
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
|
||||
struct bch_dev_usage *src = (void *)
|
||||
acc_percpu_u64s((void *) ca->usage[1], nr);
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
|
||||
|
||||
*dst = *src;
|
||||
}
|
||||
@ -554,9 +529,9 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
|
||||
unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
c->replicas.nr;
|
||||
struct bch_fs_usage *dst = (void *)
|
||||
acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
struct bch_fs_usage *src = (void *)
|
||||
acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
|
||||
memcpy(&dst->s.gc_start[0],
|
||||
&src->s.gc_start[0],
|
||||
@ -582,6 +557,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
dst_iter.pos, ##__VA_ARGS__, \
|
||||
dst->_f, src->_f); \
|
||||
dst->_f = src->_f; \
|
||||
dst->dirty = true; \
|
||||
}
|
||||
#define copy_bucket_field(_f) \
|
||||
if (dst->b[b].mark._f != src->b[b].mark._f) { \
|
||||
@ -612,16 +588,18 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
|
||||
while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
|
||||
(src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
|
||||
BUG_ON(src_iter.pos != dst_iter.pos);
|
||||
|
||||
copy_stripe_field(alive, "alive");
|
||||
copy_stripe_field(sectors, "sectors");
|
||||
copy_stripe_field(algorithm, "algorithm");
|
||||
copy_stripe_field(nr_blocks, "nr_blocks");
|
||||
copy_stripe_field(nr_redundant, "nr_redundant");
|
||||
copy_stripe_field(blocks_nonempty.counter,
|
||||
copy_stripe_field(blocks_nonempty,
|
||||
"blocks_nonempty");
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
|
||||
copy_stripe_field(block_sectors[i].counter,
|
||||
copy_stripe_field(block_sectors[i],
|
||||
"block_sectors[%u]", i);
|
||||
|
||||
if (dst->alive)
|
||||
@ -656,9 +634,9 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
for_each_member_device(ca, c, i) {
|
||||
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
struct bch_dev_usage *dst = (void *)
|
||||
acc_percpu_u64s((void *) ca->usage[0], nr);
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
|
||||
struct bch_dev_usage *src = (void *)
|
||||
acc_percpu_u64s((void *) ca->usage[1], nr);
|
||||
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
|
||||
unsigned b;
|
||||
|
||||
for (b = 0; b < BCH_DATA_NR; b++)
|
||||
@ -678,9 +656,9 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
unsigned nr = sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
c->replicas.nr;
|
||||
struct bch_fs_usage *dst = (void *)
|
||||
acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
struct bch_fs_usage *src = (void *)
|
||||
acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
|
||||
copy_fs_field(s.hidden, "hidden");
|
||||
copy_fs_field(s.data, "data");
|
||||
|
@ -109,7 +109,7 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->gc_pos_lock);
|
||||
ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
|
||||
ret = gc_pos_cmp(pos, c->gc_pos) < 0;
|
||||
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
|
||||
|
||||
return ret;
|
||||
|
@ -77,6 +77,7 @@ enum {
|
||||
__BTREE_INSERT_ATOMIC,
|
||||
__BTREE_INSERT_NOUNLOCK,
|
||||
__BTREE_INSERT_NOFAIL,
|
||||
__BTREE_INSERT_NOCHECK_RW,
|
||||
__BTREE_INSERT_USE_RESERVE,
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
@ -100,6 +101,8 @@ enum {
|
||||
/* Don't check for -ENOSPC: */
|
||||
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
|
||||
|
||||
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
|
||||
|
||||
/* for copygc, or when merging btree nodes */
|
||||
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
|
||||
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
|
||||
|
@ -628,7 +628,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
trans_for_each_entry(trans, i)
|
||||
btree_insert_entry_checks(c, i);
|
||||
|
||||
if (unlikely(!percpu_ref_tryget(&c->writes)))
|
||||
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
!percpu_ref_tryget(&c->writes)))
|
||||
return -EROFS;
|
||||
retry:
|
||||
trans_for_each_iter(trans, i) {
|
||||
@ -658,7 +659,8 @@ retry:
|
||||
trans_for_each_iter(trans, i)
|
||||
bch2_btree_iter_downgrade(i->iter);
|
||||
out:
|
||||
percpu_ref_put(&c->writes);
|
||||
if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
percpu_ref_put(&c->writes);
|
||||
|
||||
/* make sure we didn't drop or screw up locks: */
|
||||
trans_for_each_iter(trans, i) {
|
||||
|
@ -151,7 +151,6 @@ retry:
|
||||
acc_u64s_percpu((u64 *) ret,
|
||||
(u64 __percpu *) c->usage[0],
|
||||
sizeof(*ret) / sizeof(u64) + nr);
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -223,13 +222,14 @@ static bool bucket_became_unavailable(struct bucket_mark old,
|
||||
!is_available_bucket(new);
|
||||
}
|
||||
|
||||
void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
{
|
||||
s64 added = fs_usage->s.data + fs_usage->s.reserved;
|
||||
s64 should_not_have_added;
|
||||
int ret = 0;
|
||||
|
||||
percpu_rwsem_assert_held(&c->mark_lock);
|
||||
|
||||
@ -242,6 +242,7 @@ void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
"disk usage increased without a reservation")) {
|
||||
atomic64_sub(should_not_have_added, &c->sectors_available);
|
||||
added -= should_not_have_added;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (added > 0) {
|
||||
@ -259,6 +260,8 @@ void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
(u64 *) fs_usage,
|
||||
sizeof(*fs_usage) / sizeof(u64) + c->replicas.nr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
@ -363,10 +366,7 @@ static inline void update_cached_sectors(struct bch_fs *c,
|
||||
{
|
||||
struct bch_replicas_padded r;
|
||||
|
||||
r.e.data_type = BCH_DATA_CACHED;
|
||||
r.e.nr_devs = 1;
|
||||
r.e.nr_required = 1;
|
||||
r.e.devs[0] = dev;
|
||||
bch2_replicas_entry_cached(&r.e, dev);
|
||||
|
||||
update_replicas(c, fs_usage, &r.e, sectors);
|
||||
}
|
||||
@ -382,7 +382,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
*old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
BUG_ON(!is_available_bucket(new));
|
||||
|
||||
new.owned_by_allocator = 1;
|
||||
new.owned_by_allocator = true;
|
||||
new.dirty = true;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
@ -455,6 +456,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
type != BCH_DATA_JOURNAL);
|
||||
|
||||
bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
@ -480,13 +482,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
true);
|
||||
} else {
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
struct bucket_mark new;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.data_type = type;
|
||||
bucket_cmpxchg(g, new, ({
|
||||
new.dirty = true;
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
@ -537,6 +540,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
do {
|
||||
new.v.counter = old.v.counter = v;
|
||||
|
||||
new.dirty = true;
|
||||
|
||||
/*
|
||||
* Check this after reading bucket mark to guard against
|
||||
* the allocator invalidating a bucket after we've already
|
||||
@ -591,9 +596,14 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
int blocks_nonempty_delta;
|
||||
s64 parity_sectors;
|
||||
|
||||
BUG_ON(!sectors);
|
||||
|
||||
m = genradix_ptr(&c->stripes[gc], p.idx);
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (!m || !m->alive) {
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
|
||||
(u64) p.idx);
|
||||
return -1;
|
||||
@ -609,19 +619,21 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
parity_sectors = -parity_sectors;
|
||||
sectors += parity_sectors;
|
||||
|
||||
new = atomic_add_return(sectors, &m->block_sectors[p.block]);
|
||||
old = new - sectors;
|
||||
old = m->block_sectors[p.block];
|
||||
m->block_sectors[p.block] += sectors;
|
||||
new = m->block_sectors[p.block];
|
||||
|
||||
blocks_nonempty_delta = (int) !!new - (int) !!old;
|
||||
if (!blocks_nonempty_delta)
|
||||
return 0;
|
||||
if (blocks_nonempty_delta) {
|
||||
m->blocks_nonempty += blocks_nonempty_delta;
|
||||
|
||||
atomic_add(blocks_nonempty_delta, &m->blocks_nonempty);
|
||||
if (!gc)
|
||||
bch2_stripes_heap_update(c, m, p.idx);
|
||||
}
|
||||
|
||||
BUG_ON(atomic_read(&m->blocks_nonempty) < 0);
|
||||
m->dirty = true;
|
||||
|
||||
if (!gc)
|
||||
bch2_stripes_heap_update(c, m, p.idx);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
update_replicas(c, fs_usage, &m->r.e, sectors);
|
||||
|
||||
@ -629,8 +641,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
|
||||
}
|
||||
|
||||
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
s64 sectors,
|
||||
enum bch_data_type data_type,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned journal_seq, unsigned flags,
|
||||
bool gc)
|
||||
@ -701,14 +712,13 @@ static void bucket_set_stripe(struct bch_fs *c,
|
||||
BUG_ON(ptr_stale(ca, ptr));
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
|
||||
BUG_ON(old.stripe == enabled);
|
||||
}
|
||||
}
|
||||
|
||||
@ -723,22 +733,19 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
|
||||
unsigned i;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (!m || (!inserting && !m->alive)) {
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
|
||||
idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (inserting && m->alive) {
|
||||
bch_err_ratelimited(c, "error marking stripe %zu: already exists",
|
||||
idx);
|
||||
return -1;
|
||||
}
|
||||
if (m->alive)
|
||||
bch2_stripes_heap_del(c, m, idx);
|
||||
|
||||
BUG_ON(atomic_read(&m->blocks_nonempty));
|
||||
|
||||
for (i = 0; i < EC_STRIPE_MAX; i++)
|
||||
BUG_ON(atomic_read(&m->block_sectors[i]));
|
||||
memset(m, 0, sizeof(*m));
|
||||
|
||||
if (inserting) {
|
||||
m->sectors = le16_to_cpu(s.v->sectors);
|
||||
@ -754,7 +761,6 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
for (i = 0; i < s.v->nr_blocks; i++)
|
||||
m->r.e.devs[i] = s.v->ptrs[i].dev;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: account for stripes somehow here
|
||||
@ -763,15 +769,23 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
update_replicas(c, fs_usage, &m->r.e, stripe_sectors);
|
||||
#endif
|
||||
|
||||
if (!gc) {
|
||||
if (inserting)
|
||||
/* gc recalculates these fields: */
|
||||
if (!(flags & BCH_BUCKET_MARK_GC)) {
|
||||
for (i = 0; i < s.v->nr_blocks; i++) {
|
||||
m->block_sectors[i] =
|
||||
stripe_blockcount_get(s.v, i);
|
||||
m->blocks_nonempty += !!m->block_sectors[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!gc)
|
||||
bch2_stripes_heap_insert(c, m, idx);
|
||||
else
|
||||
bch2_stripes_heap_del(c, m, idx);
|
||||
} else {
|
||||
m->alive = inserting;
|
||||
m->alive = true;
|
||||
}
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
|
||||
return 0;
|
||||
}
|
||||
@ -879,6 +893,8 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
struct bch_fs_usage *fs_usage;
|
||||
struct gc_pos pos = gc_pos_btree_node(b);
|
||||
struct bkey_packed *_k;
|
||||
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
|
||||
static int warned_disk_usage = 0;
|
||||
|
||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||
return;
|
||||
@ -939,7 +955,37 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
|
||||
bch2_fs_usage_apply(c, fs_usage, trans->disk_res, pos);
|
||||
if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res, pos) &&
|
||||
!warned_disk_usage &&
|
||||
!xchg(&warned_disk_usage, 1)) {
|
||||
char buf[200];
|
||||
|
||||
pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
|
||||
|
||||
pr_err("while inserting");
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
|
||||
pr_err("%s", buf);
|
||||
pr_err("overlapping with");
|
||||
|
||||
node_iter = iter->l[0].iter;
|
||||
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
|
||||
KEY_TYPE_discard))) {
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
|
||||
k = bkey_disassemble(b, _k, &unpacked);
|
||||
|
||||
if (btree_node_is_extents(b)
|
||||
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
|
||||
: bkey_cmp(insert->k->k.p, k.k->p))
|
||||
break;
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, k);
|
||||
pr_err("%s", buf);
|
||||
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
}
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
}
|
||||
|
@ -181,6 +181,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
struct bch_dev_usage stats)
|
||||
{
|
||||
@ -264,8 +266,8 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
|
||||
bool, s64, struct gc_pos,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
|
||||
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, struct gc_pos);
|
||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, struct gc_pos);
|
||||
|
||||
/* disk reservations: */
|
||||
|
||||
|
@ -402,6 +402,8 @@ static long bch2_ioctl_usage(struct bch_fs *c,
|
||||
if (!src)
|
||||
return -ENOMEM;
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
dst.used = bch2_fs_sectors_used(c, *src);
|
||||
dst.online_reserved = src->s.online_reserved;
|
||||
|
||||
|
179
libbcachefs/ec.c
179
libbcachefs/ec.c
@ -11,6 +11,7 @@
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "io.h"
|
||||
#include "journal_io.h"
|
||||
#include "keylist.h"
|
||||
#include "super-io.h"
|
||||
#include "util.h"
|
||||
@ -98,40 +99,6 @@ struct ec_bio {
|
||||
|
||||
/* Stripes btree keys: */
|
||||
|
||||
static unsigned stripe_csums_per_device(const struct bch_stripe *s)
|
||||
{
|
||||
return DIV_ROUND_UP(le16_to_cpu(s->sectors),
|
||||
1 << s->csum_granularity_bits);
|
||||
}
|
||||
|
||||
static unsigned stripe_csum_offset(const struct bch_stripe *s,
|
||||
unsigned dev, unsigned csum_idx)
|
||||
{
|
||||
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
|
||||
|
||||
return sizeof(struct bch_stripe) +
|
||||
sizeof(struct bch_extent_ptr) * s->nr_blocks +
|
||||
(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
|
||||
}
|
||||
|
||||
static unsigned stripe_blockcount_offset(const struct bch_stripe *s,
|
||||
unsigned idx)
|
||||
{
|
||||
return stripe_csum_offset(s, s->nr_blocks, 0) +
|
||||
sizeof(16) * idx;
|
||||
}
|
||||
|
||||
static unsigned stripe_val_u64s(const struct bch_stripe *s)
|
||||
{
|
||||
return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
|
||||
sizeof(u64));
|
||||
}
|
||||
|
||||
static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx)
|
||||
{
|
||||
return (void *) s + stripe_csum_offset(s, dev, csum_idx);
|
||||
}
|
||||
|
||||
const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
||||
@ -164,8 +131,9 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
1U << s->csum_granularity_bits);
|
||||
|
||||
for (i = 0; i < s->nr_blocks; i++)
|
||||
pr_buf(out, " %u:%llu", s->ptrs[i].dev,
|
||||
(u64) s->ptrs[i].offset);
|
||||
pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
|
||||
(u64) s->ptrs[i].offset,
|
||||
stripe_blockcount_get(s, i));
|
||||
}
|
||||
|
||||
static int ptr_matches_stripe(struct bch_fs *c,
|
||||
@ -609,29 +577,15 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
||||
BUG_ON(h->data[m->heap_idx].idx != idx);
|
||||
}
|
||||
|
||||
static inline unsigned stripe_entry_blocks(struct stripe *m)
|
||||
{
|
||||
return atomic_read(&m->blocks_nonempty);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
bool queue_delete;
|
||||
size_t i;
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (!m->alive) {
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
h->data[m->heap_idx].blocks_nonempty =
|
||||
stripe_entry_blocks(m);
|
||||
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
|
||||
|
||||
i = m->heap_idx;
|
||||
heap_sift_up(h, i, ec_stripes_heap_cmp,
|
||||
@ -641,44 +595,35 @@ void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
queue_delete = stripe_idx_to_delete(c) >= 0;
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
if (queue_delete)
|
||||
if (stripe_idx_to_delete(c) >= 0)
|
||||
schedule_work(&c->ec_stripe_delete_work);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
m->alive = false;
|
||||
heap_del(&c->ec_stripes_heap, m->heap_idx,
|
||||
ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
void bch2_stripes_heap_insert(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
BUG_ON(heap_full(&c->ec_stripes_heap));
|
||||
|
||||
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
|
||||
.idx = idx,
|
||||
.blocks_nonempty = stripe_entry_blocks(m),
|
||||
.blocks_nonempty = m->blocks_nonempty,
|
||||
}),
|
||||
ec_stripes_heap_cmp,
|
||||
ec_stripes_heap_set_backpointer);
|
||||
m->alive = true;
|
||||
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
|
||||
/* stripe deletion */
|
||||
@ -1217,6 +1162,116 @@ unlock:
|
||||
mutex_unlock(&c->ec_new_stripe_lock);
|
||||
}
|
||||
|
||||
static int __bch2_stripe_write_key(struct bch_fs *c,
|
||||
struct btree_iter *iter,
|
||||
struct stripe *m,
|
||||
size_t idx,
|
||||
struct bkey_i_stripe *new_key,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(0, idx));
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = btree_iter_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return -EIO;
|
||||
|
||||
bkey_reassemble(&new_key->k_i, k);
|
||||
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
|
||||
for (i = 0; i < new_key->v.nr_blocks; i++)
|
||||
stripe_blockcount_set(&new_key->v, i,
|
||||
m->block_sectors[i]);
|
||||
m->dirty = false;
|
||||
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
|
||||
return bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|flags,
|
||||
BTREE_INSERT_ENTRY(iter, &new_key->k_i));
|
||||
}
|
||||
|
||||
int bch2_stripes_write(struct bch_fs *c, bool *wrote)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct genradix_iter giter;
|
||||
struct bkey_i_stripe *new_key;
|
||||
struct stripe *m;
|
||||
int ret = 0;
|
||||
|
||||
new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
|
||||
BUG_ON(!new_key);
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
genradix_for_each(&c->stripes[0], giter, m) {
|
||||
if (!m->dirty)
|
||||
continue;
|
||||
|
||||
ret = __bch2_stripe_write_key(c, &iter, m, giter.pos,
|
||||
new_key, BTREE_INSERT_NOCHECK_RW);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
*wrote = true;
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
kfree(new_key);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
|
||||
struct gc_pos pos = { 0 };
|
||||
|
||||
bch2_mark_key(c, k, true, 0, pos, NULL, 0, 0);
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
{
|
||||
struct journal_replay *r;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
ret = bch2_fs_ec_start(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) {
|
||||
bch2_stripe_read_key(c, k);
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
}
|
||||
|
||||
ret = bch2_btree_iter_unlock(&iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
list_for_each_entry(r, journal_replay_list, list) {
|
||||
struct bkey_i *k, *n;
|
||||
struct jset_entry *entry;
|
||||
|
||||
for_each_jset_key(k, n, entry, &r->j)
|
||||
if (entry->btree_id == BTREE_ID_EC)
|
||||
bch2_stripe_read_key(c, bkey_i_to_s_c(k));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
|
@ -13,6 +13,55 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
|
||||
.val_to_text = bch2_stripe_to_text, \
|
||||
}
|
||||
|
||||
static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
|
||||
{
|
||||
return DIV_ROUND_UP(le16_to_cpu(s->sectors),
|
||||
1 << s->csum_granularity_bits);
|
||||
}
|
||||
|
||||
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
|
||||
unsigned dev, unsigned csum_idx)
|
||||
{
|
||||
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
|
||||
|
||||
return sizeof(struct bch_stripe) +
|
||||
sizeof(struct bch_extent_ptr) * s->nr_blocks +
|
||||
(dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
|
||||
}
|
||||
|
||||
static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
|
||||
unsigned idx)
|
||||
{
|
||||
return stripe_csum_offset(s, s->nr_blocks, 0) +
|
||||
sizeof(u16) * idx;
|
||||
}
|
||||
|
||||
static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
|
||||
unsigned idx)
|
||||
{
|
||||
return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
|
||||
}
|
||||
|
||||
static inline void stripe_blockcount_set(struct bch_stripe *s,
|
||||
unsigned idx, unsigned v)
|
||||
{
|
||||
__le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
|
||||
|
||||
*p = cpu_to_le16(v);
|
||||
}
|
||||
|
||||
static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
|
||||
{
|
||||
return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
|
||||
sizeof(u64));
|
||||
}
|
||||
|
||||
static inline void *stripe_csum(struct bch_stripe *s,
|
||||
unsigned dev, unsigned csum_idx)
|
||||
{
|
||||
return (void *) s + stripe_csum_offset(s, dev, csum_idx);
|
||||
}
|
||||
|
||||
struct bch_read_bio;
|
||||
|
||||
struct ec_stripe_buf {
|
||||
@ -100,6 +149,9 @@ void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_ec_flush_new_stripes(struct bch_fs *);
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *, struct list_head *);
|
||||
int bch2_stripes_write(struct bch_fs *, bool *);
|
||||
|
||||
int bch2_ec_mem_alloc(struct bch_fs *, bool);
|
||||
|
||||
int bch2_fs_ec_start(struct bch_fs *);
|
||||
|
@ -19,9 +19,10 @@ struct stripe {
|
||||
u8 nr_blocks;
|
||||
u8 nr_redundant;
|
||||
|
||||
u8 alive;
|
||||
atomic_t blocks_nonempty;
|
||||
atomic_t block_sectors[EC_STRIPE_MAX];
|
||||
unsigned alive:1;
|
||||
unsigned dirty:1;
|
||||
u8 blocks_nonempty;
|
||||
u16 block_sectors[EC_STRIPE_MAX];
|
||||
|
||||
struct bch_replicas_padded r;
|
||||
};
|
||||
|
@ -1664,12 +1664,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
|
||||
return ret == BCH_MERGE_MERGE;
|
||||
}
|
||||
|
||||
int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
|
||||
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
|
||||
unsigned nr_replicas)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bpos end = pos;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
bool ret = true;
|
||||
|
||||
end.offset += size;
|
||||
|
||||
@ -1678,8 +1679,8 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
|
||||
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
|
||||
break;
|
||||
|
||||
if (!bch2_extent_is_fully_allocated(k)) {
|
||||
ret = -ENOSPC;
|
||||
if (nr_replicas > bch2_bkey_nr_ptrs_allocated(k)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1688,6 +1689,29 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
|
||||
{
|
||||
unsigned ret = 0;
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_extent: {
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
extent_for_each_ptr_decode(e, p, entry)
|
||||
ret += !p.ptr.cached &&
|
||||
p.crc.compression_type == BCH_COMPRESSION_NONE;
|
||||
break;
|
||||
}
|
||||
case KEY_TYPE_reservation:
|
||||
ret = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* KEY_TYPE_reservation: */
|
||||
|
||||
const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
|
@ -571,6 +571,7 @@ static inline void extent_save(struct btree *b, struct bkey_packed *dst,
|
||||
BUG_ON(!bch2_bkey_pack_key(dst, src, f));
|
||||
}
|
||||
|
||||
int bch2_check_range_allocated(struct bch_fs *, struct bpos, u64);
|
||||
bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
|
||||
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
|
||||
|
||||
#endif /* _BCACHEFS_EXTENTS_H */
|
||||
|
@ -262,18 +262,20 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
}
|
||||
}
|
||||
|
||||
static inline size_t eytzinger0_find(void *base, size_t nr, size_t size,
|
||||
eytzinger_cmp_fn cmp, const void *search)
|
||||
{
|
||||
size_t i = 0;
|
||||
int res;
|
||||
|
||||
while (i < nr &&
|
||||
(res = cmp(search, base + i * size, size)))
|
||||
i = eytzinger0_child(i, res > 0);
|
||||
|
||||
return i;
|
||||
}
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
void *_base = (base); \
|
||||
void *_search = (search); \
|
||||
size_t _nr = (nr); \
|
||||
size_t _size = (size); \
|
||||
size_t _i = 0; \
|
||||
int _res; \
|
||||
\
|
||||
while (_i < _nr && \
|
||||
(_res = _cmp(_search, _base + _i * _size, _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
_i; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort(void *, size_t, size_t,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
|
@ -253,7 +253,9 @@ static s64 sum_sector_overwrites(struct bkey_i *new, struct btree_iter *_iter,
|
||||
BUG_ON(btree_iter_err(old));
|
||||
|
||||
if (allocating &&
|
||||
!bch2_extent_is_fully_allocated(old))
|
||||
!*allocating &&
|
||||
bch2_bkey_nr_ptrs_allocated(old) <
|
||||
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new)))
|
||||
*allocating = true;
|
||||
|
||||
delta += (min(new->k.p.offset,
|
||||
@ -858,9 +860,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
|
||||
{
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
unsigned nr_ptrs = !bch2_extent_is_compressed(k)
|
||||
? bch2_bkey_nr_dirty_ptrs(k)
|
||||
: 0;
|
||||
unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k);
|
||||
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
/* brand new pages, don't need to be locked: */
|
||||
@ -1759,6 +1759,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
struct bch_inode_info *inode = dio->iop.inode;
|
||||
struct bio *bio = &dio->iop.op.wbio.bio;
|
||||
struct bio_vec *bv;
|
||||
loff_t offset;
|
||||
bool sync;
|
||||
long ret;
|
||||
int i;
|
||||
@ -1770,12 +1771,16 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
__pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
/* Write and invalidate pagecache range that we're writing to: */
|
||||
ret = write_invalidate_inode_pages_range(mapping, req->ki_pos,
|
||||
req->ki_pos + iov_iter_count(&dio->iter) - 1);
|
||||
offset = req->ki_pos + (dio->iop.op.written << 9);
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
offset,
|
||||
offset + iov_iter_count(&dio->iter) - 1);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
while (1) {
|
||||
offset = req->ki_pos + (dio->iop.op.written << 9);
|
||||
|
||||
BUG_ON(current->pagecache_lock);
|
||||
current->pagecache_lock = &mapping->add_lock;
|
||||
if (kthread)
|
||||
@ -1792,13 +1797,12 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
|
||||
/* gup might have faulted pages back in: */
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
req->ki_pos + (dio->iop.op.written << 9),
|
||||
req->ki_pos + iov_iter_count(&dio->iter) - 1);
|
||||
offset,
|
||||
offset + bio->bi_iter.bi_size - 1);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
dio->iop.op.pos = POS(inode->v.i_ino,
|
||||
(req->ki_pos >> 9) + dio->iop.op.written);
|
||||
dio->iop.op.pos = POS(inode->v.i_ino, offset >> 9);
|
||||
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
|
||||
@ -1878,7 +1882,6 @@ static int bch2_direct_IO_write(struct kiocb *req,
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct dio_write *dio;
|
||||
struct bio *bio;
|
||||
loff_t offset = req->ki_pos;
|
||||
ssize_t ret;
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
@ -1886,7 +1889,7 @@ static int bch2_direct_IO_write(struct kiocb *req,
|
||||
if (unlikely(!iter->count))
|
||||
return 0;
|
||||
|
||||
if (unlikely((offset|iter->count) & (block_bytes(c) - 1)))
|
||||
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
|
||||
return -EINVAL;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_KERNEL,
|
||||
@ -1898,7 +1901,7 @@ static int bch2_direct_IO_write(struct kiocb *req,
|
||||
dio->mm = current->mm;
|
||||
dio->loop = false;
|
||||
dio->sync = is_sync_kiocb(req) ||
|
||||
offset + iter->count > inode->v.i_size;
|
||||
req->ki_pos + iter->count > inode->v.i_size;
|
||||
dio->free_iov = false;
|
||||
dio->quota_res.sectors = 0;
|
||||
dio->iter = *iter;
|
||||
@ -1915,19 +1918,20 @@ static int bch2_direct_IO_write(struct kiocb *req,
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
dio->iop.op.nr_replicas = dio->iop.op.opts.data_replicas;
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9,
|
||||
dio->iop.op.opts.data_replicas, 0);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_check_range_allocated(c, POS(inode->v.i_ino,
|
||||
offset >> 9),
|
||||
iter->count >> 9))
|
||||
if (!bch2_check_range_allocated(c, POS(inode->v.i_ino,
|
||||
req->ki_pos >> 9),
|
||||
iter->count >> 9,
|
||||
dio->iop.op.opts.data_replicas))
|
||||
goto err;
|
||||
|
||||
dio->iop.unalloc = true;
|
||||
}
|
||||
|
||||
dio->iop.op.nr_replicas = dio->iop.op.res.nr_replicas;
|
||||
|
||||
return bch2_dio_write_loop(dio);
|
||||
err:
|
||||
bch2_disk_reservation_put(c, &dio->iop.op.res);
|
||||
|
@ -694,6 +694,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
}
|
||||
|
||||
list_for_each_entry(i, list, list) {
|
||||
struct bch_replicas_padded replicas;
|
||||
char buf[80];
|
||||
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
||||
|
||||
ret = jset_validate_entries(c, &i->j, READ);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
@ -705,11 +710,11 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
|
||||
if (!degraded &&
|
||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL,
|
||||
i->devs, false), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
BCH_DATA_JOURNAL))) {
|
||||
ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs);
|
||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
|
||||
"superblock not marked as containing replicas %s",
|
||||
(bch2_replicas_entry_to_text(&PBUF(buf),
|
||||
&replicas.e), buf)))) {
|
||||
ret = bch2_mark_replicas(c, &replicas.e);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1108,6 +1113,7 @@ static void journal_write_done(struct closure *cl)
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
struct bch_devs_list devs =
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
struct bch_replicas_padded replicas;
|
||||
u64 seq = le64_to_cpu(w->data->seq);
|
||||
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
||||
|
||||
@ -1118,7 +1124,9 @@ static void journal_write_done(struct closure *cl)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs))
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, devs);
|
||||
|
||||
if (bch2_mark_replicas(c, &replicas.e))
|
||||
goto err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
@ -335,7 +335,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
|
||||
if (!test_bit(BCH_FS_RO, &c->flags))
|
||||
queue_delayed_work(system_freezable_wq, &j->reclaim_work,
|
||||
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
|
||||
msecs_to_jiffies(j->reclaim_delay_ms));
|
||||
}
|
||||
|
||||
@ -387,7 +387,6 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_entry_pin_list *p;
|
||||
struct bch_devs_list devs;
|
||||
u64 iter, seq = 0;
|
||||
int ret = 0;
|
||||
|
||||
@ -412,12 +411,15 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
|
||||
spin_lock(&j->lock);
|
||||
while (!ret && seq < j->pin.back) {
|
||||
struct bch_replicas_padded replicas;
|
||||
|
||||
seq = max(seq, journal_last_seq(j));
|
||||
devs = journal_seq_pin(j, seq)->devs;
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL,
|
||||
journal_seq_pin(j, seq)->devs);
|
||||
seq++;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs);
|
||||
ret = bch2_mark_replicas(c, &replicas.e);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "buckets.h"
|
||||
#include "extents.h"
|
||||
#include "io.h"
|
||||
@ -152,6 +153,16 @@ retry:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
/* flush relevant btree updates */
|
||||
while (1) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c) ||
|
||||
c->btree_roots_dirty);
|
||||
if (!bch2_btree_interior_updates_nr_pending(c))
|
||||
break;
|
||||
bch2_journal_meta(&c->journal);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
ret = bch2_replicas_gc_end(c, ret);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "buckets.h"
|
||||
#include "disk_groups.h"
|
||||
#include "inode.h"
|
||||
@ -763,6 +764,16 @@ int bch2_data_job(struct bch_fs *c,
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, -1);
|
||||
|
||||
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
|
||||
|
||||
while (1) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c) ||
|
||||
c->btree_roots_dirty);
|
||||
if (!bch2_btree_interior_updates_nr_pending(c))
|
||||
break;
|
||||
bch2_journal_meta(&c->journal);
|
||||
}
|
||||
|
||||
ret = bch2_gc_btree_replicas(c) ?: ret;
|
||||
|
||||
ret = bch2_move_data(c, NULL,
|
||||
|
@ -214,12 +214,12 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
err = "cannot allocate memory";
|
||||
ret = bch2_fs_ec_start(c);
|
||||
ret = bch2_stripes_read(c, &journal);
|
||||
if (ret)
|
||||
goto err;
|
||||
pr_info("stripes_read done");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
bch_verbose(c, "starting mark and sweep:");
|
||||
err = "error in recovery";
|
||||
|
@ -13,6 +13,16 @@ static inline int u8_cmp(u8 l, u8 r)
|
||||
return (l > r) - (l < r);
|
||||
}
|
||||
|
||||
static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
|
||||
{
|
||||
#ifdef CONFIG_BCACHES_DEBUG
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i + 1 < e->nr_devs; i++)
|
||||
BUG_ON(e->devs[i] >= e->devs[i + 1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void replicas_entry_sort(struct bch_replicas_entry *e)
|
||||
{
|
||||
bubble_sort(e->devs, e->nr_devs, u8_cmp);
|
||||
@ -23,19 +33,13 @@ static void replicas_entry_sort(struct bch_replicas_entry *e)
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
|
||||
}
|
||||
|
||||
static void replicas_entry_to_text(struct printbuf *out,
|
||||
struct bch_replicas_entry *e)
|
||||
void bch2_replicas_entry_to_text(struct printbuf *out,
|
||||
struct bch_replicas_entry *e)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
@ -60,7 +64,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *out,
|
||||
pr_buf(out, " ");
|
||||
first = false;
|
||||
|
||||
replicas_entry_to_text(out, e);
|
||||
bch2_replicas_entry_to_text(out, e);
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,8 +104,8 @@ static void stripe_to_replicas(struct bkey_s_c k,
|
||||
r->devs[r->nr_devs++] = ptr->dev;
|
||||
}
|
||||
|
||||
static void bkey_to_replicas(struct bkey_s_c k,
|
||||
struct bch_replicas_entry *e)
|
||||
static void bkey_to_replicas(struct bch_replicas_entry *e,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
e->nr_devs = 0;
|
||||
|
||||
@ -119,11 +123,13 @@ static void bkey_to_replicas(struct bkey_s_c k,
|
||||
stripe_to_replicas(k, e);
|
||||
break;
|
||||
}
|
||||
|
||||
replicas_entry_sort(e);
|
||||
}
|
||||
|
||||
static inline void devlist_to_replicas(struct bch_devs_list devs,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_replicas_entry *e)
|
||||
void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
@ -137,6 +143,8 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
|
||||
|
||||
for (i = 0; i < devs.nr; i++)
|
||||
e->devs[e->nr_devs++] = devs.devs[i];
|
||||
|
||||
replicas_entry_sort(e);
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu
|
||||
@ -150,6 +158,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
replicas_entry_bytes(new_entry)),
|
||||
};
|
||||
|
||||
BUG_ON(!new_entry->data_type);
|
||||
verify_replicas_entry_sorted(new_entry);
|
||||
|
||||
new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
|
||||
if (!new.entries)
|
||||
return new;
|
||||
@ -175,13 +186,12 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
if (unlikely(entry_size > r->entry_size))
|
||||
return -1;
|
||||
|
||||
replicas_entry_sort(search);
|
||||
|
||||
while (entry_size < r->entry_size)
|
||||
((char *) search)[entry_size++] = 0;
|
||||
verify_replicas_entry_sorted(search);
|
||||
|
||||
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
|
||||
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
|
||||
memcmp, search);
|
||||
entry_cmp, search);
|
||||
#undef entry_cmp
|
||||
|
||||
return idx < r->nr ? idx : -1;
|
||||
}
|
||||
@ -189,6 +199,8 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
int bch2_replicas_entry_idx(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search)
|
||||
{
|
||||
replicas_entry_sort(search);
|
||||
|
||||
return __replicas_entry_idx(&c->replicas, search);
|
||||
}
|
||||
|
||||
@ -198,12 +210,17 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
return __replicas_entry_idx(r, search) >= 0;
|
||||
}
|
||||
|
||||
static bool replicas_has_entry(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search,
|
||||
bool check_gc_replicas)
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
struct bch_replicas_entry *search,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
bool marked;
|
||||
|
||||
if (!search->nr_devs)
|
||||
return true;
|
||||
|
||||
verify_replicas_entry_sorted(search);
|
||||
|
||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||
marked = __replicas_has_entry(&c->replicas, search) &&
|
||||
(!check_gc_replicas ||
|
||||
@ -214,35 +231,31 @@ static bool replicas_has_entry(struct bch_fs *c,
|
||||
return marked;
|
||||
}
|
||||
|
||||
static void __replicas_table_update(struct bch_fs_usage __percpu *dst,
|
||||
static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
|
||||
struct bch_replicas_cpu *dst_r,
|
||||
struct bch_fs_usage __percpu *src,
|
||||
struct bch_fs_usage __percpu *src_p,
|
||||
struct bch_replicas_cpu *src_r)
|
||||
{
|
||||
int src_idx, dst_idx, cpu;
|
||||
unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
|
||||
struct bch_fs_usage *dst, *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) src_p, src_nr);
|
||||
int src_idx, dst_idx;
|
||||
|
||||
preempt_disable();
|
||||
dst = this_cpu_ptr(dst_p);
|
||||
preempt_enable();
|
||||
|
||||
*dst = *src;
|
||||
|
||||
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
|
||||
u64 *dst_v, src_v = 0;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
src_v += *per_cpu_ptr(&src->data[src_idx], cpu);
|
||||
if (!src->data[src_idx])
|
||||
continue;
|
||||
|
||||
dst_idx = __replicas_entry_idx(dst_r,
|
||||
cpu_replicas_entry(src_r, src_idx));
|
||||
BUG_ON(dst_idx < 0);
|
||||
|
||||
if (dst_idx < 0) {
|
||||
BUG_ON(src_v);
|
||||
continue;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
|
||||
dst_v = this_cpu_ptr(&dst->data[dst_idx]);
|
||||
BUG_ON(*dst_v);
|
||||
|
||||
*dst_v = src_v;
|
||||
|
||||
preempt_enable();
|
||||
dst->data[dst_idx] = src->data[src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
@ -344,30 +357,32 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_mark_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_entry *devs)
|
||||
int bch2_mark_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_entry *r)
|
||||
{
|
||||
return likely(replicas_has_entry(c, devs, true))
|
||||
return likely(bch2_replicas_marked(c, r, true))
|
||||
? 0
|
||||
: bch2_mark_replicas_slowpath(c, devs);
|
||||
: bch2_mark_replicas_slowpath(c, r);
|
||||
}
|
||||
|
||||
int bch2_mark_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
struct bch_replicas_padded search;
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
|
||||
if (!devs.nr)
|
||||
return 0;
|
||||
for (i = 0; i < cached.nr; i++) {
|
||||
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
|
||||
|
||||
memset(&search, 0, sizeof(search));
|
||||
if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
|
||||
return false;
|
||||
}
|
||||
|
||||
BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
|
||||
bkey_to_replicas(&search.e, k);
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search.e);
|
||||
|
||||
return __bch2_mark_replicas(c, &search.e);
|
||||
return bch2_replicas_marked(c, &search.e, check_gc_replicas);
|
||||
}
|
||||
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
@ -377,18 +392,17 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
memset(&search, 0, sizeof(search));
|
||||
for (i = 0; i < cached.nr; i++) {
|
||||
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i]))))
|
||||
ret = bch2_mark_replicas(c, &search.e);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bkey_to_replicas(k, &search.e);
|
||||
bkey_to_replicas(&search.e, k);
|
||||
|
||||
return search.e.nr_devs
|
||||
? __bch2_mark_replicas(c, &search.e)
|
||||
: 0;
|
||||
return bch2_mark_replicas(c, &search.e);
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
@ -749,7 +763,7 @@ static void bch2_sb_replicas_to_text(struct printbuf *out,
|
||||
pr_buf(out, " ");
|
||||
first = false;
|
||||
|
||||
replicas_entry_to_text(out, e);
|
||||
bch2_replicas_entry_to_text(out, e);
|
||||
}
|
||||
}
|
||||
|
||||
@ -798,46 +812,6 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
struct bch_replicas_padded search;
|
||||
|
||||
if (!devs.nr)
|
||||
return true;
|
||||
|
||||
memset(&search, 0, sizeof(search));
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search.e);
|
||||
|
||||
return replicas_has_entry(c, &search.e, check_gc_replicas);
|
||||
}
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
bool check_gc_replicas)
|
||||
{
|
||||
struct bch_replicas_padded search;
|
||||
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
|
||||
unsigned i;
|
||||
|
||||
memset(&search, 0, sizeof(search));
|
||||
|
||||
for (i = 0; i < cached.nr; i++)
|
||||
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
|
||||
bch2_dev_list_single(cached.devs[i]),
|
||||
check_gc_replicas))
|
||||
return false;
|
||||
|
||||
bkey_to_replicas(k, &search.e);
|
||||
|
||||
return search.e.nr_devs
|
||||
? replicas_has_entry(c, &search.e, check_gc_replicas)
|
||||
: true;
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
{
|
||||
|
@ -4,17 +4,39 @@
|
||||
#include "eytzinger.h"
|
||||
#include "replicas_types.h"
|
||||
|
||||
void bch2_replicas_entry_to_text(struct printbuf *,
|
||||
struct bch_replicas_entry *);
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
int bch2_replicas_entry_idx(struct bch_fs *,
|
||||
struct bch_replicas_entry *);
|
||||
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list, bool);
|
||||
|
||||
void bch2_devlist_to_replicas(struct bch_replicas_entry *,
|
||||
enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
bool bch2_replicas_marked(struct bch_fs *,
|
||||
struct bch_replicas_entry *, bool);
|
||||
int bch2_mark_replicas(struct bch_fs *,
|
||||
struct bch_replicas_entry *);
|
||||
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *,
|
||||
struct bkey_s_c, bool);
|
||||
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
e->data_type = BCH_DATA_CACHED;
|
||||
e->nr_devs = 1;
|
||||
e->nr_required = 1;
|
||||
e->devs[0] = dev;
|
||||
}
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
|
@ -205,7 +205,9 @@ int bch2_congested(void *data, int bdi_bits)
|
||||
static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
bool wrote;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
bch2_rebalance_stop(c);
|
||||
|
||||
@ -220,23 +222,42 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
do {
|
||||
ret = bch2_alloc_write(c, false, &wrote);
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_stripes_write(c, &wrote);
|
||||
if (ret) {
|
||||
bch2_fs_inconsistent(c, "error writing out stripes");
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_quiesce(c, ca);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
/*
|
||||
* We need to explicitly wait on btree interior updates to complete
|
||||
* before stopping the journal, flushing all journal pins isn't
|
||||
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
|
||||
* interior updates have to drop their journal pin before they're
|
||||
* fully complete:
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
} while (wrote);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
/*
|
||||
* We need to explicitly wait on btree interior updates to complete
|
||||
* before stopping the journal, flushing all journal pins isn't
|
||||
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
|
||||
* interior updates have to drop their journal pin before they're
|
||||
* fully complete:
|
||||
*/
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
/* XXX: mark super that alloc info is persistent */
|
||||
|
||||
/*
|
||||
* the journal kicks off btree writes via reclaim - wait for in flight
|
||||
* writes after stopping journal:
|
||||
@ -420,6 +441,8 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->replicas_gc.entries);
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
|
||||
if (c->journal_reclaim_wq)
|
||||
destroy_workqueue(c->journal_reclaim_wq);
|
||||
if (c->copygc_wq)
|
||||
destroy_workqueue(c->copygc_wq);
|
||||
if (c->wq)
|
||||
@ -638,6 +661,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||
!(c->copygc_wq = alloc_workqueue("bcache_copygc",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) ||
|
||||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
|
||||
sizeof(struct btree_reserve)) ||
|
||||
@ -1297,8 +1322,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
if (data) {
|
||||
char data_has_str[100];
|
||||
|
||||
bch2_string_opt_to_text(&PBUF(data_has_str),
|
||||
bch2_data_types, data);
|
||||
bch2_flags_to_text(&PBUF(data_has_str),
|
||||
bch2_data_types, data);
|
||||
bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
|
||||
ret = -EBUSY;
|
||||
goto err;
|
||||
|
@ -234,17 +234,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
{
|
||||
struct printbuf out = _PBUF(buf, PAGE_SIZE);
|
||||
struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
|
||||
unsigned replicas;
|
||||
unsigned i;
|
||||
|
||||
if (!fs_usage)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_buf(&out, "capacity:\t\t%llu\n", c->capacity);
|
||||
|
||||
for (replicas = 0;
|
||||
replicas < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
replicas++) {
|
||||
pr_buf(&out, "%u replicas:\n", replicas + 1);
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
i++) {
|
||||
pr_buf(&out, "%u replicas:\n", i + 1);
|
||||
#if 0
|
||||
for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
|
||||
pr_buf(&out, "\t%s:\t\t%llu\n",
|
||||
@ -254,12 +254,23 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
stats.replicas[replicas].ec_data);
|
||||
#endif
|
||||
pr_buf(&out, "\treserved:\t%llu\n",
|
||||
fs_usage->persistent_reserved[replicas]);
|
||||
fs_usage->persistent_reserved[i]);
|
||||
}
|
||||
|
||||
pr_buf(&out, "online reserved:\t%llu\n",
|
||||
fs_usage->s.online_reserved);
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
|
||||
pr_buf(&out, "\t");
|
||||
bch2_replicas_entry_to_text(&out, e);
|
||||
pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
|
||||
}
|
||||
|
||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||
|
||||
kfree(fs_usage);
|
||||
|
||||
return out.pos - buf;
|
||||
@ -797,6 +808,12 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
|
||||
unsigned i, nr[BCH_DATA_NR];
|
||||
|
||||
memset(nr, 0, sizeof(nr));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].type]++;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
"free_inc: %zu/%zu\n"
|
||||
@ -823,7 +840,10 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
" copygc threshold: %llu\n"
|
||||
"freelist_wait: %s\n"
|
||||
"open buckets: %u/%u (reserved %u)\n"
|
||||
"open_buckets_wait: %s\n",
|
||||
"open_buckets_wait: %s\n"
|
||||
"open_buckets_btree: %u\n"
|
||||
"open_buckets_user: %u\n"
|
||||
"btree reserve cache: %u\n",
|
||||
fifo_used(&ca->free_inc), ca->free_inc.size,
|
||||
fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
|
||||
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
|
||||
@ -845,8 +865,12 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
stats.sectors_fragmented,
|
||||
ca->copygc_threshold,
|
||||
c->freelist_wait.list.first ? "waiting" : "empty",
|
||||
c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty");
|
||||
c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
|
||||
BTREE_NODE_OPEN_BUCKET_RESERVE,
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty",
|
||||
nr[BCH_DATA_BTREE],
|
||||
nr[BCH_DATA_USER],
|
||||
c->btree_reserve_cache_nr);
|
||||
}
|
||||
|
||||
static const char * const bch2_rw[] = {
|
||||
|
@ -133,6 +133,7 @@ void bch2_flags_to_text(struct printbuf *out,
|
||||
const char * const list[], u64 flags)
|
||||
{
|
||||
unsigned bit, nr = 0;
|
||||
bool first = true;
|
||||
|
||||
if (out->pos != out->end)
|
||||
*out->pos = '\0';
|
||||
@ -141,7 +142,10 @@ void bch2_flags_to_text(struct printbuf *out,
|
||||
nr++;
|
||||
|
||||
while (flags && (bit = __ffs(flags)) < nr) {
|
||||
pr_buf(out, "%s,", list[bit]);
|
||||
pr_buf(out, "%s", list[bit]);
|
||||
if (!first)
|
||||
pr_buf(out, ",");
|
||||
first = false;
|
||||
flags ^= 1 << bit;
|
||||
}
|
||||
}
|
||||
@ -894,3 +898,28 @@ void eytzinger0_find_test(void)
|
||||
kfree(test_array);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Accumulate percpu counters onto one cpu's copy - only valid when access
|
||||
* against any percpu counter is guarded against
|
||||
*/
|
||||
u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
|
||||
{
|
||||
u64 *ret;
|
||||
int cpu;
|
||||
|
||||
preempt_disable();
|
||||
ret = this_cpu_ptr(p);
|
||||
preempt_enable();
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
u64 *i = per_cpu_ptr(p, cpu);
|
||||
|
||||
if (i != ret) {
|
||||
acc_u64s(ret, i, nr);
|
||||
memset(i, 0, nr * sizeof(u64));
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -715,4 +715,6 @@ static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
|
||||
acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
|
||||
}
|
||||
|
||||
u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
|
||||
|
||||
#endif /* _BCACHEFS_UTIL_H */
|
||||
|
@ -1,4 +1,5 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/generic-radix-tree.h>
|
||||
#include <linux/gfp.h>
|
||||
@ -16,7 +17,7 @@ struct genradix_node {
|
||||
};
|
||||
};
|
||||
|
||||
static inline unsigned genradix_depth_shift(unsigned depth)
|
||||
static inline int genradix_depth_shift(unsigned depth)
|
||||
{
|
||||
return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
|
||||
}
|
||||
@ -29,16 +30,34 @@ static inline size_t genradix_depth_size(unsigned depth)
|
||||
return 1UL << genradix_depth_shift(depth);
|
||||
}
|
||||
|
||||
/* depth that's needed for a genradix that can address up to ULONG_MAX: */
|
||||
#define GENRADIX_MAX_DEPTH \
|
||||
DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
|
||||
|
||||
#define GENRADIX_DEPTH_MASK \
|
||||
((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
|
||||
|
||||
unsigned genradix_root_to_depth(struct genradix_root *r)
|
||||
{
|
||||
return (unsigned long) r & GENRADIX_DEPTH_MASK;
|
||||
}
|
||||
|
||||
struct genradix_node *genradix_root_to_node(struct genradix_root *r)
|
||||
{
|
||||
return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns pointer to the specified byte @offset within @radix, or NULL if not
|
||||
* allocated
|
||||
*/
|
||||
void *__genradix_ptr(struct __genradix *radix, size_t offset)
|
||||
{
|
||||
size_t level = radix->depth;
|
||||
struct genradix_node *n = radix->root;
|
||||
struct genradix_root *r = READ_ONCE(radix->root);
|
||||
struct genradix_node *n = genradix_root_to_node(r);
|
||||
unsigned level = genradix_root_to_depth(r);
|
||||
|
||||
if (offset >= genradix_depth_size(radix->depth))
|
||||
if (ilog2(offset) >= genradix_depth_shift(level))
|
||||
return NULL;
|
||||
|
||||
while (1) {
|
||||
@ -64,43 +83,60 @@ EXPORT_SYMBOL(__genradix_ptr);
|
||||
void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct genradix_node **n;
|
||||
size_t level;
|
||||
struct genradix_root *v = READ_ONCE(radix->root);
|
||||
struct genradix_node *n, *new_node = NULL;
|
||||
unsigned level;
|
||||
|
||||
/* Increase tree depth if necessary: */
|
||||
|
||||
while (offset >= genradix_depth_size(radix->depth)) {
|
||||
struct genradix_node *new_root =
|
||||
(void *) __get_free_page(gfp_mask|__GFP_ZERO);
|
||||
|
||||
if (!new_root)
|
||||
return NULL;
|
||||
|
||||
new_root->children[0] = radix->root;
|
||||
radix->root = new_root;
|
||||
radix->depth++;
|
||||
}
|
||||
|
||||
n = &radix->root;
|
||||
level = radix->depth;
|
||||
|
||||
while (1) {
|
||||
if (!*n) {
|
||||
*n = (void *) __get_free_page(gfp_mask|__GFP_ZERO);
|
||||
if (!*n)
|
||||
struct genradix_root *r = v, *new_root;
|
||||
|
||||
n = genradix_root_to_node(r);
|
||||
level = genradix_root_to_depth(r);
|
||||
|
||||
if (n && ilog2(offset) < genradix_depth_shift(level))
|
||||
break;
|
||||
|
||||
if (!new_node) {
|
||||
new_node = (void *)
|
||||
__get_free_page(gfp_mask|__GFP_ZERO);
|
||||
if (!new_node)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!level)
|
||||
break;
|
||||
new_node->children[0] = n;
|
||||
new_root = ((struct genradix_root *)
|
||||
((unsigned long) new_node | (n ? level + 1 : 0)));
|
||||
|
||||
level--;
|
||||
|
||||
n = &(*n)->children[offset >> genradix_depth_shift(level)];
|
||||
offset &= genradix_depth_size(level) - 1;
|
||||
if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
|
||||
v = new_root;
|
||||
new_node = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return &(*n)->data[offset];
|
||||
while (level--) {
|
||||
struct genradix_node **p =
|
||||
&n->children[offset >> genradix_depth_shift(level)];
|
||||
offset &= genradix_depth_size(level) - 1;
|
||||
|
||||
n = READ_ONCE(*p);
|
||||
if (!n) {
|
||||
if (!new_node) {
|
||||
new_node = (void *)
|
||||
__get_free_page(gfp_mask|__GFP_ZERO);
|
||||
if (!new_node)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(n = cmpxchg_release(p, NULL, new_node)))
|
||||
swap(n, new_node);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_node)
|
||||
free_page((unsigned long) new_node);
|
||||
|
||||
return &n->data[offset];
|
||||
}
|
||||
EXPORT_SYMBOL(__genradix_ptr_alloc);
|
||||
|
||||
@ -108,17 +144,19 @@ void *__genradix_iter_peek(struct genradix_iter *iter,
|
||||
struct __genradix *radix,
|
||||
size_t objs_per_page)
|
||||
{
|
||||
struct genradix_root *r;
|
||||
struct genradix_node *n;
|
||||
size_t level, i;
|
||||
|
||||
if (!radix->root)
|
||||
return NULL;
|
||||
unsigned level, i;
|
||||
restart:
|
||||
if (iter->offset >= genradix_depth_size(radix->depth))
|
||||
r = READ_ONCE(radix->root);
|
||||
if (!r)
|
||||
return NULL;
|
||||
|
||||
n = radix->root;
|
||||
level = radix->depth;
|
||||
n = genradix_root_to_node(r);
|
||||
level = genradix_root_to_depth(r);
|
||||
|
||||
if (ilog2(iter->offset) >= genradix_depth_shift(level))
|
||||
return NULL;
|
||||
|
||||
while (level) {
|
||||
level--;
|
||||
@ -157,11 +195,24 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
|
||||
free_page((unsigned long) n);
|
||||
}
|
||||
|
||||
int __genradix_prealloc(struct __genradix *radix, size_t size,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
size_t offset;
|
||||
|
||||
for (offset = 0; offset < size; offset += PAGE_SIZE)
|
||||
if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__genradix_prealloc);
|
||||
|
||||
void __genradix_free(struct __genradix *radix)
|
||||
{
|
||||
genradix_free_recurse(radix->root, radix->depth);
|
||||
struct genradix_root *r = xchg(&radix->root, NULL);
|
||||
|
||||
radix->root = NULL;
|
||||
radix->depth = 0;
|
||||
genradix_free_recurse(genradix_root_to_node(r),
|
||||
genradix_root_to_depth(r));
|
||||
}
|
||||
EXPORT_SYMBOL(__genradix_free);
|
||||
|
Loading…
Reference in New Issue
Block a user