mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 2e70771b8d
This commit is contained in:
parent
1b495cf9e1
commit
c35fbbc025
@ -1 +1 @@
|
||||
846600a41b7853588796a5403b07347d36c5a65c
|
||||
2e70771b8dc0d0f2d0356a5a7d16cab9430cd49e
|
||||
|
@ -160,7 +160,8 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id,
|
||||
struct bkey_s_c k;
|
||||
char buf[512];
|
||||
|
||||
for_each_btree_key(&iter, c, btree_id, start, k) {
|
||||
for_each_btree_key(&iter, c, btree_id, start,
|
||||
BTREE_ITER_PREFETCH, k) {
|
||||
if (bkey_cmp(k.k->p, end) > 0)
|
||||
break;
|
||||
|
||||
|
@ -259,9 +259,7 @@ static void write_data(struct bch_fs *c,
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bio_init(&bio.bio);
|
||||
bio.bio.bi_max_vecs = 1;
|
||||
bio.bio.bi_io_vec = &bv;
|
||||
bio_init(&bio.bio, &bv, 1);
|
||||
bio.bio.bi_iter.bi_size = len;
|
||||
bch2_bio_map(&bio.bio, buf);
|
||||
|
||||
|
@ -9,6 +9,7 @@ enum wb_congested_state {
|
||||
};
|
||||
|
||||
struct backing_dev_info {
|
||||
struct list_head bdi_list;
|
||||
unsigned ra_pages;
|
||||
unsigned capabilities;
|
||||
|
||||
|
@ -451,11 +451,15 @@ static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
|
||||
return bio_clone_bioset(bio, gfp_mask, NULL);
|
||||
}
|
||||
|
||||
static inline void bio_init(struct bio *bio)
|
||||
static inline void bio_init(struct bio *bio, struct bio_vec *table,
|
||||
unsigned short max_vecs)
|
||||
{
|
||||
memset(bio, 0, sizeof(*bio));
|
||||
atomic_set(&bio->__bi_remaining, 1);
|
||||
atomic_set(&bio->__bi_cnt, 1);
|
||||
|
||||
bio->bi_io_vec = table;
|
||||
bio->bi_max_vecs = max_vecs;
|
||||
}
|
||||
|
||||
#endif /* __LINUX_BIO_H */
|
||||
|
@ -90,8 +90,7 @@ DECLARE_EVENT_CLASS(bio,
|
||||
__entry->dev = bio->bi_bdev->bd_dev;
|
||||
__entry->sector = bio->bi_iter.bi_sector;
|
||||
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
|
||||
blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
|
||||
),
|
||||
|
||||
TP_printk("%d,%d %s %llu + %u",
|
||||
@ -156,8 +155,7 @@ TRACE_EVENT(write_throttle,
|
||||
__entry->inode = inode;
|
||||
__entry->sector = bio->bi_iter.bi_sector;
|
||||
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
|
||||
blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
|
||||
__entry->delay = delay;
|
||||
),
|
||||
|
||||
|
@ -233,11 +233,8 @@ static void pd_controllers_update(struct work_struct *work)
|
||||
|
||||
static int prio_io(struct bch_dev *ca, uint64_t bucket, int op)
|
||||
{
|
||||
bio_init(ca->bio_prio);
|
||||
bio_set_op_attrs(ca->bio_prio, op, REQ_SYNC|REQ_META);
|
||||
|
||||
ca->bio_prio->bi_max_vecs = bucket_pages(ca);
|
||||
ca->bio_prio->bi_io_vec = ca->bio_prio->bi_inline_vecs;
|
||||
bio_init(ca->bio_prio, ca->bio_prio->bi_inline_vecs, bucket_pages(ca));
|
||||
ca->bio_prio->bi_opf = op|REQ_SYNC|REQ_META;
|
||||
ca->bio_prio->bi_iter.bi_sector = bucket * ca->mi.bucket_size;
|
||||
ca->bio_prio->bi_bdev = ca->disk_sb.bdev;
|
||||
ca->bio_prio->bi_iter.bi_size = bucket_bytes(ca);
|
||||
@ -636,9 +633,10 @@ static inline bool can_inc_bucket_gen(struct bch_dev *ca, struct bucket *g)
|
||||
return bucket_gc_gen(ca, g) < BUCKET_GC_GEN_MAX;
|
||||
}
|
||||
|
||||
static bool bch2_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
|
||||
static bool bch2_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g,
|
||||
struct bucket_mark mark)
|
||||
{
|
||||
if (!is_available_bucket(READ_ONCE(g->mark)))
|
||||
if (!is_available_bucket(mark))
|
||||
return false;
|
||||
|
||||
if (bucket_gc_gen(ca, g) >= BUCKET_GC_GEN_MAX - 1)
|
||||
@ -679,24 +677,38 @@ static void bch2_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g)
|
||||
* btree GC to rewrite nodes with stale pointers.
|
||||
*/
|
||||
|
||||
#define bucket_sort_key(g) \
|
||||
({ \
|
||||
unsigned long prio = g->read_prio - ca->min_prio[READ]; \
|
||||
prio = (prio * 7) / (ca->fs->prio_clock[READ].hand - \
|
||||
ca->min_prio[READ]); \
|
||||
\
|
||||
(((prio + 1) * bucket_sectors_used(g)) << 8) | bucket_gc_gen(ca, g);\
|
||||
})
|
||||
static unsigned long bucket_sort_key(bucket_heap *h,
|
||||
struct bucket_heap_entry e)
|
||||
{
|
||||
struct bch_dev *ca = container_of(h, struct bch_dev, alloc_heap);
|
||||
struct bucket *g = ca->buckets + e.bucket;
|
||||
unsigned long prio = g->read_prio - ca->min_prio[READ];
|
||||
prio = (prio * 7) / (ca->fs->prio_clock[READ].hand -
|
||||
ca->min_prio[READ]);
|
||||
|
||||
return (prio + 1) * bucket_sectors_used(e.mark);
|
||||
}
|
||||
|
||||
static inline int bucket_alloc_cmp(bucket_heap *h,
|
||||
struct bucket_heap_entry l,
|
||||
struct bucket_heap_entry r)
|
||||
{
|
||||
return bucket_sort_key(h, l) - bucket_sort_key(h, r);
|
||||
}
|
||||
|
||||
static inline long bucket_idx_cmp(bucket_heap *h,
|
||||
struct bucket_heap_entry l,
|
||||
struct bucket_heap_entry r)
|
||||
{
|
||||
return l.bucket - r.bucket;
|
||||
}
|
||||
|
||||
static void invalidate_buckets_lru(struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_heap_entry e;
|
||||
struct bucket *g;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&ca->heap_lock);
|
||||
|
||||
ca->heap.used = 0;
|
||||
ca->alloc_heap.used = 0;
|
||||
|
||||
mutex_lock(&ca->fs->bucket_lock);
|
||||
bch2_recalc_min_prio(ca, READ);
|
||||
@ -708,37 +720,32 @@ static void invalidate_buckets_lru(struct bch_dev *ca)
|
||||
* all buckets have been visited.
|
||||
*/
|
||||
for_each_bucket(g, ca) {
|
||||
if (!bch2_can_invalidate_bucket(ca, g))
|
||||
struct bucket_mark m = READ_ONCE(g->mark);
|
||||
struct bucket_heap_entry e = { g - ca->buckets, m };
|
||||
|
||||
if (!bch2_can_invalidate_bucket(ca, g, m))
|
||||
continue;
|
||||
|
||||
bucket_heap_push(ca, g, bucket_sort_key(g));
|
||||
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
}
|
||||
|
||||
/* Sort buckets by physical location on disk for better locality */
|
||||
for (i = 0; i < ca->heap.used; i++) {
|
||||
struct bucket_heap_entry *e = &ca->heap.data[i];
|
||||
|
||||
e->val = e->g - ca->buckets;
|
||||
}
|
||||
|
||||
heap_resort(&ca->heap, bucket_max_cmp);
|
||||
heap_resort(&ca->alloc_heap, bucket_idx_cmp);
|
||||
|
||||
/*
|
||||
* If we run out of buckets to invalidate, bch2_allocator_thread() will
|
||||
* kick stuff and retry us
|
||||
*/
|
||||
while (!fifo_full(&ca->free_inc) &&
|
||||
heap_pop(&ca->heap, e, bucket_max_cmp)) {
|
||||
BUG_ON(!bch2_can_invalidate_bucket(ca, e.g));
|
||||
bch2_invalidate_one_bucket(ca, e.g);
|
||||
}
|
||||
heap_pop(&ca->alloc_heap, e, bucket_idx_cmp))
|
||||
bch2_invalidate_one_bucket(ca, &ca->buckets[e.bucket]);
|
||||
|
||||
mutex_unlock(&ca->fs->bucket_lock);
|
||||
mutex_unlock(&ca->heap_lock);
|
||||
}
|
||||
|
||||
static void invalidate_buckets_fifo(struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_mark m;
|
||||
struct bucket *g;
|
||||
size_t checked = 0;
|
||||
|
||||
@ -748,8 +755,9 @@ static void invalidate_buckets_fifo(struct bch_dev *ca)
|
||||
ca->fifo_last_bucket = ca->mi.first_bucket;
|
||||
|
||||
g = ca->buckets + ca->fifo_last_bucket++;
|
||||
m = READ_ONCE(g->mark);
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, g))
|
||||
if (bch2_can_invalidate_bucket(ca, g, m))
|
||||
bch2_invalidate_one_bucket(ca, g);
|
||||
|
||||
if (++checked >= ca->mi.nbuckets)
|
||||
@ -759,6 +767,7 @@ static void invalidate_buckets_fifo(struct bch_dev *ca)
|
||||
|
||||
static void invalidate_buckets_random(struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_mark m;
|
||||
struct bucket *g;
|
||||
size_t checked = 0;
|
||||
|
||||
@ -768,8 +777,9 @@ static void invalidate_buckets_random(struct bch_dev *ca)
|
||||
ca->mi.first_bucket;
|
||||
|
||||
g = ca->buckets + n;
|
||||
m = READ_ONCE(g->mark);
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, g))
|
||||
if (bch2_can_invalidate_bucket(ca, g, m))
|
||||
bch2_invalidate_one_bucket(ca, g);
|
||||
|
||||
if (++checked >= ca->mi.nbuckets / 2)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef _BCACHE_ALLOC_H
|
||||
#define _BCACHE_ALLOC_H
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_types.h"
|
||||
|
||||
struct bkey;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef _BCACHE_H
|
||||
#define _BCACHE_H
|
||||
#ifndef _BCACHEFS_H
|
||||
#define _BCACHEFS_H
|
||||
|
||||
/*
|
||||
* SOME HIGH LEVEL CODE DOCUMENTATION:
|
||||
@ -418,8 +418,8 @@ struct bch_dev {
|
||||
atomic_long_t saturated_count;
|
||||
size_t inc_gen_needs_gc;
|
||||
|
||||
struct mutex heap_lock;
|
||||
DECLARE_HEAP(struct bucket_heap_entry, heap);
|
||||
bucket_heap alloc_heap;
|
||||
bucket_heap copygc_heap;
|
||||
|
||||
/* Moving GC: */
|
||||
struct task_struct *moving_gc_read;
|
||||
@ -803,4 +803,4 @@ static inline unsigned block_bytes(const struct bch_fs *c)
|
||||
return c->sb.block_size << 9;
|
||||
}
|
||||
|
||||
#endif /* _BCACHE_H */
|
||||
#endif /* _BCACHEFS_H */
|
||||
|
@ -1,15 +1,10 @@
|
||||
#ifndef _LINUX_BCACHE_H
|
||||
#define _LINUX_BCACHE_H
|
||||
#ifndef _BCACHEFS_FORMAT_H
|
||||
#define _BCACHEFS_FORMAT_H
|
||||
|
||||
/*
|
||||
* Bcache on disk data structures
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef bool _Bool;
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <asm/types.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/uuid.h>
|
||||
@ -230,8 +225,6 @@ struct bkey_i {
|
||||
};
|
||||
};
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
#define KEY(_inode, _offset, _size) \
|
||||
((struct bkey) { \
|
||||
.u64s = BKEY_U64s, \
|
||||
@ -240,24 +233,6 @@ struct bkey_i {
|
||||
.size = _size, \
|
||||
})
|
||||
|
||||
#else
|
||||
|
||||
static inline struct bkey KEY(__u64 inode, __u64 offset, __u64 size)
|
||||
{
|
||||
struct bkey ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
ret.u64s = BKEY_U64s;
|
||||
ret.format = KEY_FORMAT_CURRENT;
|
||||
ret.p.inode = inode;
|
||||
ret.p.offset = offset;
|
||||
ret.size = size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void bkey_init(struct bkey *k)
|
||||
{
|
||||
*k = KEY(0, 0, 0);
|
||||
@ -1344,9 +1319,4 @@ struct btree_node_entry {
|
||||
};
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* _LINUX_BCACHE_H */
|
||||
|
||||
/* vim: set foldnestmax=2: */
|
||||
#endif /* _BCACHEFS_FORMAT_H */
|
||||
|
@ -473,7 +473,7 @@ void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
|
||||
* in one cacheline in t->set (BSET_CACHELINE bytes).
|
||||
*
|
||||
* This means we don't have to store the full index of the key that a node in
|
||||
* the binary tree points to; eytzinger_to_inorder() gives us the cacheline, and
|
||||
* the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
|
||||
* then bkey_float->m gives us the offset within that cacheline, in units of 8
|
||||
* bytes.
|
||||
*
|
||||
@ -534,7 +534,7 @@ static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
|
||||
unsigned j)
|
||||
{
|
||||
return cacheline_to_bkey(b, t,
|
||||
__eytzinger_to_inorder(j, t->size, t->extra),
|
||||
__eytzinger1_to_inorder(j, t->size, t->extra),
|
||||
bkey_float(b, t, j)->key_offset);
|
||||
}
|
||||
|
||||
@ -882,7 +882,7 @@ retry:
|
||||
t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
|
||||
|
||||
/* First we figure out where the first key in each cacheline is */
|
||||
eytzinger_for_each(j, t->size) {
|
||||
eytzinger1_for_each(j, t->size) {
|
||||
while (bkey_to_cacheline(b, t, k) < cacheline)
|
||||
prev = k, k = bkey_next(k);
|
||||
|
||||
@ -905,7 +905,7 @@ retry:
|
||||
t->max_key = bkey_unpack_pos(b, k);
|
||||
|
||||
/* Then we build the tree */
|
||||
eytzinger_for_each(j, t->size)
|
||||
eytzinger1_for_each(j, t->size)
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
}
|
||||
|
||||
@ -996,7 +996,7 @@ static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
|
||||
|
||||
do {
|
||||
p = j ? tree_to_bkey(b, t,
|
||||
__inorder_to_eytzinger(j--,
|
||||
__inorder_to_eytzinger1(j--,
|
||||
t->size, t->extra))
|
||||
: btree_bkey_first(b, t);
|
||||
} while (p >= k);
|
||||
@ -1087,30 +1087,30 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
|
||||
|
||||
if (inorder &&
|
||||
inorder < t->size) {
|
||||
j = __inorder_to_eytzinger(inorder, t->size, t->extra);
|
||||
j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
|
||||
|
||||
if (k == tree_to_bkey(b, t, j)) {
|
||||
/* Fix the node this key corresponds to */
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
|
||||
/* Children for which this key is the right boundary */
|
||||
for (j = eytzinger_left_child(j);
|
||||
for (j = eytzinger1_left_child(j);
|
||||
j < t->size;
|
||||
j = eytzinger_right_child(j))
|
||||
j = eytzinger1_right_child(j))
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
}
|
||||
}
|
||||
|
||||
if (inorder + 1 < t->size) {
|
||||
j = __inorder_to_eytzinger(inorder + 1, t->size, t->extra);
|
||||
j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
|
||||
|
||||
if (k == tree_to_prev_bkey(b, t, j)) {
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
|
||||
/* Children for which this key is the left boundary */
|
||||
for (j = eytzinger_right_child(j);
|
||||
for (j = eytzinger1_right_child(j);
|
||||
j < t->size;
|
||||
j = eytzinger_left_child(j))
|
||||
j = eytzinger1_left_child(j))
|
||||
make_bfloat(b, t, j, &min_key, &max_key);
|
||||
}
|
||||
}
|
||||
@ -1331,7 +1331,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
|
||||
p = bkey_float_get(base, n << 4);
|
||||
prefetch(p);
|
||||
} else if (n << 3 < t->size) {
|
||||
inorder = __eytzinger_to_inorder(n, t->size, t->extra);
|
||||
inorder = __eytzinger1_to_inorder(n, t->size, t->extra);
|
||||
p = bset_cacheline(b, t, inorder);
|
||||
#ifdef CONFIG_X86_64
|
||||
asm(".intel_syntax noprefix;"
|
||||
@ -1362,7 +1362,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
|
||||
&search, packed_search, n);
|
||||
} while (n < t->size);
|
||||
|
||||
inorder = __eytzinger_to_inorder(n >> 1, t->size, t->extra);
|
||||
inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
|
||||
|
||||
/*
|
||||
* n would have been the node we recursed to - the low bit tells us if
|
||||
@ -1372,7 +1372,7 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
|
||||
return cacheline_to_bkey(b, t, inorder, f->key_offset);
|
||||
} else {
|
||||
if (--inorder) {
|
||||
n = eytzinger_prev(n >> 1, t->size);
|
||||
n = eytzinger1_prev(n >> 1, t->size);
|
||||
f = bkey_float_get(base, n);
|
||||
return cacheline_to_bkey(b, t, inorder, f->key_offset);
|
||||
} else
|
||||
@ -1790,7 +1790,7 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
|
||||
if (!bset_has_ro_aux_tree(t))
|
||||
goto out;
|
||||
|
||||
j = __inorder_to_eytzinger(bkey_to_cacheline(b, t, k), t->size, t->extra);
|
||||
j = __inorder_to_eytzinger1(bkey_to_cacheline(b, t, k), t->size, t->extra);
|
||||
if (j &&
|
||||
j < t->size &&
|
||||
k == tree_to_bkey(b, t, j))
|
||||
|
@ -163,10 +163,14 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b) ||
|
||||
btree_node_write_in_flight(b)) {
|
||||
btree_node_write_in_flight(b) ||
|
||||
btree_node_read_in_flight(b)) {
|
||||
if (!flush)
|
||||
goto out_unlock;
|
||||
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Using the underscore version because we don't want to compact
|
||||
* bsets after the write, since this node is about to be evicted
|
||||
@ -582,7 +586,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_iter *iter,
|
||||
if (btree_node_read_locked(iter, level + 1))
|
||||
btree_node_unlock(iter, level + 1);
|
||||
|
||||
bch2_btree_node_read(c, b);
|
||||
bch2_btree_node_read(c, b, true);
|
||||
six_unlock_write(&b->lock);
|
||||
|
||||
if (lock_type == SIX_LOCK_read)
|
||||
@ -673,6 +677,9 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
prefetch(b->aux_data);
|
||||
|
||||
for_each_bset(b, t) {
|
||||
@ -700,6 +707,44 @@ retry:
|
||||
return b;
|
||||
}
|
||||
|
||||
void bch2_btree_node_prefetch(struct btree_iter *iter,
|
||||
const struct bkey_i *k, unsigned level)
|
||||
{
|
||||
struct bch_fs *c = iter->c;
|
||||
struct btree *b;
|
||||
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
rcu_read_lock();
|
||||
b = mca_find(c, k);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (b)
|
||||
return;
|
||||
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
if (IS_ERR(b))
|
||||
return;
|
||||
|
||||
bkey_copy(&b->key, k);
|
||||
if (bch2_btree_node_hash_insert(c, b, level, iter->btree_id)) {
|
||||
/* raced with another fill: */
|
||||
|
||||
/* mark as unhashed... */
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_add(&b->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_btree_node_read(c, b, false);
|
||||
out:
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
}
|
||||
|
||||
int bch2_print_btree_node(struct bch_fs *c, struct btree *b,
|
||||
char *buf, size_t len)
|
||||
{
|
||||
|
@ -22,6 +22,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
|
||||
struct btree *bch2_btree_node_get(struct btree_iter *, const struct bkey_i *,
|
||||
unsigned, enum six_lock_type);
|
||||
|
||||
void bch2_btree_node_prefetch(struct btree_iter *, const struct bkey_i *,
|
||||
unsigned);
|
||||
|
||||
void bch2_fs_btree_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_init(struct bch_fs *);
|
||||
|
||||
|
@ -225,7 +225,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
|
||||
btree_node_range_checks_init(&r, depth);
|
||||
|
||||
for_each_btree_node(&iter, c, btree_id, POS_MIN, depth, b) {
|
||||
__for_each_btree_node(&iter, c, btree_id, POS_MIN,
|
||||
0, depth, BTREE_ITER_PREFETCH, b) {
|
||||
btree_node_range_checks(c, b, &r);
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
@ -779,7 +780,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
*/
|
||||
memset(merge, 0, sizeof(merge));
|
||||
|
||||
__for_each_btree_node(&iter, c, btree_id, POS_MIN, 0, b, U8_MAX) {
|
||||
__for_each_btree_node(&iter, c, btree_id, POS_MIN,
|
||||
U8_MAX, 0, BTREE_ITER_PREFETCH, b) {
|
||||
memmove(merge + 1, merge,
|
||||
sizeof(merge) - sizeof(merge[0]));
|
||||
memmove(lock_seq + 1, lock_seq,
|
||||
@ -952,7 +954,7 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
|
||||
* We have to hit every btree node before starting journal replay, in
|
||||
* order for the journal seq blacklist machinery to work:
|
||||
*/
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
btree_node_range_checks(c, b, &r);
|
||||
|
||||
if (btree_node_has_ptrs(b)) {
|
||||
|
@ -1196,6 +1196,8 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
||||
|
||||
btree_node_reset_sib_u64s(b);
|
||||
out:
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
return;
|
||||
err:
|
||||
@ -1206,13 +1208,48 @@ fsck_err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_btree_node_read(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_read_work(struct work_struct *work)
|
||||
{
|
||||
struct btree_read_bio *rb =
|
||||
container_of(work, struct btree_read_bio, work);
|
||||
|
||||
bch2_btree_node_read_done(rb->c, rb->bio.bi_private,
|
||||
rb->pick.ca, &rb->pick.ptr);
|
||||
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
bio_put(&rb->bio);
|
||||
}
|
||||
|
||||
static void btree_node_read_endio(struct bio *bio)
|
||||
{
|
||||
struct btree *b = bio->bi_private;
|
||||
struct btree_read_bio *rb =
|
||||
container_of(bio, struct btree_read_bio, bio);
|
||||
|
||||
if (bch2_dev_fatal_io_err_on(bio->bi_error,
|
||||
rb->pick.ca, "IO error reading bucket %zu",
|
||||
PTR_BUCKET_NR(rb->pick.ca, &rb->pick.ptr)) ||
|
||||
bch2_meta_read_fault("btree")) {
|
||||
set_btree_node_read_error(b);
|
||||
percpu_ref_put(&rb->pick.ca->io_ref);
|
||||
bio_put(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
INIT_WORK(&rb->work, btree_node_read_work);
|
||||
schedule_work(&rb->work);
|
||||
}
|
||||
|
||||
void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
bool sync)
|
||||
{
|
||||
uint64_t start_time = local_clock();
|
||||
struct bio *bio;
|
||||
struct extent_pick_ptr pick;
|
||||
struct btree_read_bio *rb;
|
||||
struct bio *bio;
|
||||
|
||||
trace_btree_read(c, b);
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
pick = bch2_btree_pick_ptr(c, b);
|
||||
if (bch2_fs_fatal_err_on(!pick.ca, c,
|
||||
@ -1222,12 +1259,16 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b)
|
||||
}
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
|
||||
rb = container_of(bio, struct btree_read_bio, bio);
|
||||
rb->c = c;
|
||||
rb->pick = pick;
|
||||
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
|
||||
bio->bi_bdev = pick.ca->disk_sb.bdev;
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_bytes(c);
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
|
||||
bch2_bio_map(bio, b->data);
|
||||
|
||||
if (sync) {
|
||||
submit_bio_wait(bio);
|
||||
|
||||
if (bch2_dev_fatal_io_err_on(bio->bi_error,
|
||||
@ -1243,6 +1284,11 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b)
|
||||
out:
|
||||
bio_put(bio);
|
||||
percpu_ref_put(&pick.ca->io_ref);
|
||||
} else {
|
||||
bio->bi_end_io = btree_node_read_endio;
|
||||
bio->bi_private = b;
|
||||
submit_bio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
@ -1267,7 +1313,7 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
bkey_copy(&b->key, k);
|
||||
BUG_ON(bch2_btree_node_hash_insert(c, b, level, id));
|
||||
|
||||
bch2_btree_node_read(c, b);
|
||||
bch2_btree_node_read(c, b, true);
|
||||
six_unlock_write(&b->lock);
|
||||
|
||||
if (btree_node_read_error(b)) {
|
||||
@ -1557,10 +1603,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
wbio->put_bio = true;
|
||||
wbio->order = order;
|
||||
wbio->used_mempool = used_mempool;
|
||||
bio->bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
|
||||
bio->bi_iter.bi_size = sectors_to_write << 9;
|
||||
bio->bi_end_io = btree_node_write_endio;
|
||||
bio->bi_private = b;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
|
||||
|
||||
if (parent)
|
||||
closure_get(parent);
|
||||
|
@ -1,11 +1,20 @@
|
||||
#ifndef _BCACHE_BTREE_IO_H
|
||||
#define _BCACHE_BTREE_IO_H
|
||||
|
||||
#include "extents.h"
|
||||
|
||||
struct bch_fs;
|
||||
struct btree_write;
|
||||
struct btree;
|
||||
struct btree_iter;
|
||||
|
||||
struct btree_read_bio {
|
||||
struct bch_fs *c;
|
||||
struct extent_pick_ptr pick;
|
||||
struct work_struct work;
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
static inline void btree_node_io_unlock(struct btree *b)
|
||||
{
|
||||
EBUG_ON(!btree_node_write_in_flight(b));
|
||||
@ -64,7 +73,7 @@ void bch2_btree_init_next(struct bch_fs *, struct btree *,
|
||||
|
||||
void bch2_btree_node_read_done(struct bch_fs *, struct btree *,
|
||||
struct bch_dev *, const struct bch_extent_ptr *);
|
||||
void bch2_btree_node_read(struct bch_fs *, struct btree *);
|
||||
void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
|
@ -161,7 +161,8 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
*/
|
||||
if (type == SIX_LOCK_intent &&
|
||||
linked->nodes_locked != linked->nodes_intent_locked) {
|
||||
linked->locks_want = max(linked->locks_want,
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want);
|
||||
return false;
|
||||
}
|
||||
@ -177,7 +178,8 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
*/
|
||||
if (linked->btree_id == iter->btree_id &&
|
||||
level > __fls(linked->nodes_locked)) {
|
||||
linked->locks_want = max(linked->locks_want,
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want);
|
||||
return false;
|
||||
}
|
||||
@ -247,12 +249,10 @@ fail:
|
||||
|
||||
static int __bch2_btree_iter_unlock(struct btree_iter *iter)
|
||||
{
|
||||
BUG_ON(iter->error == -EINTR);
|
||||
|
||||
while (iter->nodes_locked)
|
||||
btree_node_unlock(iter, __ffs(iter->nodes_locked));
|
||||
|
||||
return iter->error;
|
||||
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
|
||||
}
|
||||
|
||||
int bch2_btree_iter_unlock(struct btree_iter *iter)
|
||||
@ -285,7 +285,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
? bch2_btree_node_iter_prev(&tmp, b)
|
||||
: bch2_btree_node_iter_prev_all(&tmp, b);
|
||||
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
|
||||
iter->is_extents)) {
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
char buf[100];
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
@ -296,7 +296,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(node_iter, b);
|
||||
if (k && !btree_iter_pos_cmp_packed(b, &iter->pos, k,
|
||||
iter->is_extents)) {
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
char buf[100];
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
@ -340,7 +340,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
/* didn't find the bset in the iterator - might have to readd it: */
|
||||
if (new_u64s &&
|
||||
btree_iter_pos_cmp_packed(b, &iter->pos, where,
|
||||
iter->is_extents))
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
bch2_btree_node_iter_push(node_iter, b, where, end);
|
||||
return;
|
||||
found:
|
||||
@ -352,7 +352,7 @@ found:
|
||||
|
||||
if (new_u64s &&
|
||||
btree_iter_pos_cmp_packed(b, &iter->pos, where,
|
||||
iter->is_extents)) {
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
set->k = offset;
|
||||
bch2_btree_node_iter_sort(node_iter, b);
|
||||
} else if (set->k < offset + clobber_u64s) {
|
||||
@ -388,7 +388,7 @@ found:
|
||||
*/
|
||||
if (b->level && new_u64s && !bkey_deleted(where) &&
|
||||
btree_iter_pos_cmp_packed(b, &iter->pos, where,
|
||||
iter->is_extents)) {
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
struct bset_tree *t;
|
||||
struct bkey_packed *k;
|
||||
|
||||
@ -535,8 +535,8 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
|
||||
static inline void __btree_iter_init(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
bch2_btree_node_iter_init(&iter->node_iters[b->level], b,
|
||||
iter->pos, iter->is_extents,
|
||||
bch2_btree_node_iter_init(&iter->node_iters[b->level], b, iter->pos,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS,
|
||||
btree_node_is_extents(b));
|
||||
|
||||
/* Skip to first non whiteout: */
|
||||
@ -549,7 +549,8 @@ static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
|
||||
{
|
||||
return iter->btree_id == b->btree_id &&
|
||||
bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
|
||||
btree_iter_pos_cmp(iter->pos, &b->key.k, iter->is_extents);
|
||||
btree_iter_pos_cmp(iter->pos, &b->key.k,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
}
|
||||
|
||||
static inline void btree_iter_node_set(struct btree_iter *iter,
|
||||
@ -695,6 +696,26 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
|
||||
}
|
||||
}
|
||||
|
||||
noinline
|
||||
static void btree_iter_prefetch(struct btree_iter *iter)
|
||||
{
|
||||
struct btree *b = iter->nodes[iter->level + 1];
|
||||
struct btree_node_iter node_iter = iter->node_iters[iter->level + 1];
|
||||
struct bkey_packed *k;
|
||||
BKEY_PADDED(k) tmp;
|
||||
unsigned nr = iter->level ? 1 : 8;
|
||||
|
||||
while (nr) {
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
k = bch2_btree_node_iter_peek(&node_iter, b);
|
||||
if (!k)
|
||||
break;
|
||||
|
||||
bch2_bkey_unpack(b, &tmp.k, k);
|
||||
bch2_btree_node_prefetch(iter, &tmp.k, iter->level);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int btree_iter_down(struct btree_iter *iter)
|
||||
{
|
||||
struct btree *b;
|
||||
@ -712,6 +733,10 @@ static inline int btree_iter_down(struct btree_iter *iter)
|
||||
iter->level = level;
|
||||
mark_btree_node_locked(iter, level, lock_type);
|
||||
btree_iter_node_set(iter, b);
|
||||
|
||||
if (iter->flags & BTREE_ITER_PREFETCH)
|
||||
btree_iter_prefetch(iter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -791,7 +816,7 @@ out:
|
||||
io_error:
|
||||
BUG_ON(ret != -EIO);
|
||||
|
||||
iter->error = ret;
|
||||
iter->flags |= BTREE_ITER_ERROR;
|
||||
iter->nodes[iter->level] = NULL;
|
||||
goto out;
|
||||
}
|
||||
@ -834,7 +859,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
bch2_btree_node_relock(iter, iter->level) &&
|
||||
btree_iter_pos_cmp(iter->pos,
|
||||
&iter->nodes[iter->level]->key.k,
|
||||
iter->is_extents)))
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)))
|
||||
btree_iter_up(iter);
|
||||
|
||||
/*
|
||||
@ -845,7 +870,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
struct bkey_s_c k;
|
||||
|
||||
while ((k = __btree_iter_peek_all(iter)).k &&
|
||||
!btree_iter_pos_cmp(iter->pos, k.k, iter->is_extents))
|
||||
!btree_iter_pos_cmp(iter->pos, k.k,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
__btree_iter_advance(iter);
|
||||
}
|
||||
|
||||
@ -875,7 +901,7 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
if (unlikely(!iter->nodes[iter->level]))
|
||||
return 0;
|
||||
|
||||
iter->at_end_of_leaf = false;
|
||||
iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF;
|
||||
|
||||
ret = __bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
@ -891,7 +917,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->is_extents);
|
||||
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
@ -912,7 +938,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(iter->is_extents);
|
||||
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
|
||||
btree_iter_up(iter);
|
||||
|
||||
@ -964,12 +990,13 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
|
||||
!btree_iter_pos_cmp_packed(b, &new_pos, k,
|
||||
iter->is_extents))
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
bch2_btree_node_iter_advance(node_iter, b);
|
||||
|
||||
if (!k &&
|
||||
!btree_iter_pos_cmp(new_pos, &b->key.k, iter->is_extents))
|
||||
iter->at_end_of_leaf = true;
|
||||
!btree_iter_pos_cmp(new_pos, &b->key.k,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
iter->flags |= BTREE_ITER_AT_END_OF_LEAF;
|
||||
|
||||
iter->pos = new_pos;
|
||||
}
|
||||
@ -1006,6 +1033,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret)) {
|
||||
@ -1019,7 +1049,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
* iter->pos should always be equal to the key we just
|
||||
* returned - except extents can straddle iter->pos:
|
||||
*/
|
||||
if (!iter->is_extents ||
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
|
||||
return k;
|
||||
@ -1043,6 +1073,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_holes(struct btree_iter *iter)
|
||||
struct bkey n;
|
||||
int ret;
|
||||
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret)) {
|
||||
@ -1057,7 +1090,7 @@ recheck:
|
||||
bkey_init(&n);
|
||||
n.p = iter->pos;
|
||||
|
||||
if (iter->is_extents) {
|
||||
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
|
||||
if (n.p.offset == KEY_OFFSET_MAX) {
|
||||
iter->pos = bkey_successor(iter->pos);
|
||||
goto recheck;
|
||||
@ -1088,20 +1121,17 @@ recheck:
|
||||
|
||||
void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
enum btree_id btree_id, struct bpos pos,
|
||||
unsigned locks_want, unsigned depth)
|
||||
unsigned locks_want, unsigned depth,
|
||||
unsigned flags)
|
||||
{
|
||||
iter->level = depth;
|
||||
/* bch2_bkey_ops isn't used much, this would be a cache miss */
|
||||
/* iter->is_extents = bch2_bkey_ops[btree_id]->is_extents; */
|
||||
iter->is_extents = btree_id == BTREE_ID_EXTENTS;
|
||||
iter->nodes_locked = 0;
|
||||
iter->nodes_intent_locked = 0;
|
||||
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
iter->btree_id = btree_id;
|
||||
iter->at_end_of_leaf = 0;
|
||||
iter->error = 0;
|
||||
iter->c = c;
|
||||
iter->pos = pos;
|
||||
iter->flags = flags;
|
||||
iter->btree_id = btree_id;
|
||||
iter->level = depth;
|
||||
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
iter->nodes_locked = 0;
|
||||
iter->nodes_intent_locked = 0;
|
||||
memset(iter->nodes, 0, sizeof(iter->nodes));
|
||||
iter->nodes[iter->level] = BTREE_ITER_NOT_END;
|
||||
iter->next = iter;
|
||||
|
@ -3,38 +3,39 @@
|
||||
|
||||
#include "btree_types.h"
|
||||
|
||||
|
||||
#define BTREE_ITER_INTENT (1 << 0)
|
||||
#define BTREE_ITER_WITH_HOLES (1 << 1)
|
||||
#define BTREE_ITER_PREFETCH (1 << 2)
|
||||
/*
|
||||
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
|
||||
* @pos or the first key strictly greater than @pos
|
||||
*/
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 3)
|
||||
/*
|
||||
* indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
|
||||
*/
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 4)
|
||||
#define BTREE_ITER_ERROR (1 << 5)
|
||||
|
||||
/*
|
||||
* @pos - iterator's current position
|
||||
* @level - current btree depth
|
||||
* @locks_want - btree level below which we start taking intent locks
|
||||
* @nodes_locked - bitmask indicating which nodes in @nodes are locked
|
||||
* @nodes_intent_locked - bitmask indicating which locks are intent locks
|
||||
*/
|
||||
struct btree_iter {
|
||||
/* Current btree depth */
|
||||
u8 level;
|
||||
|
||||
/*
|
||||
* Used in bch2_btree_iter_traverse(), to indicate whether we're
|
||||
* searching for @pos or the first key strictly greater than @pos
|
||||
*/
|
||||
u8 is_extents;
|
||||
|
||||
/* Bitmasks for read/intent locks held per level */
|
||||
u8 nodes_locked;
|
||||
u8 nodes_intent_locked;
|
||||
|
||||
/* Btree level below which we start taking intent locks */
|
||||
u8 locks_want;
|
||||
|
||||
enum btree_id btree_id:8;
|
||||
|
||||
/*
|
||||
* indicates we need to call bch2_btree_iter_traverse() to revalidate
|
||||
* iterator:
|
||||
*/
|
||||
u8 at_end_of_leaf;
|
||||
|
||||
s8 error;
|
||||
|
||||
struct bch_fs *c;
|
||||
|
||||
/* Current position of the iterator */
|
||||
struct bpos pos;
|
||||
|
||||
u8 flags;
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:4,
|
||||
locks_want:4,
|
||||
nodes_locked:4,
|
||||
nodes_intent_locked:4;
|
||||
|
||||
u32 lock_seq[BTREE_MAX_DEPTH];
|
||||
|
||||
/*
|
||||
@ -166,22 +167,17 @@ void bch2_btree_iter_advance_pos(struct btree_iter *);
|
||||
void bch2_btree_iter_rewind(struct btree_iter *, struct bpos);
|
||||
|
||||
void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *,
|
||||
enum btree_id, struct bpos, unsigned , unsigned);
|
||||
enum btree_id, struct bpos,
|
||||
unsigned , unsigned, unsigned);
|
||||
|
||||
static inline void bch2_btree_iter_init(struct btree_iter *iter,
|
||||
struct bch_fs *c,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
struct bch_fs *c, enum btree_id btree_id,
|
||||
struct bpos pos, unsigned flags)
|
||||
{
|
||||
__bch2_btree_iter_init(iter, c, btree_id, pos, 0, 0);
|
||||
}
|
||||
|
||||
static inline void bch2_btree_iter_init_intent(struct btree_iter *iter,
|
||||
struct bch_fs *c,
|
||||
enum btree_id btree_id,
|
||||
struct bpos pos)
|
||||
{
|
||||
__bch2_btree_iter_init(iter, c, btree_id, pos, 1, 0);
|
||||
__bch2_btree_iter_init(iter, c, btree_id, pos,
|
||||
flags & BTREE_ITER_INTENT ? 1 : 0, 0,
|
||||
btree_id == BTREE_ID_EXTENTS
|
||||
? BTREE_ITER_IS_EXTENTS : 0);
|
||||
}
|
||||
|
||||
void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *);
|
||||
@ -216,45 +212,25 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
|
||||
return __btree_iter_cmp(l->btree_id, l->pos, r);
|
||||
}
|
||||
|
||||
#define __for_each_btree_node(_iter, _c, _btree_id, _start, _depth, \
|
||||
_b, _locks_want) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
_start, _locks_want, _depth), \
|
||||
(_iter)->is_extents = false, \
|
||||
#define __for_each_btree_node(_iter, _c, _btree_id, _start, \
|
||||
_locks_want, _depth, _flags, _b) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \
|
||||
_locks_want, _depth, _flags), \
|
||||
_b = bch2_btree_iter_peek_node(_iter); \
|
||||
(_b); \
|
||||
(_b) = bch2_btree_iter_next_node(_iter, _depth))
|
||||
|
||||
#define for_each_btree_node(_iter, _c, _btree_id, _start, _depth, _b) \
|
||||
__for_each_btree_node(_iter, _c, _btree_id, _start, _depth, _b, 0)
|
||||
#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \
|
||||
__for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b)
|
||||
|
||||
#define __for_each_btree_key(_iter, _c, _btree_id, _start, \
|
||||
_k, _locks_want) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
_start, _locks_want, 0); \
|
||||
!IS_ERR_OR_NULL(((_k) = bch2_btree_iter_peek(_iter)).k); \
|
||||
#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \
|
||||
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
!IS_ERR_OR_NULL(((_k) = (((_flags) & BTREE_ITER_WITH_HOLES)\
|
||||
? bch2_btree_iter_peek_with_holes(_iter)\
|
||||
: bch2_btree_iter_peek(_iter))).k); \
|
||||
bch2_btree_iter_advance_pos(_iter))
|
||||
|
||||
#define for_each_btree_key(_iter, _c, _btree_id, _start, _k) \
|
||||
__for_each_btree_key(_iter, _c, _btree_id, _start, _k, 0)
|
||||
|
||||
#define for_each_btree_key_intent(_iter, _c, _btree_id, _start, _k) \
|
||||
__for_each_btree_key(_iter, _c, _btree_id, _start, _k, 1)
|
||||
|
||||
#define __for_each_btree_key_with_holes(_iter, _c, _btree_id, \
|
||||
_start, _k, _locks_want) \
|
||||
for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
_start, _locks_want, 0); \
|
||||
!IS_ERR_OR_NULL(((_k) = bch2_btree_iter_peek_with_holes(_iter)).k);\
|
||||
bch2_btree_iter_advance_pos(_iter))
|
||||
|
||||
#define for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k) \
|
||||
__for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 0)
|
||||
|
||||
#define for_each_btree_key_with_holes_intent(_iter, _c, _btree_id, \
|
||||
_start, _k) \
|
||||
__for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 1)
|
||||
|
||||
static inline int btree_iter_err(struct bkey_s_c k)
|
||||
{
|
||||
return IS_ERR(k.k) ? PTR_ERR(k.k) : 0;
|
||||
|
@ -141,6 +141,7 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
|
||||
{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
|
||||
|
||||
enum btree_flags {
|
||||
BTREE_NODE_read_in_flight,
|
||||
BTREE_NODE_read_error,
|
||||
BTREE_NODE_write_error,
|
||||
BTREE_NODE_dirty,
|
||||
@ -152,6 +153,7 @@ enum btree_flags {
|
||||
BTREE_NODE_just_written,
|
||||
};
|
||||
|
||||
BTREE_FLAG(read_in_flight);
|
||||
BTREE_FLAG(read_error);
|
||||
BTREE_FLAG(write_error);
|
||||
BTREE_FLAG(dirty);
|
||||
|
@ -2047,7 +2047,7 @@ unlock:
|
||||
* traversed again
|
||||
*/
|
||||
trans_for_each_entry(trans, i)
|
||||
if (i->iter->at_end_of_leaf)
|
||||
if (i->iter->flags & BTREE_ITER_AT_END_OF_LEAF)
|
||||
goto out;
|
||||
|
||||
trans_for_each_entry(trans, i)
|
||||
@ -2161,7 +2161,8 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
|
||||
struct btree_iter iter;
|
||||
int ret, ret2;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, id, bkey_start_pos(&k->k));
|
||||
bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (unlikely(ret))
|
||||
@ -2187,7 +2188,8 @@ int bch2_btree_update(struct bch_fs *c, enum btree_id id,
|
||||
|
||||
EBUG_ON(id == BTREE_ID_EXTENTS);
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, id, k->k.p);
|
||||
bch2_btree_iter_init(&iter, c, id, k->k.p,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
u = bch2_btree_iter_peek_with_holes(&iter);
|
||||
ret = btree_iter_err(u);
|
||||
@ -2222,7 +2224,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, id, start);
|
||||
bch2_btree_iter_init(&iter, c, id, start,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = btree_iter_err(k))) {
|
||||
@ -2248,7 +2251,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
delete.k.p = iter.pos;
|
||||
delete.k.version = version;
|
||||
|
||||
if (iter.is_extents) {
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
|
||||
/*
|
||||
* The extents btree is special - KEY_TYPE_DISCARD is
|
||||
* used for deletions, not KEY_TYPE_DELETED. This is an
|
||||
|
@ -317,7 +317,6 @@ void bch2_invalidate_bucket(struct bch_dev *ca, struct bucket *g)
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
new.copygc = 0;
|
||||
new.gen++;
|
||||
}));
|
||||
|
||||
|
@ -95,33 +95,6 @@ static inline u8 ptr_stale(const struct bch_dev *ca,
|
||||
return gen_after(PTR_BUCKET(ca, ptr)->mark.gen, ptr->gen);
|
||||
}
|
||||
|
||||
/* bucket heaps */
|
||||
|
||||
static inline bool bucket_min_cmp(struct bucket_heap_entry l,
|
||||
struct bucket_heap_entry r)
|
||||
{
|
||||
return l.val < r.val;
|
||||
}
|
||||
|
||||
static inline bool bucket_max_cmp(struct bucket_heap_entry l,
|
||||
struct bucket_heap_entry r)
|
||||
{
|
||||
return l.val > r.val;
|
||||
}
|
||||
|
||||
static inline void bucket_heap_push(struct bch_dev *ca, struct bucket *g,
|
||||
unsigned long val)
|
||||
{
|
||||
struct bucket_heap_entry new = { g, val };
|
||||
|
||||
if (!heap_full(&ca->heap))
|
||||
heap_add(&ca->heap, new, bucket_min_cmp);
|
||||
else if (bucket_min_cmp(new, heap_peek(&ca->heap))) {
|
||||
ca->heap.data[0] = new;
|
||||
heap_sift(&ca->heap, 0, bucket_min_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* bucket gc marks */
|
||||
|
||||
/* The dirty and cached sector counts saturate. If this occurs,
|
||||
@ -129,14 +102,16 @@ static inline void bucket_heap_push(struct bch_dev *ca, struct bucket *g,
|
||||
* GC must be performed. */
|
||||
#define GC_MAX_SECTORS_USED ((1U << 15) - 1)
|
||||
|
||||
static inline bool bucket_unused(struct bucket *g)
|
||||
static inline unsigned bucket_sectors_used(struct bucket_mark mark)
|
||||
{
|
||||
return !g->mark.counter;
|
||||
return mark.dirty_sectors + mark.cached_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned bucket_sectors_used(struct bucket *g)
|
||||
static inline bool bucket_unused(struct bucket_mark mark)
|
||||
{
|
||||
return g->mark.dirty_sectors + g->mark.cached_sectors;
|
||||
return !mark.owned_by_allocator &&
|
||||
!mark.data_type &&
|
||||
!bucket_sectors_used(mark);
|
||||
}
|
||||
|
||||
/* Per device stats: */
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef _BUCKETS_TYPES_H
|
||||
#define _BUCKETS_TYPES_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
enum bucket_data_type {
|
||||
BUCKET_DATA = 0,
|
||||
BUCKET_BTREE,
|
||||
@ -18,9 +20,6 @@ struct bucket_mark {
|
||||
struct {
|
||||
u8 gen;
|
||||
|
||||
/* generation copygc is going to move this bucket into */
|
||||
unsigned copygc:1;
|
||||
|
||||
unsigned journal_seq_valid:1;
|
||||
|
||||
/*
|
||||
@ -96,10 +95,12 @@ struct bch_fs_usage {
|
||||
};
|
||||
|
||||
struct bucket_heap_entry {
|
||||
struct bucket *g;
|
||||
unsigned long val;
|
||||
size_t bucket;
|
||||
struct bucket_mark mark;
|
||||
};
|
||||
|
||||
typedef HEAP(struct bucket_heap_entry) bucket_heap;
|
||||
|
||||
/*
|
||||
* A reservation for space on disk:
|
||||
*/
|
||||
|
@ -5,9 +5,11 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/preempt.h>
|
||||
|
||||
static inline bool io_timer_cmp(struct io_timer *l, struct io_timer *r)
|
||||
static inline long io_timer_cmp(io_timer_heap *h,
|
||||
struct io_timer *l,
|
||||
struct io_timer *r)
|
||||
{
|
||||
return time_after(l->expire, r->expire);
|
||||
return l->expire - r->expire;
|
||||
}
|
||||
|
||||
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
|
||||
|
@ -22,12 +22,14 @@ struct io_timer {
|
||||
/* Amount to buffer up on a percpu counter */
|
||||
#define IO_CLOCK_PCPU_SECTORS 128
|
||||
|
||||
typedef HEAP(struct io_timer *) io_timer_heap;
|
||||
|
||||
struct io_clock {
|
||||
atomic_long_t now;
|
||||
u16 __percpu *pcpu_buf;
|
||||
|
||||
spinlock_t timer_lock;
|
||||
DECLARE_HEAP(struct io_timer *, timers);
|
||||
io_timer_heap timers;
|
||||
};
|
||||
|
||||
#endif /* _BCACHE_CLOCK_TYPES_H */
|
||||
|
@ -60,9 +60,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
|
||||
bio->bi_bdev = pick.ca->disk_sb.bdev;
|
||||
bio->bi_opf = REQ_OP_READ|REQ_META;
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
bio->bi_iter.bi_size = btree_bytes(c);
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
|
||||
bch2_bio_map(bio, n_sorted);
|
||||
|
||||
submit_bio_wait(bio);
|
||||
@ -212,7 +212,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
|
||||
if (!i->size)
|
||||
return i->ret;
|
||||
|
||||
bch2_btree_iter_init(&iter, i->c, i->id, i->from);
|
||||
bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(err = btree_iter_err(k))) {
|
||||
@ -314,7 +314,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
|
||||
if (!i->size)
|
||||
return i->ret;
|
||||
|
||||
bch2_btree_iter_init(&iter, i->c, i->id, i->from);
|
||||
bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(err = btree_iter_err(k))) {
|
||||
|
@ -214,11 +214,13 @@ int bch2_dirent_rename(struct bch_fs *c,
|
||||
bool need_whiteout;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
bch2_btree_iter_init_intent(&src_iter, c, BTREE_ID_DIRENTS, src_pos);
|
||||
bch2_btree_iter_init_intent(&dst_iter, c, BTREE_ID_DIRENTS, dst_pos);
|
||||
bch2_btree_iter_init(&src_iter, c, BTREE_ID_DIRENTS, src_pos,
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_init(&dst_iter, c, BTREE_ID_DIRENTS, dst_pos,
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_link(&src_iter, &dst_iter);
|
||||
|
||||
bch2_btree_iter_init(&whiteout_iter, c, BTREE_ID_DIRENTS, src_pos);
|
||||
bch2_btree_iter_init(&whiteout_iter, c, BTREE_ID_DIRENTS, src_pos, 0);
|
||||
bch2_btree_iter_link(&src_iter, &whiteout_iter);
|
||||
|
||||
if (mode == BCH_RENAME_EXCHANGE) {
|
||||
@ -376,7 +378,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) {
|
||||
if (k.k->p.inode > dir_inum)
|
||||
break;
|
||||
|
||||
@ -405,7 +407,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file,
|
||||
pr_debug("listing for %lu from %llu", inode->i_ino, ctx->pos);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
||||
POS(inode->i_ino, ctx->pos), k) {
|
||||
POS(inode->i_ino, ctx->pos), 0, k) {
|
||||
if (k.k->type != BCH_DIRENT)
|
||||
continue;
|
||||
|
||||
|
@ -41,13 +41,13 @@ static void sort_key_next(struct btree_node_iter *iter,
|
||||
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
|
||||
* equal in different sets, we have to process them newest to oldest.
|
||||
*/
|
||||
#define key_sort_cmp(l, r) \
|
||||
#define key_sort_cmp(h, l, r) \
|
||||
({ \
|
||||
int _c = bkey_cmp_packed(b, \
|
||||
bkey_cmp_packed(b, \
|
||||
__btree_node_offset_to_key(b, (l).k), \
|
||||
__btree_node_offset_to_key(b, (r).k)); \
|
||||
__btree_node_offset_to_key(b, (r).k)) \
|
||||
\
|
||||
_c ? _c > 0 : (l).k > (r).k; \
|
||||
?: (l).k - (r).k; \
|
||||
})
|
||||
|
||||
static inline bool should_drop_next_key(struct btree_node_iter *iter,
|
||||
@ -63,7 +63,7 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter,
|
||||
return false;
|
||||
|
||||
if (iter->used > 2 &&
|
||||
key_sort_cmp(r[0], r[1]))
|
||||
key_sort_cmp(iter, r[0], r[1]) >= 0)
|
||||
r++;
|
||||
|
||||
/*
|
||||
@ -98,7 +98,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
|
||||
}
|
||||
|
||||
sort_key_next(iter, b, iter->data);
|
||||
heap_sift(iter, 0, key_sort_cmp);
|
||||
heap_sift_down(iter, 0, key_sort_cmp);
|
||||
}
|
||||
|
||||
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
|
||||
@ -754,27 +754,26 @@ static void extent_save(struct btree *b, struct btree_node_iter *iter,
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if l > r - unless l == r, in which case returns true if l is
|
||||
* older than r.
|
||||
* If keys compare equal, compare by pointer order:
|
||||
*
|
||||
* Necessary for sort_fix_overlapping() - if there are multiple keys that
|
||||
* compare equal in different sets, we have to process them newest to oldest.
|
||||
*/
|
||||
#define extent_sort_cmp(l, r) \
|
||||
#define extent_sort_cmp(h, l, r) \
|
||||
({ \
|
||||
struct bkey _ul = bkey_unpack_key(b, \
|
||||
__btree_node_offset_to_key(b, (l).k)); \
|
||||
struct bkey _ur = bkey_unpack_key(b, \
|
||||
__btree_node_offset_to_key(b, (r).k)); \
|
||||
\
|
||||
int _c = bkey_cmp(bkey_start_pos(&_ul), bkey_start_pos(&_ur)); \
|
||||
_c ? _c > 0 : (l).k < (r).k; \
|
||||
bkey_cmp(bkey_start_pos(&_ul), \
|
||||
bkey_start_pos(&_ur)) ?: (r).k - (l).k; \
|
||||
})
|
||||
|
||||
static inline void extent_sort_sift(struct btree_node_iter *iter,
|
||||
struct btree *b, size_t i)
|
||||
{
|
||||
heap_sift(iter, i, extent_sort_cmp);
|
||||
heap_sift_down(iter, i, extent_sort_cmp);
|
||||
}
|
||||
|
||||
static inline void extent_sort_next(struct btree_node_iter *iter,
|
||||
@ -782,7 +781,7 @@ static inline void extent_sort_next(struct btree_node_iter *iter,
|
||||
struct btree_node_iter_set *i)
|
||||
{
|
||||
sort_key_next(iter, b, i);
|
||||
heap_sift(iter, i - iter->data, extent_sort_cmp);
|
||||
heap_sift_down(iter, i - iter->data, extent_sort_cmp);
|
||||
}
|
||||
|
||||
static void extent_sort_append(struct bch_fs *c,
|
||||
@ -843,7 +842,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
|
||||
|
||||
_r = iter->data + 1;
|
||||
if (iter->used > 2 &&
|
||||
extent_sort_cmp(_r[0], _r[1]))
|
||||
extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
|
||||
_r++;
|
||||
|
||||
rk = __btree_node_offset_to_key(b, _r->k);
|
||||
@ -1433,11 +1432,12 @@ stop:
|
||||
gc_pos_btree_node(b));
|
||||
|
||||
EBUG_ON(bkey_cmp(iter->pos, s->committed));
|
||||
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf);
|
||||
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) !=
|
||||
!!(iter->flags & BTREE_ITER_AT_END_OF_LEAF));
|
||||
|
||||
bch2_cut_front(iter->pos, insert);
|
||||
|
||||
if (insert->k.size && iter->at_end_of_leaf)
|
||||
if (insert->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
|
||||
ret = BTREE_INSERT_NEED_TRAVERSE;
|
||||
|
||||
EBUG_ON(insert->k.size && ret == BTREE_INSERT_OK);
|
||||
@ -1596,9 +1596,10 @@ stop:
|
||||
|
||||
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
|
||||
EBUG_ON(bkey_cmp(iter->pos, s.committed));
|
||||
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != iter->at_end_of_leaf);
|
||||
EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) !=
|
||||
!!(iter->flags & BTREE_ITER_AT_END_OF_LEAF));
|
||||
|
||||
if (insert->k->k.size && iter->at_end_of_leaf)
|
||||
if (insert->k->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF))
|
||||
ret = BTREE_INSERT_NEED_TRAVERSE;
|
||||
|
||||
EBUG_ON(insert->k->k.size && ret == BTREE_INSERT_OK);
|
||||
|
@ -9,160 +9,162 @@
|
||||
/*
|
||||
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
|
||||
* array
|
||||
*
|
||||
* We used one based indexing, not zero based: with one based indexing, each
|
||||
* level of the tree starts at a power of two - leading to better alignment -
|
||||
* and it's what you want for implementing next/prev and to/from inorder.
|
||||
*
|
||||
* To/from inorder also uses 1 based indexing.
|
||||
*
|
||||
* Size parameter is treated as if we were using 0 based indexing, however:
|
||||
* valid nodes, and inorder indices, are in the range [1..size)
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger_child(unsigned j, unsigned child)
|
||||
/*
|
||||
* One based indexing version:
|
||||
*
|
||||
* With one based indexing each level of the tree starts at a power of two -
|
||||
* good for cacheline alignment:
|
||||
*
|
||||
* Size parameter is treated as if we were using 0 based indexing, however:
|
||||
* valid nodes, and inorder indices, are in the range [1..size) - that is, there
|
||||
* are actually size - 1 elements
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
|
||||
return (j << 1) + child;
|
||||
return (i << 1) + child;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_left_child(unsigned j)
|
||||
static inline unsigned eytzinger1_left_child(unsigned i)
|
||||
{
|
||||
return eytzinger_child(j, 0);
|
||||
return eytzinger1_child(i, 0);
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_right_child(unsigned j)
|
||||
static inline unsigned eytzinger1_right_child(unsigned i)
|
||||
{
|
||||
return eytzinger_child(j, 1);
|
||||
return eytzinger1_child(i, 1);
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_first(unsigned size)
|
||||
static inline unsigned eytzinger1_first(unsigned size)
|
||||
{
|
||||
return rounddown_pow_of_two(size - 1);
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_last(unsigned size)
|
||||
static inline unsigned eytzinger1_last(unsigned size)
|
||||
{
|
||||
return rounddown_pow_of_two(size) - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* eytzinger_next() and eytzinger_prev() have the nice properties that
|
||||
* eytzinger1_next() and eytzinger1_prev() have the nice properties that
|
||||
*
|
||||
* eytzinger_next(0) == eytzinger_first())
|
||||
* eytzinger_prev(0) == eytzinger_last())
|
||||
* eytzinger1_next(0) == eytzinger1_first())
|
||||
* eytzinger1_prev(0) == eytzinger1_last())
|
||||
*
|
||||
* eytzinger_prev(eytzinger_first()) == 0
|
||||
* eytzinger_next(eytzinger_last()) == 0
|
||||
* eytzinger1_prev(eytzinger1_first()) == 0
|
||||
* eytzinger1_next(eytzinger1_last()) == 0
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger_next(unsigned j, unsigned size)
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(j >= size);
|
||||
EBUG_ON(i >= size);
|
||||
|
||||
if (eytzinger_right_child(j) < size) {
|
||||
j = eytzinger_right_child(j);
|
||||
if (eytzinger1_right_child(i) < size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
|
||||
j <<= __fls(size) - __fls(j);
|
||||
j >>= j >= size;
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i >>= i >= size;
|
||||
} else {
|
||||
j >>= ffz(j) + 1;
|
||||
i >>= ffz(i) + 1;
|
||||
}
|
||||
|
||||
return j;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_prev(unsigned j, unsigned size)
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(j >= size);
|
||||
EBUG_ON(i >= size);
|
||||
|
||||
if (eytzinger_left_child(j) < size) {
|
||||
j = eytzinger_left_child(j);
|
||||
if (eytzinger1_left_child(i) < size) {
|
||||
i = eytzinger1_left_child(i);
|
||||
|
||||
j <<= __fls(size) - __fls(j);
|
||||
j -= 1;
|
||||
j >>= j >= size;
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i -= 1;
|
||||
i >>= i >= size;
|
||||
} else {
|
||||
j >>= __ffs(j) + 1;
|
||||
i >>= __ffs(i) + 1;
|
||||
}
|
||||
|
||||
return j;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_extra(unsigned size)
|
||||
static inline unsigned eytzinger1_extra(unsigned size)
|
||||
{
|
||||
return (size - rounddown_pow_of_two(size - 1)) << 1;
|
||||
}
|
||||
|
||||
static inline unsigned __eytzinger_to_inorder(unsigned j, unsigned size,
|
||||
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
|
||||
unsigned extra)
|
||||
{
|
||||
unsigned b = __fls(j);
|
||||
unsigned b = __fls(i);
|
||||
unsigned shift = __fls(size - 1) - b;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!j || j >= size);
|
||||
EBUG_ON(!i || i >= size);
|
||||
|
||||
j ^= 1U << b;
|
||||
j <<= 1;
|
||||
j |= 1;
|
||||
j <<= shift;
|
||||
i ^= 1U << b;
|
||||
i <<= 1;
|
||||
i |= 1;
|
||||
i <<= shift;
|
||||
|
||||
/*
|
||||
* sign bit trick:
|
||||
*
|
||||
* if (j > extra)
|
||||
* j -= (j - extra) >> 1;
|
||||
* if (i > extra)
|
||||
* i -= (i - extra) >> 1;
|
||||
*/
|
||||
s = extra - j;
|
||||
j += (s >> 1) & (s >> 31);
|
||||
s = extra - i;
|
||||
i += (s >> 1) & (s >> 31);
|
||||
|
||||
return j;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline unsigned __inorder_to_eytzinger(unsigned j, unsigned size,
|
||||
static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
|
||||
unsigned extra)
|
||||
{
|
||||
unsigned shift;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!j || j >= size);
|
||||
EBUG_ON(!i || i >= size);
|
||||
|
||||
/*
|
||||
* sign bit trick:
|
||||
*
|
||||
* if (j > extra)
|
||||
* j += j - extra;
|
||||
* if (i > extra)
|
||||
* i += i - extra;
|
||||
*/
|
||||
s = extra - j;
|
||||
j -= s & (s >> 31);
|
||||
s = extra - i;
|
||||
i -= s & (s >> 31);
|
||||
|
||||
shift = __ffs(j);
|
||||
shift = __ffs(i);
|
||||
|
||||
j >>= shift + 1;
|
||||
j |= 1U << (__fls(size - 1) - shift);
|
||||
i >>= shift + 1;
|
||||
i |= 1U << (__fls(size - 1) - shift);
|
||||
|
||||
return j;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger_to_inorder(unsigned j, unsigned size)
|
||||
static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
|
||||
{
|
||||
return __eytzinger_to_inorder(j, size, eytzinger_extra(size));
|
||||
return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
|
||||
}
|
||||
|
||||
static inline unsigned inorder_to_eytzinger(unsigned j, unsigned size)
|
||||
static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
|
||||
{
|
||||
return __inorder_to_eytzinger(j, size, eytzinger_extra(size));
|
||||
return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
|
||||
}
|
||||
|
||||
#define eytzinger_for_each(_i, _size) \
|
||||
for ((_i) = eytzinger_first((_size)); \
|
||||
#define eytzinger1_for_each(_i, _size) \
|
||||
for ((_i) = eytzinger1_first((_size)); \
|
||||
(_i) != 0; \
|
||||
(_i) = eytzinger_next((_i), (_size)))
|
||||
(_i) = eytzinger1_next((_i), (_size)))
|
||||
|
||||
#if 0
|
||||
void eytzinger_test(void)
|
||||
void eytzinger0_test(void)
|
||||
{
|
||||
unsigned i, j, size;
|
||||
|
||||
@ -172,20 +174,20 @@ void eytzinger_test(void)
|
||||
if (!(size % 4096))
|
||||
printk(KERN_INFO "tree size %u\n", size);
|
||||
|
||||
assert(eytzinger_prev(0, size) == eytzinger_last(size));
|
||||
assert(eytzinger_next(0, size) == eytzinger_first(size));
|
||||
assert(eytzinger1_prev(0, size) == eytzinger1_last(size));
|
||||
assert(eytzinger1_next(0, size) == eytzinger1_first(size));
|
||||
|
||||
assert(eytzinger_prev(eytzinger_first(size), size) == 0);
|
||||
assert(eytzinger_next(eytzinger_last(size), size) == 0);
|
||||
assert(eytzinger1_prev(eytzinger1_first(size), size) == 0);
|
||||
assert(eytzinger1_next(eytzinger1_last(size), size) == 0);
|
||||
|
||||
eytzinger_for_each(j, size) {
|
||||
eytzinger1_for_each(j, size) {
|
||||
assert(from_inorder(i, size) == j);
|
||||
assert(to_inorder(j, size) == i);
|
||||
|
||||
if (j != eytzinger_last(size)) {
|
||||
unsigned next = eytzinger_next(j, size);
|
||||
if (j != eytzinger1_last(size)) {
|
||||
unsigned next = eytzinger1_next(j, size);
|
||||
|
||||
assert(eytzinger_prev(next, size) == j);
|
||||
assert(eytzinger1_prev(next, size) == j);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -193,4 +195,96 @@ void eytzinger_test(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Zero based indexing version: */
|
||||
|
||||
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + 1 + child;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_left_child(unsigned i)
|
||||
{
|
||||
return eytzinger0_child(i, 0);
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_right_child(unsigned i)
|
||||
{
|
||||
return eytzinger0_child(i, 1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static inline unsigned eytzinger0_first(unsigned size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_last(unsigned size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_next(unsigned i, unsigned size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned eytzinger0_extra(unsigned size)
|
||||
{
|
||||
return (size + 1 - rounddown_pow_of_two(size)) << 1;
|
||||
}
|
||||
|
||||
static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
|
||||
unsigned extra)
|
||||
{
|
||||
return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
|
||||
}
|
||||
|
||||
static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
|
||||
unsigned extra)
|
||||
{
|
||||
return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
|
||||
{
|
||||
return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
|
||||
}
|
||||
|
||||
static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
{
|
||||
return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, _nr, _size, _cmp, _search) \
|
||||
({ \
|
||||
void *_base = base; \
|
||||
size_t _i = 0; \
|
||||
int _res; \
|
||||
\
|
||||
while (_i < (_nr) && \
|
||||
(_res = _cmp(_search, _base + _i * (_size), _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
\
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { \
|
||||
bool found1 = _i < _nr, found2 = false; \
|
||||
unsigned _j; \
|
||||
\
|
||||
for (_j = 0; _j < _nr; _j++) \
|
||||
if (!_cmp(_base + _j * (_size), _search, _size))\
|
||||
found2 = true; \
|
||||
\
|
||||
BUG_ON(found1 != found2); \
|
||||
} \
|
||||
\
|
||||
_i; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort(void *, size_t, size_t,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
|
||||
#endif /* _EYTZINGER_H */
|
||||
|
@ -282,10 +282,12 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
|
||||
|
||||
BUG_ON(k->k.p.inode != op->ei->vfs_inode.i_ino);
|
||||
|
||||
bch2_btree_iter_init_intent(&extent_iter, wop->c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k));
|
||||
bch2_btree_iter_init_intent(&inode_iter, wop->c, BTREE_ID_INODES,
|
||||
POS(extent_iter.pos.inode, 0));
|
||||
bch2_btree_iter_init(&extent_iter, wop->c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k),
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_init(&inode_iter, wop->c, BTREE_ID_INODES,
|
||||
POS(extent_iter.pos.inode, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
hook.op = op;
|
||||
hook.hook.fn = bchfs_extent_update_hook;
|
||||
@ -786,7 +788,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
|
||||
.mapping = mapping, .nr_pages = nr_pages
|
||||
};
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0);
|
||||
|
||||
INIT_LIST_HEAD(&readpages_iter.pages);
|
||||
list_add(&readpages_iter.pages, pages);
|
||||
@ -841,7 +843,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
|
||||
bio_add_page_contig(&rbio->bio, page);
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0);
|
||||
bchfs_read(c, &iter, rbio, inode, NULL);
|
||||
}
|
||||
|
||||
@ -1036,7 +1038,7 @@ do_io:
|
||||
w->io->op.new_i_size = i_size;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
w->io->bio.bio.bi_opf |= WRITE_SYNC;
|
||||
w->io->bio.bio.bi_opf |= REQ_SYNC;
|
||||
|
||||
/* Before unlocking the page, transfer reservation to w->io: */
|
||||
old = page_state_cmpxchg(page_state(page), new, {
|
||||
@ -1448,7 +1450,7 @@ start:
|
||||
bio->bi_iter.bi_sector = offset >> 9;
|
||||
bio->bi_private = dio;
|
||||
|
||||
ret = bio_get_user_pages(bio, iter, 1);
|
||||
ret = bio_iov_iter_get_pages(bio, iter);
|
||||
if (ret < 0) {
|
||||
/* XXX: fault inject this path */
|
||||
bio->bi_error = ret;
|
||||
@ -1537,7 +1539,7 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
|
||||
bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
|
||||
|
||||
ret = bio_get_user_pages(bio, &dio->iter, 0);
|
||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* these didn't get initialized, but bch2_dio_write_done() will
|
||||
@ -1908,7 +1910,7 @@ static int __bch2_truncate_page(struct address_space *mapping,
|
||||
*/
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino,
|
||||
index << (PAGE_SHIFT - 9)), k) {
|
||||
index << (PAGE_SHIFT - 9)), 0, k) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k),
|
||||
POS(inode->i_ino,
|
||||
(index + 1) << (PAGE_SHIFT - 9))) >= 0)
|
||||
@ -2122,10 +2124,11 @@ static long bch2_fcollapse(struct inode *inode, loff_t offset, loff_t len)
|
||||
if ((offset | len) & (PAGE_SIZE - 1))
|
||||
return -EINVAL;
|
||||
|
||||
bch2_btree_iter_init_intent(&dst, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, offset >> 9));
|
||||
bch2_btree_iter_init(&dst, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, offset >> 9),
|
||||
BTREE_ITER_INTENT);
|
||||
/* position will be set from dst iter's position: */
|
||||
bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&src, c, BTREE_ID_EXTENTS, POS_MIN, 0);
|
||||
bch2_btree_iter_link(&src, &dst);
|
||||
|
||||
/*
|
||||
@ -2249,7 +2252,8 @@ static long bch2_fallocate(struct inode *inode, int mode,
|
||||
unsigned replicas = READ_ONCE(c->opts.data_replicas);
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
inode_lock(inode);
|
||||
inode_dio_wait(inode);
|
||||
@ -2459,7 +2463,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
|
||||
return -ENXIO;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, offset >> 9), k) {
|
||||
POS(inode->i_ino, offset >> 9), 0, k) {
|
||||
if (k.k->p.inode != inode->i_ino) {
|
||||
break;
|
||||
} else if (bkey_extent_is_data(k.k)) {
|
||||
@ -2527,8 +2531,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
|
||||
if (offset >= isize)
|
||||
return -ENXIO;
|
||||
|
||||
for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, offset >> 9), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, offset >> 9),
|
||||
BTREE_ITER_WITH_HOLES, k) {
|
||||
if (k.k->p.inode != inode->i_ino) {
|
||||
next_hole = bch2_next_pagecache_hole(inode,
|
||||
offset, MAX_LFS_FILESIZE);
|
||||
|
@ -81,7 +81,8 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
|
||||
|
||||
lockdep_assert_held(&ei->update_lock);
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(inum, 0));
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
do {
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
|
||||
@ -714,7 +715,7 @@ static int bch2_fiemap(struct inode *inode, struct fiemap_extent_info *info,
|
||||
return -EINVAL;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode->i_ino, start >> 9), k)
|
||||
POS(inode->i_ino, start >> 9), 0, k)
|
||||
if (bkey_extent_is_data(k.k) ||
|
||||
k.k->type == BCH_RESERVATION) {
|
||||
if (bkey_cmp(bkey_start_pos(k.k),
|
||||
@ -990,7 +991,6 @@ static const struct file_operations bch_dir_file_operations = {
|
||||
};
|
||||
|
||||
static const struct inode_operations bch_symlink_inode_operations = {
|
||||
.readlink = generic_readlink,
|
||||
.get_link = page_get_link,
|
||||
.setattr = bch2_setattr,
|
||||
.listxattr = bch2_xattr_list,
|
||||
|
@ -134,8 +134,8 @@ struct hash_check {
|
||||
static void hash_check_init(const struct bch_hash_desc desc,
|
||||
struct hash_check *h, struct bch_fs *c)
|
||||
{
|
||||
bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN);
|
||||
bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN);
|
||||
bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN, 0);
|
||||
bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN, 0);
|
||||
}
|
||||
|
||||
static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
|
||||
@ -251,7 +251,7 @@ static int check_extents(struct bch_fs *c)
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(BCACHE_ROOT_INO, 0), k) {
|
||||
POS(BCACHE_ROOT_INO, 0), 0, k) {
|
||||
if (k.k->type == KEY_TYPE_DISCARD)
|
||||
continue;
|
||||
|
||||
@ -310,7 +310,7 @@ static int check_dirents(struct bch_fs *c)
|
||||
hash_check_init(bch2_dirent_hash_desc, &h, c);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
||||
POS(BCACHE_ROOT_INO, 0), k) {
|
||||
POS(BCACHE_ROOT_INO, 0), 0, k) {
|
||||
struct bkey_s_c_dirent d;
|
||||
struct bch_inode_unpacked target;
|
||||
bool have_target;
|
||||
@ -444,7 +444,7 @@ static int check_xattrs(struct bch_fs *c)
|
||||
hash_check_init(bch2_xattr_hash_desc, &h, c);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
|
||||
POS(BCACHE_ROOT_INO, 0), k) {
|
||||
POS(BCACHE_ROOT_INO, 0), 0, k) {
|
||||
ret = walk_inode(c, &w, k.k->p.inode);
|
||||
if (ret)
|
||||
break;
|
||||
@ -664,7 +664,7 @@ next:
|
||||
goto up;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
||||
POS(e->inum, e->offset + 1), k) {
|
||||
POS(e->inum, e->offset + 1), 0, k) {
|
||||
if (k.k->p.inode != e->inum)
|
||||
break;
|
||||
|
||||
@ -712,7 +712,7 @@ up:
|
||||
path.nr--;
|
||||
}
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
|
||||
if (k.k->type != BCH_INODE_FS ||
|
||||
!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
|
||||
continue;
|
||||
@ -794,7 +794,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
|
||||
|
||||
inc_link(c, links, range_start, range_end, BCACHE_ROOT_INO, false);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) {
|
||||
switch (k.k->type) {
|
||||
case BCH_DIRENT:
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
@ -825,7 +825,7 @@ s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum)
|
||||
struct bkey_s_c k;
|
||||
u64 sectors = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) {
|
||||
if (k.k->p.inode != inum)
|
||||
break;
|
||||
|
||||
@ -999,7 +999,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
|
||||
int ret = 0, ret2 = 0;
|
||||
u64 nlinks_pos;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
|
||||
genradix_iter_init(&nlinks_iter);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
|
@ -276,7 +276,8 @@ int bch2_inode_create(struct bch_fs *c, struct bkey_i *inode,
|
||||
if (*hint == min)
|
||||
searched_from_start = true;
|
||||
again:
|
||||
bch2_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(*hint, 0));
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(*hint, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
while (1) {
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
|
||||
@ -376,8 +377,9 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
|
||||
struct bkey_s_c k;
|
||||
int ret = -ENOENT;
|
||||
|
||||
for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
|
||||
POS(inode_nr, 0), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES,
|
||||
POS(inode_nr, 0),
|
||||
BTREE_ITER_WITH_HOLES, k) {
|
||||
switch (k.k->type) {
|
||||
case BCH_INODE_FS:
|
||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
|
||||
@ -400,7 +402,7 @@ int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), 0, k) {
|
||||
if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
|
||||
break;
|
||||
|
||||
|
@ -182,8 +182,9 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, op->c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k));
|
||||
bch2_btree_iter_init(&iter, op->c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
|
||||
NULL, op_journal_seq(op),
|
||||
@ -1112,9 +1113,9 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
if (promote_op) {
|
||||
struct bio *promote_bio = &promote_op->write.wbio.bio;
|
||||
|
||||
bio_init(promote_bio);
|
||||
promote_bio->bi_max_vecs = pages;
|
||||
promote_bio->bi_io_vec = promote_bio->bi_inline_vecs;
|
||||
bio_init(promote_bio,
|
||||
promote_bio->bi_inline_vecs,
|
||||
pages);
|
||||
bounce = true;
|
||||
/* could also set read_full */
|
||||
}
|
||||
@ -1265,8 +1266,9 @@ static void bch2_read_iter(struct bch_fs *c, struct bch_read_bio *rbio,
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode, bvec_iter.bi_sector), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
|
||||
POS(inode, bvec_iter.bi_sector),
|
||||
BTREE_ITER_WITH_HOLES, k) {
|
||||
BKEY_PADDED(k) tmp;
|
||||
struct extent_pick_ptr pick;
|
||||
unsigned bytes, sectors;
|
||||
|
@ -163,8 +163,7 @@ static void journal_seq_blacklist_flush(struct journal *j,
|
||||
n = bl->entries[i];
|
||||
mutex_unlock(&j->blacklist_lock);
|
||||
|
||||
bch2_btree_iter_init(&iter, c, n.btree_id, n.pos);
|
||||
iter.is_extents = false;
|
||||
__bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, 0, 0, 0);
|
||||
redo_peek:
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
|
||||
@ -1921,6 +1920,9 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
struct journal_entry_pin *pin;
|
||||
u64 pin_seq;
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
|
||||
while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq)))
|
||||
pin->flush(j, pin, pin_seq);
|
||||
|
||||
@ -2374,9 +2376,9 @@ static void journal_write(struct closure *cl)
|
||||
bio = ca->journal.bio;
|
||||
bio_reset(bio);
|
||||
bio->bi_bdev = ca->disk_sb.bdev;
|
||||
bio->bi_opf = REQ_OP_FLUSH;
|
||||
bio->bi_end_io = journal_write_endio;
|
||||
bio->bi_private = ca;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
|
||||
closure_bio_submit(bio, cl);
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,8 @@ int bch2_move_data_off_device(struct bch_dev *ca)
|
||||
atomic_set(&ctxt.error_count, 0);
|
||||
atomic_set(&ctxt.error_flags, 0);
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
|
||||
while (!bch2_move_ctxt_wait(&ctxt) &&
|
||||
(k = bch2_btree_iter_peek(&iter)).k &&
|
||||
@ -167,7 +168,7 @@ static int bch2_move_btree_off(struct bch_dev *ca, enum btree_id id)
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
retry:
|
||||
if (!bch2_extent_has_device(e, ca->dev_idx))
|
||||
@ -197,7 +198,7 @@ retry:
|
||||
return ret; /* btree IO error */
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
|
||||
BUG_ON(bch2_extent_has_device(e, ca->dev_idx));
|
||||
@ -341,7 +342,8 @@ int bch2_flag_data_bad(struct bch_dev *ca)
|
||||
struct bkey_s_c_extent e;
|
||||
struct btree_iter iter;
|
||||
|
||||
bch2_btree_iter_init(&iter, ca->fs, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, ca->fs, BTREE_ID_EXTENTS,
|
||||
POS_MIN, BTREE_ITER_PREFETCH);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = btree_iter_err(k))) {
|
||||
|
@ -54,8 +54,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
struct btree_iter iter;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k));
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
while (1) {
|
||||
struct bkey_s_extent insert =
|
||||
@ -171,13 +172,12 @@ void bch2_migrate_write_init(struct bch_fs *c,
|
||||
static void migrate_bio_init(struct moving_io *io, struct bio *bio,
|
||||
unsigned sectors)
|
||||
{
|
||||
bio_init(bio);
|
||||
bio_init(bio, io->bi_inline_vecs,
|
||||
DIV_ROUND_UP(sectors, PAGE_SECTORS));
|
||||
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
||||
|
||||
bio->bi_iter.bi_size = sectors << 9;
|
||||
bio->bi_max_vecs = DIV_ROUND_UP(sectors, PAGE_SECTORS);
|
||||
bio->bi_private = &io->cl;
|
||||
bio->bi_io_vec = io->bi_inline_vecs;
|
||||
bch2_bio_map(bio, NULL);
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "buckets.h"
|
||||
#include "clock.h"
|
||||
#include "extents.h"
|
||||
#include "eytzinger.h"
|
||||
#include "io.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
@ -18,20 +19,43 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
/* Moving GC - IO loop */
|
||||
|
||||
static int bucket_idx_cmp(const void *_l, const void *_r, size_t size)
|
||||
{
|
||||
const struct bucket_heap_entry *l = _l;
|
||||
const struct bucket_heap_entry *r = _r;
|
||||
|
||||
if (l->bucket < r->bucket)
|
||||
return -1;
|
||||
if (l->bucket > r->bucket)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct bch_extent_ptr *moving_pred(struct bch_dev *ca,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
bucket_heap *h = &ca->copygc_heap;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
if (bkey_extent_is_data(k.k) &&
|
||||
(ptr = bch2_extent_has_device(bkey_s_c_to_extent(k),
|
||||
ca->dev_idx)) &&
|
||||
PTR_BUCKET(ca, ptr)->mark.copygc)
|
||||
ca->dev_idx))) {
|
||||
struct bucket_heap_entry search = {
|
||||
.bucket = PTR_BUCKET_NR(ca, ptr)
|
||||
};
|
||||
|
||||
size_t i = eytzinger0_find(h->data, h->used,
|
||||
sizeof(h->data[0]),
|
||||
bucket_idx_cmp, &search);
|
||||
|
||||
if (i < h->used)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -60,17 +84,19 @@ static void read_moving(struct bch_dev *ca, size_t buckets_to_move,
|
||||
u64 sectors_to_move)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bucket *g;
|
||||
bucket_heap *h = &ca->copygc_heap;
|
||||
struct moving_context ctxt;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 sectors_not_moved = 0;
|
||||
size_t buckets_not_moved = 0;
|
||||
struct bucket_heap_entry *i;
|
||||
|
||||
bch2_ratelimit_reset(&ca->moving_gc_pd.rate);
|
||||
bch2_move_ctxt_init(&ctxt, &ca->moving_gc_pd.rate,
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
|
||||
while (1) {
|
||||
if (kthread_should_stop())
|
||||
@ -108,11 +134,14 @@ next:
|
||||
buckets_to_move);
|
||||
|
||||
/* don't check this if we bailed out early: */
|
||||
for_each_bucket(g, ca)
|
||||
if (g->mark.copygc && bucket_sectors_used(g)) {
|
||||
sectors_not_moved += bucket_sectors_used(g);
|
||||
for (i = h->data; i < h->data + h->used; i++) {
|
||||
struct bucket_mark m = READ_ONCE(ca->buckets[i->bucket].mark);
|
||||
|
||||
if (i->mark.gen == m.gen && bucket_sectors_used(m)) {
|
||||
sectors_not_moved += bucket_sectors_used(m);
|
||||
buckets_not_moved++;
|
||||
}
|
||||
}
|
||||
|
||||
if (sectors_not_moved)
|
||||
bch_warn(c, "copygc finished but %llu/%llu sectors, %zu/%zu buckets not moved",
|
||||
@ -138,15 +167,20 @@ static bool have_copygc_reserve(struct bch_dev *ca)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int sectors_used_cmp(bucket_heap *heap,
|
||||
struct bucket_heap_entry l,
|
||||
struct bucket_heap_entry r)
|
||||
{
|
||||
return bucket_sectors_used(l.mark) - bucket_sectors_used(r.mark);
|
||||
}
|
||||
|
||||
static void bch2_moving_gc(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct bucket *g;
|
||||
struct bucket_mark new;
|
||||
u64 sectors_to_move;
|
||||
u64 sectors_to_move = 0;
|
||||
size_t buckets_to_move, buckets_unused = 0;
|
||||
struct bucket_heap_entry e;
|
||||
unsigned sectors_used, i;
|
||||
struct bucket_heap_entry e, *i;
|
||||
int reserve_sectors;
|
||||
|
||||
if (!have_copygc_reserve(ca)) {
|
||||
@ -174,52 +208,47 @@ static void bch2_moving_gc(struct bch_dev *ca)
|
||||
*/
|
||||
|
||||
/*
|
||||
* We need bucket marks to be up to date, so gc can't be recalculating
|
||||
* them, and we don't want the allocator invalidating a bucket after
|
||||
* we've decided to evacuate it but before we set copygc:
|
||||
* We need bucket marks to be up to date - gc can't be recalculating
|
||||
* them:
|
||||
*/
|
||||
down_read(&c->gc_lock);
|
||||
mutex_lock(&ca->heap_lock);
|
||||
mutex_lock(&ca->fs->bucket_lock);
|
||||
|
||||
ca->heap.used = 0;
|
||||
ca->copygc_heap.used = 0;
|
||||
for_each_bucket(g, ca) {
|
||||
bucket_cmpxchg(g, new, new.copygc = 0);
|
||||
struct bucket_mark m = READ_ONCE(g->mark);
|
||||
struct bucket_heap_entry e = { g - ca->buckets, m };
|
||||
|
||||
if (bucket_unused(g)) {
|
||||
if (bucket_unused(m)) {
|
||||
buckets_unused++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g->mark.owned_by_allocator ||
|
||||
g->mark.data_type != BUCKET_DATA)
|
||||
if (m.owned_by_allocator ||
|
||||
m.data_type != BUCKET_DATA)
|
||||
continue;
|
||||
|
||||
sectors_used = bucket_sectors_used(g);
|
||||
|
||||
if (sectors_used >= ca->mi.bucket_size)
|
||||
if (bucket_sectors_used(m) >= ca->mi.bucket_size)
|
||||
continue;
|
||||
|
||||
bucket_heap_push(ca, g, sectors_used);
|
||||
heap_add_or_replace(&ca->copygc_heap, e, -sectors_used_cmp);
|
||||
}
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
sectors_to_move = 0;
|
||||
for (i = 0; i < ca->heap.used; i++)
|
||||
sectors_to_move += ca->heap.data[i].val;
|
||||
for (i = ca->copygc_heap.data;
|
||||
i < ca->copygc_heap.data + ca->copygc_heap.used;
|
||||
i++)
|
||||
sectors_to_move += bucket_sectors_used(i->mark);
|
||||
|
||||
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
|
||||
BUG_ON(!heap_pop(&ca->heap, e, bucket_min_cmp));
|
||||
sectors_to_move -= e.val;
|
||||
BUG_ON(!heap_pop(&ca->copygc_heap, e, -sectors_used_cmp));
|
||||
sectors_to_move -= bucket_sectors_used(e.mark);
|
||||
}
|
||||
|
||||
for (i = 0; i < ca->heap.used; i++)
|
||||
bucket_cmpxchg(ca->heap.data[i].g, new, new.copygc = 1);
|
||||
buckets_to_move = ca->copygc_heap.used;
|
||||
|
||||
buckets_to_move = ca->heap.used;
|
||||
|
||||
mutex_unlock(&ca->fs->bucket_lock);
|
||||
mutex_unlock(&ca->heap_lock);
|
||||
up_read(&c->gc_lock);
|
||||
eytzinger0_sort(ca->copygc_heap.data,
|
||||
ca->copygc_heap.used,
|
||||
sizeof(ca->copygc_heap.data[0]),
|
||||
bucket_idx_cmp, NULL);
|
||||
|
||||
read_moving(ca, buckets_to_move, sectors_to_move);
|
||||
}
|
||||
|
@ -169,7 +169,7 @@ static bool six_spin_on_owner(struct six_lock *lock, struct task_struct *owner)
|
||||
break;
|
||||
}
|
||||
|
||||
cpu_relax_lowlatency();
|
||||
cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -222,7 +222,7 @@ static bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
|
||||
* memory barriers as we'll eventually observe the right
|
||||
* values at the cost of a few extra spins.
|
||||
*/
|
||||
cpu_relax_lowlatency();
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
osq_unlock(&lock->osq);
|
||||
|
@ -190,7 +190,7 @@ bch2_hash_lookup(const struct bch_hash_desc desc,
|
||||
struct btree_iter *iter, const void *key)
|
||||
{
|
||||
bch2_btree_iter_init(iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)));
|
||||
POS(inode, desc.hash_key(info, key)), 0);
|
||||
|
||||
return bch2_hash_lookup_at(desc, info, iter, key);
|
||||
}
|
||||
@ -201,8 +201,9 @@ bch2_hash_lookup_intent(const struct bch_hash_desc desc,
|
||||
struct bch_fs *c, u64 inode,
|
||||
struct btree_iter *iter, const void *key)
|
||||
{
|
||||
bch2_btree_iter_init_intent(iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)));
|
||||
bch2_btree_iter_init(iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
return bch2_hash_lookup_at(desc, info, iter, key);
|
||||
}
|
||||
@ -232,8 +233,9 @@ static inline struct bkey_s_c bch2_hash_hole(const struct bch_hash_desc desc,
|
||||
struct btree_iter *iter,
|
||||
const void *key)
|
||||
{
|
||||
bch2_btree_iter_init_intent(iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)));
|
||||
bch2_btree_iter_init(iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
return bch2_hash_hole_at(desc, iter);
|
||||
}
|
||||
@ -278,9 +280,11 @@ static inline int bch2_hash_set(const struct bch_hash_desc desc,
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_btree_iter_init_intent(&hashed_slot, c, desc.btree_id,
|
||||
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))));
|
||||
bch2_btree_iter_init_intent(&iter, c, desc.btree_id, hashed_slot.pos);
|
||||
bch2_btree_iter_init(&hashed_slot, c, desc.btree_id,
|
||||
POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_init(&iter, c, desc.btree_id, hashed_slot.pos,
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_link(&hashed_slot, &iter);
|
||||
retry:
|
||||
/*
|
||||
@ -353,7 +357,7 @@ static inline int bch2_hash_delete_at(const struct bch_hash_desc desc,
|
||||
int ret = -ENOENT;
|
||||
|
||||
bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id,
|
||||
iter->pos);
|
||||
iter->pos, 0);
|
||||
bch2_btree_iter_link(iter, &whiteout_iter);
|
||||
|
||||
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter);
|
||||
@ -382,10 +386,11 @@ static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
||||
struct bkey_s_c k;
|
||||
int ret = -ENOENT;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)));
|
||||
bch2_btree_iter_init(&iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)),
|
||||
BTREE_ITER_INTENT);
|
||||
bch2_btree_iter_init(&whiteout_iter, c, desc.btree_id,
|
||||
POS(inode, desc.hash_key(info, key)));
|
||||
POS(inode, desc.hash_key(info, key)), 0);
|
||||
bch2_btree_iter_link(&iter, &whiteout_iter);
|
||||
retry:
|
||||
k = bch2_hash_lookup_at(desc, info, &iter, key);
|
||||
|
@ -377,6 +377,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
if (c->bdi.bdi_list.next)
|
||||
bdi_destroy(&c->bdi);
|
||||
lg_lock_free(&c->usage_lock);
|
||||
free_percpu(c->usage_percpu);
|
||||
@ -572,7 +573,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
sizeof(struct btree_interior_update)) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->btree_read_bio, 1, 0) ||
|
||||
bioset_init(&c->btree_read_bio, 1,
|
||||
offsetof(struct btree_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio)) ||
|
||||
bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio)) ||
|
||||
@ -984,7 +986,8 @@ static void bch2_dev_free(struct bch_dev *ca)
|
||||
kfree(ca->bio_prio);
|
||||
kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket));
|
||||
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
|
||||
free_heap(&ca->heap);
|
||||
free_heap(&ca->copygc_heap);
|
||||
free_heap(&ca->alloc_heap);
|
||||
free_fifo(&ca->free_inc);
|
||||
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
@ -1105,7 +1108,6 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
|
||||
spin_lock_init(&ca->freelist_lock);
|
||||
spin_lock_init(&ca->prio_buckets_lock);
|
||||
mutex_init(&ca->heap_lock);
|
||||
mutex_init(&ca->prio_write_lock);
|
||||
bch2_dev_moving_gc_init(ca);
|
||||
|
||||
@ -1142,7 +1144,8 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
movinggc_reserve, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) ||
|
||||
!init_heap(&ca->heap, heap_size, GFP_KERNEL) ||
|
||||
!init_heap(&ca->alloc_heap, heap_size, GFP_KERNEL) ||
|
||||
!init_heap(&ca->copygc_heap,heap_size, GFP_KERNEL) ||
|
||||
!(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets *
|
||||
sizeof(u8),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
|
@ -263,7 +263,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
|
||||
if (!bch2_fs_running(c))
|
||||
return -EPERM;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
|
||||
if (k.k->type == BCH_EXTENT) {
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -604,7 +604,7 @@ static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g,
|
||||
static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g,
|
||||
void *private)
|
||||
{
|
||||
return bucket_sectors_used(g);
|
||||
return bucket_sectors_used(g->mark);
|
||||
}
|
||||
|
||||
static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g,
|
||||
|
@ -118,7 +118,8 @@ static s64 read_tiering(struct bch_fs *c, struct bch_tier *tier)
|
||||
|
||||
bch2_move_ctxt_init(&ctxt, &tier->pd.rate,
|
||||
nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
|
||||
while (!kthread_should_stop() &&
|
||||
!bch2_move_ctxt_wait(&ctxt) &&
|
||||
|
@ -431,3 +431,104 @@ size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len)
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
#include "eytzinger.h"
|
||||
|
||||
static int alignment_ok(const void *base, size_t align)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
((unsigned long)base & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
static void u32_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u32 t = *(u32 *)a;
|
||||
*(u32 *)a = *(u32 *)b;
|
||||
*(u32 *)b = t;
|
||||
}
|
||||
|
||||
static void u64_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u64 t = *(u64 *)a;
|
||||
*(u64 *)a = *(u64 *)b;
|
||||
*(u64 *)b = t;
|
||||
}
|
||||
|
||||
static void generic_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
char t;
|
||||
|
||||
do {
|
||||
t = *(char *)a;
|
||||
*(char *)a++ = *(char *)b;
|
||||
*(char *)b++ = t;
|
||||
} while (--size > 0);
|
||||
}
|
||||
|
||||
static inline int do_cmp(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
static inline void do_swap(void *base, size_t n, size_t size,
|
||||
void (*swap_func)(void *, void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
swap_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t))
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
do_swap(base, n, size, swap_func, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -98,11 +98,13 @@ static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
#define DECLARE_HEAP(type, name) \
|
||||
#define HEAP(type) \
|
||||
struct { \
|
||||
size_t size, used; \
|
||||
type *data; \
|
||||
} name
|
||||
}
|
||||
|
||||
#define DECLARE_HEAP(type, name) HEAP(type) name
|
||||
|
||||
#define init_heap(heap, _size, gfp) \
|
||||
({ \
|
||||
@ -120,46 +122,62 @@ do { \
|
||||
|
||||
#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
|
||||
|
||||
#define heap_sift(h, i, cmp) \
|
||||
do { \
|
||||
size_t _r, _j = i; \
|
||||
\
|
||||
for (; _j * 2 + 1 < (h)->used; _j = _r) { \
|
||||
_r = _j * 2 + 1; \
|
||||
if (_r + 1 < (h)->used && \
|
||||
cmp((h)->data[_r], (h)->data[_r + 1])) \
|
||||
_r++; \
|
||||
\
|
||||
if (cmp((h)->data[_r], (h)->data[_j])) \
|
||||
break; \
|
||||
heap_swap(h, _r, _j); \
|
||||
} \
|
||||
} while (0)
|
||||
#define heap_peek(h) \
|
||||
({ \
|
||||
EBUG_ON(!(h)->used); \
|
||||
(h)->data[0]; \
|
||||
})
|
||||
|
||||
#define heap_full(h) ((h)->used == (h)->size)
|
||||
|
||||
#define heap_sift_down(h, i, cmp) \
|
||||
do { \
|
||||
size_t _c, _j = i; \
|
||||
\
|
||||
for (; _j * 2 + 1 < (h)->used; _j = _c) { \
|
||||
_c = _j * 2 + 1; \
|
||||
if (_c + 1 < (h)->used && \
|
||||
cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
|
||||
_c++; \
|
||||
\
|
||||
if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
|
||||
break; \
|
||||
heap_swap(h, _c, _j); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define heap_sift_up(h, i, cmp) \
|
||||
do { \
|
||||
while (i) { \
|
||||
size_t p = (i - 1) / 2; \
|
||||
if (cmp((h)->data[i], (h)->data[p])) \
|
||||
if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
|
||||
break; \
|
||||
heap_swap(h, i, p); \
|
||||
i = p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define heap_add(h, d, cmp) \
|
||||
#define heap_add(h, new, cmp) \
|
||||
({ \
|
||||
bool _r = !heap_full(h); \
|
||||
if (_r) { \
|
||||
size_t _i = (h)->used++; \
|
||||
(h)->data[_i] = d; \
|
||||
(h)->data[_i] = new; \
|
||||
\
|
||||
heap_sift_down(h, _i, cmp); \
|
||||
heap_sift(h, _i, cmp); \
|
||||
heap_sift_up(h, _i, cmp); \
|
||||
} \
|
||||
_r; \
|
||||
})
|
||||
|
||||
#define heap_add_or_replace(h, new, cmp) \
|
||||
do { \
|
||||
if (!heap_add(h, new, cmp) && \
|
||||
cmp(h, new, heap_peek(h)) >= 0) { \
|
||||
(h)->data[0] = new; \
|
||||
heap_sift_down(h, 0, cmp); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define heap_del(h, i, cmp) \
|
||||
do { \
|
||||
size_t _i = (i); \
|
||||
@ -167,8 +185,8 @@ do { \
|
||||
BUG_ON(_i >= (h)->used); \
|
||||
(h)->used--; \
|
||||
heap_swap(h, _i, (h)->used); \
|
||||
heap_sift_up(h, _i, cmp); \
|
||||
heap_sift_down(h, _i, cmp); \
|
||||
heap_sift(h, _i, cmp); \
|
||||
} while (0)
|
||||
|
||||
#define heap_pop(h, d, cmp) \
|
||||
@ -181,19 +199,11 @@ do { \
|
||||
_r; \
|
||||
})
|
||||
|
||||
#define heap_peek(h) \
|
||||
({ \
|
||||
EBUG_ON(!(h)->used); \
|
||||
(h)->data[0]; \
|
||||
})
|
||||
|
||||
#define heap_full(h) ((h)->used == (h)->size)
|
||||
|
||||
#define heap_resort(heap, cmp) \
|
||||
do { \
|
||||
ssize_t _i; \
|
||||
for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
|
||||
heap_sift(heap, _i, cmp); \
|
||||
heap_sift_down(heap, _i, cmp); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -282,7 +282,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
ssize_t ret = 0;
|
||||
size_t len;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), k) {
|
||||
for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) {
|
||||
BUG_ON(k.k->p.inode < inum);
|
||||
|
||||
if (k.k->p.inode > inum)
|
||||
|
@ -278,10 +278,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
||||
return NULL;
|
||||
|
||||
bio = p + front_pad;
|
||||
bio_init(bio);
|
||||
bio_init(bio, bio->bi_inline_vecs, nr_iovecs);
|
||||
bio->bi_pool = bs;
|
||||
bio->bi_max_vecs = nr_iovecs;
|
||||
bio->bi_io_vec = bio->bi_inline_vecs;
|
||||
|
||||
return bio;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user