Update bcachefs sources to 14e9ac5016 bcachefs: btree_iter fastpath

This commit is contained in:
Kent Overstreet 2017-05-12 18:45:15 -08:00
parent a588eb0d9e
commit 565b4a74d6
39 changed files with 934 additions and 523 deletions

View File

@ -1 +1 @@
43e3159567958ea70c8a95d98fdb6e881153a656
14e9ac5016803fc63c1216608c866bef16b4053e

View File

@ -250,7 +250,6 @@ static void write_data(struct bch_fs *c,
{
struct disk_reservation res;
struct bch_write_op op;
struct bch_write_bio bio;
struct bio_vec bv;
struct closure cl;
@ -259,15 +258,15 @@ static void write_data(struct bch_fs *c,
closure_init_stack(&cl);
bio_init(&bio.bio, &bv, 1);
bio.bio.bi_iter.bi_size = len;
bch2_bio_map(&bio.bio, buf);
bio_init(&op.wbio.bio, &bv, 1);
op.wbio.bio.bi_iter.bi_size = len;
bch2_bio_map(&op.wbio.bio, buf);
int ret = bch2_disk_reservation_get(c, &res, len >> 9, 0);
if (ret)
die("error reserving space in new filesystem: %s", strerror(-ret));
bch2_write_op_init(&op, c, &bio, res, c->write_points,
bch2_write_op_init(&op, c, res, c->write_points,
POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
closure_call(&op.cl, bch2_write, NULL, &cl);
closure_sync(&cl);

View File

@ -166,4 +166,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
#define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
#ifdef __x86_64
#define CONFIG_X86_64 y
#endif
#endif /* _TOOLS_LINUX_COMPILER_H */

View File

@ -10,8 +10,14 @@
struct kmem_cache;
typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
typedef void (mempool_free_t)(void *element, void *pool_data);
typedef struct mempool_s {
size_t elem_size;
size_t elem_size;
void *pool_data;
mempool_alloc_t *alloc;
mempool_free_t *free;
} mempool_t;
static inline bool mempool_initialized(mempool_t *pool)
@ -60,24 +66,22 @@ static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t
return 0;
}
static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
{
mempool_t *pool = malloc(sizeof(*pool));
pool->elem_size = size;
return pool;
}
static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
{
pool->elem_size = PAGE_SIZE << order;
return 0;
}
static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
static inline int mempool_init(mempool_t *pool, int min_nr,
mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn,
void *pool_data)
{
mempool_t *pool = malloc(sizeof(*pool));
pool->elem_size = PAGE_SIZE << order;
return pool;
pool->elem_size = (size_t) pool_data;
pool->pool_data = pool_data;
pool->alloc = alloc_fn;
pool->free = free_fn;
return 0;
}
#endif /* _LINUX_MEMPOOL_H */

View File

@ -43,9 +43,6 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags)
#define kcalloc(n, size, flags) calloc(n, size)
#define kmalloc_array(n, size, flags) calloc(n, size)
#define vmalloc(size) malloc(size)
#define vzalloc(size) calloc(1, size)
#define kfree(p) free(p)
#define kvfree(p) free(p)
#define kzfree(p) free(p)
@ -89,8 +86,6 @@ do { \
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define PAGE_KERNEL 0
static inline void vunmap(const void *addr) {}
static inline void *vmap(struct page **pages, unsigned int count,

View File

@ -1,8 +1,41 @@
#ifndef __TOOLS_LINUX_VMALLOC_H
#define __TOOLS_LINUX_VMALLOC_H
#define vmalloc(size) malloc(size)
#define __vmalloc(size, flags, prot) malloc(size)
#include <stdlib.h>
#include <sys/mman.h>
#include "tools-util.h"
#define PAGE_KERNEL 0
#define PAGE_KERNEL_EXEC 1
#define vfree(p) free(p)
static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask, unsigned prot)
{
void *p = aligned_alloc(PAGE_SIZE, size);
if (p && prot == PAGE_KERNEL_EXEC) {
if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
vfree(p);
p = NULL;
}
}
if (p && (gfp_mask & __GFP_ZERO))
memset(p, 0, size);
return p;
}
static inline void *vmalloc(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
}
static inline void *vzalloc(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO, PAGE_KERNEL);
}
#endif /* __TOOLS_LINUX_VMALLOC_H */

View File

@ -361,7 +361,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
struct bucket *g, struct btree_iter *iter,
u64 *journal_seq)
{
struct bucket_mark m = READ_ONCE(g->mark);
struct bucket_mark m;
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
struct bkey_i_alloc *a;
u8 *d;
@ -374,6 +374,8 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
if (ret)
break;
/* read mark under btree node lock: */
m = READ_ONCE(g->mark);
a = bkey_alloc_init(&alloc_key.k);
a->k.p = iter->pos;
a->v.fields = 0;
@ -407,8 +409,6 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
struct btree_iter iter;
int ret;
lockdep_assert_held(&c->state_lock);
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
return 0;

View File

@ -725,6 +725,10 @@ struct bch_fs {
struct work_struct read_retry_work;
spinlock_t read_retry_lock;
struct bio_list btree_write_error_list;
struct work_struct btree_write_error_work;
spinlock_t btree_write_error_lock;
/* ERRORS */
struct list_head fsck_errors;
struct mutex fsck_error_lock;

View File

@ -1082,7 +1082,8 @@ struct jset_entry {
__le16 u64s;
__u8 btree_id;
__u8 level;
__le32 flags; /* designates what this jset holds */
__u8 type; /* designates what this jset holds */
__u8 pad[3];
union {
struct bkey_i start[0];
@ -1092,7 +1093,6 @@ struct jset_entry {
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
enum {
JOURNAL_ENTRY_BTREE_KEYS = 0,
JOURNAL_ENTRY_BTREE_ROOT = 1,

View File

@ -791,11 +791,9 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
unsigned dst_offset, unsigned dst_size,
bool *eax_zeroed)
{
unsigned byte = format->key_u64s * sizeof(u64);
unsigned bits = format->bits_per_field[field];
u64 offset = format->field_offset[field];
unsigned i, bit_offset = 0;
unsigned shl, shr;
unsigned i, byte, bit_offset, align, shl, shr;
if (!bits && !offset) {
if (!*eax_zeroed) {
@ -842,11 +840,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
return out;
}
bit_offset = format->key_u64s * 64;
for (i = 0; i <= field; i++)
bit_offset += format->bits_per_field[i];
bit_offset -= format->bits_per_field[i];
byte -= DIV_ROUND_UP(bit_offset, 8);
bit_offset = round_up(bit_offset, 8) - bit_offset;
byte = bit_offset / 8;
bit_offset -= byte * 8;
*eax_zeroed = false;
@ -857,6 +856,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
/* movzx eax, WORD PTR [rsi + imm8] */
I4(0x0f, 0xb7, 0x46, byte);
} else if (bit_offset + bits <= 32) {
align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
byte -= align;
bit_offset += align * 8;
BUG_ON(bit_offset + bits > 32);
/* mov eax, [rsi + imm8] */
I3(0x8b, 0x46, byte);
@ -874,6 +879,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
out += 4;
}
} else if (bit_offset + bits <= 64) {
align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
byte -= align;
bit_offset += align * 8;
BUG_ON(bit_offset + bits > 64);
/* mov rax, [rsi + imm8] */
I4(0x48, 0x8b, 0x46, byte);
@ -890,6 +901,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
I4(0x48, 0xc1, 0xe8, shr);
}
} else {
align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
byte -= align;
bit_offset += align * 8;
BUG_ON(bit_offset + bits > 96);
/* mov rax, [rsi + byte] */
I4(0x48, 0x8b, 0x46, byte);

View File

@ -41,7 +41,7 @@ static void __mca_data_free(struct bch_fs *c, struct btree *b)
{
EBUG_ON(btree_node_write_in_flight(b));
free_pages((unsigned long) b->data, btree_page_order(c));
kvpfree(b->data, btree_bytes(c));
b->data = NULL;
bch2_btree_keys_free(b);
}
@ -53,8 +53,6 @@ static void mca_data_free(struct bch_fs *c, struct btree *b)
list_move(&b->list, &c->btree_cache_freed);
}
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
static const struct rhashtable_params bch_btree_cache_params = {
.head_offset = offsetof(struct btree, hash),
.key_offset = offsetof(struct btree, key.v),
@ -63,20 +61,18 @@ static const struct rhashtable_params bch_btree_cache_params = {
static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
{
unsigned order = ilog2(btree_pages(c));
b->data = (void *) __get_free_pages(gfp, order);
b->data = kvpmalloc(btree_bytes(c), gfp);
if (!b->data)
goto err;
if (bch2_btree_keys_alloc(b, order, gfp))
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp))
goto err;
c->btree_cache_used++;
list_move(&b->list, &c->btree_cache_freeable);
return;
err:
free_pages((unsigned long) b->data, order);
kvpfree(b->data, btree_bytes(c));
b->data = NULL;
list_move(&b->list, &c->btree_cache_freed);
}
@ -91,7 +87,6 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
six_lock_init(&b->lock);
INIT_LIST_HEAD(&b->list);
INIT_LIST_HEAD(&b->write_blocked);
INIT_LIST_HEAD(&b->reachable);
mca_data_alloc(c, b, gfp);
return b->data ? b : NULL;
@ -101,10 +96,6 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
{
BUG_ON(btree_node_dirty(b));
b->nsets = 0;
rhashtable_remove_fast(&c->btree_cache_table, &b->hash,
bch_btree_cache_params);
@ -112,23 +103,27 @@ void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
bkey_i_to_extent(&b->key)->v._data[0] = 0;
}
int __bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b)
{
return rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
bch_btree_cache_params);
}
int bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b,
unsigned level, enum btree_id id)
{
int ret;
b->level = level;
b->btree_id = id;
ret = rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
bch_btree_cache_params);
if (ret)
return ret;
mutex_lock(&c->btree_cache_lock);
list_add(&b->list, &c->btree_cache);
ret = __bch2_btree_node_hash_insert(c, b);
if (!ret)
list_add(&b->list, &c->btree_cache);
mutex_unlock(&c->btree_cache_lock);
return 0;
return ret;
}
__flatten
@ -155,8 +150,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (!six_trylock_write(&b->lock))
goto out_unlock_intent;
if (btree_node_write_error(b) ||
btree_node_noevict(b))
if (btree_node_noevict(b))
goto out_unlock;
if (!btree_node_may_write(b))
@ -328,7 +322,7 @@ void bch2_fs_btree_exit(struct bch_fs *c)
if (c->verify_data)
list_move(&c->verify_data->list, &c->btree_cache);
free_pages((unsigned long) c->verify_ondisk, ilog2(btree_pages(c)));
kvpfree(c->verify_ondisk, btree_bytes(c));
#endif
for (i = 0; i < BTREE_ID_NR; i++)
@ -384,8 +378,7 @@ int bch2_fs_btree_init(struct bch_fs *c)
#ifdef CONFIG_BCACHEFS_DEBUG
mutex_init(&c->verify_lock);
c->verify_ondisk = (void *)
__get_free_pages(GFP_KERNEL, ilog2(btree_pages(c)));
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
if (!c->verify_ondisk)
return -ENOMEM;
@ -510,7 +503,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
BUG_ON(!six_trylock_intent(&b->lock));
BUG_ON(!six_trylock_write(&b->lock));
out_unlock:
BUG_ON(bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key));
BUG_ON(btree_node_hashed(b));
BUG_ON(btree_node_write_in_flight(b));
list_del_init(&b->list);
@ -554,6 +547,12 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_iter *iter,
struct bch_fs *c = iter->c;
struct btree *b;
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
*/
BUG_ON(!btree_node_locked(iter, level + 1));
b = bch2_btree_node_mem_alloc(c);
if (IS_ERR(b))
return b;

View File

@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "btree_types.h"
#include "extents.h"
struct btree_iter;
@ -11,6 +12,7 @@ extern const char * const bch2_btree_ids[];
void bch2_recalc_btree_reserve(struct bch_fs *);
void bch2_btree_node_hash_remove(struct bch_fs *, struct btree *);
int __bch2_btree_node_hash_insert(struct bch_fs *, struct btree *);
int bch2_btree_node_hash_insert(struct bch_fs *, struct btree *,
unsigned, enum btree_id);
@ -28,6 +30,14 @@ void bch2_btree_node_prefetch(struct btree_iter *, const struct bkey_i *,
void bch2_fs_btree_exit(struct bch_fs *);
int bch2_fs_btree_init(struct bch_fs *);
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
/* is btree node in hash table? */
static inline bool btree_node_hashed(struct btree *b)
{
return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key);
}
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
&(_c)->btree_cache_table), \

View File

@ -621,12 +621,10 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]);
/* Repack everything with @new_format and sort down to one bset */
for (i = 0; i < nr_old_nodes; i++) {
for (i = 0; i < nr_old_nodes; i++)
new_nodes[i] =
__bch2_btree_node_alloc_replacement(c, old_nodes[i],
new_format, res);
list_add(&new_nodes[i]->reachable, &as->reachable_list);
}
new_format, as, res);
/*
* Conceptually we concatenate the nodes together and slice them
@ -663,7 +661,6 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
set_btree_bset_end(n1, n1->set);
list_del_init(&n2->reachable);
six_unlock_write(&n2->lock);
bch2_btree_node_free_never_inserted(c, n2);
six_unlock_intent(&n2->lock);
@ -796,7 +793,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
memset(merge, 0, sizeof(merge));
__for_each_btree_node(&iter, c, btree_id, POS_MIN,
U8_MAX, 0, BTREE_ITER_PREFETCH, b) {
BTREE_MAX_DEPTH, 0,
BTREE_ITER_PREFETCH, b) {
memmove(merge + 1, merge,
sizeof(merge) - sizeof(merge[0]));
memmove(lock_seq + 1, lock_seq,

View File

@ -56,9 +56,9 @@ static void btree_bounce_free(struct bch_fs *c, unsigned order,
bool used_mempool, void *p)
{
if (used_mempool)
mempool_free(virt_to_page(p), &c->btree_bounce_pool);
mempool_free(p, &c->btree_bounce_pool);
else
free_pages((unsigned long) p, order);
vpfree(p, PAGE_SIZE << order);
}
static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
@ -66,7 +66,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
{
void *p;
BUG_ON(1 << order > btree_pages(c));
BUG_ON(order > btree_page_order(c));
*used_mempool = false;
p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
@ -74,7 +74,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
return p;
*used_mempool = true;
return page_address(mempool_alloc(&c->btree_bounce_pool, GFP_NOIO));
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
}
typedef int (*sort_cmp_fn)(struct btree *,
@ -1183,7 +1183,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
if (bne->keys.seq == b->data->keys.seq)
goto err;
sorted = btree_bounce_alloc(c, ilog2(btree_pages(c)), &used_mempool);
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
sorted->keys.u64s = 0;
b->nr = btree_node_is_extents(b)
@ -1199,7 +1199,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
BUG_ON(b->nr.live_u64s != u64s);
btree_bounce_free(c, ilog2(btree_pages(c)), used_mempool, sorted);
btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
bch2_bset_build_aux_tree(b, b->set, false);
@ -1344,50 +1344,100 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
struct btree_write *w = btree_prev_write(b);
/*
* Before calling bch2_btree_complete_write() - if the write errored, we
* have to halt new journal writes before they see this btree node
* write as completed:
*/
if (btree_node_write_error(b))
bch2_journal_halt(&c->journal);
bch2_btree_complete_write(c, b, w);
btree_node_io_unlock(b);
}
static void bch2_btree_node_write_error(struct bch_fs *c,
struct bch_write_bio *wbio)
{
struct btree *b = wbio->bio.bi_private;
struct closure *cl = wbio->cl;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct bkey_i_extent *new_key;
bkey_copy(&tmp.k, &b->key);
new_key = bkey_i_to_extent(&tmp.k);
while (wbio->replicas_failed) {
unsigned idx = __fls(wbio->replicas_failed);
bch2_extent_drop_ptr_idx(extent_i_to_s(new_key), idx);
wbio->replicas_failed ^= 1 << idx;
}
if (!bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)) ||
bch2_btree_node_update_key(c, b, new_key)) {
set_btree_node_noevict(b);
bch2_fatal_error(c);
}
bio_put(&wbio->bio);
btree_node_write_done(c, b);
if (cl)
closure_put(cl);
}
void bch2_btree_write_error_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs,
btree_write_error_work);
struct bio *bio;
while (1) {
spin_lock_irq(&c->read_retry_lock);
bio = bio_list_pop(&c->read_retry_list);
spin_unlock_irq(&c->read_retry_lock);
if (!bio)
break;
bch2_btree_node_write_error(c, to_wbio(bio));
}
}
static void btree_node_write_endio(struct bio *bio)
{
struct btree *b = bio->bi_private;
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_fs *c = wbio->c;
struct bio *orig = wbio->split ? wbio->orig : NULL;
struct closure *cl = !wbio->split ? wbio->cl : NULL;
struct bch_dev *ca = wbio->ca;
struct btree *b = bio->bi_private;
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
struct bch_write_bio *orig = parent ?: wbio;
struct closure *cl = !wbio->split ? wbio->cl : NULL;
struct bch_fs *c = wbio->c;
struct bch_dev *ca = wbio->ca;
if (bch2_dev_fatal_io_err_on(bio->bi_error, ca, "btree write") ||
if (bch2_dev_nonfatal_io_err_on(bio->bi_error, ca, "btree write") ||
bch2_meta_write_fault("btree"))
set_btree_node_write_error(b);
set_bit(wbio->ptr_idx, (unsigned long *) &orig->replicas_failed);
if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref);
if (wbio->bounce)
btree_bounce_free(c,
wbio->order,
wbio->used_mempool,
page_address(bio->bi_io_vec[0].bv_page));
if (wbio->put_bio)
if (parent) {
bio_put(bio);
if (orig) {
bio_endio(orig);
} else {
btree_node_write_done(c, b);
if (cl)
closure_put(cl);
bio_endio(&parent->bio);
return;
}
btree_bounce_free(c,
wbio->order,
wbio->used_mempool,
wbio->data);
if (wbio->replicas_failed) {
unsigned long flags;
spin_lock_irqsave(&c->btree_write_error_lock, flags);
bio_list_add(&c->read_retry_list, &wbio->bio);
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
queue_work(c->wq, &c->btree_write_error_work);
return;
}
bio_put(bio);
btree_node_write_done(c, b);
if (cl)
closure_put(cl);
}
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
@ -1411,7 +1461,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
struct closure *parent,
enum six_lock_type lock_type_held)
{
struct bio *bio;
struct bch_write_bio *wbio;
struct bset_tree *t;
struct bset *i;
@ -1458,7 +1507,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
} while (cmpxchg_acquire(&b->flags, old, new) != old);
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON(!list_empty_careful(&b->reachable) != !b->written);
BUG_ON((b->will_make_reachable != NULL) != !b->written);
BUG_ON(b->written >= c->sb.btree_node_size);
BUG_ON(bset_written(b, btree_bset_last(b)));
@ -1601,23 +1650,20 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
trace_btree_write(b, bytes_to_write, sectors_to_write);
bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write);
wbio = to_wbio(bio);
wbio = wbio_init(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write));
wbio->cl = parent;
wbio->bounce = true;
wbio->put_bio = true;
wbio->order = order;
wbio->used_mempool = used_mempool;
bio->bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
bio->bi_iter.bi_size = sectors_to_write << 9;
bio->bi_end_io = btree_node_write_endio;
bio->bi_private = b;
wbio->data = data;
wbio->bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
wbio->bio.bi_iter.bi_size = sectors_to_write << 9;
wbio->bio.bi_end_io = btree_node_write_endio;
wbio->bio.bi_private = b;
if (parent)
closure_get(parent);
bch2_bio_map(bio, data);
bch2_bio_map(&wbio->bio, data);
/*
* If we're appending to a leaf node, we don't technically need FUA -

View File

@ -37,7 +37,7 @@ static inline void btree_node_wait_on_io(struct btree *b)
static inline bool btree_node_may_write(struct btree *b)
{
return list_empty_careful(&b->write_blocked) &&
list_empty_careful(&b->reachable);
!b->will_make_reachable;
}
enum compact_mode {
@ -79,6 +79,7 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id,
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *,
struct closure *, enum six_lock_type);

View File

@ -252,6 +252,8 @@ static int __bch2_btree_iter_unlock(struct btree_iter *iter)
while (iter->nodes_locked)
btree_node_unlock(iter, __ffs(iter->nodes_locked));
iter->flags &= ~BTREE_ITER_UPTODATE;
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
}
@ -1006,16 +1008,30 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
iter->flags |= BTREE_ITER_AT_END_OF_LEAF;
iter->pos = new_pos;
iter->flags &= ~BTREE_ITER_UPTODATE;
}
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); /* XXX handle this */
iter->pos = new_pos;
iter->flags &= ~BTREE_ITER_UPTODATE;
}
void bch2_btree_iter_advance_pos(struct btree_iter *iter)
{
if (iter->flags & BTREE_ITER_UPTODATE &&
!(iter->flags & BTREE_ITER_WITH_HOLES)) {
struct bkey_s_c k;
__btree_iter_advance(iter);
k = __btree_iter_peek(iter);
if (likely(k.k)) {
iter->pos = bkey_start_pos(k.k);
return;
}
}
/*
* We use iter->k instead of iter->pos for extents: iter->pos will be
* equal to the start of the extent we returned, but we need to advance
@ -1032,6 +1048,7 @@ void bch2_btree_iter_rewind(struct btree_iter *iter, struct bpos pos)
BUG_ON(bkey_cmp(pos, iter->nodes[iter->level]->data->min_key) < 0);
iter->pos = pos;
iter->flags &= ~BTREE_ITER_UPTODATE;
__btree_iter_init(iter, iter->nodes[iter->level]);
}
@ -1043,6 +1060,17 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
if (iter->flags & BTREE_ITER_UPTODATE) {
struct btree *b = iter->nodes[0];
struct bkey_packed *k =
__bch2_btree_node_iter_peek_all(&iter->node_iters[0], b);
return (struct bkey_s_c) {
.k = &iter->k,
.v = bkeyp_val(&b->format, k)
};
}
while (1) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret)) {
@ -1058,7 +1086,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
iter->pos = bkey_start_pos(k.k);
iter->flags |= BTREE_ITER_UPTODATE;
return k;
}
@ -1083,6 +1113,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_holes(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
iter->flags &= ~BTREE_ITER_UPTODATE;
while (1) {
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret)) {
@ -1131,12 +1163,15 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
unsigned locks_want, unsigned depth,
unsigned flags)
{
EBUG_ON(depth >= BTREE_MAX_DEPTH);
EBUG_ON(locks_want > BTREE_MAX_DEPTH);
iter->c = c;
iter->pos = pos;
iter->flags = flags;
iter->btree_id = btree_id;
iter->level = depth;
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
iter->locks_want = locks_want;
iter->nodes_locked = 0;
iter->nodes_intent_locked = 0;
memset(iter->nodes, 0, sizeof(iter->nodes));

View File

@ -4,19 +4,20 @@
#include "btree_types.h"
#define BTREE_ITER_INTENT (1 << 0)
#define BTREE_ITER_UPTODATE (1 << 0)
#define BTREE_ITER_WITH_HOLES (1 << 1)
#define BTREE_ITER_PREFETCH (1 << 2)
#define BTREE_ITER_INTENT (1 << 2)
#define BTREE_ITER_PREFETCH (1 << 3)
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
*/
#define BTREE_ITER_IS_EXTENTS (1 << 3)
#define BTREE_ITER_IS_EXTENTS (1 << 4)
/*
* indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
*/
#define BTREE_ITER_AT_END_OF_LEAF (1 << 4)
#define BTREE_ITER_ERROR (1 << 5)
#define BTREE_ITER_AT_END_OF_LEAF (1 << 5)
#define BTREE_ITER_ERROR (1 << 6)
/*
* @pos - iterator's current position
@ -223,17 +224,23 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \
__for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b)
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_WITH_HOLES
? bch2_btree_iter_peek_with_holes(iter)
: bch2_btree_iter_peek(iter);
}
#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \
(_start), (_flags)); \
!IS_ERR_OR_NULL(((_k) = (((_flags) & BTREE_ITER_WITH_HOLES)\
? bch2_btree_iter_peek_with_holes(_iter)\
: bch2_btree_iter_peek(_iter))).k); \
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \
(_start), (_flags)); \
!IS_ERR_OR_NULL(((_k) = __bch2_btree_iter_peek(_iter, _flags)).k);\
bch2_btree_iter_advance_pos(_iter))
static inline int btree_iter_err(struct bkey_s_c k)
{
return IS_ERR(k.k) ? PTR_ERR(k.k) : 0;
return PTR_ERR_OR_ZERO(k.k);
}
/*

View File

@ -116,7 +116,7 @@ struct btree {
* another write - because that write also won't yet be reachable and
* marking it as completed before it's reachable would be incorrect:
*/
struct list_head reachable;
struct btree_interior_update *will_make_reachable;
struct open_bucket *ob;
@ -143,7 +143,6 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
enum btree_flags {
BTREE_NODE_read_in_flight,
BTREE_NODE_read_error,
BTREE_NODE_write_error,
BTREE_NODE_dirty,
BTREE_NODE_need_write,
BTREE_NODE_noevict,
@ -155,7 +154,6 @@ enum btree_flags {
BTREE_FLAG(read_in_flight);
BTREE_FLAG(read_error);
BTREE_FLAG(write_error);
BTREE_FLAG(dirty);
BTREE_FLAG(need_write);
BTREE_FLAG(noevict);

View File

@ -21,6 +21,11 @@
static void btree_interior_update_updated_root(struct bch_fs *,
struct btree_interior_update *,
enum btree_id);
static void btree_interior_update_will_make_reachable(struct bch_fs *,
struct btree_interior_update *,
struct btree *);
static void btree_interior_update_drop_new_node(struct bch_fs *,
struct btree *);
/* Calculate ideal packed bkey format for new btree nodes: */
@ -166,7 +171,7 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
BUG_ON(b == btree_node_root(c, b));
BUG_ON(b->ob);
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON(!list_empty(&b->reachable));
BUG_ON(b->will_make_reachable);
clear_btree_node_noevict(b);
@ -191,6 +196,8 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
{
struct open_bucket *ob = b->ob;
btree_interior_update_drop_new_node(c, b);
b->ob = NULL;
clear_btree_node_dirty(b);
@ -299,6 +306,7 @@ mem_alloc:
static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
unsigned level, enum btree_id id,
struct btree_interior_update *as,
struct btree_reserve *reserve)
{
struct btree *b;
@ -322,7 +330,7 @@ static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
bch2_btree_build_aux_trees(b);
bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key), BCH_DATA_BTREE);
btree_interior_update_will_make_reachable(c, as, b);
trace_btree_node_alloc(c, b);
return b;
@ -331,11 +339,12 @@ static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *c,
struct btree *b,
struct bkey_format format,
struct btree_interior_update *as,
struct btree_reserve *reserve)
{
struct btree *n;
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
n = bch2_btree_node_alloc(c, b->level, b->btree_id, as, reserve);
n->data->min_key = b->data->min_key;
n->data->max_key = b->data->max_key;
@ -353,6 +362,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *c,
static struct btree *bch2_btree_node_alloc_replacement(struct bch_fs *c,
struct btree *b,
struct btree_interior_update *as,
struct btree_reserve *reserve)
{
struct bkey_format new_f = bch2_btree_calc_format(b);
@ -364,7 +374,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct bch_fs *c,
if (!bch2_btree_node_format_fits(c, b, &new_f))
new_f = b->format;
return __bch2_btree_node_alloc_replacement(c, b, new_f, reserve);
return __bch2_btree_node_alloc_replacement(c, b, new_f, as, reserve);
}
static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b,
@ -478,9 +488,10 @@ static void bch2_btree_set_root(struct btree_iter *iter, struct btree *b,
static struct btree *__btree_root_alloc(struct bch_fs *c, unsigned level,
enum btree_id id,
struct btree_interior_update *as,
struct btree_reserve *reserve)
{
struct btree *b = bch2_btree_node_alloc(c, level, id, reserve);
struct btree *b = bch2_btree_node_alloc(c, level, id, as, reserve);
b->data->min_key = POS_MIN;
b->data->max_key = POS_MAX;
@ -581,6 +592,11 @@ static struct btree_reserve *__bch2_btree_reserve_get(struct bch_fs *c,
goto err_free;
}
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key),
BCH_DATA_BTREE);
if (ret)
goto err_free;
reserve->b[reserve->nr++] = b;
}
@ -608,11 +624,12 @@ struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
struct closure *writes)
{
struct closure cl;
struct btree_interior_update as;
struct btree_reserve *reserve;
struct closure cl;
struct btree *b;
LIST_HEAD(reachable_list);
memset(&as, 0, sizeof(as));
closure_init_stack(&cl);
while (1) {
@ -627,15 +644,14 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
closure_sync(&cl);
}
b = __btree_root_alloc(c, 0, id, reserve);
list_add(&b->reachable, &reachable_list);
b = __btree_root_alloc(c, 0, id, &as, reserve);
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
bch2_btree_set_root_initial(c, b, reserve);
bch2_btree_open_bucket_put(c, b);
list_del_init(&b->reachable);
btree_interior_update_drop_new_node(c, b);
bch2_btree_open_bucket_put(c, b);
six_unlock_intent(&b->lock);
bch2_btree_reserve_put(c, reserve);
@ -819,9 +835,12 @@ void bch2_btree_journal_key(struct btree_insert *trans,
/* ick */
insert->k.needs_whiteout = false;
bch2_journal_add_keys(j, &trans->journal_res,
b->btree_id, insert);
b->btree_id, insert);
insert->k.needs_whiteout = needs_whiteout;
bch2_journal_set_has_inode(j, &trans->journal_res,
insert->k.p.inode);
if (trans->journal_seq)
*trans->journal_seq = seq;
btree_bset_last(b)->journal_seq = cpu_to_le64(seq);
@ -891,7 +910,6 @@ bch2_btree_interior_update_alloc(struct bch_fs *c)
as->c = c;
as->mode = BTREE_INTERIOR_NO_UPDATE;
INIT_LIST_HEAD(&as->write_blocked_list);
INIT_LIST_HEAD(&as->reachable_list);
bch2_keylist_init(&as->parent_keys, as->inline_keys,
ARRAY_SIZE(as->inline_keys));
@ -916,16 +934,16 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
struct btree_interior_update *as =
container_of(cl, struct btree_interior_update, cl);
struct bch_fs *c = as->c;
unsigned i;
bch2_journal_pin_drop(&c->journal, &as->journal);
mutex_lock(&c->btree_interior_update_lock);
while (!list_empty(&as->reachable_list)) {
struct btree *b = list_first_entry(&as->reachable_list,
struct btree, reachable);
list_del_init(&b->reachable);
while (as->nr_new_nodes) {
struct btree *b = as->new_nodes[--as->nr_new_nodes];
BUG_ON(b->will_make_reachable != as);
b->will_make_reachable = NULL;
mutex_unlock(&c->btree_interior_update_lock);
six_lock_read(&b->lock);
@ -934,9 +952,8 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
mutex_lock(&c->btree_interior_update_lock);
}
for (i = 0; i < as->nr_pending; i++)
bch2_btree_node_free_ondisk(c, &as->pending[i]);
as->nr_pending = 0;
while (as->nr_pending)
bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending]);
list_del(&as->list);
mutex_unlock(&c->btree_interior_update_lock);
@ -1185,6 +1202,68 @@ static void btree_interior_update_updated_root(struct bch_fs *c,
system_freezable_wq);
}
static void btree_interior_update_will_make_reachable(struct bch_fs *c,
struct btree_interior_update *as,
struct btree *b)
{
mutex_lock(&c->btree_interior_update_lock);
BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
BUG_ON(b->will_make_reachable);
as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = as;
mutex_unlock(&c->btree_interior_update_lock);
}
static void __btree_interior_update_drop_new_node(struct btree *b)
{
struct btree_interior_update *as = b->will_make_reachable;
unsigned i;
BUG_ON(!as);
for (i = 0; i < as->nr_new_nodes; i++)
if (as->new_nodes[i] == b)
goto found;
BUG();
found:
as->nr_new_nodes--;
memmove(&as->new_nodes[i],
&as->new_nodes[i + 1],
sizeof(struct btree *) * (as->nr_new_nodes - i));
b->will_make_reachable = NULL;
}
static void btree_interior_update_drop_new_node(struct bch_fs *c,
struct btree *b)
{
mutex_lock(&c->btree_interior_update_lock);
__btree_interior_update_drop_new_node(b);
mutex_unlock(&c->btree_interior_update_lock);
}
static void bch2_btree_interior_update_add_node_reference(struct bch_fs *c,
struct btree_interior_update *as,
struct btree *b)
{
struct pending_btree_node_free *d;
mutex_lock(&c->btree_interior_update_lock);
/* Add this node to the list of nodes being freed: */
BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
d = &as->pending[as->nr_pending++];
d->index_update_done = false;
d->seq = b->data->keys.seq;
d->btree_id = b->btree_id;
d->level = b->level;
bkey_copy(&d->key, &b->key);
mutex_unlock(&c->btree_interior_update_lock);
}
/*
* @b is being split/rewritten: it may have pointers to not-yet-written btree
* nodes and thus outstanding btree_interior_updates - redirect @b's
@ -1196,10 +1275,11 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
{
struct closure *cl, *cl_n;
struct btree_interior_update *p, *n;
struct pending_btree_node_free *d;
struct btree_write *w;
struct bset_tree *t;
bch2_btree_interior_update_add_node_reference(c, as, b);
/*
* Does this node have data that hasn't been written in the journal?
*
@ -1213,16 +1293,6 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
mutex_lock(&c->btree_interior_update_lock);
/* Add this node to the list of nodes being freed: */
BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
d = &as->pending[as->nr_pending++];
d->index_update_done = false;
d->seq = b->data->keys.seq;
d->btree_id = b->btree_id;
d->level = b->level;
bkey_copy(&d->key, &b->key);
/*
* Does this node have any btree_interior_update operations preventing
* it from being written?
@ -1255,8 +1325,13 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
&as->journal, interior_update_flush);
bch2_journal_pin_drop(&c->journal, &w->journal);
if (!list_empty(&b->reachable))
list_del_init(&b->reachable);
w = btree_prev_write(b);
bch2_journal_pin_add_if_older(&c->journal, &w->journal,
&as->journal, interior_update_flush);
bch2_journal_pin_drop(&c->journal, &w->journal);
if (b->will_make_reachable)
__btree_interior_update_drop_new_node(b);
mutex_unlock(&c->btree_interior_update_lock);
}
@ -1301,7 +1376,7 @@ err:
#endif
}
static enum btree_insert_ret
static int
bch2_btree_insert_keys_interior(struct btree *b,
struct btree_iter *iter,
struct keylist *insert_keys,
@ -1324,7 +1399,7 @@ bch2_btree_insert_keys_interior(struct btree *b,
if (bch_keylist_u64s(insert_keys) >
bch_btree_keys_u64s_remaining(c, b)) {
bch2_btree_node_unlock_write(b, iter);
return BTREE_INSERT_BTREE_NODE_FULL;
return -1;
}
/* Don't screw up @iter's position: */
@ -1362,7 +1437,7 @@ bch2_btree_insert_keys_interior(struct btree *b,
bch2_btree_node_unlock_write(b, iter);
btree_node_interior_verify(b);
return BTREE_INSERT_OK;
return 0;
}
/*
@ -1373,13 +1448,13 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
struct btree_reserve *reserve,
struct btree_interior_update *as)
{
struct bch_fs *c = iter->c;
size_t nr_packed = 0, nr_unpacked = 0;
struct btree *n2;
struct bset *set1, *set2;
struct bkey_packed *k, *prev = NULL;
n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
list_add(&n2->reachable, &as->reachable_list);
n2 = bch2_btree_node_alloc(c, n1->level, iter->btree_id, as, reserve);
n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format;
@ -1528,8 +1603,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
bch2_btree_interior_update_will_free_node(c, as, b);
n1 = bch2_btree_node_alloc_replacement(c, b, reserve);
list_add(&n1->reachable, &as->reachable_list);
n1 = bch2_btree_node_alloc_replacement(c, b, as, reserve);
if (b->level)
btree_split_insert_keys(iter, n1, insert_keys, reserve);
@ -1558,8 +1632,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
/* Depth increases, make a new root */
n3 = __btree_root_alloc(c, b->level + 1,
iter->btree_id,
reserve);
list_add(&n3->reachable, &as->reachable_list);
as, reserve);
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
@ -1641,16 +1714,10 @@ void bch2_btree_insert_node(struct btree *b,
BUG_ON(!b->level);
BUG_ON(!reserve || !as);
switch (bch2_btree_insert_keys_interior(b, iter, insert_keys,
as, reserve)) {
case BTREE_INSERT_OK:
break;
case BTREE_INSERT_BTREE_NODE_FULL:
if ((as->flags & BTREE_INTERIOR_UPDATE_MUST_REWRITE) ||
bch2_btree_insert_keys_interior(b, iter, insert_keys,
as, reserve))
btree_split(b, iter, insert_keys, reserve, as);
break;
default:
BUG();
}
}
static int bch2_btree_split_leaf(struct btree_iter *iter, unsigned flags)
@ -1859,8 +1926,7 @@ retry:
bch2_btree_interior_update_will_free_node(c, as, b);
bch2_btree_interior_update_will_free_node(c, as, m);
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
list_add(&n->reachable, &as->reachable_list);
n = bch2_btree_node_alloc(c, b->level, b->btree_id, as, reserve);
n->data->min_key = prev->data->min_key;
n->data->max_key = next->data->max_key;
@ -1945,6 +2011,8 @@ btree_insert_key(struct btree_insert *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
iter->flags &= ~BTREE_ITER_UPTODATE;
ret = !btree_node_is_extents(b)
? bch2_insert_fixup_key(trans, insert)
: bch2_insert_fixup_extent(trans, insert);
@ -2383,8 +2451,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_interior_update_will_free_node(c, as, b);
n = bch2_btree_node_alloc_replacement(c, b, reserve);
list_add(&n->reachable, &as->reachable_list);
n = bch2_btree_node_alloc_replacement(c, b, as, reserve);
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->lock);
@ -2464,3 +2531,140 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
closure_sync(&cl);
return ret;
}
int bch2_btree_node_update_key(struct bch_fs *c, struct btree *b,
struct bkey_i_extent *new_key)
{
struct btree_interior_update *as;
struct btree_reserve *reserve = NULL;
struct btree *parent, *new_hash = NULL;
struct btree_iter iter;
struct closure cl;
bool must_rewrite_parent = false;
int ret;
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
BTREE_MAX_DEPTH,
b->level, 0);
closure_init_stack(&cl);
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
/* bch2_btree_reserve_get will unlock */
do {
ret = bch2_btree_node_cannibalize_lock(c, &cl);
closure_sync(&cl);
} while (ret == -EAGAIN);
BUG_ON(ret);
new_hash = bch2_btree_node_mem_alloc(c);
}
retry:
reserve = bch2_btree_reserve_get(c, b, 0,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE,
&cl);
closure_sync(&cl);
if (IS_ERR(reserve)) {
ret = PTR_ERR(reserve);
if (ret == -EAGAIN || ret == -EINTR)
goto retry;
goto err;
}
down_read(&c->gc_lock);
ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
mutex_lock(&c->btree_interior_update_lock);
/*
* Two corner cases that need to be thought about here:
*
* @b may not be reachable yet - there might be another interior update
* operation waiting on @b to be written, and we're gonna deliver the
* write completion to that interior update operation _before_
* persisting the new_key update
*
* That ends up working without us having to do anything special here:
* the reason is, we do kick off (and do the in memory updates) for the
* update for @new_key before we return, creating a new interior_update
* operation here.
*
* The new interior update operation here will in effect override the
* previous one. The previous one was going to terminate - make @b
* reachable - in one of two ways:
* - updating the btree root pointer
* In that case,
* no, this doesn't work. argh.
*/
if (b->will_make_reachable)
must_rewrite_parent = true;
/* other case: btree node being freed */
if (iter.nodes[b->level] != b) {
/* node has been freed: */
BUG_ON(btree_node_hashed(b));
mutex_unlock(&c->btree_interior_update_lock);
goto err;
}
mutex_unlock(&c->btree_interior_update_lock);
ret = bch2_check_mark_super(c, extent_i_to_s_c(new_key), BCH_DATA_BTREE);
if (ret)
goto err;
as = bch2_btree_interior_update_alloc(c);
if (must_rewrite_parent)
as->flags |= BTREE_INTERIOR_UPDATE_MUST_REWRITE;
bch2_btree_interior_update_add_node_reference(c, as, b);
if (new_hash) {
bkey_copy(&new_hash->key, &new_key->k_i);
BUG_ON(bch2_btree_node_hash_insert(c, new_hash,
b->level, b->btree_id));
}
parent = iter.nodes[b->level + 1];
if (parent) {
bch2_btree_insert_node(parent, &iter,
&keylist_single(&b->key),
reserve, as);
} else {
bch2_btree_set_root(&iter, b, as, reserve);
}
if (new_hash) {
mutex_lock(&c->btree_cache_lock);
bch2_btree_node_hash_remove(c, b);
bkey_copy(&b->key, &new_key->k_i);
__bch2_btree_node_hash_insert(c, b);
bch2_btree_node_hash_remove(c, new_hash);
mutex_unlock(&c->btree_cache_lock);
} else {
bkey_copy(&b->key, &new_key->k_i);
}
err:
if (!IS_ERR_OR_NULL(reserve))
bch2_btree_reserve_put(c, reserve);
if (new_hash) {
mutex_lock(&c->btree_cache_lock);
list_move(&b->list, &c->btree_cache_freeable);
mutex_unlock(&c->btree_cache_lock);
six_unlock_write(&new_hash->lock);
six_unlock_intent(&new_hash->lock);
}
bch2_btree_iter_unlock(&iter);
up_read(&c->gc_lock);
return ret;
}

View File

@ -76,6 +76,9 @@ struct btree_interior_update {
BTREE_INTERIOR_UPDATING_AS,
} mode;
unsigned flags;
struct btree_reserve *reserve;
/*
* BTREE_INTERIOR_UPDATING_NODE:
* The update that made the new nodes visible was a regular update to an
@ -86,7 +89,6 @@ struct btree_interior_update {
*/
struct btree *b;
struct list_head write_blocked_list;
struct list_head reachable_list;
/*
* BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
@ -117,6 +119,10 @@ struct btree_interior_update {
struct pending_btree_node_free pending[BTREE_MAX_DEPTH + GC_MERGE_NODES];
unsigned nr_pending;
/* New nodes, that will be made reachable by this update: */
struct btree *new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES];
unsigned nr_new_nodes;
/* Only here to reduce stack usage on recursive splits: */
struct keylist parent_keys;
/*
@ -127,6 +133,8 @@ struct btree_interior_update {
u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
};
#define BTREE_INTERIOR_UPDATE_MUST_REWRITE (1 << 0)
#define for_each_pending_btree_node_free(c, as, p) \
list_for_each_entry(as, &c->btree_interior_update_list, list) \
for (p = as->pending; p < as->pending + as->nr_pending; p++)
@ -138,6 +146,7 @@ void bch2_btree_open_bucket_put(struct bch_fs *c, struct btree *);
struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *,
struct btree *,
struct bkey_format,
struct btree_interior_update *,
struct btree_reserve *);
struct btree_interior_update *
@ -426,6 +435,8 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
__le64, unsigned);
int bch2_btree_node_update_key(struct bch_fs *, struct btree *,
struct bkey_i_extent *);
#endif /* _BCACHE_BTREE_INSERT_H */

View File

@ -153,6 +153,37 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
return nr_ptrs;
}
/* Doesn't cleanup redundant crcs */
void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
{
EBUG_ON(ptr < &e.v->start->ptr ||
ptr >= &extent_entry_last(e)->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
memmove_u64s_down(ptr, ptr + 1,
(u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
e.k->u64s -= sizeof(*ptr) / sizeof(u64);
}
void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
{
__bch2_extent_drop_ptr(e, ptr);
bch2_extent_drop_redundant_crcs(e);
}
void bch2_extent_drop_ptr_idx(struct bkey_s_extent e, unsigned idx)
{
struct bch_extent_ptr *ptr;
unsigned i = 0;
extent_for_each_ptr(e, ptr)
if (i++ == idx)
goto found;
BUG();
found:
bch2_extent_drop_ptr(e, ptr);
}
/* returns true if equal */
static bool crc_cmp(union bch_extent_crc *l, union bch_extent_crc *r)
{

View File

@ -552,24 +552,9 @@ static inline unsigned extent_current_nonce(struct bkey_s_c_extent e)
void bch2_extent_narrow_crcs(struct bkey_s_extent);
void bch2_extent_drop_redundant_crcs(struct bkey_s_extent);
/* Doesn't cleanup redundant crcs */
static inline void __bch2_extent_drop_ptr(struct bkey_s_extent e,
struct bch_extent_ptr *ptr)
{
EBUG_ON(ptr < &e.v->start->ptr ||
ptr >= &extent_entry_last(e)->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
memmove_u64s_down(ptr, ptr + 1,
(u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
e.k->u64s -= sizeof(*ptr) / sizeof(u64);
}
static inline void bch2_extent_drop_ptr(struct bkey_s_extent e,
struct bch_extent_ptr *ptr)
{
__bch2_extent_drop_ptr(e, ptr);
bch2_extent_drop_redundant_crcs(e);
}
void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
void bch2_extent_drop_ptr_idx(struct bkey_s_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);

View File

@ -871,9 +871,8 @@ static void bch2_writepage_io_free(struct closure *cl)
{
struct bch_writepage_io *io = container_of(cl,
struct bch_writepage_io, cl);
struct bio *bio = &io->bio.bio;
bio_put(bio);
bio_put(&io->op.op.wbio.bio);
}
static void bch2_writepage_io_done(struct closure *cl)
@ -881,7 +880,7 @@ static void bch2_writepage_io_done(struct closure *cl)
struct bch_writepage_io *io = container_of(cl,
struct bch_writepage_io, cl);
struct bch_fs *c = io->op.op.c;
struct bio *bio = &io->bio.bio;
struct bio *bio = &io->op.op.wbio.bio;
struct bio_vec *bvec;
unsigned i;
@ -940,11 +939,12 @@ static void bch2_writepage_io_done(struct closure *cl)
static void bch2_writepage_do_io(struct bch_writepage_state *w)
{
struct bch_writepage_io *io = w->io;
struct bio *bio = &io->op.op.wbio.bio;
w->io = NULL;
atomic_add(io->bio.bio.bi_vcnt, &io->op.op.c->writeback_pages);
atomic_add(bio->bi_vcnt, &io->op.op.c->writeback_pages);
io->op.op.pos.offset = io->bio.bio.bi_iter.bi_sector;
io->op.op.pos.offset = bio->bi_iter.bi_sector;
closure_call(&io->op.op.cl, bch2_write, NULL, &io->cl);
continue_at(&io->cl, bch2_writepage_io_done, NULL);
@ -970,13 +970,13 @@ alloc_io:
w->io = container_of(bio_alloc_bioset(GFP_NOFS,
BIO_MAX_PAGES,
bch2_writepage_bioset),
struct bch_writepage_io, bio.bio);
struct bch_writepage_io, op.op.wbio.bio);
closure_init(&w->io->cl, NULL);
w->io->op.ei = ei;
w->io->op.sectors_added = 0;
w->io->op.is_dio = false;
bch2_write_op_init(&w->io->op.op, c, &w->io->bio,
bch2_write_op_init(&w->io->op.op, c,
(struct disk_reservation) {
.nr_replicas = c->opts.data_replicas,
},
@ -987,7 +987,7 @@ alloc_io:
}
if (w->io->op.op.res.nr_replicas != nr_replicas ||
bio_add_page_contig(&w->io->bio.bio, page)) {
bio_add_page_contig(&w->io->op.op.wbio.bio, page)) {
bch2_writepage_do_io(w);
goto alloc_io;
}
@ -1038,7 +1038,7 @@ do_io:
w->io->op.new_i_size = i_size;
if (wbc->sync_mode == WB_SYNC_ALL)
w->io->bio.bio.bi_opf |= REQ_SYNC;
w->io->op.op.wbio.bio.bi_opf |= REQ_SYNC;
/* Before unlocking the page, transfer reservation to w->io: */
old = page_state_cmpxchg(page_state(page), new, {
@ -1110,7 +1110,7 @@ get_pages:
done_index = page->index;
if (w.io &&
!bio_can_add_page_contig(&w.io->bio.bio, page))
!bio_can_add_page_contig(&w.io->op.op.wbio.bio, page))
bch2_writepage_do_io(&w);
if (!w.io &&
@ -1495,7 +1495,7 @@ static long __bch2_dio_write_complete(struct dio_write *dio)
if (dio->iovec && dio->iovec != dio->inline_vecs)
kfree(dio->iovec);
bio_put(&dio->bio.bio);
bio_put(&dio->iop.op.wbio.bio);
return ret;
}
@ -1517,11 +1517,11 @@ static void bch2_dio_write_done(struct dio_write *dio)
if (dio->iop.op.error)
dio->error = dio->iop.op.error;
bio_for_each_segment_all(bv, &dio->bio.bio, i)
bio_for_each_segment_all(bv, &dio->iop.op.wbio.bio, i)
put_page(bv->bv_page);
if (dio->iter.count)
bio_reset(&dio->bio.bio);
bio_reset(&dio->iop.op.wbio.bio);
}
static void bch2_do_direct_IO_write(struct dio_write *dio)
@ -1529,7 +1529,7 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
struct file *file = dio->req->ki_filp;
struct inode *inode = file->f_inode;
struct bch_inode_info *ei = to_bch_ei(inode);
struct bio *bio = &dio->bio.bio;
struct bio *bio = &dio->iop.op.wbio.bio;
unsigned flags = 0;
int ret;
@ -1537,8 +1537,6 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
!dio->c->opts.journal_flush_disabled)
flags |= BCH_WRITE_FLUSH;
bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
ret = bio_iov_iter_get_pages(bio, &dio->iter);
if (ret < 0) {
/*
@ -1555,10 +1553,9 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
dio->iop.sectors_added = 0;
dio->iop.is_dio = true;
dio->iop.new_i_size = U64_MAX;
bch2_write_op_init(&dio->iop.op, dio->c, &dio->bio,
dio->res,
bch2_write_op_init(&dio->iop.op, dio->c, dio->res,
foreground_write_point(dio->c, inode->i_ino),
POS(inode->i_ino, bio->bi_iter.bi_sector),
POS(inode->i_ino, (dio->offset + dio->written) >> 9),
&ei->journal_seq, flags);
dio->iop.op.index_update_fn = bchfs_write_index_update;
@ -1619,7 +1616,7 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req,
bio = bio_alloc_bioset(GFP_KERNEL,
iov_iter_npages(iter, BIO_MAX_PAGES),
bch2_dio_write_bioset);
dio = container_of(bio, struct dio_write, bio.bio);
dio = container_of(bio, struct dio_write, iop.op.wbio.bio);
dio->req = req;
dio->c = c;
dio->written = 0;

View File

@ -46,16 +46,16 @@ struct bchfs_write_op {
s64 sectors_added;
bool is_dio;
u64 new_i_size;
/* must be last: */
struct bch_write_op op;
};
struct bch_writepage_io {
struct closure cl;
/* must be last: */
struct bchfs_write_op op;
/* must come last: */
struct bch_write_bio bio;
};
extern struct bio_set *bch2_writepage_bioset;
@ -76,10 +76,8 @@ struct dio_write {
struct mm_struct *mm;
struct bchfs_write_op iop;
/* must be last: */
struct bch_write_bio bio;
struct bchfs_write_op iop;
};
extern struct bio_set *bch2_dio_write_bioset;

View File

@ -1458,7 +1458,7 @@ int __init bch2_vfs_init(void)
goto err;
bch2_writepage_bioset =
bioset_create(4, offsetof(struct bch_writepage_io, bio.bio));
bioset_create(4, offsetof(struct bch_writepage_io, op.op.wbio.bio));
if (!bch2_writepage_bioset)
goto err;
@ -1466,7 +1466,8 @@ int __init bch2_vfs_init(void)
if (!bch2_dio_read_bioset)
goto err;
bch2_dio_write_bioset = bioset_create(4, offsetof(struct dio_write, bio.bio));
bch2_dio_write_bioset =
bioset_create(4, offsetof(struct dio_write, iop.op.wbio.bio));
if (!bch2_dio_write_bioset)
goto err;

View File

@ -92,12 +92,10 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
const struct bch_extent_ptr *ptr;
struct bch_write_bio *n;
struct bch_dev *ca;
unsigned ptr_idx = 0;
BUG_ON(c->opts.nochanges);
wbio->split = false;
wbio->c = c;
extent_for_each_ptr(e, ptr) {
ca = c->devs[ptr->dev];
@ -107,24 +105,26 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->bio.bi_end_io = wbio->bio.bi_end_io;
n->bio.bi_private = wbio->bio.bi_private;
n->c = c;
n->orig = &wbio->bio;
n->bounce = false;
n->parent = wbio;
n->split = true;
n->bounce = false;
n->put_bio = true;
n->bio.bi_opf = wbio->bio.bi_opf;
__bio_inc_remaining(n->orig);
__bio_inc_remaining(&wbio->bio);
} else {
n = wbio;
n->split = false;
}
n->c = c;
n->ca = ca;
n->ptr_idx = ptr_idx++;
n->submit_time_us = local_clock_us();
n->bio.bi_iter.bi_sector = ptr->offset;
if (!journal_flushes_device(ca))
n->bio.bi_opf |= REQ_FUA;
n->ca = ca;
n->submit_time_us = local_clock_us();
n->bio.bi_iter.bi_sector = ptr->offset;
if (likely(percpu_ref_tryget(&ca->io_ref))) {
n->have_io_ref = true;
n->bio.bi_bdev = ca->disk_sb.bdev;
@ -250,10 +250,9 @@ static void bch2_write_index(struct closure *cl)
static void bch2_write_discard(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bio *bio = &op->bio->bio;
struct bpos end = op->pos;
end.offset += bio_sectors(bio);
end.offset += bio_sectors(&op->wbio.bio);
op->error = bch2_discard(op->c, op->pos, end, op->version,
&op->res, NULL, NULL);
@ -308,31 +307,28 @@ static void bch2_write_io_error(struct closure *cl)
static void bch2_write_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_fs *c = wbio->c;
struct bio *orig = wbio->orig;
struct bch_dev *ca = wbio->ca;
struct closure *cl = bio->bi_private;
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_write_bio *wbio = to_wbio(bio);
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
struct bch_fs *c = wbio->c;
struct bch_dev *ca = wbio->ca;
if (bch2_dev_nonfatal_io_err_on(bio->bi_error, ca,
"data write"))
"data write"))
set_closure_fn(cl, bch2_write_io_error, index_update_wq(op));
if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref);
if (bio->bi_error && orig)
orig->bi_error = bio->bi_error;
if (wbio->bounce)
bch2_bio_free_pages_pool(c, bio);
if (wbio->put_bio)
bio_put(bio);
if (orig)
bio_endio(orig);
if (parent)
bio_endio(&parent->bio);
else
closure_put(cl);
}
@ -380,11 +376,10 @@ static void init_append_extent(struct bch_write_op *op,
bch2_keylist_push(&op->insert_keys);
}
static int bch2_write_extent(struct bch_write_op *op,
struct open_bucket *ob,
struct bio *orig)
static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
{
struct bch_fs *c = op->c;
struct bio *orig = &op->wbio.bio;
struct bio *bio;
struct bch_write_bio *wbio;
unsigned key_to_write_offset = op->insert_keys.top_p -
@ -392,11 +387,13 @@ static int bch2_write_extent(struct bch_write_op *op,
struct bkey_i *key_to_write;
unsigned csum_type = op->csum_type;
unsigned compression_type = op->compression_type;
int ret;
int ret, more;
/* don't refetch csum type/compression type */
barrier();
BUG_ON(!bio_sectors(orig));
/* Need to decompress data? */
if ((op->flags & BCH_WRITE_DATA_COMPRESSED) &&
(crc_uncompressed_size(NULL, &op->crc) != op->size ||
@ -421,11 +418,8 @@ static int bch2_write_extent(struct bch_write_op *op,
ob);
bio = orig;
wbio = to_wbio(bio);
wbio->orig = NULL;
wbio->bounce = false;
wbio->put_bio = false;
ret = 0;
wbio = wbio_init(bio);
more = 0;
} else if (csum_type != BCH_CSUM_NONE ||
compression_type != BCH_COMPRESSION_NONE) {
/* all units here in bytes */
@ -439,19 +433,18 @@ static int bch2_write_extent(struct bch_write_op *op,
bio = bio_alloc_bioset(GFP_NOIO,
DIV_ROUND_UP(output_available, PAGE_SIZE),
&c->bio_write);
wbio = wbio_init(bio);
wbio->bounce = true;
wbio->put_bio = true;
/* copy WRITE_SYNC flag */
wbio->bio.bi_opf = orig->bi_opf;
/*
* XXX: can't use mempool for more than
* BCH_COMPRESSED_EXTENT_MAX worth of pages
*/
bch2_bio_alloc_pages_pool(c, bio, output_available);
/* copy WRITE_SYNC flag */
bio->bi_opf = orig->bi_opf;
wbio = to_wbio(bio);
wbio->orig = NULL;
wbio->bounce = true;
wbio->put_bio = true;
do {
unsigned fragment_compression_type = compression_type;
size_t dst_len, src_len;
@ -504,45 +497,43 @@ static int bch2_write_extent(struct bch_write_op *op,
mempool_free(bio->bi_io_vec[--bio->bi_vcnt].bv_page,
&c->bio_bounce_pages);
ret = orig->bi_iter.bi_size != 0;
more = orig->bi_iter.bi_size != 0;
} else {
bio = bio_next_split(orig, ob->sectors_free, GFP_NOIO,
&c->bio_write);
wbio = to_wbio(bio);
wbio->orig = NULL;
wbio->bounce = false;
wbio = wbio_init(bio);
wbio->put_bio = bio != orig;
init_append_extent(op, bio_sectors(bio), bio_sectors(bio),
compression_type, 0,
(struct bch_csum) { 0 }, csum_type, ob);
ret = bio != orig;
more = bio != orig;
}
/* might have done a realloc... */
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(key_to_write),
BCH_DATA_USER);
if (ret)
return ret;
bio->bi_end_io = bch2_write_endio;
bio->bi_private = &op->cl;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
closure_get(bio->bi_private);
/* might have done a realloc... */
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
bch2_check_mark_super(c, bkey_i_to_s_c_extent(key_to_write),
BCH_DATA_USER);
bch2_submit_wbio_replicas(to_wbio(bio), c, key_to_write);
return ret;
return more;
}
static void __bch2_write(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_fs *c = op->c;
struct bio *bio = &op->bio->bio;
unsigned open_bucket_nr = 0;
struct open_bucket *b;
int ret;
@ -550,22 +541,12 @@ static void __bch2_write(struct closure *cl)
memset(op->open_buckets, 0, sizeof(op->open_buckets));
if (op->flags & BCH_WRITE_DISCARD) {
op->flags |= BCH_WRITE_DONE;
bch2_write_discard(cl);
bio_put(bio);
op->flags |= BCH_WRITE_DONE;
continue_at(cl, bch2_write_done, index_update_wq(op));
}
/*
* Journal writes are marked REQ_PREFLUSH; if the original write was a
* flush, it'll wait on the journal write.
*/
bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
do {
EBUG_ON(bio->bi_iter.bi_sector != op->pos.offset);
EBUG_ON(!bio_sectors(bio));
if (open_bucket_nr == ARRAY_SIZE(op->open_buckets))
continue_at(cl, bch2_write_index, index_update_wq(op));
@ -622,7 +603,7 @@ static void __bch2_write(struct closure *cl)
b - c->open_buckets > U8_MAX);
op->open_buckets[open_bucket_nr++] = b - c->open_buckets;
ret = bch2_write_extent(op, b, bio);
ret = bch2_write_extent(op, b);
bch2_alloc_sectors_done(c, op->wp, b);
@ -703,16 +684,13 @@ void bch2_wake_delayed_writes(unsigned long data)
* after the data is written it calls bch_journal, and after the keys have been
* added to the next journal write they're inserted into the btree.
*
* It inserts the data in op->bio; bi_sector is used for the key offset, and
* op->inode is used for the key inode.
*
* If op->discard is true, instead of inserting the data it invalidates the
* region of the cache represented by op->bio and op->inode.
*/
void bch2_write(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bio *bio = &op->bio->bio;
struct bio *bio = &op->wbio.bio;
struct bch_fs *c = op->c;
u64 inode = op->pos.inode;
@ -742,7 +720,7 @@ void bch2_write(struct closure *cl)
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
bch2_ratelimit_increment(&c->foreground_write_pd.rate,
bio->bi_iter.bi_size);
bio->bi_iter.bi_size);
delay = bch2_ratelimit_delay(&c->foreground_write_pd.rate);
@ -776,15 +754,14 @@ void bch2_write(struct closure *cl)
}
void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
struct bch_write_bio *bio, struct disk_reservation res,
struct write_point *wp, struct bpos pos,
u64 *journal_seq, unsigned flags)
struct disk_reservation res,
struct write_point *wp, struct bpos pos,
u64 *journal_seq, unsigned flags)
{
EBUG_ON(res.sectors && !res.nr_replicas);
op->c = c;
op->io_wq = index_update_wq(op);
op->bio = bio;
op->written = 0;
op->error = 0;
op->flags = flags;
@ -983,7 +960,7 @@ static void cache_promote_done(struct closure *cl)
struct cache_promote_op *op =
container_of(cl, struct cache_promote_op, cl);
bch2_bio_free_pages_pool(op->write.op.c, &op->write.wbio.bio);
bch2_bio_free_pages_pool(op->write.op.c, &op->write.op.wbio.bio);
kfree(op);
}
@ -1020,7 +997,7 @@ static void __bch2_read_endio(struct work_struct *work)
trace_promote(&rbio->bio);
/* we now own pages: */
swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
swap(promote->write.op.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
rbio->promote = NULL;
bch2_rbio_done(rbio);
@ -1112,7 +1089,7 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
promote_op = kmalloc(sizeof(*promote_op) +
sizeof(struct bio_vec) * pages, GFP_NOIO);
if (promote_op) {
struct bio *promote_bio = &promote_op->write.wbio.bio;
struct bio *promote_bio = &promote_op->write.op.wbio.bio;
bio_init(promote_bio,
promote_bio->bi_inline_vecs,
@ -1204,7 +1181,7 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
rbio->bio.bi_end_io = bch2_read_endio;
if (promote_op) {
struct bio *promote_bio = &promote_op->write.wbio.bio;
struct bio *promote_bio = &promote_op->write.op.wbio.bio;
promote_bio->bi_iter = rbio->bio.bi_iter;
memcpy(promote_bio->bi_io_vec, rbio->bio.bi_io_vec,
@ -1367,12 +1344,11 @@ void bch2_read_retry_work(struct work_struct *work)
read_retry_work);
struct bch_read_bio *rbio;
struct bio *bio;
unsigned long flags;
while (1) {
spin_lock_irqsave(&c->read_retry_lock, flags);
spin_lock_irq(&c->read_retry_lock);
bio = bio_list_pop(&c->read_retry_list);
spin_unlock_irqrestore(&c->read_retry_lock, flags);
spin_unlock_irq(&c->read_retry_lock);
if (!bio)
break;

View File

@ -41,11 +41,18 @@ static inline struct write_point *foreground_write_point(struct bch_fs *c,
}
void bch2_write_op_init(struct bch_write_op *, struct bch_fs *,
struct bch_write_bio *,
struct disk_reservation, struct write_point *,
struct bpos, u64 *, unsigned);
void bch2_write(struct closure *);
static inline struct bch_write_bio *wbio_init(struct bio *bio)
{
struct bch_write_bio *wbio = to_wbio(bio);
memset(wbio, 0, offsetof(struct bch_write_bio, bio));
return wbio;
}
struct cache_promote_op;
struct extent_pick_ptr;

View File

@ -66,37 +66,30 @@ struct bch_write_bio {
struct bch_fs *c;
struct bch_dev *ca;
union {
struct bio *orig;
struct closure *cl;
struct bch_write_bio *parent;
struct closure *cl;
};
unsigned submit_time_us;
u8 ptr_idx;
u8 replicas_failed;
u8 order;
unsigned split:1,
bounce:1,
put_bio:1,
have_io_ref:1;
have_io_ref:1,
used_mempool:1;
/* Only for btree writes: */
unsigned used_mempool:1;
u8 order;
unsigned submit_time_us;
void *data;
struct bio bio;
};
struct bch_replace_info {
struct extent_insert_hook hook;
/* How many insertions succeeded */
unsigned successes;
/* How many insertions failed */
unsigned failures;
BKEY_PADDED(key);
};
struct bch_write_op {
struct closure cl;
struct bch_fs *c;
struct bch_fs *c;
struct workqueue_struct *io_wq;
struct bch_write_bio *bio;
unsigned written; /* sectors */
@ -141,6 +134,9 @@ struct bch_write_op {
struct keylist insert_keys;
u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2];
/* Must be last: */
struct bch_write_bio wbio;
};
#endif /* _BCACHE_IO_TYPES_H */

View File

@ -53,15 +53,15 @@ static inline u64 journal_pin_seq(struct journal *j,
return last_seq(j) + fifo_entry_idx(&j->pin, pin_list);
}
static inline void bch2_journal_add_entry(struct journal_buf *buf,
const void *data, size_t u64s,
unsigned type, enum btree_id id,
unsigned level)
static inline void bch2_journal_add_entry_noreservation(struct journal_buf *buf,
unsigned type, enum btree_id id,
unsigned level,
const void *data, size_t u64s)
{
struct jset *jset = buf->data;
bch2_journal_add_entry_at(buf, data, u64s, type, id, level,
le32_to_cpu(jset->u64s));
bch2_journal_add_entry_at(buf, le32_to_cpu(jset->u64s),
type, id, level, data, u64s);
le32_add_cpu(&jset->u64s, jset_u64s(u64s));
}
@ -97,8 +97,9 @@ static void bch2_journal_add_btree_root(struct journal_buf *buf,
enum btree_id id, struct bkey_i *k,
unsigned level)
{
bch2_journal_add_entry(buf, k, k->k.u64s,
JOURNAL_ENTRY_BTREE_ROOT, id, level);
bch2_journal_add_entry_noreservation(buf,
JOURNAL_ENTRY_BTREE_ROOT, id, level,
k, k->k.u64s);
}
static void journal_seq_blacklist_flush(struct journal *j,
@ -416,13 +417,8 @@ static void journal_entry_null_range(void *start, void *end)
{
struct jset_entry *entry;
for (entry = start; entry != end; entry = vstruct_next(entry)) {
entry->u64s = 0;
entry->btree_id = 0;
entry->level = 0;
entry->flags = 0;
SET_JOURNAL_ENTRY_TYPE(entry, 0);
}
for (entry = start; entry != end; entry = vstruct_next(entry))
memset(entry, 0, sizeof(*entry));
}
static int journal_validate_key(struct bch_fs *c, struct jset *j,
@ -514,7 +510,7 @@ static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
break;
}
switch (JOURNAL_ENTRY_TYPE(entry)) {
switch (entry->type) {
case JOURNAL_ENTRY_BTREE_KEYS:
vstruct_for_each(entry, k) {
ret = journal_validate_key(c, j, entry, k,
@ -555,8 +551,8 @@ static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
break;
default:
journal_entry_err(c, "invalid journal entry type %llu",
JOURNAL_ENTRY_TYPE(entry));
journal_entry_err(c, "invalid journal entry type %u",
entry->type);
journal_entry_null_range(entry, vstruct_next(entry));
break;
}
@ -1426,9 +1422,9 @@ void bch2_journal_start(struct bch_fs *c)
*/
list_for_each_entry(bl, &j->seq_blacklist, list)
if (!bl->written) {
bch2_journal_add_entry(journal_cur_buf(j), &bl->seq, 1,
bch2_journal_add_entry_noreservation(journal_cur_buf(j),
JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED,
0, 0);
0, 0, &bl->seq, 1);
journal_pin_add_entry(j,
&fifo_peek_back(&j->pin),
@ -2083,8 +2079,8 @@ static void journal_write_compact(struct jset *jset)
if (prev &&
i->btree_id == prev->btree_id &&
i->level == prev->level &&
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_TYPE(prev) &&
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_BTREE_KEYS &&
i->type == prev->type &&
i->type == JOURNAL_ENTRY_BTREE_KEYS &&
le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
memmove_u64s_down(vstruct_next(prev),
i->_data,
@ -2238,8 +2234,9 @@ static void journal_write(struct closure *cl)
closure_return_with_destructor(cl, journal_write_done);
}
bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key),
BCH_DATA_JOURNAL);
if (bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key),
BCH_DATA_JOURNAL))
goto err;
/*
* XXX: we really should just disable the entire journal in nochanges

View File

@ -125,7 +125,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
struct jset_entry *entry, unsigned type)
{
while (entry < vstruct_last(jset)) {
if (JOURNAL_ENTRY_TYPE(entry) == type)
if (entry->type == type)
return entry;
entry = vstruct_next(entry);
@ -187,8 +187,12 @@ static inline void journal_state_inc(union journal_res_state *s)
s->buf1_count += s->idx == 1;
}
static inline void bch2_journal_set_has_inode(struct journal_buf *buf, u64 inum)
static inline void bch2_journal_set_has_inode(struct journal *j,
struct journal_res *res,
u64 inum)
{
struct journal_buf *buf = &j->buf[res->idx];
set_bit(hash_64(inum, ilog2(sizeof(buf->has_inode) * 8)), buf->has_inode);
}
@ -202,38 +206,44 @@ static inline unsigned jset_u64s(unsigned u64s)
}
static inline void bch2_journal_add_entry_at(struct journal_buf *buf,
const void *data, size_t u64s,
unsigned offset,
unsigned type, enum btree_id id,
unsigned level, unsigned offset)
unsigned level,
const void *data, size_t u64s)
{
struct jset_entry *entry = vstruct_idx(buf->data, offset);
entry->u64s = cpu_to_le16(u64s);
memset(entry, 0, sizeof(*entry));
entry->u64s = cpu_to_le16(u64s);
entry->btree_id = id;
entry->level = level;
entry->flags = 0;
SET_JOURNAL_ENTRY_TYPE(entry, type);
entry->level = level;
entry->type = type;
memcpy_u64s(entry->_data, data, u64s);
}
static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
unsigned type, enum btree_id id,
unsigned level,
const void *data, unsigned u64s)
{
struct journal_buf *buf = &j->buf[res->idx];
unsigned actual = jset_u64s(u64s);
EBUG_ON(!res->ref);
BUG_ON(actual > res->u64s);
bch2_journal_add_entry_at(buf, res->offset, type,
id, level, data, u64s);
res->offset += actual;
res->u64s -= actual;
}
static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
enum btree_id id, const struct bkey_i *k)
{
struct journal_buf *buf = &j->buf[res->idx];
unsigned actual = jset_u64s(k->k.u64s);
EBUG_ON(!res->ref);
BUG_ON(actual > res->u64s);
bch2_journal_set_has_inode(buf, k->k.p.inode);
bch2_journal_add_entry_at(buf, k, k->k.u64s,
JOURNAL_ENTRY_BTREE_KEYS, id,
0, res->offset);
res->offset += actual;
res->u64s -= actual;
bch2_journal_add_entry(j, res, JOURNAL_ENTRY_BTREE_KEYS,
id, 0, k, k->k.u64s);
}
void bch2_journal_buf_put_slowpath(struct journal *, bool);
@ -272,13 +282,10 @@ static inline void bch2_journal_res_put(struct journal *j,
lock_release(&j->res_map, 0, _RET_IP_);
while (res->u64s) {
bch2_journal_add_entry_at(&j->buf[res->idx], NULL, 0,
JOURNAL_ENTRY_BTREE_KEYS,
0, 0, res->offset);
res->offset += jset_u64s(0);
res->u64s -= jset_u64s(0);
}
while (res->u64s)
bch2_journal_add_entry(j, res,
JOURNAL_ENTRY_BTREE_KEYS,
0, 0, NULL, 0);
bch2_journal_buf_put(j, res->idx, false);

View File

@ -128,9 +128,12 @@ int bch2_move_data_off_device(struct bch_dev *ca)
seen_key_count++;
continue;
next:
if (bkey_extent_is_data(k.k))
bch2_check_mark_super(c, bkey_s_c_to_extent(k),
BCH_DATA_USER);
if (bkey_extent_is_data(k.k)) {
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
BCH_DATA_USER);
if (ret)
break;
}
bch2_btree_iter_advance_pos(&iter);
bch2_btree_iter_cond_resched(&iter);
@ -386,9 +389,12 @@ int bch2_flag_data_bad(struct bch_dev *ca)
*/
continue;
advance:
if (bkey_extent_is_data(k.k))
bch2_check_mark_super(c, bkey_s_c_to_extent(k),
BCH_DATA_USER);
if (bkey_extent_is_data(k.k)) {
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
BCH_DATA_USER);
if (ret)
break;
}
bch2_btree_iter_advance_pos(&iter);
}

View File

@ -155,11 +155,8 @@ void bch2_migrate_write_init(struct bch_fs *c,
(move_ptr && move_ptr->cached))
flags |= BCH_WRITE_CACHED;
bch2_write_op_init(&m->op, c, &m->wbio,
(struct disk_reservation) { 0 },
wp,
bkey_start_pos(k.k),
NULL, flags);
bch2_write_op_init(&m->op, c, (struct disk_reservation) { 0 }, wp,
bkey_start_pos(k.k), NULL, flags);
if (m->move)
m->op.alloc_reserve = RESERVE_MOVINGGC;
@ -194,7 +191,7 @@ static void moving_io_destructor(struct closure *cl)
atomic_sub(io->write.key.k.size, &ctxt->sectors_in_flight);
wake_up(&ctxt->wait);
bio_for_each_segment_all(bv, &io->write.wbio.bio, i)
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i)
if (bv->bv_page)
__free_page(bv->bv_page);
@ -307,9 +304,7 @@ int bch2_data_move(struct bch_fs *c,
return -ENOMEM;
}
migrate_bio_init(io, &io->write.wbio.bio, k.k->size);
bio_get(&io->write.wbio.bio);
io->write.wbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
migrate_bio_init(io, &io->write.op.wbio.bio, k.k->size);
bch2_migrate_write_init(c, &io->write, wp, k, move_ptr, 0);

View File

@ -19,7 +19,6 @@ struct migrate_write {
bool move;
struct bch_extent_ptr move_ptr;
struct bch_write_op op;
struct bch_write_bio wbio;
};
void bch2_migrate_write_init(struct bch_fs *,

View File

@ -783,6 +783,12 @@ out:
/* replica information: */
static inline struct bch_replicas_cpu_entry *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
}
static inline struct bch_replicas_entry *
replicas_entry_next(struct bch_replicas_entry *i)
{
@ -794,6 +800,24 @@ replicas_entry_next(struct bch_replicas_entry *i)
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
{
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
}
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
{
e->devs[dev >> 3] |= 1 << (dev & 7);
}
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
{
return (r->entry_size -
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
}
static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
unsigned *nr,
unsigned *bytes,
@ -879,6 +903,29 @@ static int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
return 0;
}
static void bkey_to_replicas(struct bkey_s_c_extent e,
enum bch_data_types data_type,
struct bch_replicas_cpu_entry *r,
unsigned *max_dev)
{
const struct bch_extent_ptr *ptr;
BUG_ON(!data_type ||
data_type == BCH_DATA_SB ||
data_type >= BCH_DATA_NR);
memset(r, 0, sizeof(*r));
r->data_type = data_type;
*max_dev = 0;
extent_for_each_ptr(e, ptr)
if (!ptr->cached) {
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
replicas_set_dev(r, ptr->dev);
}
}
/*
* for when gc of replica information is in progress:
*/
@ -887,14 +934,11 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
const struct bch_extent_ptr *ptr;
struct bch_replicas_cpu_entry *new_e;
struct bch_replicas_cpu_entry new_e;
struct bch_replicas_cpu *new;
unsigned i, nr, entry_size, max_dev = 0;
unsigned i, nr, entry_size, max_dev;
extent_for_each_ptr(e, ptr)
if (!ptr->cached)
max_dev = max_t(unsigned, max_dev, ptr->dev);
bkey_to_replicas(e, data_type, &new_e, &max_dev);
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
DIV_ROUND_UP(max_dev + 1, 8);
@ -914,12 +958,9 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
cpu_replicas_entry(gc_r, i),
gc_r->entry_size);
new_e = cpu_replicas_entry(new, nr - 1);
new_e->data_type = data_type;
extent_for_each_ptr(e, ptr)
if (!ptr->cached)
replicas_set_dev(new_e, ptr->dev);
memcpy(cpu_replicas_entry(new, nr - 1),
&new_e,
new->entry_size);
eytzinger0_sort(new->entries,
new->nr,
@ -931,8 +972,38 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
return 0;
}
int bch2_check_mark_super_slowpath(struct bch_fs *c, struct bkey_s_c_extent e,
enum bch_data_types data_type)
static bool replicas_has_extent(struct bch_replicas_cpu *r,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
struct bch_replicas_cpu_entry search;
unsigned max_dev;
bkey_to_replicas(e, data_type, &search, &max_dev);
return max_dev < replicas_dev_slots(r) &&
eytzinger0_find(r->entries, r->nr,
r->entry_size,
memcmp, &search) < r->nr;
}
bool bch2_sb_has_replicas(struct bch_fs *c, struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
bool ret;
rcu_read_lock();
ret = replicas_has_extent(rcu_dereference(c->replicas),
e, data_type);
rcu_read_unlock();
return ret;
}
noinline
static int bch2_check_mark_super_slowpath(struct bch_fs *c,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
struct bch_replicas_cpu *gc_r;
const struct bch_extent_ptr *ptr;
@ -996,6 +1067,25 @@ err:
return ret;
}
int bch2_check_mark_super(struct bch_fs *c, struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
struct bch_replicas_cpu *gc_r;
bool marked;
rcu_read_lock();
marked = replicas_has_extent(rcu_dereference(c->replicas),
e, data_type) &&
(!(gc_r = rcu_dereference(c->replicas_gc)) ||
replicas_has_extent(gc_r, e, data_type));
rcu_read_unlock();
if (marked)
return 0;
return bch2_check_mark_super_slowpath(c, e, data_type);
}
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_dev *dev_to_offline)
{

View File

@ -121,92 +121,10 @@ const char *bch2_read_super(struct bcache_superblock *,
struct bch_opts, const char *);
void bch2_write_super(struct bch_fs *);
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
{
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
}
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
unsigned dev)
{
e->devs[dev >> 3] |= 1 << (dev & 7);
}
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
{
return (r->entry_size -
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
}
static inline struct bch_replicas_cpu_entry *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
}
int bch2_check_mark_super_slowpath(struct bch_fs *, struct bkey_s_c_extent,
enum bch_data_types);
static inline bool replicas_has_extent(struct bch_replicas_cpu *r,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
const struct bch_extent_ptr *ptr;
struct bch_replicas_cpu_entry search = {
.data_type = data_type,
};
unsigned max_dev = 0;
BUG_ON(!data_type ||
data_type == BCH_DATA_SB ||
data_type >= BCH_DATA_NR);
extent_for_each_ptr(e, ptr)
if (!ptr->cached) {
max_dev = max_t(unsigned, max_dev, ptr->dev);
replicas_set_dev(&search, ptr->dev);
}
return max_dev < replicas_dev_slots(r) &&
eytzinger0_find(r->entries, r->nr,
r->entry_size,
memcmp, &search) < r->nr;
}
static inline bool bch2_sb_has_replicas(struct bch_fs *c,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
bool ret;
rcu_read_lock();
ret = replicas_has_extent(rcu_dereference(c->replicas),
e, data_type);
rcu_read_unlock();
return ret;
}
static inline int bch2_check_mark_super(struct bch_fs *c,
struct bkey_s_c_extent e,
enum bch_data_types data_type)
{
struct bch_replicas_cpu *gc_r;
bool marked;
rcu_read_lock();
marked = replicas_has_extent(rcu_dereference(c->replicas),
e, data_type) &&
(!(gc_r = rcu_dereference(c->replicas_gc)) ||
replicas_has_extent(gc_r, e, data_type));
rcu_read_unlock();
if (marked)
return 0;
return bch2_check_mark_super_slowpath(c, e, data_type);
}
bool bch2_sb_has_replicas(struct bch_fs *, struct bkey_s_c_extent,
enum bch_data_types);
int bch2_check_mark_super(struct bch_fs *, struct bkey_s_c_extent,
enum bch_data_types);
struct replicas_status {
struct {

View File

@ -517,10 +517,15 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->bio_bounce_pages_lock);
mutex_init(&c->zlib_workspace_lock);
bio_list_init(&c->read_retry_list);
spin_lock_init(&c->read_retry_lock);
INIT_WORK(&c->read_retry_work, bch2_read_retry_work);
mutex_init(&c->zlib_workspace_lock);
bio_list_init(&c->btree_write_error_list);
spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock);
@ -593,8 +598,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
PAGE_SECTORS, 0) ||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
lg_lock_init(&c->usage_lock) ||
mempool_init_page_pool(&c->btree_bounce_pool, 1,
ilog2(btree_pages(c))) ||
mempool_init_vp_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) ||
bdi_setup_and_register(&c->bdi, "bcachefs") ||
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
@ -1345,11 +1349,13 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
}
}
static bool bch2_fs_may_start(struct bch_fs *c, int flags)
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct replicas_status s;
struct bch_sb_field_members *mi;
unsigned i;
unsigned i, flags = c->opts.degraded
? BCH_FORCE_IF_DEGRADED
: 0;
if (!c->opts.degraded) {
mutex_lock(&c->sb_lock);
@ -1773,7 +1779,7 @@ const char *bch2_fs_open(char * const *devices, unsigned nr_devices,
mutex_unlock(&c->sb_lock);
err = "insufficient devices";
if (!bch2_fs_may_start(c, 0))
if (!bch2_fs_may_start(c))
goto err;
if (!c->opts.nostart) {
@ -1844,7 +1850,7 @@ static const char *__bch2_fs_open_incremental(struct bcache_superblock *sb,
}
mutex_unlock(&c->sb_lock);
if (!c->opts.nostart && bch2_fs_may_start(c, 0)) {
if (!c->opts.nostart && bch2_fs_may_start(c)) {
err = __bch2_fs_start(c);
if (err)
goto err;

View File

@ -577,3 +577,17 @@ void sort_cmp_size(void *base, size_t num, size_t size,
}
}
}
void mempool_free_vp(void *element, void *pool_data)
{
size_t size = (size_t) pool_data;
vpfree(element, size);
}
void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
{
size_t size = (size_t) pool_data;
return vpmalloc(size, gfp_mask);
}

View File

@ -79,23 +79,43 @@ do { \
(__builtin_types_compatible_p(typeof(_val), _type) || \
__builtin_types_compatible_p(typeof(_val), const _type))
static inline void vpfree(void *p, size_t size)
{
if (is_vmalloc_addr(p))
vfree(p);
else
free_pages((unsigned long) p, get_order(size));
}
static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
{
return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
get_order(size)) ?:
__vmalloc(size, gfp_mask, PAGE_KERNEL);
}
static inline void kvpfree(void *p, size_t size)
{
if (size < PAGE_SIZE)
kfree(p);
else if (is_vmalloc_addr(p))
vfree(p);
else
free_pages((unsigned long) p, get_order(size));
vpfree(p, size);
}
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
{
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
: (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
get_order(size))
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
return size < PAGE_SIZE
? kmalloc(size, gfp_mask)
: vpmalloc(size, gfp_mask);
}
void mempool_free_vp(void *element, void *pool_data);
void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data);
static inline int mempool_init_vp_pool(mempool_t *pool, int min_nr, size_t size)
{
return mempool_init(pool, min_nr, mempool_alloc_vp,
mempool_free_vp, (void *) size);
}
#define HEAP(type) \

View File

@ -1,5 +1,6 @@
#include <string.h>
#include <sys/mman.h>
#include <linux/math64.h>
#include <linux/printk.h>
@ -163,6 +164,8 @@ static void sched_init(void)
{
struct task_struct *p = malloc(sizeof(*p));
mlockall(MCL_CURRENT|MCL_FUTURE);
memset(p, 0, sizeof(*p));
p->state = TASK_RUNNING;