mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 14e9ac5016 bcachefs: btree_iter fastpath
This commit is contained in:
parent
a588eb0d9e
commit
565b4a74d6
@ -1 +1 @@
|
||||
43e3159567958ea70c8a95d98fdb6e881153a656
|
||||
14e9ac5016803fc63c1216608c866bef16b4053e
|
||||
|
@ -250,7 +250,6 @@ static void write_data(struct bch_fs *c,
|
||||
{
|
||||
struct disk_reservation res;
|
||||
struct bch_write_op op;
|
||||
struct bch_write_bio bio;
|
||||
struct bio_vec bv;
|
||||
struct closure cl;
|
||||
|
||||
@ -259,15 +258,15 @@ static void write_data(struct bch_fs *c,
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
bio_init(&bio.bio, &bv, 1);
|
||||
bio.bio.bi_iter.bi_size = len;
|
||||
bch2_bio_map(&bio.bio, buf);
|
||||
bio_init(&op.wbio.bio, &bv, 1);
|
||||
op.wbio.bio.bi_iter.bi_size = len;
|
||||
bch2_bio_map(&op.wbio.bio, buf);
|
||||
|
||||
int ret = bch2_disk_reservation_get(c, &res, len >> 9, 0);
|
||||
if (ret)
|
||||
die("error reserving space in new filesystem: %s", strerror(-ret));
|
||||
|
||||
bch2_write_op_init(&op, c, &bio, res, c->write_points,
|
||||
bch2_write_op_init(&op, c, res, c->write_points,
|
||||
POS(dst_inode->inum, dst_offset >> 9), NULL, 0);
|
||||
closure_call(&op.cl, bch2_write, NULL, &cl);
|
||||
closure_sync(&cl);
|
||||
|
@ -166,4 +166,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
#define flush_cache_vmap(start, end) do { } while (0)
|
||||
#define flush_cache_vunmap(start, end) do { } while (0)
|
||||
|
||||
#ifdef __x86_64
|
||||
#define CONFIG_X86_64 y
|
||||
#endif
|
||||
|
||||
#endif /* _TOOLS_LINUX_COMPILER_H */
|
||||
|
@ -10,8 +10,14 @@
|
||||
|
||||
struct kmem_cache;
|
||||
|
||||
typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
|
||||
typedef void (mempool_free_t)(void *element, void *pool_data);
|
||||
|
||||
typedef struct mempool_s {
|
||||
size_t elem_size;
|
||||
size_t elem_size;
|
||||
void *pool_data;
|
||||
mempool_alloc_t *alloc;
|
||||
mempool_free_t *free;
|
||||
} mempool_t;
|
||||
|
||||
static inline bool mempool_initialized(mempool_t *pool)
|
||||
@ -60,24 +66,22 @@ static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
|
||||
{
|
||||
mempool_t *pool = malloc(sizeof(*pool));
|
||||
pool->elem_size = size;
|
||||
return pool;
|
||||
}
|
||||
|
||||
static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
|
||||
{
|
||||
pool->elem_size = PAGE_SIZE << order;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
|
||||
static inline int mempool_init(mempool_t *pool, int min_nr,
|
||||
mempool_alloc_t *alloc_fn,
|
||||
mempool_free_t *free_fn,
|
||||
void *pool_data)
|
||||
{
|
||||
mempool_t *pool = malloc(sizeof(*pool));
|
||||
pool->elem_size = PAGE_SIZE << order;
|
||||
return pool;
|
||||
pool->elem_size = (size_t) pool_data;
|
||||
pool->pool_data = pool_data;
|
||||
pool->alloc = alloc_fn;
|
||||
pool->free = free_fn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_MEMPOOL_H */
|
||||
|
@ -43,9 +43,6 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags)
|
||||
#define kcalloc(n, size, flags) calloc(n, size)
|
||||
#define kmalloc_array(n, size, flags) calloc(n, size)
|
||||
|
||||
#define vmalloc(size) malloc(size)
|
||||
#define vzalloc(size) calloc(1, size)
|
||||
|
||||
#define kfree(p) free(p)
|
||||
#define kvfree(p) free(p)
|
||||
#define kzfree(p) free(p)
|
||||
@ -89,8 +86,6 @@ do { \
|
||||
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
|
||||
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
|
||||
|
||||
#define PAGE_KERNEL 0
|
||||
|
||||
static inline void vunmap(const void *addr) {}
|
||||
|
||||
static inline void *vmap(struct page **pages, unsigned int count,
|
||||
|
@ -1,8 +1,41 @@
|
||||
#ifndef __TOOLS_LINUX_VMALLOC_H
|
||||
#define __TOOLS_LINUX_VMALLOC_H
|
||||
|
||||
#define vmalloc(size) malloc(size)
|
||||
#define __vmalloc(size, flags, prot) malloc(size)
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "tools-util.h"
|
||||
|
||||
#define PAGE_KERNEL 0
|
||||
#define PAGE_KERNEL_EXEC 1
|
||||
|
||||
#define vfree(p) free(p)
|
||||
|
||||
static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask, unsigned prot)
|
||||
{
|
||||
void *p = aligned_alloc(PAGE_SIZE, size);
|
||||
|
||||
if (p && prot == PAGE_KERNEL_EXEC) {
|
||||
if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
|
||||
vfree(p);
|
||||
p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (p && (gfp_mask & __GFP_ZERO))
|
||||
memset(p, 0, size);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline void *vmalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL|__GFP_ZERO, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_LINUX_VMALLOC_H */
|
||||
|
@ -361,7 +361,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket *g, struct btree_iter *iter,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
struct bucket_mark m = READ_ONCE(g->mark);
|
||||
struct bucket_mark m;
|
||||
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
|
||||
struct bkey_i_alloc *a;
|
||||
u8 *d;
|
||||
@ -374,6 +374,8 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* read mark under btree node lock: */
|
||||
m = READ_ONCE(g->mark);
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
a->k.p = iter->pos;
|
||||
a->v.fields = 0;
|
||||
@ -407,8 +409,6 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
struct btree_iter iter;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (pos.inode >= c->sb.nr_devices || !c->devs[pos.inode])
|
||||
return 0;
|
||||
|
||||
|
@ -725,6 +725,10 @@ struct bch_fs {
|
||||
struct work_struct read_retry_work;
|
||||
spinlock_t read_retry_lock;
|
||||
|
||||
struct bio_list btree_write_error_list;
|
||||
struct work_struct btree_write_error_work;
|
||||
spinlock_t btree_write_error_lock;
|
||||
|
||||
/* ERRORS */
|
||||
struct list_head fsck_errors;
|
||||
struct mutex fsck_error_lock;
|
||||
|
@ -1082,7 +1082,8 @@ struct jset_entry {
|
||||
__le16 u64s;
|
||||
__u8 btree_id;
|
||||
__u8 level;
|
||||
__le32 flags; /* designates what this jset holds */
|
||||
__u8 type; /* designates what this jset holds */
|
||||
__u8 pad[3];
|
||||
|
||||
union {
|
||||
struct bkey_i start[0];
|
||||
@ -1092,7 +1093,6 @@ struct jset_entry {
|
||||
|
||||
#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
|
||||
|
||||
LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
|
||||
enum {
|
||||
JOURNAL_ENTRY_BTREE_KEYS = 0,
|
||||
JOURNAL_ENTRY_BTREE_ROOT = 1,
|
||||
|
@ -791,11 +791,9 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
unsigned dst_offset, unsigned dst_size,
|
||||
bool *eax_zeroed)
|
||||
{
|
||||
unsigned byte = format->key_u64s * sizeof(u64);
|
||||
unsigned bits = format->bits_per_field[field];
|
||||
u64 offset = format->field_offset[field];
|
||||
unsigned i, bit_offset = 0;
|
||||
unsigned shl, shr;
|
||||
unsigned i, byte, bit_offset, align, shl, shr;
|
||||
|
||||
if (!bits && !offset) {
|
||||
if (!*eax_zeroed) {
|
||||
@ -842,11 +840,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
return out;
|
||||
}
|
||||
|
||||
bit_offset = format->key_u64s * 64;
|
||||
for (i = 0; i <= field; i++)
|
||||
bit_offset += format->bits_per_field[i];
|
||||
bit_offset -= format->bits_per_field[i];
|
||||
|
||||
byte -= DIV_ROUND_UP(bit_offset, 8);
|
||||
bit_offset = round_up(bit_offset, 8) - bit_offset;
|
||||
byte = bit_offset / 8;
|
||||
bit_offset -= byte * 8;
|
||||
|
||||
*eax_zeroed = false;
|
||||
|
||||
@ -857,6 +856,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
/* movzx eax, WORD PTR [rsi + imm8] */
|
||||
I4(0x0f, 0xb7, 0x46, byte);
|
||||
} else if (bit_offset + bits <= 32) {
|
||||
align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
|
||||
byte -= align;
|
||||
bit_offset += align * 8;
|
||||
|
||||
BUG_ON(bit_offset + bits > 32);
|
||||
|
||||
/* mov eax, [rsi + imm8] */
|
||||
I3(0x8b, 0x46, byte);
|
||||
|
||||
@ -874,6 +879,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
out += 4;
|
||||
}
|
||||
} else if (bit_offset + bits <= 64) {
|
||||
align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
|
||||
byte -= align;
|
||||
bit_offset += align * 8;
|
||||
|
||||
BUG_ON(bit_offset + bits > 64);
|
||||
|
||||
/* mov rax, [rsi + imm8] */
|
||||
I4(0x48, 0x8b, 0x46, byte);
|
||||
|
||||
@ -890,6 +901,12 @@ static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
|
||||
I4(0x48, 0xc1, 0xe8, shr);
|
||||
}
|
||||
} else {
|
||||
align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
|
||||
byte -= align;
|
||||
bit_offset += align * 8;
|
||||
|
||||
BUG_ON(bit_offset + bits > 96);
|
||||
|
||||
/* mov rax, [rsi + byte] */
|
||||
I4(0x48, 0x8b, 0x46, byte);
|
||||
|
||||
|
@ -41,7 +41,7 @@ static void __mca_data_free(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
EBUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
free_pages((unsigned long) b->data, btree_page_order(c));
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
bch2_btree_keys_free(b);
|
||||
}
|
||||
@ -53,8 +53,6 @@ static void mca_data_free(struct bch_fs *c, struct btree *b)
|
||||
list_move(&b->list, &c->btree_cache_freed);
|
||||
}
|
||||
|
||||
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
|
||||
|
||||
static const struct rhashtable_params bch_btree_cache_params = {
|
||||
.head_offset = offsetof(struct btree, hash),
|
||||
.key_offset = offsetof(struct btree, key.v),
|
||||
@ -63,20 +61,18 @@ static const struct rhashtable_params bch_btree_cache_params = {
|
||||
|
||||
static void mca_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
{
|
||||
unsigned order = ilog2(btree_pages(c));
|
||||
|
||||
b->data = (void *) __get_free_pages(gfp, order);
|
||||
b->data = kvpmalloc(btree_bytes(c), gfp);
|
||||
if (!b->data)
|
||||
goto err;
|
||||
|
||||
if (bch2_btree_keys_alloc(b, order, gfp))
|
||||
if (bch2_btree_keys_alloc(b, btree_page_order(c), gfp))
|
||||
goto err;
|
||||
|
||||
c->btree_cache_used++;
|
||||
list_move(&b->list, &c->btree_cache_freeable);
|
||||
return;
|
||||
err:
|
||||
free_pages((unsigned long) b->data, order);
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
list_move(&b->list, &c->btree_cache_freed);
|
||||
}
|
||||
@ -91,7 +87,6 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
six_lock_init(&b->lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
INIT_LIST_HEAD(&b->reachable);
|
||||
|
||||
mca_data_alloc(c, b, gfp);
|
||||
return b->data ? b : NULL;
|
||||
@ -101,10 +96,6 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
|
||||
void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
|
||||
b->nsets = 0;
|
||||
|
||||
rhashtable_remove_fast(&c->btree_cache_table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
|
||||
@ -112,23 +103,27 @@ void bch2_btree_node_hash_remove(struct bch_fs *c, struct btree *b)
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = 0;
|
||||
}
|
||||
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
return rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
}
|
||||
|
||||
int bch2_btree_node_hash_insert(struct bch_fs *c, struct btree *b,
|
||||
unsigned level, enum btree_id id)
|
||||
{
|
||||
int ret;
|
||||
|
||||
b->level = level;
|
||||
b->btree_id = id;
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&c->btree_cache_table, &b->hash,
|
||||
bch_btree_cache_params);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_add(&b->list, &c->btree_cache);
|
||||
ret = __bch2_btree_node_hash_insert(c, b);
|
||||
if (!ret)
|
||||
list_add(&b->list, &c->btree_cache);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__flatten
|
||||
@ -155,8 +150,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
if (!six_trylock_write(&b->lock))
|
||||
goto out_unlock_intent;
|
||||
|
||||
if (btree_node_write_error(b) ||
|
||||
btree_node_noevict(b))
|
||||
if (btree_node_noevict(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (!btree_node_may_write(b))
|
||||
@ -328,7 +322,7 @@ void bch2_fs_btree_exit(struct bch_fs *c)
|
||||
if (c->verify_data)
|
||||
list_move(&c->verify_data->list, &c->btree_cache);
|
||||
|
||||
free_pages((unsigned long) c->verify_ondisk, ilog2(btree_pages(c)));
|
||||
kvpfree(c->verify_ondisk, btree_bytes(c));
|
||||
#endif
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
@ -384,8 +378,7 @@ int bch2_fs_btree_init(struct bch_fs *c)
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
mutex_init(&c->verify_lock);
|
||||
|
||||
c->verify_ondisk = (void *)
|
||||
__get_free_pages(GFP_KERNEL, ilog2(btree_pages(c)));
|
||||
c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
|
||||
if (!c->verify_ondisk)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -510,7 +503,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
BUG_ON(!six_trylock_intent(&b->lock));
|
||||
BUG_ON(!six_trylock_write(&b->lock));
|
||||
out_unlock:
|
||||
BUG_ON(bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key));
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
list_del_init(&b->list);
|
||||
@ -554,6 +547,12 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_iter *iter,
|
||||
struct bch_fs *c = iter->c;
|
||||
struct btree *b;
|
||||
|
||||
/*
|
||||
* Parent node must be locked, else we could read in a btree node that's
|
||||
* been freed:
|
||||
*/
|
||||
BUG_ON(!btree_node_locked(iter, level + 1));
|
||||
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
if (IS_ERR(b))
|
||||
return b;
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_types.h"
|
||||
#include "extents.h"
|
||||
|
||||
struct btree_iter;
|
||||
|
||||
@ -11,6 +12,7 @@ extern const char * const bch2_btree_ids[];
|
||||
void bch2_recalc_btree_reserve(struct bch_fs *);
|
||||
|
||||
void bch2_btree_node_hash_remove(struct bch_fs *, struct btree *);
|
||||
int __bch2_btree_node_hash_insert(struct bch_fs *, struct btree *);
|
||||
int bch2_btree_node_hash_insert(struct bch_fs *, struct btree *,
|
||||
unsigned, enum btree_id);
|
||||
|
||||
@ -28,6 +30,14 @@ void bch2_btree_node_prefetch(struct btree_iter *, const struct bkey_i *,
|
||||
void bch2_fs_btree_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_init(struct bch_fs *);
|
||||
|
||||
#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0])
|
||||
|
||||
/* is btree node in hash table? */
|
||||
static inline bool btree_node_hashed(struct btree *b)
|
||||
{
|
||||
return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key);
|
||||
}
|
||||
|
||||
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
|
||||
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
|
||||
&(_c)->btree_cache_table), \
|
||||
|
@ -621,12 +621,10 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
||||
bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]);
|
||||
|
||||
/* Repack everything with @new_format and sort down to one bset */
|
||||
for (i = 0; i < nr_old_nodes; i++) {
|
||||
for (i = 0; i < nr_old_nodes; i++)
|
||||
new_nodes[i] =
|
||||
__bch2_btree_node_alloc_replacement(c, old_nodes[i],
|
||||
new_format, res);
|
||||
list_add(&new_nodes[i]->reachable, &as->reachable_list);
|
||||
}
|
||||
new_format, as, res);
|
||||
|
||||
/*
|
||||
* Conceptually we concatenate the nodes together and slice them
|
||||
@ -663,7 +661,6 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
||||
|
||||
set_btree_bset_end(n1, n1->set);
|
||||
|
||||
list_del_init(&n2->reachable);
|
||||
six_unlock_write(&n2->lock);
|
||||
bch2_btree_node_free_never_inserted(c, n2);
|
||||
six_unlock_intent(&n2->lock);
|
||||
@ -796,7 +793,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
memset(merge, 0, sizeof(merge));
|
||||
|
||||
__for_each_btree_node(&iter, c, btree_id, POS_MIN,
|
||||
U8_MAX, 0, BTREE_ITER_PREFETCH, b) {
|
||||
BTREE_MAX_DEPTH, 0,
|
||||
BTREE_ITER_PREFETCH, b) {
|
||||
memmove(merge + 1, merge,
|
||||
sizeof(merge) - sizeof(merge[0]));
|
||||
memmove(lock_seq + 1, lock_seq,
|
||||
|
@ -56,9 +56,9 @@ static void btree_bounce_free(struct bch_fs *c, unsigned order,
|
||||
bool used_mempool, void *p)
|
||||
{
|
||||
if (used_mempool)
|
||||
mempool_free(virt_to_page(p), &c->btree_bounce_pool);
|
||||
mempool_free(p, &c->btree_bounce_pool);
|
||||
else
|
||||
free_pages((unsigned long) p, order);
|
||||
vpfree(p, PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
|
||||
@ -66,7 +66,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
|
||||
{
|
||||
void *p;
|
||||
|
||||
BUG_ON(1 << order > btree_pages(c));
|
||||
BUG_ON(order > btree_page_order(c));
|
||||
|
||||
*used_mempool = false;
|
||||
p = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order);
|
||||
@ -74,7 +74,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order,
|
||||
return p;
|
||||
|
||||
*used_mempool = true;
|
||||
return page_address(mempool_alloc(&c->btree_bounce_pool, GFP_NOIO));
|
||||
return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
|
||||
}
|
||||
|
||||
typedef int (*sort_cmp_fn)(struct btree *,
|
||||
@ -1183,7 +1183,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
||||
if (bne->keys.seq == b->data->keys.seq)
|
||||
goto err;
|
||||
|
||||
sorted = btree_bounce_alloc(c, ilog2(btree_pages(c)), &used_mempool);
|
||||
sorted = btree_bounce_alloc(c, btree_page_order(c), &used_mempool);
|
||||
sorted->keys.u64s = 0;
|
||||
|
||||
b->nr = btree_node_is_extents(b)
|
||||
@ -1199,7 +1199,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
||||
|
||||
BUG_ON(b->nr.live_u64s != u64s);
|
||||
|
||||
btree_bounce_free(c, ilog2(btree_pages(c)), used_mempool, sorted);
|
||||
btree_bounce_free(c, btree_page_order(c), used_mempool, sorted);
|
||||
|
||||
bch2_bset_build_aux_tree(b, b->set, false);
|
||||
|
||||
@ -1344,50 +1344,100 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
|
||||
/*
|
||||
* Before calling bch2_btree_complete_write() - if the write errored, we
|
||||
* have to halt new journal writes before they see this btree node
|
||||
* write as completed:
|
||||
*/
|
||||
if (btree_node_write_error(b))
|
||||
bch2_journal_halt(&c->journal);
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
btree_node_io_unlock(b);
|
||||
}
|
||||
|
||||
static void bch2_btree_node_write_error(struct bch_fs *c,
|
||||
struct bch_write_bio *wbio)
|
||||
{
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct closure *cl = wbio->cl;
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
|
||||
struct bkey_i_extent *new_key;
|
||||
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
new_key = bkey_i_to_extent(&tmp.k);
|
||||
|
||||
while (wbio->replicas_failed) {
|
||||
unsigned idx = __fls(wbio->replicas_failed);
|
||||
|
||||
bch2_extent_drop_ptr_idx(extent_i_to_s(new_key), idx);
|
||||
wbio->replicas_failed ^= 1 << idx;
|
||||
}
|
||||
|
||||
if (!bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)) ||
|
||||
bch2_btree_node_update_key(c, b, new_key)) {
|
||||
set_btree_node_noevict(b);
|
||||
bch2_fatal_error(c);
|
||||
}
|
||||
|
||||
bio_put(&wbio->bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
void bch2_btree_write_error_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs,
|
||||
btree_write_error_work);
|
||||
struct bio *bio;
|
||||
|
||||
while (1) {
|
||||
spin_lock_irq(&c->read_retry_lock);
|
||||
bio = bio_list_pop(&c->read_retry_list);
|
||||
spin_unlock_irq(&c->read_retry_lock);
|
||||
|
||||
if (!bio)
|
||||
break;
|
||||
|
||||
bch2_btree_node_write_error(c, to_wbio(bio));
|
||||
}
|
||||
}
|
||||
|
||||
static void btree_node_write_endio(struct bio *bio)
|
||||
{
|
||||
struct btree *b = bio->bi_private;
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bio *orig = wbio->split ? wbio->orig : NULL;
|
||||
struct closure *cl = !wbio->split ? wbio->cl : NULL;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
struct btree *b = bio->bi_private;
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
|
||||
struct bch_write_bio *orig = parent ?: wbio;
|
||||
struct closure *cl = !wbio->split ? wbio->cl : NULL;
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
|
||||
if (bch2_dev_fatal_io_err_on(bio->bi_error, ca, "btree write") ||
|
||||
if (bch2_dev_nonfatal_io_err_on(bio->bi_error, ca, "btree write") ||
|
||||
bch2_meta_write_fault("btree"))
|
||||
set_btree_node_write_error(b);
|
||||
set_bit(wbio->ptr_idx, (unsigned long *) &orig->replicas_failed);
|
||||
|
||||
if (wbio->have_io_ref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
|
||||
if (wbio->bounce)
|
||||
btree_bounce_free(c,
|
||||
wbio->order,
|
||||
wbio->used_mempool,
|
||||
page_address(bio->bi_io_vec[0].bv_page));
|
||||
|
||||
if (wbio->put_bio)
|
||||
if (parent) {
|
||||
bio_put(bio);
|
||||
|
||||
if (orig) {
|
||||
bio_endio(orig);
|
||||
} else {
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
bio_endio(&parent->bio);
|
||||
return;
|
||||
}
|
||||
|
||||
btree_bounce_free(c,
|
||||
wbio->order,
|
||||
wbio->used_mempool,
|
||||
wbio->data);
|
||||
|
||||
if (wbio->replicas_failed) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bio_list_add(&c->read_retry_list, &wbio->bio);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
queue_work(c->wq, &c->btree_write_error_work);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
@ -1411,7 +1461,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct closure *parent,
|
||||
enum six_lock_type lock_type_held)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct bch_write_bio *wbio;
|
||||
struct bset_tree *t;
|
||||
struct bset *i;
|
||||
@ -1458,7 +1507,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON(!list_empty_careful(&b->reachable) != !b->written);
|
||||
BUG_ON((b->will_make_reachable != NULL) != !b->written);
|
||||
|
||||
BUG_ON(b->written >= c->sb.btree_node_size);
|
||||
BUG_ON(bset_written(b, btree_bset_last(b)));
|
||||
@ -1601,23 +1650,20 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write);
|
||||
|
||||
wbio = to_wbio(bio);
|
||||
wbio = wbio_init(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write));
|
||||
wbio->cl = parent;
|
||||
wbio->bounce = true;
|
||||
wbio->put_bio = true;
|
||||
wbio->order = order;
|
||||
wbio->used_mempool = used_mempool;
|
||||
bio->bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
|
||||
bio->bi_iter.bi_size = sectors_to_write << 9;
|
||||
bio->bi_end_io = btree_node_write_endio;
|
||||
bio->bi_private = b;
|
||||
wbio->data = data;
|
||||
wbio->bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
|
||||
wbio->bio.bi_iter.bi_size = sectors_to_write << 9;
|
||||
wbio->bio.bi_end_io = btree_node_write_endio;
|
||||
wbio->bio.bi_private = b;
|
||||
|
||||
if (parent)
|
||||
closure_get(parent);
|
||||
|
||||
bch2_bio_map(bio, data);
|
||||
bch2_bio_map(&wbio->bio, data);
|
||||
|
||||
/*
|
||||
* If we're appending to a leaf node, we don't technically need FUA -
|
||||
|
@ -37,7 +37,7 @@ static inline void btree_node_wait_on_io(struct btree *b)
|
||||
static inline bool btree_node_may_write(struct btree *b)
|
||||
{
|
||||
return list_empty_careful(&b->write_blocked) &&
|
||||
list_empty_careful(&b->reachable);
|
||||
!b->will_make_reachable;
|
||||
}
|
||||
|
||||
enum compact_mode {
|
||||
@ -79,6 +79,7 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
|
||||
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
||||
struct btree_write *);
|
||||
void bch2_btree_write_error_work(struct work_struct *);
|
||||
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
struct closure *, enum six_lock_type);
|
||||
|
@ -252,6 +252,8 @@ static int __bch2_btree_iter_unlock(struct btree_iter *iter)
|
||||
while (iter->nodes_locked)
|
||||
btree_node_unlock(iter, __ffs(iter->nodes_locked));
|
||||
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
|
||||
return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
|
||||
}
|
||||
|
||||
@ -1006,16 +1008,30 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
iter->flags |= BTREE_ITER_AT_END_OF_LEAF;
|
||||
|
||||
iter->pos = new_pos;
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0); /* XXX handle this */
|
||||
iter->pos = new_pos;
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
void bch2_btree_iter_advance_pos(struct btree_iter *iter)
|
||||
{
|
||||
if (iter->flags & BTREE_ITER_UPTODATE &&
|
||||
!(iter->flags & BTREE_ITER_WITH_HOLES)) {
|
||||
struct bkey_s_c k;
|
||||
|
||||
__btree_iter_advance(iter);
|
||||
k = __btree_iter_peek(iter);
|
||||
if (likely(k.k)) {
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We use iter->k instead of iter->pos for extents: iter->pos will be
|
||||
* equal to the start of the extent we returned, but we need to advance
|
||||
@ -1032,6 +1048,7 @@ void bch2_btree_iter_rewind(struct btree_iter *iter, struct bpos pos)
|
||||
BUG_ON(bkey_cmp(pos, iter->nodes[iter->level]->data->min_key) < 0);
|
||||
|
||||
iter->pos = pos;
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
__btree_iter_init(iter, iter->nodes[iter->level]);
|
||||
}
|
||||
|
||||
@ -1043,6 +1060,17 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
|
||||
if (iter->flags & BTREE_ITER_UPTODATE) {
|
||||
struct btree *b = iter->nodes[0];
|
||||
struct bkey_packed *k =
|
||||
__bch2_btree_node_iter_peek_all(&iter->node_iters[0], b);
|
||||
|
||||
return (struct bkey_s_c) {
|
||||
.k = &iter->k,
|
||||
.v = bkeyp_val(&b->format, k)
|
||||
};
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret)) {
|
||||
@ -1058,7 +1086,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
*/
|
||||
if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
|
||||
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
|
||||
iter->pos = bkey_start_pos(k.k);
|
||||
|
||||
iter->flags |= BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -1083,6 +1113,8 @@ struct bkey_s_c bch2_btree_iter_peek_with_holes(struct btree_iter *iter)
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
|
||||
while (1) {
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret)) {
|
||||
@ -1131,12 +1163,15 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
unsigned locks_want, unsigned depth,
|
||||
unsigned flags)
|
||||
{
|
||||
EBUG_ON(depth >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(locks_want > BTREE_MAX_DEPTH);
|
||||
|
||||
iter->c = c;
|
||||
iter->pos = pos;
|
||||
iter->flags = flags;
|
||||
iter->btree_id = btree_id;
|
||||
iter->level = depth;
|
||||
iter->locks_want = min(locks_want, BTREE_MAX_DEPTH);
|
||||
iter->locks_want = locks_want;
|
||||
iter->nodes_locked = 0;
|
||||
iter->nodes_intent_locked = 0;
|
||||
memset(iter->nodes, 0, sizeof(iter->nodes));
|
||||
|
@ -4,19 +4,20 @@
|
||||
#include "btree_types.h"
|
||||
|
||||
|
||||
#define BTREE_ITER_INTENT (1 << 0)
|
||||
#define BTREE_ITER_UPTODATE (1 << 0)
|
||||
#define BTREE_ITER_WITH_HOLES (1 << 1)
|
||||
#define BTREE_ITER_PREFETCH (1 << 2)
|
||||
#define BTREE_ITER_INTENT (1 << 2)
|
||||
#define BTREE_ITER_PREFETCH (1 << 3)
|
||||
/*
|
||||
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
|
||||
* @pos or the first key strictly greater than @pos
|
||||
*/
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 3)
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 4)
|
||||
/*
|
||||
* indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
|
||||
*/
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 4)
|
||||
#define BTREE_ITER_ERROR (1 << 5)
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 5)
|
||||
#define BTREE_ITER_ERROR (1 << 6)
|
||||
|
||||
/*
|
||||
* @pos - iterator's current position
|
||||
@ -223,17 +224,23 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
|
||||
#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \
|
||||
__for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b)
|
||||
|
||||
static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
|
||||
unsigned flags)
|
||||
{
|
||||
return flags & BTREE_ITER_WITH_HOLES
|
||||
? bch2_btree_iter_peek_with_holes(iter)
|
||||
: bch2_btree_iter_peek(iter);
|
||||
}
|
||||
|
||||
#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \
|
||||
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
!IS_ERR_OR_NULL(((_k) = (((_flags) & BTREE_ITER_WITH_HOLES)\
|
||||
? bch2_btree_iter_peek_with_holes(_iter)\
|
||||
: bch2_btree_iter_peek(_iter))).k); \
|
||||
for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \
|
||||
(_start), (_flags)); \
|
||||
!IS_ERR_OR_NULL(((_k) = __bch2_btree_iter_peek(_iter, _flags)).k);\
|
||||
bch2_btree_iter_advance_pos(_iter))
|
||||
|
||||
static inline int btree_iter_err(struct bkey_s_c k)
|
||||
{
|
||||
return IS_ERR(k.k) ? PTR_ERR(k.k) : 0;
|
||||
return PTR_ERR_OR_ZERO(k.k);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -116,7 +116,7 @@ struct btree {
|
||||
* another write - because that write also won't yet be reachable and
|
||||
* marking it as completed before it's reachable would be incorrect:
|
||||
*/
|
||||
struct list_head reachable;
|
||||
struct btree_interior_update *will_make_reachable;
|
||||
|
||||
struct open_bucket *ob;
|
||||
|
||||
@ -143,7 +143,6 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
|
||||
enum btree_flags {
|
||||
BTREE_NODE_read_in_flight,
|
||||
BTREE_NODE_read_error,
|
||||
BTREE_NODE_write_error,
|
||||
BTREE_NODE_dirty,
|
||||
BTREE_NODE_need_write,
|
||||
BTREE_NODE_noevict,
|
||||
@ -155,7 +154,6 @@ enum btree_flags {
|
||||
|
||||
BTREE_FLAG(read_in_flight);
|
||||
BTREE_FLAG(read_error);
|
||||
BTREE_FLAG(write_error);
|
||||
BTREE_FLAG(dirty);
|
||||
BTREE_FLAG(need_write);
|
||||
BTREE_FLAG(noevict);
|
||||
|
@ -21,6 +21,11 @@
|
||||
static void btree_interior_update_updated_root(struct bch_fs *,
|
||||
struct btree_interior_update *,
|
||||
enum btree_id);
|
||||
static void btree_interior_update_will_make_reachable(struct bch_fs *,
|
||||
struct btree_interior_update *,
|
||||
struct btree *);
|
||||
static void btree_interior_update_drop_new_node(struct bch_fs *,
|
||||
struct btree *);
|
||||
|
||||
/* Calculate ideal packed bkey format for new btree nodes: */
|
||||
|
||||
@ -166,7 +171,7 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
|
||||
BUG_ON(b == btree_node_root(c, b));
|
||||
BUG_ON(b->ob);
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON(!list_empty(&b->reachable));
|
||||
BUG_ON(b->will_make_reachable);
|
||||
|
||||
clear_btree_node_noevict(b);
|
||||
|
||||
@ -191,6 +196,8 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct open_bucket *ob = b->ob;
|
||||
|
||||
btree_interior_update_drop_new_node(c, b);
|
||||
|
||||
b->ob = NULL;
|
||||
|
||||
clear_btree_node_dirty(b);
|
||||
@ -299,6 +306,7 @@ mem_alloc:
|
||||
|
||||
static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
|
||||
unsigned level, enum btree_id id,
|
||||
struct btree_interior_update *as,
|
||||
struct btree_reserve *reserve)
|
||||
{
|
||||
struct btree *b;
|
||||
@ -322,7 +330,7 @@ static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
|
||||
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key), BCH_DATA_BTREE);
|
||||
btree_interior_update_will_make_reachable(c, as, b);
|
||||
|
||||
trace_btree_node_alloc(c, b);
|
||||
return b;
|
||||
@ -331,11 +339,12 @@ static struct btree *bch2_btree_node_alloc(struct bch_fs *c,
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct bkey_format format,
|
||||
struct btree_interior_update *as,
|
||||
struct btree_reserve *reserve)
|
||||
{
|
||||
struct btree *n;
|
||||
|
||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
|
||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, as, reserve);
|
||||
|
||||
n->data->min_key = b->data->min_key;
|
||||
n->data->max_key = b->data->max_key;
|
||||
@ -353,6 +362,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *c,
|
||||
|
||||
static struct btree *bch2_btree_node_alloc_replacement(struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct btree_interior_update *as,
|
||||
struct btree_reserve *reserve)
|
||||
{
|
||||
struct bkey_format new_f = bch2_btree_calc_format(b);
|
||||
@ -364,7 +374,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct bch_fs *c,
|
||||
if (!bch2_btree_node_format_fits(c, b, &new_f))
|
||||
new_f = b->format;
|
||||
|
||||
return __bch2_btree_node_alloc_replacement(c, b, new_f, reserve);
|
||||
return __bch2_btree_node_alloc_replacement(c, b, new_f, as, reserve);
|
||||
}
|
||||
|
||||
static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b,
|
||||
@ -478,9 +488,10 @@ static void bch2_btree_set_root(struct btree_iter *iter, struct btree *b,
|
||||
|
||||
static struct btree *__btree_root_alloc(struct bch_fs *c, unsigned level,
|
||||
enum btree_id id,
|
||||
struct btree_interior_update *as,
|
||||
struct btree_reserve *reserve)
|
||||
{
|
||||
struct btree *b = bch2_btree_node_alloc(c, level, id, reserve);
|
||||
struct btree *b = bch2_btree_node_alloc(c, level, id, as, reserve);
|
||||
|
||||
b->data->min_key = POS_MIN;
|
||||
b->data->max_key = POS_MAX;
|
||||
@ -581,6 +592,11 @@ static struct btree_reserve *__bch2_btree_reserve_get(struct bch_fs *c,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key),
|
||||
BCH_DATA_BTREE);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
reserve->b[reserve->nr++] = b;
|
||||
}
|
||||
|
||||
@ -608,11 +624,12 @@ struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
struct closure *writes)
|
||||
{
|
||||
struct closure cl;
|
||||
struct btree_interior_update as;
|
||||
struct btree_reserve *reserve;
|
||||
struct closure cl;
|
||||
struct btree *b;
|
||||
LIST_HEAD(reachable_list);
|
||||
|
||||
memset(&as, 0, sizeof(as));
|
||||
closure_init_stack(&cl);
|
||||
|
||||
while (1) {
|
||||
@ -627,15 +644,14 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
closure_sync(&cl);
|
||||
}
|
||||
|
||||
b = __btree_root_alloc(c, 0, id, reserve);
|
||||
list_add(&b->reachable, &reachable_list);
|
||||
b = __btree_root_alloc(c, 0, id, &as, reserve);
|
||||
|
||||
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
||||
|
||||
bch2_btree_set_root_initial(c, b, reserve);
|
||||
bch2_btree_open_bucket_put(c, b);
|
||||
|
||||
list_del_init(&b->reachable);
|
||||
btree_interior_update_drop_new_node(c, b);
|
||||
bch2_btree_open_bucket_put(c, b);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
||||
bch2_btree_reserve_put(c, reserve);
|
||||
@ -819,9 +835,12 @@ void bch2_btree_journal_key(struct btree_insert *trans,
|
||||
/* ick */
|
||||
insert->k.needs_whiteout = false;
|
||||
bch2_journal_add_keys(j, &trans->journal_res,
|
||||
b->btree_id, insert);
|
||||
b->btree_id, insert);
|
||||
insert->k.needs_whiteout = needs_whiteout;
|
||||
|
||||
bch2_journal_set_has_inode(j, &trans->journal_res,
|
||||
insert->k.p.inode);
|
||||
|
||||
if (trans->journal_seq)
|
||||
*trans->journal_seq = seq;
|
||||
btree_bset_last(b)->journal_seq = cpu_to_le64(seq);
|
||||
@ -891,7 +910,6 @@ bch2_btree_interior_update_alloc(struct bch_fs *c)
|
||||
as->c = c;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
INIT_LIST_HEAD(&as->write_blocked_list);
|
||||
INIT_LIST_HEAD(&as->reachable_list);
|
||||
|
||||
bch2_keylist_init(&as->parent_keys, as->inline_keys,
|
||||
ARRAY_SIZE(as->inline_keys));
|
||||
@ -916,16 +934,16 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
|
||||
struct btree_interior_update *as =
|
||||
container_of(cl, struct btree_interior_update, cl);
|
||||
struct bch_fs *c = as->c;
|
||||
unsigned i;
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
while (!list_empty(&as->reachable_list)) {
|
||||
struct btree *b = list_first_entry(&as->reachable_list,
|
||||
struct btree, reachable);
|
||||
list_del_init(&b->reachable);
|
||||
while (as->nr_new_nodes) {
|
||||
struct btree *b = as->new_nodes[--as->nr_new_nodes];
|
||||
|
||||
BUG_ON(b->will_make_reachable != as);
|
||||
b->will_make_reachable = NULL;
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
six_lock_read(&b->lock);
|
||||
@ -934,9 +952,8 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
for (i = 0; i < as->nr_pending; i++)
|
||||
bch2_btree_node_free_ondisk(c, &as->pending[i]);
|
||||
as->nr_pending = 0;
|
||||
while (as->nr_pending)
|
||||
bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending]);
|
||||
|
||||
list_del(&as->list);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
@ -1185,6 +1202,68 @@ static void btree_interior_update_updated_root(struct bch_fs *c,
|
||||
system_freezable_wq);
|
||||
}
|
||||
|
||||
static void btree_interior_update_will_make_reachable(struct bch_fs *c,
|
||||
struct btree_interior_update *as,
|
||||
struct btree *b)
|
||||
{
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
|
||||
BUG_ON(b->will_make_reachable);
|
||||
|
||||
as->new_nodes[as->nr_new_nodes++] = b;
|
||||
b->will_make_reachable = as;
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
static void __btree_interior_update_drop_new_node(struct btree *b)
|
||||
{
|
||||
struct btree_interior_update *as = b->will_make_reachable;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(!as);
|
||||
|
||||
for (i = 0; i < as->nr_new_nodes; i++)
|
||||
if (as->new_nodes[i] == b)
|
||||
goto found;
|
||||
|
||||
BUG();
|
||||
found:
|
||||
as->nr_new_nodes--;
|
||||
memmove(&as->new_nodes[i],
|
||||
&as->new_nodes[i + 1],
|
||||
sizeof(struct btree *) * (as->nr_new_nodes - i));
|
||||
b->will_make_reachable = NULL;
|
||||
}
|
||||
|
||||
static void btree_interior_update_drop_new_node(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
__btree_interior_update_drop_new_node(b);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
static void bch2_btree_interior_update_add_node_reference(struct bch_fs *c,
|
||||
struct btree_interior_update *as,
|
||||
struct btree *b)
|
||||
{
|
||||
struct pending_btree_node_free *d;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/* Add this node to the list of nodes being freed: */
|
||||
BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
|
||||
|
||||
d = &as->pending[as->nr_pending++];
|
||||
d->index_update_done = false;
|
||||
d->seq = b->data->keys.seq;
|
||||
d->btree_id = b->btree_id;
|
||||
d->level = b->level;
|
||||
bkey_copy(&d->key, &b->key);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* @b is being split/rewritten: it may have pointers to not-yet-written btree
|
||||
* nodes and thus outstanding btree_interior_updates - redirect @b's
|
||||
@ -1196,10 +1275,11 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
{
|
||||
struct closure *cl, *cl_n;
|
||||
struct btree_interior_update *p, *n;
|
||||
struct pending_btree_node_free *d;
|
||||
struct btree_write *w;
|
||||
struct bset_tree *t;
|
||||
|
||||
bch2_btree_interior_update_add_node_reference(c, as, b);
|
||||
|
||||
/*
|
||||
* Does this node have data that hasn't been written in the journal?
|
||||
*
|
||||
@ -1213,16 +1293,6 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/* Add this node to the list of nodes being freed: */
|
||||
BUG_ON(as->nr_pending >= ARRAY_SIZE(as->pending));
|
||||
|
||||
d = &as->pending[as->nr_pending++];
|
||||
d->index_update_done = false;
|
||||
d->seq = b->data->keys.seq;
|
||||
d->btree_id = b->btree_id;
|
||||
d->level = b->level;
|
||||
bkey_copy(&d->key, &b->key);
|
||||
|
||||
/*
|
||||
* Does this node have any btree_interior_update operations preventing
|
||||
* it from being written?
|
||||
@ -1255,8 +1325,13 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
&as->journal, interior_update_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
if (!list_empty(&b->reachable))
|
||||
list_del_init(&b->reachable);
|
||||
w = btree_prev_write(b);
|
||||
bch2_journal_pin_add_if_older(&c->journal, &w->journal,
|
||||
&as->journal, interior_update_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
if (b->will_make_reachable)
|
||||
__btree_interior_update_drop_new_node(b);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
@ -1301,7 +1376,7 @@ err:
|
||||
#endif
|
||||
}
|
||||
|
||||
static enum btree_insert_ret
|
||||
static int
|
||||
bch2_btree_insert_keys_interior(struct btree *b,
|
||||
struct btree_iter *iter,
|
||||
struct keylist *insert_keys,
|
||||
@ -1324,7 +1399,7 @@ bch2_btree_insert_keys_interior(struct btree *b,
|
||||
if (bch_keylist_u64s(insert_keys) >
|
||||
bch_btree_keys_u64s_remaining(c, b)) {
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Don't screw up @iter's position: */
|
||||
@ -1362,7 +1437,7 @@ bch2_btree_insert_keys_interior(struct btree *b,
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
|
||||
btree_node_interior_verify(b);
|
||||
return BTREE_INSERT_OK;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1373,13 +1448,13 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
|
||||
struct btree_reserve *reserve,
|
||||
struct btree_interior_update *as)
|
||||
{
|
||||
struct bch_fs *c = iter->c;
|
||||
size_t nr_packed = 0, nr_unpacked = 0;
|
||||
struct btree *n2;
|
||||
struct bset *set1, *set2;
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
|
||||
n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
|
||||
list_add(&n2->reachable, &as->reachable_list);
|
||||
n2 = bch2_btree_node_alloc(c, n1->level, iter->btree_id, as, reserve);
|
||||
|
||||
n2->data->max_key = n1->data->max_key;
|
||||
n2->data->format = n1->format;
|
||||
@ -1528,8 +1603,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||
|
||||
n1 = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||
list_add(&n1->reachable, &as->reachable_list);
|
||||
n1 = bch2_btree_node_alloc_replacement(c, b, as, reserve);
|
||||
|
||||
if (b->level)
|
||||
btree_split_insert_keys(iter, n1, insert_keys, reserve);
|
||||
@ -1558,8 +1632,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
||||
/* Depth increases, make a new root */
|
||||
n3 = __btree_root_alloc(c, b->level + 1,
|
||||
iter->btree_id,
|
||||
reserve);
|
||||
list_add(&n3->reachable, &as->reachable_list);
|
||||
as, reserve);
|
||||
|
||||
n3->sib_u64s[0] = U16_MAX;
|
||||
n3->sib_u64s[1] = U16_MAX;
|
||||
@ -1641,16 +1714,10 @@ void bch2_btree_insert_node(struct btree *b,
|
||||
BUG_ON(!b->level);
|
||||
BUG_ON(!reserve || !as);
|
||||
|
||||
switch (bch2_btree_insert_keys_interior(b, iter, insert_keys,
|
||||
as, reserve)) {
|
||||
case BTREE_INSERT_OK:
|
||||
break;
|
||||
case BTREE_INSERT_BTREE_NODE_FULL:
|
||||
if ((as->flags & BTREE_INTERIOR_UPDATE_MUST_REWRITE) ||
|
||||
bch2_btree_insert_keys_interior(b, iter, insert_keys,
|
||||
as, reserve))
|
||||
btree_split(b, iter, insert_keys, reserve, as);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_btree_split_leaf(struct btree_iter *iter, unsigned flags)
|
||||
@ -1859,8 +1926,7 @@ retry:
|
||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||
bch2_btree_interior_update_will_free_node(c, as, m);
|
||||
|
||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
|
||||
list_add(&n->reachable, &as->reachable_list);
|
||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, as, reserve);
|
||||
|
||||
n->data->min_key = prev->data->min_key;
|
||||
n->data->max_key = next->data->max_key;
|
||||
@ -1945,6 +2011,8 @@ btree_insert_key(struct btree_insert *trans,
|
||||
int old_live_u64s = b->nr.live_u64s;
|
||||
int live_u64s_added, u64s_added;
|
||||
|
||||
iter->flags &= ~BTREE_ITER_UPTODATE;
|
||||
|
||||
ret = !btree_node_is_extents(b)
|
||||
? bch2_insert_fixup_key(trans, insert)
|
||||
: bch2_insert_fixup_extent(trans, insert);
|
||||
@ -2383,8 +2451,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
|
||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||
|
||||
n = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||
list_add(&n->reachable, &as->reachable_list);
|
||||
n = bch2_btree_node_alloc_replacement(c, b, as, reserve);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
six_unlock_write(&n->lock);
|
||||
@ -2464,3 +2531,140 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
closure_sync(&cl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_node_update_key(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_i_extent *new_key)
|
||||
{
|
||||
struct btree_interior_update *as;
|
||||
struct btree_reserve *reserve = NULL;
|
||||
struct btree *parent, *new_hash = NULL;
|
||||
struct btree_iter iter;
|
||||
struct closure cl;
|
||||
bool must_rewrite_parent = false;
|
||||
int ret;
|
||||
|
||||
__bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p,
|
||||
BTREE_MAX_DEPTH,
|
||||
b->level, 0);
|
||||
closure_init_stack(&cl);
|
||||
|
||||
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
|
||||
/* bch2_btree_reserve_get will unlock */
|
||||
do {
|
||||
ret = bch2_btree_node_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
new_hash = bch2_btree_node_mem_alloc(c);
|
||||
}
|
||||
retry:
|
||||
reserve = bch2_btree_reserve_get(c, b, 0,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
closure_sync(&cl);
|
||||
if (IS_ERR(reserve)) {
|
||||
ret = PTR_ERR(reserve);
|
||||
if (ret == -EAGAIN || ret == -EINTR)
|
||||
goto retry;
|
||||
goto err;
|
||||
}
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
/*
|
||||
* Two corner cases that need to be thought about here:
|
||||
*
|
||||
* @b may not be reachable yet - there might be another interior update
|
||||
* operation waiting on @b to be written, and we're gonna deliver the
|
||||
* write completion to that interior update operation _before_
|
||||
* persisting the new_key update
|
||||
*
|
||||
* That ends up working without us having to do anything special here:
|
||||
* the reason is, we do kick off (and do the in memory updates) for the
|
||||
* update for @new_key before we return, creating a new interior_update
|
||||
* operation here.
|
||||
*
|
||||
* The new interior update operation here will in effect override the
|
||||
* previous one. The previous one was going to terminate - make @b
|
||||
* reachable - in one of two ways:
|
||||
* - updating the btree root pointer
|
||||
* In that case,
|
||||
* no, this doesn't work. argh.
|
||||
*/
|
||||
|
||||
if (b->will_make_reachable)
|
||||
must_rewrite_parent = true;
|
||||
|
||||
/* other case: btree node being freed */
|
||||
if (iter.nodes[b->level] != b) {
|
||||
/* node has been freed: */
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
ret = bch2_check_mark_super(c, extent_i_to_s_c(new_key), BCH_DATA_BTREE);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
as = bch2_btree_interior_update_alloc(c);
|
||||
|
||||
if (must_rewrite_parent)
|
||||
as->flags |= BTREE_INTERIOR_UPDATE_MUST_REWRITE;
|
||||
|
||||
bch2_btree_interior_update_add_node_reference(c, as, b);
|
||||
|
||||
if (new_hash) {
|
||||
bkey_copy(&new_hash->key, &new_key->k_i);
|
||||
BUG_ON(bch2_btree_node_hash_insert(c, new_hash,
|
||||
b->level, b->btree_id));
|
||||
}
|
||||
|
||||
parent = iter.nodes[b->level + 1];
|
||||
if (parent) {
|
||||
bch2_btree_insert_node(parent, &iter,
|
||||
&keylist_single(&b->key),
|
||||
reserve, as);
|
||||
} else {
|
||||
bch2_btree_set_root(&iter, b, as, reserve);
|
||||
}
|
||||
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
bch2_btree_node_hash_remove(c, b);
|
||||
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
__bch2_btree_node_hash_insert(c, b);
|
||||
|
||||
bch2_btree_node_hash_remove(c, new_hash);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
} else {
|
||||
bkey_copy(&b->key, &new_key->k_i);
|
||||
}
|
||||
err:
|
||||
if (!IS_ERR_OR_NULL(reserve))
|
||||
bch2_btree_reserve_put(c, reserve);
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache_lock);
|
||||
list_move(&b->list, &c->btree_cache_freeable);
|
||||
mutex_unlock(&c->btree_cache_lock);
|
||||
|
||||
six_unlock_write(&new_hash->lock);
|
||||
six_unlock_intent(&new_hash->lock);
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
up_read(&c->gc_lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -76,6 +76,9 @@ struct btree_interior_update {
|
||||
BTREE_INTERIOR_UPDATING_AS,
|
||||
} mode;
|
||||
|
||||
unsigned flags;
|
||||
struct btree_reserve *reserve;
|
||||
|
||||
/*
|
||||
* BTREE_INTERIOR_UPDATING_NODE:
|
||||
* The update that made the new nodes visible was a regular update to an
|
||||
@ -86,7 +89,6 @@ struct btree_interior_update {
|
||||
*/
|
||||
struct btree *b;
|
||||
struct list_head write_blocked_list;
|
||||
struct list_head reachable_list;
|
||||
|
||||
/*
|
||||
* BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
|
||||
@ -117,6 +119,10 @@ struct btree_interior_update {
|
||||
struct pending_btree_node_free pending[BTREE_MAX_DEPTH + GC_MERGE_NODES];
|
||||
unsigned nr_pending;
|
||||
|
||||
/* New nodes, that will be made reachable by this update: */
|
||||
struct btree *new_nodes[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES];
|
||||
unsigned nr_new_nodes;
|
||||
|
||||
/* Only here to reduce stack usage on recursive splits: */
|
||||
struct keylist parent_keys;
|
||||
/*
|
||||
@ -127,6 +133,8 @@ struct btree_interior_update {
|
||||
u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
|
||||
};
|
||||
|
||||
#define BTREE_INTERIOR_UPDATE_MUST_REWRITE (1 << 0)
|
||||
|
||||
#define for_each_pending_btree_node_free(c, as, p) \
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list) \
|
||||
for (p = as->pending; p < as->pending + as->nr_pending; p++)
|
||||
@ -138,6 +146,7 @@ void bch2_btree_open_bucket_put(struct bch_fs *c, struct btree *);
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *,
|
||||
struct btree *,
|
||||
struct bkey_format,
|
||||
struct btree_interior_update *,
|
||||
struct btree_reserve *);
|
||||
|
||||
struct btree_interior_update *
|
||||
@ -426,6 +435,8 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
||||
|
||||
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
|
||||
__le64, unsigned);
|
||||
int bch2_btree_node_update_key(struct bch_fs *, struct btree *,
|
||||
struct bkey_i_extent *);
|
||||
|
||||
#endif /* _BCACHE_BTREE_INSERT_H */
|
||||
|
||||
|
@ -153,6 +153,37 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
|
||||
return nr_ptrs;
|
||||
}
|
||||
|
||||
/* Doesn't cleanup redundant crcs */
|
||||
void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
|
||||
{
|
||||
EBUG_ON(ptr < &e.v->start->ptr ||
|
||||
ptr >= &extent_entry_last(e)->ptr);
|
||||
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
|
||||
memmove_u64s_down(ptr, ptr + 1,
|
||||
(u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
|
||||
e.k->u64s -= sizeof(*ptr) / sizeof(u64);
|
||||
}
|
||||
|
||||
void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
|
||||
{
|
||||
__bch2_extent_drop_ptr(e, ptr);
|
||||
bch2_extent_drop_redundant_crcs(e);
|
||||
}
|
||||
|
||||
void bch2_extent_drop_ptr_idx(struct bkey_s_extent e, unsigned idx)
|
||||
{
|
||||
struct bch_extent_ptr *ptr;
|
||||
unsigned i = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (i++ == idx)
|
||||
goto found;
|
||||
|
||||
BUG();
|
||||
found:
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
}
|
||||
|
||||
/* returns true if equal */
|
||||
static bool crc_cmp(union bch_extent_crc *l, union bch_extent_crc *r)
|
||||
{
|
||||
|
@ -552,24 +552,9 @@ static inline unsigned extent_current_nonce(struct bkey_s_c_extent e)
|
||||
void bch2_extent_narrow_crcs(struct bkey_s_extent);
|
||||
void bch2_extent_drop_redundant_crcs(struct bkey_s_extent);
|
||||
|
||||
/* Doesn't cleanup redundant crcs */
|
||||
static inline void __bch2_extent_drop_ptr(struct bkey_s_extent e,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
EBUG_ON(ptr < &e.v->start->ptr ||
|
||||
ptr >= &extent_entry_last(e)->ptr);
|
||||
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
|
||||
memmove_u64s_down(ptr, ptr + 1,
|
||||
(u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
|
||||
e.k->u64s -= sizeof(*ptr) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline void bch2_extent_drop_ptr(struct bkey_s_extent e,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
__bch2_extent_drop_ptr(e, ptr);
|
||||
bch2_extent_drop_redundant_crcs(e);
|
||||
}
|
||||
void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
|
||||
void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
|
||||
void bch2_extent_drop_ptr_idx(struct bkey_s_extent, unsigned);
|
||||
|
||||
const struct bch_extent_ptr *
|
||||
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
|
||||
|
@ -871,9 +871,8 @@ static void bch2_writepage_io_free(struct closure *cl)
|
||||
{
|
||||
struct bch_writepage_io *io = container_of(cl,
|
||||
struct bch_writepage_io, cl);
|
||||
struct bio *bio = &io->bio.bio;
|
||||
|
||||
bio_put(bio);
|
||||
bio_put(&io->op.op.wbio.bio);
|
||||
}
|
||||
|
||||
static void bch2_writepage_io_done(struct closure *cl)
|
||||
@ -881,7 +880,7 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
struct bch_writepage_io *io = container_of(cl,
|
||||
struct bch_writepage_io, cl);
|
||||
struct bch_fs *c = io->op.op.c;
|
||||
struct bio *bio = &io->bio.bio;
|
||||
struct bio *bio = &io->op.op.wbio.bio;
|
||||
struct bio_vec *bvec;
|
||||
unsigned i;
|
||||
|
||||
@ -940,11 +939,12 @@ static void bch2_writepage_io_done(struct closure *cl)
|
||||
static void bch2_writepage_do_io(struct bch_writepage_state *w)
|
||||
{
|
||||
struct bch_writepage_io *io = w->io;
|
||||
struct bio *bio = &io->op.op.wbio.bio;
|
||||
|
||||
w->io = NULL;
|
||||
atomic_add(io->bio.bio.bi_vcnt, &io->op.op.c->writeback_pages);
|
||||
atomic_add(bio->bi_vcnt, &io->op.op.c->writeback_pages);
|
||||
|
||||
io->op.op.pos.offset = io->bio.bio.bi_iter.bi_sector;
|
||||
io->op.op.pos.offset = bio->bi_iter.bi_sector;
|
||||
|
||||
closure_call(&io->op.op.cl, bch2_write, NULL, &io->cl);
|
||||
continue_at(&io->cl, bch2_writepage_io_done, NULL);
|
||||
@ -970,13 +970,13 @@ alloc_io:
|
||||
w->io = container_of(bio_alloc_bioset(GFP_NOFS,
|
||||
BIO_MAX_PAGES,
|
||||
bch2_writepage_bioset),
|
||||
struct bch_writepage_io, bio.bio);
|
||||
struct bch_writepage_io, op.op.wbio.bio);
|
||||
|
||||
closure_init(&w->io->cl, NULL);
|
||||
w->io->op.ei = ei;
|
||||
w->io->op.sectors_added = 0;
|
||||
w->io->op.is_dio = false;
|
||||
bch2_write_op_init(&w->io->op.op, c, &w->io->bio,
|
||||
bch2_write_op_init(&w->io->op.op, c,
|
||||
(struct disk_reservation) {
|
||||
.nr_replicas = c->opts.data_replicas,
|
||||
},
|
||||
@ -987,7 +987,7 @@ alloc_io:
|
||||
}
|
||||
|
||||
if (w->io->op.op.res.nr_replicas != nr_replicas ||
|
||||
bio_add_page_contig(&w->io->bio.bio, page)) {
|
||||
bio_add_page_contig(&w->io->op.op.wbio.bio, page)) {
|
||||
bch2_writepage_do_io(w);
|
||||
goto alloc_io;
|
||||
}
|
||||
@ -1038,7 +1038,7 @@ do_io:
|
||||
w->io->op.new_i_size = i_size;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
w->io->bio.bio.bi_opf |= REQ_SYNC;
|
||||
w->io->op.op.wbio.bio.bi_opf |= REQ_SYNC;
|
||||
|
||||
/* Before unlocking the page, transfer reservation to w->io: */
|
||||
old = page_state_cmpxchg(page_state(page), new, {
|
||||
@ -1110,7 +1110,7 @@ get_pages:
|
||||
done_index = page->index;
|
||||
|
||||
if (w.io &&
|
||||
!bio_can_add_page_contig(&w.io->bio.bio, page))
|
||||
!bio_can_add_page_contig(&w.io->op.op.wbio.bio, page))
|
||||
bch2_writepage_do_io(&w);
|
||||
|
||||
if (!w.io &&
|
||||
@ -1495,7 +1495,7 @@ static long __bch2_dio_write_complete(struct dio_write *dio)
|
||||
if (dio->iovec && dio->iovec != dio->inline_vecs)
|
||||
kfree(dio->iovec);
|
||||
|
||||
bio_put(&dio->bio.bio);
|
||||
bio_put(&dio->iop.op.wbio.bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1517,11 +1517,11 @@ static void bch2_dio_write_done(struct dio_write *dio)
|
||||
if (dio->iop.op.error)
|
||||
dio->error = dio->iop.op.error;
|
||||
|
||||
bio_for_each_segment_all(bv, &dio->bio.bio, i)
|
||||
bio_for_each_segment_all(bv, &dio->iop.op.wbio.bio, i)
|
||||
put_page(bv->bv_page);
|
||||
|
||||
if (dio->iter.count)
|
||||
bio_reset(&dio->bio.bio);
|
||||
bio_reset(&dio->iop.op.wbio.bio);
|
||||
}
|
||||
|
||||
static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
@ -1529,7 +1529,7 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
struct file *file = dio->req->ki_filp;
|
||||
struct inode *inode = file->f_inode;
|
||||
struct bch_inode_info *ei = to_bch_ei(inode);
|
||||
struct bio *bio = &dio->bio.bio;
|
||||
struct bio *bio = &dio->iop.op.wbio.bio;
|
||||
unsigned flags = 0;
|
||||
int ret;
|
||||
|
||||
@ -1537,8 +1537,6 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
!dio->c->opts.journal_flush_disabled)
|
||||
flags |= BCH_WRITE_FLUSH;
|
||||
|
||||
bio->bi_iter.bi_sector = (dio->offset + dio->written) >> 9;
|
||||
|
||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
@ -1555,10 +1553,9 @@ static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
dio->iop.sectors_added = 0;
|
||||
dio->iop.is_dio = true;
|
||||
dio->iop.new_i_size = U64_MAX;
|
||||
bch2_write_op_init(&dio->iop.op, dio->c, &dio->bio,
|
||||
dio->res,
|
||||
bch2_write_op_init(&dio->iop.op, dio->c, dio->res,
|
||||
foreground_write_point(dio->c, inode->i_ino),
|
||||
POS(inode->i_ino, bio->bi_iter.bi_sector),
|
||||
POS(inode->i_ino, (dio->offset + dio->written) >> 9),
|
||||
&ei->journal_seq, flags);
|
||||
dio->iop.op.index_update_fn = bchfs_write_index_update;
|
||||
|
||||
@ -1619,7 +1616,7 @@ static int bch2_direct_IO_write(struct bch_fs *c, struct kiocb *req,
|
||||
bio = bio_alloc_bioset(GFP_KERNEL,
|
||||
iov_iter_npages(iter, BIO_MAX_PAGES),
|
||||
bch2_dio_write_bioset);
|
||||
dio = container_of(bio, struct dio_write, bio.bio);
|
||||
dio = container_of(bio, struct dio_write, iop.op.wbio.bio);
|
||||
dio->req = req;
|
||||
dio->c = c;
|
||||
dio->written = 0;
|
||||
|
@ -46,16 +46,16 @@ struct bchfs_write_op {
|
||||
s64 sectors_added;
|
||||
bool is_dio;
|
||||
u64 new_i_size;
|
||||
|
||||
/* must be last: */
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
struct bch_writepage_io {
|
||||
struct closure cl;
|
||||
|
||||
/* must be last: */
|
||||
struct bchfs_write_op op;
|
||||
|
||||
/* must come last: */
|
||||
struct bch_write_bio bio;
|
||||
};
|
||||
|
||||
extern struct bio_set *bch2_writepage_bioset;
|
||||
@ -76,10 +76,8 @@ struct dio_write {
|
||||
|
||||
struct mm_struct *mm;
|
||||
|
||||
struct bchfs_write_op iop;
|
||||
|
||||
/* must be last: */
|
||||
struct bch_write_bio bio;
|
||||
struct bchfs_write_op iop;
|
||||
};
|
||||
|
||||
extern struct bio_set *bch2_dio_write_bioset;
|
||||
|
@ -1458,7 +1458,7 @@ int __init bch2_vfs_init(void)
|
||||
goto err;
|
||||
|
||||
bch2_writepage_bioset =
|
||||
bioset_create(4, offsetof(struct bch_writepage_io, bio.bio));
|
||||
bioset_create(4, offsetof(struct bch_writepage_io, op.op.wbio.bio));
|
||||
if (!bch2_writepage_bioset)
|
||||
goto err;
|
||||
|
||||
@ -1466,7 +1466,8 @@ int __init bch2_vfs_init(void)
|
||||
if (!bch2_dio_read_bioset)
|
||||
goto err;
|
||||
|
||||
bch2_dio_write_bioset = bioset_create(4, offsetof(struct dio_write, bio.bio));
|
||||
bch2_dio_write_bioset =
|
||||
bioset_create(4, offsetof(struct dio_write, iop.op.wbio.bio));
|
||||
if (!bch2_dio_write_bioset)
|
||||
goto err;
|
||||
|
||||
|
144
libbcachefs/io.c
144
libbcachefs/io.c
@ -92,12 +92,10 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_write_bio *n;
|
||||
struct bch_dev *ca;
|
||||
unsigned ptr_idx = 0;
|
||||
|
||||
BUG_ON(c->opts.nochanges);
|
||||
|
||||
wbio->split = false;
|
||||
wbio->c = c;
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
ca = c->devs[ptr->dev];
|
||||
|
||||
@ -107,24 +105,26 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
|
||||
n->bio.bi_end_io = wbio->bio.bi_end_io;
|
||||
n->bio.bi_private = wbio->bio.bi_private;
|
||||
n->c = c;
|
||||
n->orig = &wbio->bio;
|
||||
n->bounce = false;
|
||||
n->parent = wbio;
|
||||
n->split = true;
|
||||
n->bounce = false;
|
||||
n->put_bio = true;
|
||||
n->bio.bi_opf = wbio->bio.bi_opf;
|
||||
__bio_inc_remaining(n->orig);
|
||||
__bio_inc_remaining(&wbio->bio);
|
||||
} else {
|
||||
n = wbio;
|
||||
n->split = false;
|
||||
}
|
||||
|
||||
n->c = c;
|
||||
n->ca = ca;
|
||||
n->ptr_idx = ptr_idx++;
|
||||
n->submit_time_us = local_clock_us();
|
||||
n->bio.bi_iter.bi_sector = ptr->offset;
|
||||
|
||||
if (!journal_flushes_device(ca))
|
||||
n->bio.bi_opf |= REQ_FUA;
|
||||
|
||||
n->ca = ca;
|
||||
n->submit_time_us = local_clock_us();
|
||||
n->bio.bi_iter.bi_sector = ptr->offset;
|
||||
|
||||
if (likely(percpu_ref_tryget(&ca->io_ref))) {
|
||||
n->have_io_ref = true;
|
||||
n->bio.bi_bdev = ca->disk_sb.bdev;
|
||||
@ -250,10 +250,9 @@ static void bch2_write_index(struct closure *cl)
|
||||
static void bch2_write_discard(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bio *bio = &op->bio->bio;
|
||||
struct bpos end = op->pos;
|
||||
|
||||
end.offset += bio_sectors(bio);
|
||||
end.offset += bio_sectors(&op->wbio.bio);
|
||||
|
||||
op->error = bch2_discard(op->c, op->pos, end, op->version,
|
||||
&op->res, NULL, NULL);
|
||||
@ -308,31 +307,28 @@ static void bch2_write_io_error(struct closure *cl)
|
||||
|
||||
static void bch2_write_endio(struct bio *bio)
|
||||
{
|
||||
struct closure *cl = bio->bi_private;
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bio *orig = wbio->orig;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
struct closure *cl = bio->bi_private;
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
|
||||
if (bch2_dev_nonfatal_io_err_on(bio->bi_error, ca,
|
||||
"data write"))
|
||||
"data write"))
|
||||
set_closure_fn(cl, bch2_write_io_error, index_update_wq(op));
|
||||
|
||||
if (wbio->have_io_ref)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
|
||||
if (bio->bi_error && orig)
|
||||
orig->bi_error = bio->bi_error;
|
||||
|
||||
if (wbio->bounce)
|
||||
bch2_bio_free_pages_pool(c, bio);
|
||||
|
||||
if (wbio->put_bio)
|
||||
bio_put(bio);
|
||||
|
||||
if (orig)
|
||||
bio_endio(orig);
|
||||
if (parent)
|
||||
bio_endio(&parent->bio);
|
||||
else
|
||||
closure_put(cl);
|
||||
}
|
||||
@ -380,11 +376,10 @@ static void init_append_extent(struct bch_write_op *op,
|
||||
bch2_keylist_push(&op->insert_keys);
|
||||
}
|
||||
|
||||
static int bch2_write_extent(struct bch_write_op *op,
|
||||
struct open_bucket *ob,
|
||||
struct bio *orig)
|
||||
static int bch2_write_extent(struct bch_write_op *op, struct open_bucket *ob)
|
||||
{
|
||||
struct bch_fs *c = op->c;
|
||||
struct bio *orig = &op->wbio.bio;
|
||||
struct bio *bio;
|
||||
struct bch_write_bio *wbio;
|
||||
unsigned key_to_write_offset = op->insert_keys.top_p -
|
||||
@ -392,11 +387,13 @@ static int bch2_write_extent(struct bch_write_op *op,
|
||||
struct bkey_i *key_to_write;
|
||||
unsigned csum_type = op->csum_type;
|
||||
unsigned compression_type = op->compression_type;
|
||||
int ret;
|
||||
int ret, more;
|
||||
|
||||
/* don't refetch csum type/compression type */
|
||||
barrier();
|
||||
|
||||
BUG_ON(!bio_sectors(orig));
|
||||
|
||||
/* Need to decompress data? */
|
||||
if ((op->flags & BCH_WRITE_DATA_COMPRESSED) &&
|
||||
(crc_uncompressed_size(NULL, &op->crc) != op->size ||
|
||||
@ -421,11 +418,8 @@ static int bch2_write_extent(struct bch_write_op *op,
|
||||
ob);
|
||||
|
||||
bio = orig;
|
||||
wbio = to_wbio(bio);
|
||||
wbio->orig = NULL;
|
||||
wbio->bounce = false;
|
||||
wbio->put_bio = false;
|
||||
ret = 0;
|
||||
wbio = wbio_init(bio);
|
||||
more = 0;
|
||||
} else if (csum_type != BCH_CSUM_NONE ||
|
||||
compression_type != BCH_COMPRESSION_NONE) {
|
||||
/* all units here in bytes */
|
||||
@ -439,19 +433,18 @@ static int bch2_write_extent(struct bch_write_op *op,
|
||||
bio = bio_alloc_bioset(GFP_NOIO,
|
||||
DIV_ROUND_UP(output_available, PAGE_SIZE),
|
||||
&c->bio_write);
|
||||
wbio = wbio_init(bio);
|
||||
wbio->bounce = true;
|
||||
wbio->put_bio = true;
|
||||
/* copy WRITE_SYNC flag */
|
||||
wbio->bio.bi_opf = orig->bi_opf;
|
||||
|
||||
/*
|
||||
* XXX: can't use mempool for more than
|
||||
* BCH_COMPRESSED_EXTENT_MAX worth of pages
|
||||
*/
|
||||
bch2_bio_alloc_pages_pool(c, bio, output_available);
|
||||
|
||||
/* copy WRITE_SYNC flag */
|
||||
bio->bi_opf = orig->bi_opf;
|
||||
wbio = to_wbio(bio);
|
||||
wbio->orig = NULL;
|
||||
wbio->bounce = true;
|
||||
wbio->put_bio = true;
|
||||
|
||||
do {
|
||||
unsigned fragment_compression_type = compression_type;
|
||||
size_t dst_len, src_len;
|
||||
@ -504,45 +497,43 @@ static int bch2_write_extent(struct bch_write_op *op,
|
||||
mempool_free(bio->bi_io_vec[--bio->bi_vcnt].bv_page,
|
||||
&c->bio_bounce_pages);
|
||||
|
||||
ret = orig->bi_iter.bi_size != 0;
|
||||
more = orig->bi_iter.bi_size != 0;
|
||||
} else {
|
||||
bio = bio_next_split(orig, ob->sectors_free, GFP_NOIO,
|
||||
&c->bio_write);
|
||||
|
||||
wbio = to_wbio(bio);
|
||||
wbio->orig = NULL;
|
||||
wbio->bounce = false;
|
||||
wbio = wbio_init(bio);
|
||||
wbio->put_bio = bio != orig;
|
||||
|
||||
init_append_extent(op, bio_sectors(bio), bio_sectors(bio),
|
||||
compression_type, 0,
|
||||
(struct bch_csum) { 0 }, csum_type, ob);
|
||||
|
||||
ret = bio != orig;
|
||||
more = bio != orig;
|
||||
}
|
||||
|
||||
/* might have done a realloc... */
|
||||
|
||||
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(key_to_write),
|
||||
BCH_DATA_USER);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bio->bi_end_io = bch2_write_endio;
|
||||
bio->bi_private = &op->cl;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
|
||||
closure_get(bio->bi_private);
|
||||
|
||||
/* might have done a realloc... */
|
||||
|
||||
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
|
||||
|
||||
bch2_check_mark_super(c, bkey_i_to_s_c_extent(key_to_write),
|
||||
BCH_DATA_USER);
|
||||
|
||||
bch2_submit_wbio_replicas(to_wbio(bio), c, key_to_write);
|
||||
return ret;
|
||||
return more;
|
||||
}
|
||||
|
||||
static void __bch2_write(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bch_fs *c = op->c;
|
||||
struct bio *bio = &op->bio->bio;
|
||||
unsigned open_bucket_nr = 0;
|
||||
struct open_bucket *b;
|
||||
int ret;
|
||||
@ -550,22 +541,12 @@ static void __bch2_write(struct closure *cl)
|
||||
memset(op->open_buckets, 0, sizeof(op->open_buckets));
|
||||
|
||||
if (op->flags & BCH_WRITE_DISCARD) {
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
bch2_write_discard(cl);
|
||||
bio_put(bio);
|
||||
op->flags |= BCH_WRITE_DONE;
|
||||
continue_at(cl, bch2_write_done, index_update_wq(op));
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal writes are marked REQ_PREFLUSH; if the original write was a
|
||||
* flush, it'll wait on the journal write.
|
||||
*/
|
||||
bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
|
||||
|
||||
do {
|
||||
EBUG_ON(bio->bi_iter.bi_sector != op->pos.offset);
|
||||
EBUG_ON(!bio_sectors(bio));
|
||||
|
||||
if (open_bucket_nr == ARRAY_SIZE(op->open_buckets))
|
||||
continue_at(cl, bch2_write_index, index_update_wq(op));
|
||||
|
||||
@ -622,7 +603,7 @@ static void __bch2_write(struct closure *cl)
|
||||
b - c->open_buckets > U8_MAX);
|
||||
op->open_buckets[open_bucket_nr++] = b - c->open_buckets;
|
||||
|
||||
ret = bch2_write_extent(op, b, bio);
|
||||
ret = bch2_write_extent(op, b);
|
||||
|
||||
bch2_alloc_sectors_done(c, op->wp, b);
|
||||
|
||||
@ -703,16 +684,13 @@ void bch2_wake_delayed_writes(unsigned long data)
|
||||
* after the data is written it calls bch_journal, and after the keys have been
|
||||
* added to the next journal write they're inserted into the btree.
|
||||
*
|
||||
* It inserts the data in op->bio; bi_sector is used for the key offset, and
|
||||
* op->inode is used for the key inode.
|
||||
*
|
||||
* If op->discard is true, instead of inserting the data it invalidates the
|
||||
* region of the cache represented by op->bio and op->inode.
|
||||
*/
|
||||
void bch2_write(struct closure *cl)
|
||||
{
|
||||
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
|
||||
struct bio *bio = &op->bio->bio;
|
||||
struct bio *bio = &op->wbio.bio;
|
||||
struct bch_fs *c = op->c;
|
||||
u64 inode = op->pos.inode;
|
||||
|
||||
@ -742,7 +720,7 @@ void bch2_write(struct closure *cl)
|
||||
|
||||
spin_lock_irqsave(&c->foreground_write_pd_lock, flags);
|
||||
bch2_ratelimit_increment(&c->foreground_write_pd.rate,
|
||||
bio->bi_iter.bi_size);
|
||||
bio->bi_iter.bi_size);
|
||||
|
||||
delay = bch2_ratelimit_delay(&c->foreground_write_pd.rate);
|
||||
|
||||
@ -776,15 +754,14 @@ void bch2_write(struct closure *cl)
|
||||
}
|
||||
|
||||
void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
|
||||
struct bch_write_bio *bio, struct disk_reservation res,
|
||||
struct write_point *wp, struct bpos pos,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
struct disk_reservation res,
|
||||
struct write_point *wp, struct bpos pos,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
EBUG_ON(res.sectors && !res.nr_replicas);
|
||||
|
||||
op->c = c;
|
||||
op->io_wq = index_update_wq(op);
|
||||
op->bio = bio;
|
||||
op->written = 0;
|
||||
op->error = 0;
|
||||
op->flags = flags;
|
||||
@ -983,7 +960,7 @@ static void cache_promote_done(struct closure *cl)
|
||||
struct cache_promote_op *op =
|
||||
container_of(cl, struct cache_promote_op, cl);
|
||||
|
||||
bch2_bio_free_pages_pool(op->write.op.c, &op->write.wbio.bio);
|
||||
bch2_bio_free_pages_pool(op->write.op.c, &op->write.op.wbio.bio);
|
||||
kfree(op);
|
||||
}
|
||||
|
||||
@ -1020,7 +997,7 @@ static void __bch2_read_endio(struct work_struct *work)
|
||||
trace_promote(&rbio->bio);
|
||||
|
||||
/* we now own pages: */
|
||||
swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
|
||||
swap(promote->write.op.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
|
||||
rbio->promote = NULL;
|
||||
|
||||
bch2_rbio_done(rbio);
|
||||
@ -1112,7 +1089,7 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
promote_op = kmalloc(sizeof(*promote_op) +
|
||||
sizeof(struct bio_vec) * pages, GFP_NOIO);
|
||||
if (promote_op) {
|
||||
struct bio *promote_bio = &promote_op->write.wbio.bio;
|
||||
struct bio *promote_bio = &promote_op->write.op.wbio.bio;
|
||||
|
||||
bio_init(promote_bio,
|
||||
promote_bio->bi_inline_vecs,
|
||||
@ -1204,7 +1181,7 @@ void bch2_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
|
||||
rbio->bio.bi_end_io = bch2_read_endio;
|
||||
|
||||
if (promote_op) {
|
||||
struct bio *promote_bio = &promote_op->write.wbio.bio;
|
||||
struct bio *promote_bio = &promote_op->write.op.wbio.bio;
|
||||
|
||||
promote_bio->bi_iter = rbio->bio.bi_iter;
|
||||
memcpy(promote_bio->bi_io_vec, rbio->bio.bi_io_vec,
|
||||
@ -1367,12 +1344,11 @@ void bch2_read_retry_work(struct work_struct *work)
|
||||
read_retry_work);
|
||||
struct bch_read_bio *rbio;
|
||||
struct bio *bio;
|
||||
unsigned long flags;
|
||||
|
||||
while (1) {
|
||||
spin_lock_irqsave(&c->read_retry_lock, flags);
|
||||
spin_lock_irq(&c->read_retry_lock);
|
||||
bio = bio_list_pop(&c->read_retry_list);
|
||||
spin_unlock_irqrestore(&c->read_retry_lock, flags);
|
||||
spin_unlock_irq(&c->read_retry_lock);
|
||||
|
||||
if (!bio)
|
||||
break;
|
||||
|
@ -41,11 +41,18 @@ static inline struct write_point *foreground_write_point(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_write_op_init(struct bch_write_op *, struct bch_fs *,
|
||||
struct bch_write_bio *,
|
||||
struct disk_reservation, struct write_point *,
|
||||
struct bpos, u64 *, unsigned);
|
||||
void bch2_write(struct closure *);
|
||||
|
||||
static inline struct bch_write_bio *wbio_init(struct bio *bio)
|
||||
{
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
|
||||
memset(wbio, 0, offsetof(struct bch_write_bio, bio));
|
||||
return wbio;
|
||||
}
|
||||
|
||||
struct cache_promote_op;
|
||||
|
||||
struct extent_pick_ptr;
|
||||
|
@ -66,37 +66,30 @@ struct bch_write_bio {
|
||||
struct bch_fs *c;
|
||||
struct bch_dev *ca;
|
||||
union {
|
||||
struct bio *orig;
|
||||
struct closure *cl;
|
||||
struct bch_write_bio *parent;
|
||||
struct closure *cl;
|
||||
};
|
||||
|
||||
unsigned submit_time_us;
|
||||
u8 ptr_idx;
|
||||
u8 replicas_failed;
|
||||
u8 order;
|
||||
|
||||
unsigned split:1,
|
||||
bounce:1,
|
||||
put_bio:1,
|
||||
have_io_ref:1;
|
||||
have_io_ref:1,
|
||||
used_mempool:1;
|
||||
|
||||
/* Only for btree writes: */
|
||||
unsigned used_mempool:1;
|
||||
u8 order;
|
||||
unsigned submit_time_us;
|
||||
void *data;
|
||||
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
struct bch_replace_info {
|
||||
struct extent_insert_hook hook;
|
||||
/* How many insertions succeeded */
|
||||
unsigned successes;
|
||||
/* How many insertions failed */
|
||||
unsigned failures;
|
||||
BKEY_PADDED(key);
|
||||
};
|
||||
|
||||
struct bch_write_op {
|
||||
struct closure cl;
|
||||
struct bch_fs *c;
|
||||
struct bch_fs *c;
|
||||
struct workqueue_struct *io_wq;
|
||||
struct bch_write_bio *bio;
|
||||
|
||||
unsigned written; /* sectors */
|
||||
|
||||
@ -141,6 +134,9 @@ struct bch_write_op {
|
||||
|
||||
struct keylist insert_keys;
|
||||
u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2];
|
||||
|
||||
/* Must be last: */
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
#endif /* _BCACHE_IO_TYPES_H */
|
||||
|
@ -53,15 +53,15 @@ static inline u64 journal_pin_seq(struct journal *j,
|
||||
return last_seq(j) + fifo_entry_idx(&j->pin, pin_list);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_entry(struct journal_buf *buf,
|
||||
const void *data, size_t u64s,
|
||||
unsigned type, enum btree_id id,
|
||||
unsigned level)
|
||||
static inline void bch2_journal_add_entry_noreservation(struct journal_buf *buf,
|
||||
unsigned type, enum btree_id id,
|
||||
unsigned level,
|
||||
const void *data, size_t u64s)
|
||||
{
|
||||
struct jset *jset = buf->data;
|
||||
|
||||
bch2_journal_add_entry_at(buf, data, u64s, type, id, level,
|
||||
le32_to_cpu(jset->u64s));
|
||||
bch2_journal_add_entry_at(buf, le32_to_cpu(jset->u64s),
|
||||
type, id, level, data, u64s);
|
||||
le32_add_cpu(&jset->u64s, jset_u64s(u64s));
|
||||
}
|
||||
|
||||
@ -97,8 +97,9 @@ static void bch2_journal_add_btree_root(struct journal_buf *buf,
|
||||
enum btree_id id, struct bkey_i *k,
|
||||
unsigned level)
|
||||
{
|
||||
bch2_journal_add_entry(buf, k, k->k.u64s,
|
||||
JOURNAL_ENTRY_BTREE_ROOT, id, level);
|
||||
bch2_journal_add_entry_noreservation(buf,
|
||||
JOURNAL_ENTRY_BTREE_ROOT, id, level,
|
||||
k, k->k.u64s);
|
||||
}
|
||||
|
||||
static void journal_seq_blacklist_flush(struct journal *j,
|
||||
@ -416,13 +417,8 @@ static void journal_entry_null_range(void *start, void *end)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
|
||||
for (entry = start; entry != end; entry = vstruct_next(entry)) {
|
||||
entry->u64s = 0;
|
||||
entry->btree_id = 0;
|
||||
entry->level = 0;
|
||||
entry->flags = 0;
|
||||
SET_JOURNAL_ENTRY_TYPE(entry, 0);
|
||||
}
|
||||
for (entry = start; entry != end; entry = vstruct_next(entry))
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
}
|
||||
|
||||
static int journal_validate_key(struct bch_fs *c, struct jset *j,
|
||||
@ -514,7 +510,7 @@ static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||
break;
|
||||
}
|
||||
|
||||
switch (JOURNAL_ENTRY_TYPE(entry)) {
|
||||
switch (entry->type) {
|
||||
case JOURNAL_ENTRY_BTREE_KEYS:
|
||||
vstruct_for_each(entry, k) {
|
||||
ret = journal_validate_key(c, j, entry, k,
|
||||
@ -555,8 +551,8 @@ static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||
|
||||
break;
|
||||
default:
|
||||
journal_entry_err(c, "invalid journal entry type %llu",
|
||||
JOURNAL_ENTRY_TYPE(entry));
|
||||
journal_entry_err(c, "invalid journal entry type %u",
|
||||
entry->type);
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
break;
|
||||
}
|
||||
@ -1426,9 +1422,9 @@ void bch2_journal_start(struct bch_fs *c)
|
||||
*/
|
||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
||||
if (!bl->written) {
|
||||
bch2_journal_add_entry(journal_cur_buf(j), &bl->seq, 1,
|
||||
bch2_journal_add_entry_noreservation(journal_cur_buf(j),
|
||||
JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED,
|
||||
0, 0);
|
||||
0, 0, &bl->seq, 1);
|
||||
|
||||
journal_pin_add_entry(j,
|
||||
&fifo_peek_back(&j->pin),
|
||||
@ -2083,8 +2079,8 @@ static void journal_write_compact(struct jset *jset)
|
||||
if (prev &&
|
||||
i->btree_id == prev->btree_id &&
|
||||
i->level == prev->level &&
|
||||
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_TYPE(prev) &&
|
||||
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_BTREE_KEYS &&
|
||||
i->type == prev->type &&
|
||||
i->type == JOURNAL_ENTRY_BTREE_KEYS &&
|
||||
le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
|
||||
memmove_u64s_down(vstruct_next(prev),
|
||||
i->_data,
|
||||
@ -2238,8 +2234,9 @@ static void journal_write(struct closure *cl)
|
||||
closure_return_with_destructor(cl, journal_write_done);
|
||||
}
|
||||
|
||||
bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key),
|
||||
BCH_DATA_JOURNAL);
|
||||
if (bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key),
|
||||
BCH_DATA_JOURNAL))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* XXX: we really should just disable the entire journal in nochanges
|
||||
|
@ -125,7 +125,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
||||
struct jset_entry *entry, unsigned type)
|
||||
{
|
||||
while (entry < vstruct_last(jset)) {
|
||||
if (JOURNAL_ENTRY_TYPE(entry) == type)
|
||||
if (entry->type == type)
|
||||
return entry;
|
||||
|
||||
entry = vstruct_next(entry);
|
||||
@ -187,8 +187,12 @@ static inline void journal_state_inc(union journal_res_state *s)
|
||||
s->buf1_count += s->idx == 1;
|
||||
}
|
||||
|
||||
static inline void bch2_journal_set_has_inode(struct journal_buf *buf, u64 inum)
|
||||
static inline void bch2_journal_set_has_inode(struct journal *j,
|
||||
struct journal_res *res,
|
||||
u64 inum)
|
||||
{
|
||||
struct journal_buf *buf = &j->buf[res->idx];
|
||||
|
||||
set_bit(hash_64(inum, ilog2(sizeof(buf->has_inode) * 8)), buf->has_inode);
|
||||
}
|
||||
|
||||
@ -202,38 +206,44 @@ static inline unsigned jset_u64s(unsigned u64s)
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_entry_at(struct journal_buf *buf,
|
||||
const void *data, size_t u64s,
|
||||
unsigned offset,
|
||||
unsigned type, enum btree_id id,
|
||||
unsigned level, unsigned offset)
|
||||
unsigned level,
|
||||
const void *data, size_t u64s)
|
||||
{
|
||||
struct jset_entry *entry = vstruct_idx(buf->data, offset);
|
||||
|
||||
entry->u64s = cpu_to_le16(u64s);
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->u64s = cpu_to_le16(u64s);
|
||||
entry->btree_id = id;
|
||||
entry->level = level;
|
||||
entry->flags = 0;
|
||||
SET_JOURNAL_ENTRY_TYPE(entry, type);
|
||||
entry->level = level;
|
||||
entry->type = type;
|
||||
|
||||
memcpy_u64s(entry->_data, data, u64s);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
|
||||
unsigned type, enum btree_id id,
|
||||
unsigned level,
|
||||
const void *data, unsigned u64s)
|
||||
{
|
||||
struct journal_buf *buf = &j->buf[res->idx];
|
||||
unsigned actual = jset_u64s(u64s);
|
||||
|
||||
EBUG_ON(!res->ref);
|
||||
BUG_ON(actual > res->u64s);
|
||||
|
||||
bch2_journal_add_entry_at(buf, res->offset, type,
|
||||
id, level, data, u64s);
|
||||
res->offset += actual;
|
||||
res->u64s -= actual;
|
||||
}
|
||||
|
||||
static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
|
||||
enum btree_id id, const struct bkey_i *k)
|
||||
{
|
||||
struct journal_buf *buf = &j->buf[res->idx];
|
||||
unsigned actual = jset_u64s(k->k.u64s);
|
||||
|
||||
EBUG_ON(!res->ref);
|
||||
BUG_ON(actual > res->u64s);
|
||||
|
||||
bch2_journal_set_has_inode(buf, k->k.p.inode);
|
||||
|
||||
bch2_journal_add_entry_at(buf, k, k->k.u64s,
|
||||
JOURNAL_ENTRY_BTREE_KEYS, id,
|
||||
0, res->offset);
|
||||
|
||||
res->offset += actual;
|
||||
res->u64s -= actual;
|
||||
bch2_journal_add_entry(j, res, JOURNAL_ENTRY_BTREE_KEYS,
|
||||
id, 0, k, k->k.u64s);
|
||||
}
|
||||
|
||||
void bch2_journal_buf_put_slowpath(struct journal *, bool);
|
||||
@ -272,13 +282,10 @@ static inline void bch2_journal_res_put(struct journal *j,
|
||||
|
||||
lock_release(&j->res_map, 0, _RET_IP_);
|
||||
|
||||
while (res->u64s) {
|
||||
bch2_journal_add_entry_at(&j->buf[res->idx], NULL, 0,
|
||||
JOURNAL_ENTRY_BTREE_KEYS,
|
||||
0, 0, res->offset);
|
||||
res->offset += jset_u64s(0);
|
||||
res->u64s -= jset_u64s(0);
|
||||
}
|
||||
while (res->u64s)
|
||||
bch2_journal_add_entry(j, res,
|
||||
JOURNAL_ENTRY_BTREE_KEYS,
|
||||
0, 0, NULL, 0);
|
||||
|
||||
bch2_journal_buf_put(j, res->idx, false);
|
||||
|
||||
|
@ -128,9 +128,12 @@ int bch2_move_data_off_device(struct bch_dev *ca)
|
||||
seen_key_count++;
|
||||
continue;
|
||||
next:
|
||||
if (bkey_extent_is_data(k.k))
|
||||
bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
if (bkey_extent_is_data(k.k)) {
|
||||
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
|
||||
@ -386,9 +389,12 @@ int bch2_flag_data_bad(struct bch_dev *ca)
|
||||
*/
|
||||
continue;
|
||||
advance:
|
||||
if (bkey_extent_is_data(k.k))
|
||||
bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
if (bkey_extent_is_data(k.k)) {
|
||||
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
}
|
||||
|
||||
|
@ -155,11 +155,8 @@ void bch2_migrate_write_init(struct bch_fs *c,
|
||||
(move_ptr && move_ptr->cached))
|
||||
flags |= BCH_WRITE_CACHED;
|
||||
|
||||
bch2_write_op_init(&m->op, c, &m->wbio,
|
||||
(struct disk_reservation) { 0 },
|
||||
wp,
|
||||
bkey_start_pos(k.k),
|
||||
NULL, flags);
|
||||
bch2_write_op_init(&m->op, c, (struct disk_reservation) { 0 }, wp,
|
||||
bkey_start_pos(k.k), NULL, flags);
|
||||
|
||||
if (m->move)
|
||||
m->op.alloc_reserve = RESERVE_MOVINGGC;
|
||||
@ -194,7 +191,7 @@ static void moving_io_destructor(struct closure *cl)
|
||||
atomic_sub(io->write.key.k.size, &ctxt->sectors_in_flight);
|
||||
wake_up(&ctxt->wait);
|
||||
|
||||
bio_for_each_segment_all(bv, &io->write.wbio.bio, i)
|
||||
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i)
|
||||
if (bv->bv_page)
|
||||
__free_page(bv->bv_page);
|
||||
|
||||
@ -307,9 +304,7 @@ int bch2_data_move(struct bch_fs *c,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
migrate_bio_init(io, &io->write.wbio.bio, k.k->size);
|
||||
bio_get(&io->write.wbio.bio);
|
||||
io->write.wbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
migrate_bio_init(io, &io->write.op.wbio.bio, k.k->size);
|
||||
|
||||
bch2_migrate_write_init(c, &io->write, wp, k, move_ptr, 0);
|
||||
|
||||
|
@ -19,7 +19,6 @@ struct migrate_write {
|
||||
bool move;
|
||||
struct bch_extent_ptr move_ptr;
|
||||
struct bch_write_op op;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
void bch2_migrate_write_init(struct bch_fs *,
|
||||
|
@ -783,6 +783,12 @@ out:
|
||||
|
||||
/* replica information: */
|
||||
|
||||
static inline struct bch_replicas_cpu_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
static inline struct bch_replicas_entry *
|
||||
replicas_entry_next(struct bch_replicas_entry *i)
|
||||
{
|
||||
@ -794,6 +800,24 @@ replicas_entry_next(struct bch_replicas_entry *i)
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
|
||||
}
|
||||
|
||||
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
e->devs[dev >> 3] |= 1 << (dev & 7);
|
||||
}
|
||||
|
||||
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
|
||||
{
|
||||
return (r->entry_size -
|
||||
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
|
||||
}
|
||||
|
||||
static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
|
||||
unsigned *nr,
|
||||
unsigned *bytes,
|
||||
@ -879,6 +903,29 @@ static int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bkey_to_replicas(struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type,
|
||||
struct bch_replicas_cpu_entry *r,
|
||||
unsigned *max_dev)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data_type = data_type;
|
||||
|
||||
*max_dev = 0;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached) {
|
||||
*max_dev = max_t(unsigned, *max_dev, ptr->dev);
|
||||
replicas_set_dev(r, ptr->dev);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* for when gc of replica information is in progress:
|
||||
*/
|
||||
@ -887,14 +934,11 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_replicas_cpu_entry *new_e;
|
||||
struct bch_replicas_cpu_entry new_e;
|
||||
struct bch_replicas_cpu *new;
|
||||
unsigned i, nr, entry_size, max_dev = 0;
|
||||
unsigned i, nr, entry_size, max_dev;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached)
|
||||
max_dev = max_t(unsigned, max_dev, ptr->dev);
|
||||
bkey_to_replicas(e, data_type, &new_e, &max_dev);
|
||||
|
||||
entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
|
||||
DIV_ROUND_UP(max_dev + 1, 8);
|
||||
@ -914,12 +958,9 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
|
||||
cpu_replicas_entry(gc_r, i),
|
||||
gc_r->entry_size);
|
||||
|
||||
new_e = cpu_replicas_entry(new, nr - 1);
|
||||
new_e->data_type = data_type;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached)
|
||||
replicas_set_dev(new_e, ptr->dev);
|
||||
memcpy(cpu_replicas_entry(new, nr - 1),
|
||||
&new_e,
|
||||
new->entry_size);
|
||||
|
||||
eytzinger0_sort(new->entries,
|
||||
new->nr,
|
||||
@ -931,8 +972,38 @@ static int bch2_update_gc_replicas(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_check_mark_super_slowpath(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
static bool replicas_has_extent(struct bch_replicas_cpu *r,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
bkey_to_replicas(e, data_type, &search, &max_dev);
|
||||
|
||||
return max_dev < replicas_dev_slots(r) &&
|
||||
eytzinger0_find(r->entries, r->nr,
|
||||
r->entry_size,
|
||||
memcmp, &search) < r->nr;
|
||||
}
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
noinline
|
||||
static int bch2_check_mark_super_slowpath(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
struct bch_replicas_cpu *gc_r;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -996,6 +1067,25 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_mark_super(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
struct bch_replicas_cpu *gc_r;
|
||||
bool marked;
|
||||
|
||||
rcu_read_lock();
|
||||
marked = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type) &&
|
||||
(!(gc_r = rcu_dereference(c->replicas_gc)) ||
|
||||
replicas_has_extent(gc_r, e, data_type));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (marked)
|
||||
return 0;
|
||||
|
||||
return bch2_check_mark_super_slowpath(c, e, data_type);
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_dev *dev_to_offline)
|
||||
{
|
||||
|
@ -121,92 +121,10 @@ const char *bch2_read_super(struct bcache_superblock *,
|
||||
struct bch_opts, const char *);
|
||||
void bch2_write_super(struct bch_fs *);
|
||||
|
||||
static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
|
||||
}
|
||||
|
||||
static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
|
||||
unsigned dev)
|
||||
{
|
||||
e->devs[dev >> 3] |= 1 << (dev & 7);
|
||||
}
|
||||
|
||||
static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
|
||||
{
|
||||
return (r->entry_size -
|
||||
offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
|
||||
}
|
||||
|
||||
static inline struct bch_replicas_cpu_entry *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
int bch2_check_mark_super_slowpath(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_types);
|
||||
|
||||
static inline bool replicas_has_extent(struct bch_replicas_cpu *r,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_replicas_cpu_entry search = {
|
||||
.data_type = data_type,
|
||||
};
|
||||
unsigned max_dev = 0;
|
||||
|
||||
BUG_ON(!data_type ||
|
||||
data_type == BCH_DATA_SB ||
|
||||
data_type >= BCH_DATA_NR);
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (!ptr->cached) {
|
||||
max_dev = max_t(unsigned, max_dev, ptr->dev);
|
||||
replicas_set_dev(&search, ptr->dev);
|
||||
}
|
||||
|
||||
return max_dev < replicas_dev_slots(r) &&
|
||||
eytzinger0_find(r->entries, r->nr,
|
||||
r->entry_size,
|
||||
memcmp, &search) < r->nr;
|
||||
}
|
||||
|
||||
static inline bool bch2_sb_has_replicas(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_check_mark_super(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
enum bch_data_types data_type)
|
||||
{
|
||||
struct bch_replicas_cpu *gc_r;
|
||||
bool marked;
|
||||
|
||||
rcu_read_lock();
|
||||
marked = replicas_has_extent(rcu_dereference(c->replicas),
|
||||
e, data_type) &&
|
||||
(!(gc_r = rcu_dereference(c->replicas_gc)) ||
|
||||
replicas_has_extent(gc_r, e, data_type));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (marked)
|
||||
return 0;
|
||||
|
||||
return bch2_check_mark_super_slowpath(c, e, data_type);
|
||||
}
|
||||
bool bch2_sb_has_replicas(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_types);
|
||||
int bch2_check_mark_super(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_types);
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
|
@ -517,10 +517,15 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mutex_init(&c->btree_interior_update_lock);
|
||||
|
||||
mutex_init(&c->bio_bounce_pages_lock);
|
||||
mutex_init(&c->zlib_workspace_lock);
|
||||
|
||||
bio_list_init(&c->read_retry_list);
|
||||
spin_lock_init(&c->read_retry_lock);
|
||||
INIT_WORK(&c->read_retry_work, bch2_read_retry_work);
|
||||
mutex_init(&c->zlib_workspace_lock);
|
||||
|
||||
bio_list_init(&c->btree_write_error_list);
|
||||
spin_lock_init(&c->btree_write_error_lock);
|
||||
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
|
||||
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
@ -593,8 +598,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
PAGE_SECTORS, 0) ||
|
||||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
|
||||
lg_lock_init(&c->usage_lock) ||
|
||||
mempool_init_page_pool(&c->btree_bounce_pool, 1,
|
||||
ilog2(btree_pages(c))) ||
|
||||
mempool_init_vp_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) ||
|
||||
bdi_setup_and_register(&c->bdi, "bcachefs") ||
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
@ -1345,11 +1349,13 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_fs_may_start(struct bch_fs *c, int flags)
|
||||
static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
{
|
||||
struct replicas_status s;
|
||||
struct bch_sb_field_members *mi;
|
||||
unsigned i;
|
||||
unsigned i, flags = c->opts.degraded
|
||||
? BCH_FORCE_IF_DEGRADED
|
||||
: 0;
|
||||
|
||||
if (!c->opts.degraded) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
@ -1773,7 +1779,7 @@ const char *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
err = "insufficient devices";
|
||||
if (!bch2_fs_may_start(c, 0))
|
||||
if (!bch2_fs_may_start(c))
|
||||
goto err;
|
||||
|
||||
if (!c->opts.nostart) {
|
||||
@ -1844,7 +1850,7 @@ static const char *__bch2_fs_open_incremental(struct bcache_superblock *sb,
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (!c->opts.nostart && bch2_fs_may_start(c, 0)) {
|
||||
if (!c->opts.nostart && bch2_fs_may_start(c)) {
|
||||
err = __bch2_fs_start(c);
|
||||
if (err)
|
||||
goto err;
|
||||
|
@ -577,3 +577,17 @@ void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mempool_free_vp(void *element, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t) pool_data;
|
||||
|
||||
vpfree(element, size);
|
||||
}
|
||||
|
||||
void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
size_t size = (size_t) pool_data;
|
||||
|
||||
return vpmalloc(size, gfp_mask);
|
||||
}
|
||||
|
@ -79,23 +79,43 @@ do { \
|
||||
(__builtin_types_compatible_p(typeof(_val), _type) || \
|
||||
__builtin_types_compatible_p(typeof(_val), const _type))
|
||||
|
||||
static inline void vpfree(void *p, size_t size)
|
||||
{
|
||||
if (is_vmalloc_addr(p))
|
||||
vfree(p);
|
||||
else
|
||||
free_pages((unsigned long) p, get_order(size));
|
||||
}
|
||||
|
||||
static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||
get_order(size)) ?:
|
||||
__vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
static inline void kvpfree(void *p, size_t size)
|
||||
{
|
||||
if (size < PAGE_SIZE)
|
||||
kfree(p);
|
||||
else if (is_vmalloc_addr(p))
|
||||
vfree(p);
|
||||
else
|
||||
free_pages((unsigned long) p, get_order(size));
|
||||
|
||||
vpfree(p, size);
|
||||
}
|
||||
|
||||
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
|
||||
: (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||
get_order(size))
|
||||
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||
return size < PAGE_SIZE
|
||||
? kmalloc(size, gfp_mask)
|
||||
: vpmalloc(size, gfp_mask);
|
||||
}
|
||||
|
||||
void mempool_free_vp(void *element, void *pool_data);
|
||||
void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data);
|
||||
|
||||
static inline int mempool_init_vp_pool(mempool_t *pool, int min_nr, size_t size)
|
||||
{
|
||||
return mempool_init(pool, min_nr, mempool_alloc_vp,
|
||||
mempool_free_vp, (void *) size);
|
||||
}
|
||||
|
||||
#define HEAP(type) \
|
||||
|
@ -1,5 +1,6 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/math64.h>
|
||||
#include <linux/printk.h>
|
||||
@ -163,6 +164,8 @@ static void sched_init(void)
|
||||
{
|
||||
struct task_struct *p = malloc(sizeof(*p));
|
||||
|
||||
mlockall(MCL_CURRENT|MCL_FUTURE);
|
||||
|
||||
memset(p, 0, sizeof(*p));
|
||||
|
||||
p->state = TASK_RUNNING;
|
||||
|
Loading…
Reference in New Issue
Block a user