mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to d863521e4078 bcachefs: BCH_IOCTL_DISK_SET_STATE_v2
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
c1c4d03aa6
commit
905297ad0f
@ -1 +1 @@
|
||||
8ffa87fa39d7a2639991676b61464321df9011a9
|
||||
d863521e4078ad402b9eb34f962ce8b713c3d9c8
|
||||
|
||||
@ -79,6 +79,7 @@ fn main() {
|
||||
.rustified_enum("fsck_err_opts")
|
||||
.allowlist_type("nonce")
|
||||
.no_debug("bch_replicas_padded")
|
||||
.no_debug("jset")
|
||||
.newtype_enum("bch_kdf_types")
|
||||
.rustified_enum("bch_key_types")
|
||||
.opaque_type("gendisk")
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define _PERF_BITOPS_H
|
||||
|
||||
#include <string.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <stdlib.h>
|
||||
@ -34,7 +35,7 @@ static inline int __bitmap_weight(const unsigned long *bitmap, int bits)
|
||||
w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
|
||||
|
||||
return w;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int bits)
|
||||
@ -156,6 +157,49 @@ static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsign
|
||||
return _find_next_bit(addr, size, offset, ~0UL);
|
||||
}
|
||||
|
||||
#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
|
||||
({ \
|
||||
unsigned long mask, idx, tmp, sz = (size), __start = (start); \
|
||||
\
|
||||
if (unlikely(__start >= sz)) \
|
||||
goto out; \
|
||||
\
|
||||
mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
|
||||
idx = __start / BITS_PER_LONG; \
|
||||
\
|
||||
for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
|
||||
if ((idx + 1) * BITS_PER_LONG >= sz) \
|
||||
goto out; \
|
||||
idx++; \
|
||||
} \
|
||||
\
|
||||
sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
|
||||
out: \
|
||||
sz; \
|
||||
})
|
||||
static inline unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
|
||||
unsigned long nbits, unsigned long start)
|
||||
{
|
||||
return FIND_NEXT_BIT(addr1[idx] & ~addr2[idx], /* nop */, nbits, start);
|
||||
}
|
||||
|
||||
static inline unsigned long find_next_andnot_bit(const unsigned long *addr1,
|
||||
const unsigned long *addr2, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
if (small_const_nbits(size)) {
|
||||
unsigned long val;
|
||||
|
||||
if (unlikely(offset >= size))
|
||||
return size;
|
||||
|
||||
val = *addr1 & ~*addr2 & __GENMASK(size - 1, offset);
|
||||
return val ? __ffs(val) : size;
|
||||
}
|
||||
|
||||
return _find_next_andnot_bit(addr1, addr2, size, offset);
|
||||
}
|
||||
|
||||
#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
|
||||
#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
|
||||
|
||||
|
||||
@ -9,6 +9,8 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define alloc_hooks(_do, ...) _do
|
||||
|
||||
struct kmem_cache;
|
||||
|
||||
typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
|
||||
@ -46,7 +48,10 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
|
||||
|
||||
extern int mempool_resize(mempool_t *pool, int new_min_nr);
|
||||
extern void mempool_destroy(mempool_t *pool);
|
||||
extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
|
||||
extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc;
|
||||
#define mempool_alloc(...) \
|
||||
alloc_hooks(mempool_alloc_noprof(__VA_ARGS__))
|
||||
|
||||
extern void mempool_free(void *element, mempool_t *pool);
|
||||
|
||||
/*
|
||||
|
||||
@ -23,11 +23,15 @@ struct sysinfo {
|
||||
__u32 mem_unit; /* Memory unit size in bytes */
|
||||
};
|
||||
|
||||
|
||||
|
||||
static inline void si_meminfo(struct sysinfo *val)
|
||||
{
|
||||
BUG_ON(syscall(SYS_sysinfo, val));
|
||||
}
|
||||
|
||||
extern unsigned long _totalram_pages;
|
||||
static inline unsigned long totalram_pages(void)
|
||||
{
|
||||
return _totalram_pages;
|
||||
}
|
||||
|
||||
#endif /* _TOOLS_LINUX_MM_H */
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#ifndef _LINUX_SCHED_MM_H
|
||||
#define _LINUX_SCHED_MM_H
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
|
||||
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
|
||||
|
||||
|
||||
@ -27,5 +27,6 @@ static inline void sort(void *base, size_t num, size_t size,
|
||||
}
|
||||
|
||||
#define sort_nonatomic(...) sort(__VA_ARGS__)
|
||||
#define sort_r_nonatomic(...) sort_r(__VA_ARGS__)
|
||||
|
||||
#endif
|
||||
|
||||
@ -532,10 +532,6 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
struct btree_iter other_extent_iter = {};
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
if (bpos_lt(bp->k.p, s->bp_start) ||
|
||||
bpos_gt(bp->k.p, s->bp_end))
|
||||
return 0;
|
||||
|
||||
CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0);
|
||||
struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter);
|
||||
int ret = bkey_err(bp_k);
|
||||
@ -690,6 +686,10 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
|
||||
|
||||
if (bpos_lt(bp.k.p, s->bp_start) ||
|
||||
bpos_gt(bp.k.p, s->bp_end))
|
||||
continue;
|
||||
|
||||
int ret = !empty
|
||||
? check_bp_exists(trans, s, &bp, k)
|
||||
: bch2_bucket_backpointer_mod(trans, k, &bp, true);
|
||||
@ -809,8 +809,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
for (enum btree_id btree_id = 0;
|
||||
btree_id < btree_id_nr_alive(c);
|
||||
btree_id++) {
|
||||
/* btree_type_has_ptrs should probably include BTREE_ID_stripes,
|
||||
* definitely her... */
|
||||
int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
@ -899,7 +897,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
||||
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
|
||||
if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen &&
|
||||
(bp.v->bucket_gen != a->gen ||
|
||||
bp.v->pad)) {
|
||||
ret = bch2_backpointer_del(trans, bp_k.k->p);
|
||||
@ -931,6 +929,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] != a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
/*
|
||||
* Post 1.14 upgrade, we assume that backpointers are mostly
|
||||
* correct and a sector count mismatch is probably due to a
|
||||
* write buffer race
|
||||
*
|
||||
* Pre upgrade, we expect all the buckets to be wrong, a write
|
||||
* buffer flush is pointless:
|
||||
*/
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
if (ret)
|
||||
@ -978,12 +984,22 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
|
||||
goto next;
|
||||
|
||||
struct bpos bucket = bp_pos_to_bucket(ca, pos);
|
||||
u64 next = ca->mi.nbuckets;
|
||||
u64 next = min(bucket.offset, ca->mi.nbuckets);
|
||||
|
||||
unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets);
|
||||
if (bitmap)
|
||||
next = min_t(u64, next,
|
||||
find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset));
|
||||
unsigned long *mismatch = READ_ONCE(ca->bucket_backpointer_mismatch.buckets);
|
||||
unsigned long *empty = READ_ONCE(ca->bucket_backpointer_empty.buckets);
|
||||
/*
|
||||
* Find the first bucket with mismatches - but
|
||||
* not empty buckets; we don't need to pin those
|
||||
* because we just recreate all backpointers in
|
||||
* those buckets
|
||||
*/
|
||||
if (mismatch && empty)
|
||||
next = find_next_andnot_bit(mismatch, empty, ca->mi.nbuckets, next);
|
||||
else if (mismatch)
|
||||
next = find_next_bit(mismatch, ca->mi.nbuckets, next);
|
||||
else
|
||||
next = ca->mi.nbuckets;
|
||||
|
||||
bucket.offset = next;
|
||||
if (bucket.offset == ca->mi.nbuckets)
|
||||
@ -1110,17 +1126,18 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
u64 nr_buckets = 0, nr_mismatches = 0;
|
||||
u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
|
||||
for_each_member_device(c, ca) {
|
||||
nr_buckets += ca->mi.nbuckets;
|
||||
nr_mismatches += ca->bucket_backpointer_mismatch.nr;
|
||||
nr_empty += ca->bucket_backpointer_empty.nr;
|
||||
}
|
||||
|
||||
if (!nr_mismatches)
|
||||
goto err;
|
||||
|
||||
bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
|
||||
nr_mismatches, nr_buckets);
|
||||
bch_info(c, "scanning for missing backpointers in %llu/%llu buckets, %llu buckets with no backpointers",
|
||||
nr_mismatches - nr_empty, nr_buckets, nr_empty);
|
||||
|
||||
while (1) {
|
||||
ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
|
||||
|
||||
@ -71,6 +71,7 @@ struct bch_ioctl_incremental {
|
||||
#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk)
|
||||
#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk)
|
||||
#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state)
|
||||
#define BCH_IOCTL_DISK_SET_STATE_v2 _IOW(0xbc, 22, struct bch_ioctl_disk_set_state_v2)
|
||||
#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data)
|
||||
#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage)
|
||||
#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage)
|
||||
@ -93,6 +94,12 @@ struct bch_ioctl_incremental {
|
||||
|
||||
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
|
||||
|
||||
struct bch_ioctl_err_msg {
|
||||
__u64 msg_ptr;
|
||||
__u32 msg_len;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/*
|
||||
* BCH_IOCTL_QUERY_UUID: get filesystem UUID
|
||||
*
|
||||
@ -181,6 +188,14 @@ struct bch_ioctl_disk_set_state {
|
||||
__u64 dev;
|
||||
};
|
||||
|
||||
struct bch_ioctl_disk_set_state_v2 {
|
||||
__u32 flags;
|
||||
__u8 new_state;
|
||||
__u8 pad[3];
|
||||
__u64 dev;
|
||||
struct bch_ioctl_err_msg err;
|
||||
};
|
||||
|
||||
#define BCH_DATA_OPS() \
|
||||
x(scrub, 0) \
|
||||
x(rereplicate, 1) \
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
#ifndef _BCACHEFS_BKEY_BUF_H
|
||||
#define _BCACHEFS_BKEY_BUF_H
|
||||
|
||||
#include <linux/mempool.h>
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey.h"
|
||||
|
||||
@ -10,41 +12,49 @@ struct bkey_buf {
|
||||
u64 onstack[12];
|
||||
};
|
||||
|
||||
static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
|
||||
struct bch_fs *c, unsigned u64s)
|
||||
static inline int bch2_bkey_buf_realloc_noprof(struct bkey_buf *s,
|
||||
struct bch_fs *c, unsigned u64s)
|
||||
{
|
||||
if (s->k == (void *) s->onstack &&
|
||||
u64s > ARRAY_SIZE(s->onstack)) {
|
||||
s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
|
||||
s->k = mempool_alloc_noprof(&c->large_bkey_pool, GFP_NOFS);
|
||||
memcpy(s->k, s->onstack, sizeof(s->onstack));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
return 0; /* for alloc_hooks() macro */
|
||||
}
|
||||
#define bch2_bkey_buf_realloc(...) alloc_hooks(bch2_bkey_buf_realloc_noprof(__VA_ARGS__))
|
||||
|
||||
static inline int bch2_bkey_buf_reassemble_noprof(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
bch2_bkey_buf_realloc(s, c, k.k->u64s);
|
||||
bkey_reassemble(s->k, k);
|
||||
return 0;
|
||||
}
|
||||
#define bch2_bkey_buf_reassemble(...) alloc_hooks(bch2_bkey_buf_reassemble_noprof(__VA_ARGS__))
|
||||
|
||||
static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct bkey_i *src)
|
||||
static inline int bch2_bkey_buf_copy_noprof(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct bkey_i *src)
|
||||
{
|
||||
bch2_bkey_buf_realloc(s, c, src->k.u64s);
|
||||
bkey_copy(s->k, src);
|
||||
return 0;
|
||||
}
|
||||
#define bch2_bkey_buf_copy(...) alloc_hooks(bch2_bkey_buf_copy_noprof(__VA_ARGS__))
|
||||
|
||||
static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct bkey_packed *src)
|
||||
static inline int bch2_bkey_buf_unpack_noprof(struct bkey_buf *s,
|
||||
struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct bkey_packed *src)
|
||||
{
|
||||
bch2_bkey_buf_realloc(s, c, BKEY_U64s +
|
||||
bkeyp_val_u64s(&b->format, src));
|
||||
bch2_bkey_buf_realloc(s, c, BKEY_U64s + bkeyp_val_u64s(&b->format, src));
|
||||
bch2_bkey_unpack(b, s->k, src);
|
||||
return 0;
|
||||
}
|
||||
#define bch2_bkey_buf_unpack(...) alloc_hooks(bch2_bkey_buf_unpack_noprof(__VA_ARGS__))
|
||||
|
||||
static inline void bch2_bkey_buf_init(struct bkey_buf *s)
|
||||
{
|
||||
|
||||
@ -356,7 +356,7 @@ again:
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) {
|
||||
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
|
||||
BUG_ON(bpos_gt(k.k->p, b->data->max_key));
|
||||
|
||||
@ -470,7 +470,7 @@ again:
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) {
|
||||
bch2_bkey_buf_reassemble(&cur_k, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
|
||||
@ -1014,6 +1014,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
k = bkey_p_next(k);
|
||||
continue;
|
||||
drop_this_key:
|
||||
ret = 0;
|
||||
next_good_key = k->u64s;
|
||||
|
||||
if (!next_good_key ||
|
||||
|
||||
@ -650,7 +650,7 @@ static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, str
|
||||
i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v;
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
struct bkey_i *j_k =
|
||||
const struct bkey_i *j_k =
|
||||
bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
|
||||
i->k->k.p);
|
||||
|
||||
@ -848,7 +848,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_advance(jiter);
|
||||
k = bch2_btree_and_journal_iter_peek(jiter);
|
||||
k = bch2_btree_and_journal_iter_peek(c, jiter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
@ -898,7 +898,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
|
||||
|
||||
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
|
||||
|
||||
k = bch2_btree_and_journal_iter_peek(&jiter);
|
||||
k = bch2_btree_and_journal_iter_peek(c, &jiter);
|
||||
if (!k.k) {
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
@ -2120,10 +2120,10 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos search_pos,
|
||||
struct bpos end_pos)
|
||||
static const struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos search_pos,
|
||||
struct bpos end_pos)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
|
||||
@ -2139,7 +2139,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos);
|
||||
const struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos);
|
||||
|
||||
if (k) {
|
||||
iter->k = k->k;
|
||||
@ -2156,7 +2156,7 @@ void btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
struct bkey_i *next_journal =
|
||||
const struct bkey_i *next_journal =
|
||||
bch2_btree_journal_peek(trans, iter, search_key,
|
||||
k->k ? k->k->p : path_l(path)->b->key.k.p);
|
||||
if (next_journal) {
|
||||
@ -2165,10 +2165,10 @@ void btree_trans_peek_journal(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos search_key,
|
||||
struct bpos end_pos)
|
||||
static const struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos search_key,
|
||||
struct bpos end_pos)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
|
||||
@ -2186,7 +2186,7 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
struct bkey_i *next_journal =
|
||||
const struct bkey_i *next_journal =
|
||||
bch2_btree_journal_peek_prev(trans, iter, search_key,
|
||||
k->k ? k->k->p : path_l(path)->b->data->min_key);
|
||||
|
||||
|
||||
@ -46,21 +46,22 @@ static size_t __bch2_journal_key_search(struct journal_keys *keys,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys);
|
||||
size_t l = 0, r = keys->nr, m;
|
||||
|
||||
while (l < r) {
|
||||
m = l + ((r - l) >> 1);
|
||||
if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0)
|
||||
if (__journal_key_cmp(c, id, level, pos, idx_to_key(keys, m)) > 0)
|
||||
l = m + 1;
|
||||
else
|
||||
r = m;
|
||||
}
|
||||
|
||||
BUG_ON(l < keys->nr &&
|
||||
__journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0);
|
||||
__journal_key_cmp(c, id, level, pos, idx_to_key(keys, l)) > 0);
|
||||
|
||||
BUG_ON(l &&
|
||||
__journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
|
||||
__journal_key_cmp(c, id, level, pos, idx_to_key(keys, l - 1)) <= 0);
|
||||
|
||||
return l;
|
||||
}
|
||||
@ -72,10 +73,20 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
|
||||
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
|
||||
}
|
||||
|
||||
static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx)
|
||||
{
|
||||
return idx ? keys->overwrites.data + idx : NULL;
|
||||
}
|
||||
|
||||
static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx)
|
||||
{
|
||||
return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL;
|
||||
}
|
||||
|
||||
/* Returns first non-overwritten key >= search key: */
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
unsigned iters = 0;
|
||||
@ -87,7 +98,7 @@ search:
|
||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||
|
||||
while (*idx &&
|
||||
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
|
||||
__journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
|
||||
--(*idx);
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
@ -96,23 +107,23 @@ search:
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_i *ret = NULL;
|
||||
const struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
|
||||
if (__journal_key_cmp(c, btree_id, level, end_pos, k) < 0)
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->end;
|
||||
*idx = overwrite_range(keys, k->overwritten_range)->end;
|
||||
else
|
||||
*idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) <= 0) {
|
||||
ret = k->k;
|
||||
if (__journal_key_cmp(c, btree_id, level, pos, k) <= 0) {
|
||||
ret = journal_key_k(c, k);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -129,9 +140,9 @@ search:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos,
|
||||
struct bpos end_pos, size_t *idx)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
unsigned iters = 0;
|
||||
@ -146,7 +157,7 @@ search:
|
||||
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
|
||||
|
||||
while (*idx < keys->nr &&
|
||||
__journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
|
||||
__journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
|
||||
(*idx)++;
|
||||
iters++;
|
||||
if (iters == 10) {
|
||||
@ -158,25 +169,25 @@ search:
|
||||
if (*idx == keys->nr)
|
||||
--(*idx);
|
||||
|
||||
struct bkey_i *ret = NULL;
|
||||
const struct bkey_i *ret = NULL;
|
||||
rcu_read_lock(); /* for overwritten_ranges */
|
||||
|
||||
while (true) {
|
||||
k = idx_to_key(keys, *idx);
|
||||
if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
|
||||
if (__journal_key_cmp(c, btree_id, level, end_pos, k) > 0)
|
||||
break;
|
||||
|
||||
if (k->overwritten) {
|
||||
if (k->overwritten_range)
|
||||
*idx = rcu_dereference(k->overwritten_range)->start;
|
||||
*idx = overwrite_range(keys, k->overwritten_range)->start;
|
||||
if (!*idx)
|
||||
break;
|
||||
--(*idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (__journal_key_cmp(btree_id, level, pos, k) >= 0) {
|
||||
ret = k->k;
|
||||
if (__journal_key_cmp(c, btree_id, level, pos, k) >= 0) {
|
||||
ret = journal_key_k(c, k);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -194,8 +205,8 @@ search:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos)
|
||||
const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
size_t idx = 0;
|
||||
|
||||
@ -264,13 +275,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
struct journal_key n = {
|
||||
.btree_id = id,
|
||||
.level = level,
|
||||
.k = k,
|
||||
.allocated = true,
|
||||
/*
|
||||
* Ensure these keys are done last by journal replay, to unblock
|
||||
* journal reclaim:
|
||||
*/
|
||||
.journal_seq = U64_MAX,
|
||||
.allocated_k = k,
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
@ -278,8 +284,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
BUG_ON(test_bit(BCH_FS_rw, &c->flags));
|
||||
|
||||
if (idx < keys->size &&
|
||||
journal_key_cmp(&n, &keys->data[idx]) == 0) {
|
||||
struct bkey_i *o = keys->data[idx].k;
|
||||
journal_key_cmp(c, &n, &keys->data[idx]) == 0) {
|
||||
struct bkey_i *o = journal_key_k(c, &keys->data[idx]);
|
||||
|
||||
if (k->k.type == KEY_TYPE_accounting &&
|
||||
o->k.type == KEY_TYPE_accounting) {
|
||||
@ -291,7 +297,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
}
|
||||
|
||||
if (keys->data[idx].allocated)
|
||||
kfree(keys->data[idx].k);
|
||||
kfree(keys->data[idx].allocated_k);
|
||||
keys->data[idx] = n;
|
||||
return 0;
|
||||
}
|
||||
@ -376,17 +382,20 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
|
||||
bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
struct journal_keys *keys = &trans->c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (!trans->journal_replay_not_finished)
|
||||
return false;
|
||||
|
||||
return (idx < keys->size &&
|
||||
keys->data[idx].btree_id == btree &&
|
||||
keys->data[idx].level == level &&
|
||||
bpos_eq(keys->data[idx].k->k.p, pos) &&
|
||||
bkey_deleted(&keys->data[idx].k->k));
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (idx >= keys->size ||
|
||||
keys->data[idx].btree_id != btree ||
|
||||
keys->data[idx].level != level)
|
||||
return false;
|
||||
|
||||
struct bkey_i *k = journal_key_k(c, &keys->data[idx]);
|
||||
return bpos_eq(k->k.p, pos) && bkey_deleted(&k->k);
|
||||
}
|
||||
|
||||
static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos)
|
||||
@ -403,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
|
||||
bool next_overwritten = next && next->overwritten;
|
||||
|
||||
struct journal_key_range_overwritten *prev_range =
|
||||
prev_overwritten ? prev->overwritten_range : NULL;
|
||||
prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL;
|
||||
struct journal_key_range_overwritten *next_range =
|
||||
next_overwritten ? next->overwritten_range : NULL;
|
||||
next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL;
|
||||
|
||||
BUG_ON(prev_range && prev_range->end != idx);
|
||||
BUG_ON(next_range && next_range->start != idx + 1);
|
||||
@ -413,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
|
||||
if (prev_range && next_range) {
|
||||
prev_range->end = next_range->end;
|
||||
|
||||
keys->data[pos].overwritten_range = prev_range;
|
||||
keys->data[pos].overwritten_range = prev->overwritten_range;
|
||||
|
||||
u32 old = next->overwritten_range;
|
||||
|
||||
for (size_t i = next_range->start; i < next_range->end; i++) {
|
||||
struct journal_key *ip = keys->data + idx_to_pos(keys, i);
|
||||
BUG_ON(ip->overwritten_range != next_range);
|
||||
ip->overwritten_range = prev_range;
|
||||
BUG_ON(ip->overwritten_range != old);
|
||||
ip->overwritten_range = prev->overwritten_range;
|
||||
}
|
||||
|
||||
kfree_rcu_mightsleep(next_range);
|
||||
} else if (prev_range) {
|
||||
prev_range->end++;
|
||||
k->overwritten_range = prev_range;
|
||||
k->overwritten_range = prev->overwritten_range;
|
||||
if (next_overwritten) {
|
||||
prev_range->end++;
|
||||
next->overwritten_range = prev_range;
|
||||
next->overwritten_range = prev->overwritten_range;
|
||||
}
|
||||
} else if (next_range) {
|
||||
next_range->start--;
|
||||
k->overwritten_range = next_range;
|
||||
k->overwritten_range = next->overwritten_range;
|
||||
if (prev_overwritten) {
|
||||
next_range->start--;
|
||||
prev->overwritten_range = next_range;
|
||||
prev->overwritten_range = next->overwritten_range;
|
||||
}
|
||||
} else if (prev_overwritten || next_overwritten) {
|
||||
struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL);
|
||||
if (!r)
|
||||
/* 0 is a sentinel value */
|
||||
if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2)))
|
||||
return;
|
||||
|
||||
r->start = idx - (size_t) prev_overwritten;
|
||||
r->end = idx + 1 + (size_t) next_overwritten;
|
||||
if (!keys->overwrites.nr)
|
||||
darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {});
|
||||
|
||||
darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) {
|
||||
.start = idx - (size_t) prev_overwritten,
|
||||
.end = idx + 1 + (size_t) next_overwritten,
|
||||
}));
|
||||
|
||||
smp_wmb();
|
||||
u32 r = keys->overwrites.nr - 1;
|
||||
|
||||
k->overwritten_range = r;
|
||||
|
||||
rcu_assign_pointer(k->overwritten_range, r);
|
||||
if (prev_overwritten)
|
||||
prev->overwritten_range = r;
|
||||
if (next_overwritten)
|
||||
@ -457,11 +476,15 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (idx < keys->size &&
|
||||
keys->data[idx].btree_id == btree &&
|
||||
keys->data[idx].level == level &&
|
||||
bpos_eq(keys->data[idx].k->k.p, pos) &&
|
||||
!keys->data[idx].overwritten) {
|
||||
if (idx >= keys->size ||
|
||||
keys->data[idx].btree_id != btree ||
|
||||
keys->data[idx].level != level ||
|
||||
keys->data[idx].overwritten)
|
||||
return;
|
||||
|
||||
struct bkey_i *k = journal_key_k(c, &keys->data[idx]);
|
||||
|
||||
if (bpos_eq(k->k.p, pos)) {
|
||||
guard(mutex)(&keys->overwrite_lock);
|
||||
__bch2_journal_key_overwritten(keys, idx);
|
||||
}
|
||||
@ -476,7 +499,7 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_iter *iter)
|
||||
{
|
||||
journal_iter_verify(iter);
|
||||
|
||||
@ -490,10 +513,10 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
BUG_ON(cmp);
|
||||
|
||||
if (!k->overwritten)
|
||||
return bkey_i_to_s_c(k->k);
|
||||
return bkey_i_to_s_c(journal_key_k(c, k));
|
||||
|
||||
if (k->overwritten_range)
|
||||
iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
|
||||
iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end);
|
||||
else
|
||||
bch2_journal_iter_advance(iter);
|
||||
}
|
||||
@ -554,7 +577,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
|
||||
while (nr--) {
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter);
|
||||
struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
@ -565,7 +588,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *c, struct btree_and_journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
|
||||
size_t iters = 0;
|
||||
@ -586,7 +609,7 @@ again:
|
||||
bch2_journal_iter_advance_btree(iter);
|
||||
|
||||
if (iter->trans->journal_replay_not_finished)
|
||||
while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
|
||||
while ((journal_k = bch2_journal_iter_peek(c, &iter->journal)).k &&
|
||||
bpos_lt(journal_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance(&iter->journal);
|
||||
|
||||
@ -658,15 +681,22 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
/*
|
||||
* When keys compare equal, oldest compares first:
|
||||
*/
|
||||
static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
static int journal_sort_key_cmp(const void *_l, const void *_r, const void *priv)
|
||||
{
|
||||
struct bch_fs *c = (void *) priv;
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
int rewind = l->rewind && r->rewind ? -1 : 1;
|
||||
|
||||
return journal_key_cmp(l, r) ?:
|
||||
((cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->journal_offset, r->journal_offset)) * rewind);
|
||||
int cmp = journal_key_cmp(c, l, r);
|
||||
if (cmp)
|
||||
return cmp;
|
||||
|
||||
if (l->allocated || r->allocated)
|
||||
return cmp_int(l->allocated, r->allocated);
|
||||
|
||||
return ((cmp_int(l->journal_seq_offset, r->journal_seq_offset) ?:
|
||||
cmp_int(l->journal_offset, r->journal_offset)) * rewind);
|
||||
}
|
||||
|
||||
void bch2_journal_keys_put(struct bch_fs *c)
|
||||
@ -680,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c)
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i) {
|
||||
if (i->overwritten_range &&
|
||||
(i == &darray_last(*keys) ||
|
||||
i->overwritten_range != i[1].overwritten_range))
|
||||
kfree(i->overwritten_range);
|
||||
|
||||
darray_for_each(*keys, i)
|
||||
if (i->allocated)
|
||||
kfree(i->k);
|
||||
}
|
||||
kfree(i->allocated_k);
|
||||
|
||||
kvfree(keys->data);
|
||||
keys->data = NULL;
|
||||
keys->nr = keys->gap = keys->size = 0;
|
||||
|
||||
darray_exit(&keys->overwrites);
|
||||
|
||||
struct journal_replay **i;
|
||||
struct genradix_iter iter;
|
||||
|
||||
@ -704,8 +730,10 @@ void bch2_journal_keys_put(struct bch_fs *c)
|
||||
|
||||
static void __journal_keys_sort(struct journal_keys *keys)
|
||||
{
|
||||
sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
|
||||
journal_sort_key_cmp, NULL);
|
||||
struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys);
|
||||
|
||||
sort_r_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
|
||||
journal_sort_key_cmp, NULL, c);
|
||||
|
||||
cond_resched();
|
||||
|
||||
@ -717,9 +745,10 @@ static void __journal_keys_sort(struct journal_keys *keys)
|
||||
* compare each individual accounting key against the version in
|
||||
* the btree during replay:
|
||||
*/
|
||||
if (src->k->k.type != KEY_TYPE_accounting &&
|
||||
struct bkey_i *k = journal_key_k(c, src);
|
||||
if (k->k.type != KEY_TYPE_accounting &&
|
||||
src + 1 < &darray_top(*keys) &&
|
||||
!journal_key_cmp(src, src + 1))
|
||||
!journal_key_cmp(c, src, src + 1))
|
||||
continue;
|
||||
|
||||
*dst++ = *src;
|
||||
@ -763,8 +792,7 @@ int bch2_journal_keys_sort(struct bch_fs *c)
|
||||
.btree_id = entry->btree_id,
|
||||
.level = entry->level,
|
||||
.rewind = rewind,
|
||||
.k = k,
|
||||
.journal_seq = le64_to_cpu(i->j.seq),
|
||||
.journal_seq_offset = journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)),
|
||||
.journal_offset = k->_data - i->j._data,
|
||||
};
|
||||
|
||||
@ -801,13 +829,18 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i)
|
||||
darray_for_each(*keys, i) {
|
||||
struct bkey_i *k = journal_key_k(c, i);
|
||||
|
||||
if (!(i->btree_id == btree &&
|
||||
i->level >= level_min &&
|
||||
i->level <= level_max &&
|
||||
bpos_ge(i->k->k.p, start) &&
|
||||
bpos_le(i->k->k.p, end)))
|
||||
bpos_ge(k->k.p, start) &&
|
||||
bpos_le(k->k.p, end)))
|
||||
keys->data[dst++] = *i;
|
||||
else if (i->allocated)
|
||||
kfree(i->allocated_k);
|
||||
}
|
||||
keys->nr = keys->gap = dst;
|
||||
}
|
||||
|
||||
@ -825,7 +858,7 @@ void bch2_journal_keys_dump(struct bch_fs *c)
|
||||
prt_printf(&buf, "btree=");
|
||||
bch2_btree_id_to_text(&buf, i->btree_id);
|
||||
prt_printf(&buf, " l=%u ", i->level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(journal_key_k(c, i)));
|
||||
pr_err("%s", buf.buf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,6 +29,22 @@ struct btree_and_journal_iter {
|
||||
bool fail_if_too_many_whiteouts;
|
||||
};
|
||||
|
||||
static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
|
||||
{
|
||||
return (seq - c->journal_entries_base_seq) & (~0U >> 1);
|
||||
}
|
||||
|
||||
static inline struct bkey_i *journal_key_k(struct bch_fs *c,
|
||||
const struct journal_key *k)
|
||||
{
|
||||
if (k->allocated)
|
||||
return k->allocated_k;
|
||||
|
||||
struct journal_replay *i = *genradix_ptr(&c->journal_entries, k->journal_seq_offset);
|
||||
|
||||
return (struct bkey_i *) (i->j._data + k->journal_offset);
|
||||
}
|
||||
|
||||
static inline int __journal_key_btree_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
const struct journal_key *r)
|
||||
@ -37,25 +53,28 @@ static inline int __journal_key_btree_cmp(enum btree_id l_btree_id,
|
||||
cmp_int(l_btree_id, r->btree_id);
|
||||
}
|
||||
|
||||
static inline int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
static inline int __journal_key_cmp(struct bch_fs *c,
|
||||
enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return __journal_key_btree_cmp(l_btree_id, l_level, r) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p);
|
||||
bpos_cmp(l_pos, journal_key_k(c, r)->k.p);
|
||||
}
|
||||
|
||||
static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||
static inline int journal_key_cmp(struct bch_fs *c,
|
||||
const struct journal_key *l, const struct journal_key *r)
|
||||
{
|
||||
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||
return __journal_key_cmp(c, l->btree_id, l->level,
|
||||
journal_key_k(c, l)->k.p, r);
|
||||
}
|
||||
|
||||
struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos, struct bpos, size_t *);
|
||||
struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
|
||||
int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *,
|
||||
@ -71,7 +90,7 @@ bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned,
|
||||
void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
|
||||
|
||||
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *, struct btree_and_journal_iter *);
|
||||
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *,
|
||||
|
||||
@ -2,21 +2,47 @@
|
||||
#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
#define _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H
|
||||
|
||||
struct journal_ptr {
|
||||
bool csum_good;
|
||||
struct bch_csum csum;
|
||||
u8 dev;
|
||||
u32 bucket;
|
||||
u32 bucket_offset;
|
||||
u64 sector;
|
||||
};
|
||||
|
||||
/*
|
||||
* Only used for holding the journal entries we read in btree_journal_read()
|
||||
* during cache_registration
|
||||
*/
|
||||
struct journal_replay {
|
||||
DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs;
|
||||
|
||||
bool csum_good;
|
||||
bool ignore_blacklisted;
|
||||
bool ignore_not_dirty;
|
||||
/* must be last: */
|
||||
struct jset j;
|
||||
};
|
||||
|
||||
struct journal_key_range_overwritten {
|
||||
size_t start, end;
|
||||
};
|
||||
|
||||
struct journal_key {
|
||||
u64 journal_seq;
|
||||
u32 journal_offset;
|
||||
union {
|
||||
struct {
|
||||
u32 journal_seq_offset;
|
||||
u32 journal_offset;
|
||||
};
|
||||
struct bkey_i *allocated_k;
|
||||
};
|
||||
enum btree_id btree_id:8;
|
||||
unsigned level:8;
|
||||
bool allocated:1;
|
||||
bool overwritten:1;
|
||||
bool rewind:1;
|
||||
struct journal_key_range_overwritten __rcu *
|
||||
overwritten_range;
|
||||
struct bkey_i *k;
|
||||
u32 overwritten_range;
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
@ -31,7 +57,9 @@ struct journal_keys {
|
||||
size_t gap;
|
||||
atomic_t ref;
|
||||
bool initial_ref_held;
|
||||
|
||||
struct mutex overwrite_lock;
|
||||
DARRAY(struct journal_key_range_overwritten) overwrites;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */
|
||||
|
||||
@ -54,7 +54,7 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u);
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
struct bkey_i *j_k =
|
||||
const struct bkey_i *j_k =
|
||||
bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p);
|
||||
|
||||
if (j_k)
|
||||
|
||||
@ -403,7 +403,7 @@ __btree_trans_update_by_path(struct btree_trans *trans,
|
||||
i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0;
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
struct bkey_i *j_k =
|
||||
const struct bkey_i *j_k =
|
||||
bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p);
|
||||
|
||||
if (j_k) {
|
||||
|
||||
@ -95,7 +95,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
if (!b->c.level)
|
||||
goto out;
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
while ((k = bch2_btree_and_journal_iter_peek(c, &iter)).k) {
|
||||
if (k.k->type != KEY_TYPE_btree_ptr_v2)
|
||||
goto out;
|
||||
|
||||
|
||||
@ -111,7 +111,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
|
||||
CLASS(printbuf, buf)();
|
||||
int ret = 0;
|
||||
|
||||
CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev);
|
||||
CLASS(bch2_dev_tryget_noerror, ca)(c, p.ptr.dev);
|
||||
if (!ca) {
|
||||
if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
|
||||
trans, ptr_to_invalid_device,
|
||||
|
||||
@ -287,11 +287,44 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c,
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
|
||||
CLASS(printbuf, err)();
|
||||
int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err);
|
||||
bch_err_msg(ca, ret, "setting device state");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_disk_set_state_v2(struct bch_fs *c,
|
||||
struct bch_ioctl_disk_set_state_v2 arg)
|
||||
{
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
|
||||
BCH_FORCE_IF_METADATA_LOST|
|
||||
BCH_FORCE_IF_DEGRADED|
|
||||
BCH_BY_INDEX)) ||
|
||||
arg.pad[0] || arg.pad[1] || arg.pad[2] ||
|
||||
arg.new_state >= BCH_MEMBER_STATE_NR)
|
||||
return -EINVAL;
|
||||
|
||||
CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags);
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
CLASS(printbuf, err)();
|
||||
int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags, &err);
|
||||
if (ret) {
|
||||
if (err.pos > arg.err.msg_len)
|
||||
return -ERANGE;
|
||||
|
||||
prt_printf(&err, "\nerror=%s", bch2_err_str(ret));
|
||||
ret = copy_to_user_errcode((void __user *)(ulong)arg.err.msg_ptr,
|
||||
err.buf,
|
||||
err.pos) ?: ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_data_ctx {
|
||||
struct thread_with_file thr;
|
||||
|
||||
@ -692,6 +725,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
||||
BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
|
||||
case BCH_IOCTL_DISK_SET_STATE:
|
||||
BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
|
||||
case BCH_IOCTL_DISK_SET_STATE_v2:
|
||||
BCH_IOCTL(disk_set_state_v2, struct bch_ioctl_disk_set_state_v2);
|
||||
case BCH_IOCTL_DATA:
|
||||
BCH_IOCTL(data, struct bch_ioctl_data);
|
||||
case BCH_IOCTL_DISK_RESIZE:
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/log2.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include "darray.h"
|
||||
|
||||
int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
|
||||
int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp,
|
||||
bool rcu)
|
||||
{
|
||||
if (new_size > d->size) {
|
||||
new_size = roundup_pow_of_two(new_size);
|
||||
@ -20,18 +22,25 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
|
||||
if (unlikely(check_mul_overflow(new_size, element_size, &bytes)))
|
||||
return -ENOMEM;
|
||||
|
||||
void *data = likely(bytes < INT_MAX)
|
||||
void *old = d->data;
|
||||
void *new = likely(bytes < INT_MAX)
|
||||
? kvmalloc_noprof(bytes, gfp)
|
||||
: vmalloc_noprof(bytes);
|
||||
if (!data)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
if (d->size)
|
||||
memcpy(data, d->data, d->size * element_size);
|
||||
if (d->data != d->preallocated)
|
||||
kvfree(d->data);
|
||||
d->data = data;
|
||||
memcpy(new, old, d->size * element_size);
|
||||
|
||||
rcu_assign_pointer(d->data, new);
|
||||
d->size = new_size;
|
||||
|
||||
if (old != d->preallocated) {
|
||||
if (!rcu)
|
||||
kvfree(old);
|
||||
else
|
||||
kvfree_rcu_mightsleep(old);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@ -34,17 +34,17 @@ typedef DARRAY(s16) darray_s16;
|
||||
typedef DARRAY(s32) darray_s32;
|
||||
typedef DARRAY(s64) darray_s64;
|
||||
|
||||
int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
|
||||
int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool);
|
||||
|
||||
#define __bch2_darray_resize(...) alloc_hooks(__bch2_darray_resize_noprof(__VA_ARGS__))
|
||||
|
||||
#define __darray_resize(_d, _element_size, _new_size, _gfp) \
|
||||
#define __darray_resize(_d, _element_size, _new_size, _gfp, _rcu) \
|
||||
(unlikely((_new_size) > (_d)->size) \
|
||||
? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp))\
|
||||
? __bch2_darray_resize((_d), (_element_size), (_new_size), (_gfp), _rcu)\
|
||||
: 0)
|
||||
|
||||
#define darray_resize_gfp(_d, _new_size, _gfp) \
|
||||
__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp)
|
||||
__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp, false)
|
||||
|
||||
#define darray_resize(_d, _new_size) \
|
||||
darray_resize_gfp(_d, _new_size, GFP_KERNEL)
|
||||
@ -55,6 +55,12 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
|
||||
#define darray_make_room(_d, _more) \
|
||||
darray_make_room_gfp(_d, _more, GFP_KERNEL)
|
||||
|
||||
#define darray_resize_rcu(_d, _new_size) \
|
||||
__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), GFP_KERNEL, true)
|
||||
|
||||
#define darray_make_room_rcu(_d, _more) \
|
||||
darray_resize_rcu((_d), (_d)->nr + (_more))
|
||||
|
||||
#define darray_room(_d) ((_d).size - (_d).nr)
|
||||
|
||||
#define darray_top(_d) ((_d).data[(_d).nr])
|
||||
@ -107,8 +113,11 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
|
||||
#define __darray_for_each(_d, _i) \
|
||||
for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++)
|
||||
|
||||
#define darray_for_each_from(_d, _i, _start) \
|
||||
for (typeof(&(_d).data[0]) _i = _start; _i < (_d).data + (_d).nr; _i++)
|
||||
|
||||
#define darray_for_each(_d, _i) \
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++)
|
||||
darray_for_each_from(_d, _i, (_d).data)
|
||||
|
||||
#define darray_for_each_reverse(_d, _i) \
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i)
|
||||
|
||||
@ -734,6 +734,37 @@ invalid_device:
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct bkey_i *k = journal_key_k(c, i);
|
||||
|
||||
darray_for_each_from(*keys, j, i + 1) {
|
||||
if (journal_key_cmp(c, i, j))
|
||||
return j;
|
||||
|
||||
struct bkey_i *n = journal_key_k(c, j);
|
||||
if (n->k.type == KEY_TYPE_accounting) {
|
||||
WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0);
|
||||
|
||||
bch2_accounting_accumulate(bkey_i_to_accounting(k),
|
||||
bkey_i_to_s_c_accounting(n));
|
||||
j->overwritten = true;
|
||||
}
|
||||
}
|
||||
|
||||
return &darray_top(*keys);
|
||||
}
|
||||
|
||||
static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct journal_key *next = accumulate_newer_accounting_keys(c, i);
|
||||
|
||||
int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i)));
|
||||
return ret ? ERR_PTR(ret) : next;
|
||||
}
|
||||
|
||||
/*
|
||||
* At startup time, initialize the in memory accounting from the btree (and
|
||||
* journal)
|
||||
@ -759,80 +790,76 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
percpu_memset(c->usage, 0, sizeof(*c->usage));
|
||||
}
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key *jk = keys->data;
|
||||
|
||||
while (jk < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0)
|
||||
jk++;
|
||||
|
||||
struct journal_key *end = jk;
|
||||
while (end < &darray_top(*keys) &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0)
|
||||
end++;
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
int ret = for_each_btree_key_continue(trans, iter,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
|
||||
struct bkey u;
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
|
||||
struct bkey u;
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
|
||||
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
continue;
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
break;
|
||||
while (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 &&
|
||||
bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) {
|
||||
jk->overwritten = true;
|
||||
jk++;
|
||||
}
|
||||
|
||||
if (!bch2_accounting_is_mem(&acc_k)) {
|
||||
struct disk_accounting_pos next;
|
||||
memset(&next, 0, sizeof(next));
|
||||
next.type = acc_k.type + 1;
|
||||
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
|
||||
continue;
|
||||
}
|
||||
if (jk < end &&
|
||||
__journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
accounting_read_key(trans, k);
|
||||
}));
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(&acc_k)) {
|
||||
struct disk_accounting_pos next_acc;
|
||||
memset(&next_acc, 0, sizeof(next_acc));
|
||||
next_acc.type = acc_k.type + 1;
|
||||
struct bpos next = disk_accounting_pos_to_bpos(&next_acc);
|
||||
if (jk < end)
|
||||
next = bpos_min(next, journal_key_k(c, jk)->k.p);
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, next);
|
||||
continue;
|
||||
}
|
||||
|
||||
accounting_read_key(trans, k);
|
||||
}));
|
||||
bch2_trans_iter_exit(&iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
while (jk < end)
|
||||
jk = accumulate_and_read_journal_accounting(trans, jk);
|
||||
|
||||
struct journal_key *dst = keys->data;
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i) {
|
||||
if (i->k->k.type == KEY_TYPE_accounting) {
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, i->k->k.p);
|
||||
|
||||
if (!bch2_accounting_is_mem(&acc_k))
|
||||
continue;
|
||||
|
||||
struct bkey_s_c k = bkey_i_to_s_c(i->k);
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr,
|
||||
sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &k.k->p);
|
||||
|
||||
bool applied = idx < acc->k.nr &&
|
||||
bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0;
|
||||
|
||||
if (applied)
|
||||
continue;
|
||||
|
||||
if (i + 1 < &darray_top(*keys) &&
|
||||
i[1].k->k.type == KEY_TYPE_accounting &&
|
||||
!journal_key_cmp(i, i + 1)) {
|
||||
WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0);
|
||||
|
||||
i[1].journal_seq = i[0].journal_seq;
|
||||
|
||||
bch2_accounting_accumulate(bkey_i_to_accounting(i[1].k),
|
||||
bkey_s_c_to_accounting(k));
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = accounting_read_key(trans, k);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
*dst++ = *i;
|
||||
}
|
||||
darray_for_each(*keys, i)
|
||||
if (!i->overwritten)
|
||||
*dst++ = *i;
|
||||
keys->gap = keys->nr = dst - keys->data;
|
||||
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
|
||||
@ -141,14 +141,16 @@ void bch2_io_error_work(struct work_struct *work)
|
||||
if (ca->mi.state >= BCH_MEMBER_STATE_ro)
|
||||
return;
|
||||
|
||||
bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
CLASS(printbuf, buf)();
|
||||
__bch2_log_msg_start(ca->name, &buf);
|
||||
|
||||
prt_printf(&buf, "writes erroring for %u seconds, setting %s ro",
|
||||
c->opts.write_error_timeout,
|
||||
dev ? "device" : "filesystem");
|
||||
prt_printf(&buf, "writes erroring for %u seconds\n",
|
||||
c->opts.write_error_timeout);
|
||||
|
||||
bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
|
||||
BCH_FORCE_IF_DEGRADED, &buf);
|
||||
|
||||
prt_printf(&buf, "setting %s ro", dev ? "device" : "filesystem");
|
||||
if (!dev)
|
||||
bch2_fs_emergency_read_only2(c, &buf);
|
||||
|
||||
|
||||
@ -120,6 +120,7 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
|
||||
INIT_LIST_HEAD(&p->flushed[i]);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
p->bytes = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -264,6 +265,11 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
|
||||
/* Close out old buffer: */
|
||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
|
||||
struct journal_entry_pin_list *pin_list =
|
||||
journal_seq_pin(j, journal_cur_seq(j));
|
||||
pin_list->bytes = roundup_pow_of_two(vstruct_bytes(buf->data));
|
||||
j->dirty_entry_bytes += pin_list->bytes;
|
||||
|
||||
if (trace_journal_entry_close_enabled() && trace) {
|
||||
CLASS(printbuf, err)();
|
||||
guard(printbuf_atomic)(&err);
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
@ -106,11 +107,6 @@ static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *cs
|
||||
return !bch2_crc_cmp(j->csum, *csum);
|
||||
}
|
||||
|
||||
static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
|
||||
{
|
||||
return (seq - c->journal_entries_base_seq) & (~0U >> 1);
|
||||
}
|
||||
|
||||
static void __journal_replay_free(struct bch_fs *c,
|
||||
struct journal_replay *i)
|
||||
{
|
||||
@ -195,6 +191,23 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
}
|
||||
|
||||
/* Drop overwrites, log entries if we don't need them: */
|
||||
if (!c->opts.retain_recovery_info &&
|
||||
!c->opts.journal_rewind) {
|
||||
struct jset_entry *dst = j->start;
|
||||
vstruct_for_each_safe(j, src) {
|
||||
if (src->type == BCH_JSET_ENTRY_log ||
|
||||
src->type == BCH_JSET_ENTRY_overwrite)
|
||||
continue;
|
||||
|
||||
memcpy(dst, src, vstruct_bytes(src));
|
||||
dst = vstruct_next(dst);
|
||||
}
|
||||
|
||||
j->u64s = cpu_to_le32((u64 *) dst - j->_data);
|
||||
bytes = vstruct_bytes(j);
|
||||
}
|
||||
|
||||
jlist->last_seq = max(jlist->last_seq, last_seq);
|
||||
|
||||
_i = genradix_ptr_alloc(&c->journal_entries,
|
||||
|
||||
@ -7,29 +7,6 @@
|
||||
void bch2_journal_pos_from_member_info_set(struct bch_fs *);
|
||||
void bch2_journal_pos_from_member_info_resume(struct bch_fs *);
|
||||
|
||||
struct journal_ptr {
|
||||
bool csum_good;
|
||||
struct bch_csum csum;
|
||||
u8 dev;
|
||||
u32 bucket;
|
||||
u32 bucket_offset;
|
||||
u64 sector;
|
||||
};
|
||||
|
||||
/*
|
||||
* Only used for holding the journal entries we read in btree_journal_read()
|
||||
* during cache_registration
|
||||
*/
|
||||
struct journal_replay {
|
||||
DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs;
|
||||
|
||||
bool csum_good;
|
||||
bool ignore_blacklisted;
|
||||
bool ignore_not_dirty;
|
||||
/* must be last: */
|
||||
struct jset j;
|
||||
};
|
||||
|
||||
static inline bool journal_replay_ignore(struct journal_replay *i)
|
||||
{
|
||||
return !i || i->ignore_blacklisted || i->ignore_not_dirty;
|
||||
|
||||
@ -148,6 +148,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
|
||||
|
||||
BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
|
||||
|
||||
size_t mem_limit = max_t(ssize_t, 0,
|
||||
(totalram_pages() * PAGE_SIZE) / 4 - j->dirty_entry_bytes);
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
if (!ca->journal.nr ||
|
||||
!ca->mi.durability)
|
||||
@ -180,6 +183,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
|
||||
* @nr_devs_want largest devices:
|
||||
*/
|
||||
space = dev_space[nr_devs_want - 1];
|
||||
space.total = min(space.total, mem_limit >> 9);
|
||||
space.next_entry = min(space.next_entry, min_bucket_size);
|
||||
return space;
|
||||
}
|
||||
@ -328,9 +332,17 @@ void bch2_journal_reclaim_fast(struct journal *j)
|
||||
* Unpin journal entries whose reference counts reached zero, meaning
|
||||
* all btree nodes got written out
|
||||
*/
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
while (!fifo_empty(&j->pin) &&
|
||||
j->pin.front <= j->seq_ondisk &&
|
||||
!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
!atomic_read(&(pin_list = &fifo_peek_front(&j->pin))->count)) {
|
||||
|
||||
if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes))
|
||||
pin_list->bytes = j->dirty_entry_bytes;
|
||||
|
||||
j->dirty_entry_bytes -= pin_list->bytes;
|
||||
pin_list->bytes = 0;
|
||||
|
||||
j->pin.front++;
|
||||
popped = true;
|
||||
}
|
||||
|
||||
@ -71,6 +71,7 @@ struct journal_entry_pin_list {
|
||||
struct list_head flushed[JOURNAL_PIN_TYPE_NR];
|
||||
atomic_t count;
|
||||
struct bch_devs_list devs;
|
||||
size_t bytes;
|
||||
};
|
||||
|
||||
struct journal;
|
||||
@ -253,6 +254,7 @@ struct journal {
|
||||
u64 front, back, size, mask;
|
||||
struct journal_entry_pin_list *data;
|
||||
} pin;
|
||||
size_t dirty_entry_bytes;
|
||||
|
||||
struct journal_space space[journal_space_nr];
|
||||
|
||||
|
||||
@ -525,7 +525,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, enum bch_opt_id
|
||||
switch (id) {
|
||||
case Opt_state:
|
||||
if (ca)
|
||||
return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
|
||||
return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED, NULL);
|
||||
break;
|
||||
|
||||
case Opt_compression:
|
||||
|
||||
@ -181,9 +181,12 @@ void bch2_reconstruct_alloc(struct bch_fs *c)
|
||||
*/
|
||||
static void zero_out_btree_mem_ptr(struct journal_keys *keys)
|
||||
{
|
||||
darray_for_each(*keys, i)
|
||||
if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
|
||||
bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
|
||||
struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys);
|
||||
darray_for_each(*keys, i) {
|
||||
struct bkey_i *k = journal_key_k(c, i);
|
||||
if (k->k.type == KEY_TYPE_btree_ptr_v2)
|
||||
bkey_i_to_btree_ptr_v2(k)->v.mem_ptr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* journal replay: */
|
||||
@ -201,8 +204,10 @@ static void replay_now_at(struct journal *j, u64 seq)
|
||||
static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
|
||||
struct journal_key *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *bk = journal_key_k(c, k);
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p,
|
||||
BTREE_MAX_DEPTH, k->level,
|
||||
BTREE_ITER_intent);
|
||||
int ret = bch2_btree_iter_traverse(&iter);
|
||||
@ -213,14 +218,14 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
|
||||
struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u);
|
||||
|
||||
/* Has this delta already been applied to the btree? */
|
||||
if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) {
|
||||
if (bversion_cmp(old.k->bversion, bk->k.bversion) >= 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
struct bkey_i *new = k->k;
|
||||
struct bkey_i *new = bk;
|
||||
if (old.k->type == KEY_TYPE_accounting) {
|
||||
new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k));
|
||||
new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(bk));
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -229,7 +234,8 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
|
||||
bkey_s_c_to_accounting(old));
|
||||
}
|
||||
|
||||
trans->journal_res.seq = k->journal_seq;
|
||||
if (!k->allocated)
|
||||
trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset;
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun);
|
||||
out:
|
||||
@ -240,6 +246,7 @@ out:
|
||||
static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
struct journal_key *k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
unsigned iter_flags =
|
||||
BTREE_ITER_intent|
|
||||
@ -250,7 +257,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
if (k->overwritten)
|
||||
return 0;
|
||||
|
||||
trans->journal_res.seq = k->journal_seq;
|
||||
if (!k->allocated)
|
||||
trans->journal_res.seq = c->journal_entries_base_seq + k->journal_seq_offset;
|
||||
|
||||
/*
|
||||
* BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to
|
||||
@ -265,7 +273,8 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
else
|
||||
update_flags |= BTREE_UPDATE_key_cache_reclaim;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
|
||||
struct bkey_i *bk = journal_key_k(c, k);
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p,
|
||||
BTREE_MAX_DEPTH, k->level,
|
||||
iter_flags);
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
@ -274,13 +283,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
|
||||
struct btree_path *path = btree_iter_path(trans, &iter);
|
||||
if (unlikely(!btree_path_node(path, k->level))) {
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
prt_str(&buf, "btree=");
|
||||
bch2_btree_id_to_text(&buf, k->btree_id);
|
||||
prt_printf(&buf, " level=%u ", k->level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k));
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(bk));
|
||||
|
||||
if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
|
||||
@ -297,7 +304,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(&iter);
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, bk->k.p,
|
||||
BTREE_MAX_DEPTH, 0, iter_flags);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_btree_increase_depth(trans, iter.path, 0) ?:
|
||||
@ -309,17 +316,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
if (k->overwritten)
|
||||
goto out;
|
||||
|
||||
if (k->k->k.type == KEY_TYPE_accounting) {
|
||||
struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s);
|
||||
if (bk->k.type == KEY_TYPE_accounting) {
|
||||
struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, bk->k.u64s);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bkey_copy(n, k->k);
|
||||
bkey_copy(n, bk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, k->k, update_flags);
|
||||
ret = bch2_trans_update(trans, &iter, bk, update_flags);
|
||||
out:
|
||||
bch2_trans_iter_exit(&iter);
|
||||
return ret;
|
||||
@ -330,13 +337,9 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
|
||||
const struct journal_key *l = *((const struct journal_key **)_l);
|
||||
const struct journal_key *r = *((const struct journal_key **)_r);
|
||||
|
||||
/*
|
||||
* Map 0 to U64_MAX, so that keys with journal_seq === 0 come last
|
||||
*
|
||||
* journal_seq == 0 means that the key comes from early repair, and
|
||||
* should be inserted last so as to avoid overflowing the journal
|
||||
*/
|
||||
return cmp_int(l->journal_seq - 1, r->journal_seq - 1);
|
||||
return !l->allocated && !r->allocated
|
||||
? cmp_int(l->journal_seq_offset, r->journal_seq_offset)
|
||||
: cmp_int(l->allocated, r->allocated);
|
||||
}
|
||||
|
||||
DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *)
|
||||
@ -368,7 +371,9 @@ int bch2_journal_replay(struct bch_fs *c)
|
||||
* flush accounting keys until we're done
|
||||
*/
|
||||
darray_for_each(*keys, k) {
|
||||
if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated))
|
||||
struct bkey_i *bk = journal_key_k(trans->c, k);
|
||||
|
||||
if (!(bk->k.type == KEY_TYPE_accounting && !k->allocated))
|
||||
continue;
|
||||
|
||||
cond_resched();
|
||||
@ -411,7 +416,6 @@ int bch2_journal_replay(struct bch_fs *c)
|
||||
BCH_TRANS_COMMIT_skip_accounting_apply|
|
||||
(!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
|
||||
bch2_journal_replay_key(trans, k));
|
||||
BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting);
|
||||
if (ret) {
|
||||
ret = darray_push(&keys_sorted, k);
|
||||
if (ret)
|
||||
@ -433,8 +437,8 @@ int bch2_journal_replay(struct bch_fs *c)
|
||||
|
||||
struct journal_key *k = *kp;
|
||||
|
||||
if (k->journal_seq)
|
||||
replay_now_at(j, k->journal_seq);
|
||||
if (!k->allocated)
|
||||
replay_now_at(j, c->journal_entries_base_seq + k->journal_seq_offset);
|
||||
else
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
|
||||
|
||||
@ -784,7 +784,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
unsigned flags, bool print)
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bch_replicas_entry_v1 *e;
|
||||
|
||||
@ -823,16 +823,14 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
: BCH_FORCE_IF_DATA_DEGRADED;
|
||||
|
||||
if (dflags & ~flags) {
|
||||
if (print) {
|
||||
CLASS(printbuf, buf)();
|
||||
|
||||
bch2_replicas_entry_to_text(&buf, e);
|
||||
bch_err(c, "insufficient devices online (%u) for replicas entry %s",
|
||||
nr_online, buf.buf);
|
||||
if (err) {
|
||||
prt_printf(err, "insufficient devices online (%u) for replicas entry ",
|
||||
nr_online);
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
prt_newline(err);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@ -44,7 +44,7 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e,
|
||||
}
|
||||
|
||||
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
|
||||
unsigned, bool);
|
||||
unsigned, struct printbuf *);
|
||||
|
||||
unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
@ -129,6 +129,7 @@ static inline void __maybe_unused check_bch_counter_ids_unique(void) {
|
||||
#define x(t, n, ...) case (n):
|
||||
BCH_PERSISTENT_COUNTERS()
|
||||
#undef x
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -36,10 +36,12 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
|
||||
|
||||
void bch2_dev_missing_atomic(struct bch_fs *c, unsigned dev)
|
||||
{
|
||||
if (dev != BCH_SB_MEMBER_INVALID)
|
||||
if (dev != BCH_SB_MEMBER_INVALID) {
|
||||
bch2_fs_inconsistent(c, "pointer to %s device %u",
|
||||
test_bit(dev, c->devs_removed.d)
|
||||
? "removed" : "nonexistent", dev);
|
||||
dump_stack();
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket)
|
||||
|
||||
@ -90,7 +90,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
|
||||
bch2_version_to_text(&buf, version);
|
||||
prt_str(&buf, " currently not enabled, allowed up to ");
|
||||
bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
|
||||
prt_printf(&buf, "\n set version_upgrade=incompatible to enable");
|
||||
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
|
||||
|
||||
bch_notice(c, "%s", buf.buf);
|
||||
}
|
||||
@ -1189,13 +1189,13 @@ int bch2_write_super(struct bch_fs *c)
|
||||
nr_wrote = dev_mask_nr(&sb_written);
|
||||
|
||||
can_mount_with_written =
|
||||
bch2_have_enough_devs(c, sb_written, degraded_flags, false);
|
||||
bch2_have_enough_devs(c, sb_written, degraded_flags, NULL);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++)
|
||||
sb_written.d[i] = ~sb_written.d[i];
|
||||
|
||||
can_mount_without_written =
|
||||
bch2_have_enough_devs(c, sb_written, degraded_flags, false);
|
||||
bch2_have_enough_devs(c, sb_written, degraded_flags, NULL);
|
||||
|
||||
/*
|
||||
* If we would be able to mount _without_ the devices we successfully
|
||||
|
||||
@ -1368,10 +1368,14 @@ static bool bch2_fs_may_start(struct bch_fs *c)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bch2_have_enough_devs(c, c->online_devs, flags, true);
|
||||
CLASS(printbuf, err)();
|
||||
bool ret = bch2_have_enough_devs(c, c->online_devs, flags, &err);
|
||||
if (!ret)
|
||||
bch2_print_str(c, KERN_ERR, err.buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_start(struct bch_fs *c)
|
||||
@ -1833,7 +1837,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
* because we got an error or what have you?
|
||||
*/
|
||||
bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
enum bch_member_state new_state, int flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bch_devs_mask new_online_devs;
|
||||
int nr_rw = 0, required;
|
||||
@ -1870,7 +1875,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
|
||||
new_online_devs = c->online_devs;
|
||||
__clear_bit(ca->dev_idx, new_online_devs.d);
|
||||
|
||||
return bch2_have_enough_devs(c, new_online_devs, flags, false);
|
||||
return bch2_have_enough_devs(c, new_online_devs, flags, err);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -1904,14 +1909,15 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
}
|
||||
|
||||
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
enum bch_member_state new_state, int flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ca->mi.state == new_state)
|
||||
return 0;
|
||||
|
||||
if (!bch2_dev_state_allowed(c, ca, new_state, flags))
|
||||
if (!bch2_dev_state_allowed(c, ca, new_state, flags, err))
|
||||
return bch_err_throw(c, device_state_not_allowed);
|
||||
|
||||
if (new_state != BCH_MEMBER_STATE_rw)
|
||||
@ -1934,10 +1940,11 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum bch_member_state new_state, int flags)
|
||||
enum bch_member_state new_state, int flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
guard(rwsem_write)(&c->state_lock);
|
||||
return __bch2_dev_set_state(c, ca, new_state, flags);
|
||||
return __bch2_dev_set_state(c, ca, new_state, flags, err);
|
||||
}
|
||||
|
||||
/* Device add/removal: */
|
||||
@ -1957,7 +1964,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
*/
|
||||
bch2_dev_put(ca);
|
||||
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) {
|
||||
bch_err(ca, "Cannot remove without losing data");
|
||||
ret = bch_err_throw(c, device_state_not_allowed);
|
||||
goto err;
|
||||
@ -2278,7 +2285,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) {
|
||||
bch_err(ca, "Cannot offline required disk");
|
||||
return bch_err_throw(c, device_state_not_allowed);
|
||||
}
|
||||
@ -2455,10 +2462,14 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
|
||||
|
||||
struct bch_dev *ca = bdev_to_bch_dev(c, bdev);
|
||||
if (ca) {
|
||||
CLASS(printbuf, buf)();
|
||||
__bch2_log_msg_start(ca->name, &buf);
|
||||
prt_printf(&buf, "offline from block layer\n");
|
||||
|
||||
bool dev = bch2_dev_state_allowed(c, ca,
|
||||
BCH_MEMBER_STATE_failed,
|
||||
BCH_FORCE_IF_DEGRADED);
|
||||
|
||||
BCH_FORCE_IF_DEGRADED,
|
||||
&buf);
|
||||
if (!dev && sb) {
|
||||
if (!surprise)
|
||||
sync_filesystem(sb);
|
||||
@ -2466,11 +2477,6 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
|
||||
evict_inodes(sb);
|
||||
}
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
__bch2_log_msg_start(ca->name, &buf);
|
||||
|
||||
prt_printf(&buf, "offline from block layer");
|
||||
|
||||
if (dev) {
|
||||
__bch2_dev_offline(c, ca);
|
||||
} else {
|
||||
|
||||
@ -17,11 +17,14 @@ struct bch_fs *bch2_dev_to_fs(dev_t);
|
||||
struct bch_fs *bch2_uuid_to_fs(__uuid_t);
|
||||
|
||||
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_member_state, int);
|
||||
enum bch_member_state, int,
|
||||
struct printbuf *);
|
||||
int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_member_state, int);
|
||||
enum bch_member_state, int,
|
||||
struct printbuf *);
|
||||
int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
|
||||
enum bch_member_state, int);
|
||||
enum bch_member_state, int,
|
||||
struct printbuf *);
|
||||
|
||||
int bch2_dev_fail(struct bch_dev *, int);
|
||||
int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
|
||||
|
||||
@ -116,10 +116,15 @@ static int shrinker_thread(void *arg)
|
||||
}
|
||||
|
||||
struct task_struct *shrinker_task;
|
||||
unsigned long _totalram_pages;
|
||||
|
||||
__attribute__((constructor(103)))
|
||||
static void shrinker_thread_init(void)
|
||||
{
|
||||
struct sysinfo info;
|
||||
si_meminfo(&info);
|
||||
_totalram_pages = info.totalram >> PAGE_SHIFT;
|
||||
|
||||
shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers");
|
||||
BUG_ON(IS_ERR(shrinker_task));
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user