mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 2f4e24d856 bcachefs: Split out dev_buckets_free()
This commit is contained in:
parent
3a0ae34333
commit
319abbc653
@ -1 +1 @@
|
||||
c76f7e91e8939751ccc96ca2f8f6bfe6dd368d93
|
||||
2f4e24d85692600a698d78938a213f27593bda25
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "backpointers.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_key_cache.h"
|
||||
@ -37,8 +38,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
|
||||
|
||||
struct bkey_alloc_unpacked {
|
||||
u64 journal_seq;
|
||||
u64 bucket;
|
||||
u8 dev;
|
||||
u8 gen;
|
||||
u8 oldest_gen;
|
||||
u8 data_type;
|
||||
@ -194,11 +193,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
|
||||
|
||||
static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_alloc_unpacked ret = {
|
||||
.dev = k.k->p.inode,
|
||||
.bucket = k.k->p.offset,
|
||||
.gen = 0,
|
||||
};
|
||||
struct bkey_alloc_unpacked ret = { .gen = 0 };
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_alloc:
|
||||
@ -215,48 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
|
||||
{
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
*out = *bkey_s_c_to_alloc_v4(k).v;
|
||||
} else {
|
||||
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
|
||||
|
||||
*out = (struct bch_alloc_v4) {
|
||||
.journal_seq = u.journal_seq,
|
||||
.flags = u.need_discard,
|
||||
.gen = u.gen,
|
||||
.oldest_gen = u.oldest_gen,
|
||||
.data_type = u.data_type,
|
||||
.stripe_redundancy = u.stripe_redundancy,
|
||||
.dirty_sectors = u.dirty_sectors,
|
||||
.cached_sectors = u.cached_sectors,
|
||||
.io_time[READ] = u.read_time,
|
||||
.io_time[WRITE] = u.write_time,
|
||||
.stripe = u.stripe,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i_alloc_v4 *ret;
|
||||
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if (!IS_ERR(ret))
|
||||
bkey_reassemble(&ret->k_i, k);
|
||||
} else {
|
||||
ret = bch2_trans_kmalloc(trans, sizeof(*ret));
|
||||
if (!IS_ERR(ret)) {
|
||||
bkey_alloc_v4_init(&ret->k_i);
|
||||
ret->k.p = k.k->p;
|
||||
bch2_alloc_to_v4(k, &ret->v);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey_i_alloc_v4 *
|
||||
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct bpos pos)
|
||||
@ -339,9 +292,15 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
{
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
|
||||
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) {
|
||||
prt_printf(err, "bad val size (%zu != %zu)",
|
||||
bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4));
|
||||
if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) {
|
||||
prt_printf(err, "bad val size (%lu != %u)",
|
||||
bkey_val_u64s(k.k), alloc_v4_u64s(a.v));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
|
||||
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) {
|
||||
prt_printf(err, "invalid backpointers_start");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -401,9 +360,19 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 swab40(u64 x)
|
||||
{
|
||||
return (((x & 0x00000000ffULL) << 32)|
|
||||
((x & 0x000000ff00ULL) << 16)|
|
||||
((x & 0x0000ff0000ULL) >> 0)|
|
||||
((x & 0x00ff000000ULL) >> 16)|
|
||||
((x & 0xff00000000ULL) >> 32));
|
||||
}
|
||||
|
||||
void bch2_alloc_v4_swab(struct bkey_s k)
|
||||
{
|
||||
struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
|
||||
struct bch_backpointer *bp, *bps;
|
||||
|
||||
a->journal_seq = swab64(a->journal_seq);
|
||||
a->flags = swab32(a->flags);
|
||||
@ -413,25 +382,135 @@ void bch2_alloc_v4_swab(struct bkey_s k)
|
||||
a->io_time[1] = swab64(a->io_time[1]);
|
||||
a->stripe = swab32(a->stripe);
|
||||
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
|
||||
|
||||
bps = alloc_v4_backpointers(a);
|
||||
for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
|
||||
bp->bucket_offset = swab40(bp->bucket_offset);
|
||||
bp->bucket_len = swab32(bp->bucket_len);
|
||||
bch2_bpos_swab(&bp->pos);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_alloc_v4 a;
|
||||
struct bch_alloc_v4 _a;
|
||||
const struct bch_alloc_v4 *a = &_a;
|
||||
const struct bch_backpointer *bps;
|
||||
unsigned i;
|
||||
|
||||
bch2_alloc_to_v4(k, &a);
|
||||
if (k.k->type == KEY_TYPE_alloc_v4)
|
||||
a = bkey_s_c_to_alloc_v4(k).v;
|
||||
else
|
||||
bch2_alloc_to_v4(k, &_a);
|
||||
|
||||
prt_printf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu",
|
||||
a.gen, a.oldest_gen, bch2_data_types[a.data_type],
|
||||
a.journal_seq,
|
||||
BCH_ALLOC_V4_NEED_DISCARD(&a),
|
||||
BCH_ALLOC_V4_NEED_INC_GEN(&a));
|
||||
prt_printf(out, " dirty_sectors %u", a.dirty_sectors);
|
||||
prt_printf(out, " cached_sectors %u", a.cached_sectors);
|
||||
prt_printf(out, " stripe %u", a.stripe);
|
||||
prt_printf(out, " stripe_redundancy %u", a.stripe_redundancy);
|
||||
prt_printf(out, " read_time %llu", a.io_time[READ]);
|
||||
prt_printf(out, " write_time %llu", a.io_time[WRITE]);
|
||||
prt_newline(out);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "gen %u oldest_gen %u data_type %s",
|
||||
a->gen, a->oldest_gen, bch2_data_types[a->data_type]);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "journal_seq %llu", a->journal_seq);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a));
|
||||
prt_newline(out);
|
||||
prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a));
|
||||
prt_newline(out);
|
||||
prt_printf(out, "dirty_sectors %u", a->dirty_sectors);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "cached_sectors %u", a->cached_sectors);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "stripe %u", a->stripe);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "io_time[READ] %llu", a->io_time[READ]);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a));
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
bps = alloc_v4_backpointers_c(a);
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) {
|
||||
prt_newline(out);
|
||||
bch2_backpointer_to_text(out, &bps[i]);
|
||||
}
|
||||
|
||||
printbuf_indent_sub(out, 4);
|
||||
}
|
||||
|
||||
void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
|
||||
{
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
int d;
|
||||
|
||||
*out = *bkey_s_c_to_alloc_v4(k).v;
|
||||
|
||||
d = (int) BCH_ALLOC_V4_U64s -
|
||||
(int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0);
|
||||
if (unlikely(d > 0)) {
|
||||
memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out),
|
||||
0,
|
||||
d * sizeof(u64));
|
||||
SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
|
||||
}
|
||||
} else {
|
||||
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
|
||||
|
||||
*out = (struct bch_alloc_v4) {
|
||||
.journal_seq = u.journal_seq,
|
||||
.flags = u.need_discard,
|
||||
.gen = u.gen,
|
||||
.oldest_gen = u.oldest_gen,
|
||||
.data_type = u.data_type,
|
||||
.stripe_redundancy = u.stripe_redundancy,
|
||||
.dirty_sectors = u.dirty_sectors,
|
||||
.cached_sectors = u.cached_sectors,
|
||||
.io_time[READ] = u.read_time,
|
||||
.io_time[WRITE] = u.write_time,
|
||||
.stripe = u.stripe,
|
||||
};
|
||||
|
||||
SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
|
||||
? bkey_bytes(k.k)
|
||||
: sizeof(struct bkey_i_alloc_v4);
|
||||
struct bkey_i_alloc_v4 *ret;
|
||||
|
||||
/*
|
||||
* Reserve space for one more backpointer here:
|
||||
* Not sketchy at doing it this way, nope...
|
||||
*/
|
||||
ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
if (k.k->type == KEY_TYPE_alloc_v4) {
|
||||
bkey_reassemble(&ret->k_i, k);
|
||||
|
||||
if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) {
|
||||
struct bch_backpointer *src, *dst;
|
||||
|
||||
src = alloc_v4_backpointers(&ret->v);
|
||||
SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
|
||||
dst = alloc_v4_backpointers(&ret->v);
|
||||
|
||||
memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
|
||||
sizeof(struct bch_backpointer));
|
||||
memset(src, 0, dst - src);
|
||||
set_alloc_v4_u64s(ret);
|
||||
}
|
||||
} else {
|
||||
bkey_alloc_v4_init(&ret->k_i);
|
||||
ret->k.p = k.k->p;
|
||||
bch2_alloc_to_v4(k, &ret->v);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
@ -1052,6 +1131,7 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
|
||||
discarded++;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
@ -70,6 +70,22 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
|
||||
return pos;
|
||||
}
|
||||
|
||||
static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
|
||||
{
|
||||
unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
|
||||
BCH_ALLOC_V4_U64s_V0) +
|
||||
BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
|
||||
(sizeof(struct bch_backpointer) / sizeof(u64));
|
||||
|
||||
BUG_ON(ret > U8_MAX - BKEY_U64s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
|
||||
{
|
||||
set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
|
||||
}
|
||||
|
||||
struct bkey_i_alloc_v4 *
|
||||
bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
|
||||
@ -143,6 +159,16 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
|
||||
void bch2_do_invalidates(struct bch_fs *);
|
||||
|
||||
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
|
||||
{
|
||||
return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
|
||||
}
|
||||
|
||||
static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
|
||||
{
|
||||
return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
|
||||
}
|
||||
|
||||
int bch2_fs_freespace_init(struct bch_fs *);
|
||||
|
||||
void bch2_recalc_capacity(struct bch_fs *);
|
||||
|
@ -506,7 +506,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
int ret;
|
||||
again:
|
||||
usage = bch2_dev_usage_read(ca);
|
||||
avail = __dev_buckets_available(ca, usage,reserve);
|
||||
avail = dev_buckets_free(ca, usage,reserve);
|
||||
|
||||
if (usage.d[BCH_DATA_need_discard].buckets > avail)
|
||||
bch2_do_discards(c);
|
||||
|
891
libbcachefs/backpointers.c
Normal file
891
libbcachefs/backpointers.c
Normal file
@ -0,0 +1,891 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "backpointers.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "error.h"
|
||||
|
||||
#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
|
||||
|
||||
/*
|
||||
* Convert from pos in backpointer btree to pos of corresponding bucket in alloc
|
||||
* btree:
|
||||
*/
|
||||
static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
|
||||
struct bpos bp_pos)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
|
||||
u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
|
||||
|
||||
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
|
||||
*/
|
||||
static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
|
||||
struct bpos bucket,
|
||||
u64 bucket_offset)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
|
||||
|
||||
return POS(bucket.inode,
|
||||
(bucket_to_sector(ca, bucket.offset) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
|
||||
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
||||
u32 bucket_offset;
|
||||
|
||||
*bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
|
||||
*bp = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
.data_type = data_type,
|
||||
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
|
||||
p.crc.offset,
|
||||
.bucket_len = ptr_disk_sectors(sectors, p),
|
||||
.pos = k.k->p,
|
||||
};
|
||||
}
|
||||
|
||||
static bool extent_matches_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k,
|
||||
struct bpos bucket,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
struct bpos bucket2;
|
||||
struct bch_backpointer bp2;
|
||||
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
|
||||
&bucket2, &bp2);
|
||||
if (!bpos_cmp(bucket, bucket2) &&
|
||||
!memcmp(&bp, &bp2, sizeof(bp)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
int rw, struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
|
||||
|
||||
if (bkey_val_bytes(bp.k) < sizeof(*bp.v)) {
|
||||
prt_str(err, "incorrect value size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bpos_cmp(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
|
||||
prt_str(err, "backpointer at wrong pos");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
|
||||
{
|
||||
prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
|
||||
bch2_btree_ids[bp->btree_id],
|
||||
bp->level,
|
||||
(u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
(u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
bp->bucket_len);
|
||||
bch2_bpos_to_text(out, bp->pos);
|
||||
}
|
||||
|
||||
void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
|
||||
}
|
||||
|
||||
void bch2_backpointer_swab(struct bkey_s k)
|
||||
{
|
||||
struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
|
||||
|
||||
bp.v->bucket_offset = swab32(bp.v->bucket_offset);
|
||||
bp.v->bucket_len = swab32(bp.v->bucket_len);
|
||||
bch2_bpos_swab(&bp.v->pos);
|
||||
}
|
||||
|
||||
#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1)
|
||||
|
||||
static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r)
|
||||
{
|
||||
return cmp_int(l.bucket_offset, r.bucket_offset);
|
||||
}
|
||||
|
||||
static int bch2_backpointer_del_by_offset(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if (bp_offset < BACKPOINTER_OFFSET_MAX) {
|
||||
struct bch_backpointer *bps;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
unsigned i, nr;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
bucket,
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_alloc_v4) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
a = bch2_alloc_to_v4_mut(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
if (ret)
|
||||
goto err;
|
||||
bps = alloc_v4_backpointers(&a->v);
|
||||
nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (bps[i].bucket_offset == bp_offset)
|
||||
goto found;
|
||||
if (bps[i].bucket_offset > bp_offset)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
found:
|
||||
if (memcmp(&bps[i], &bp, sizeof(bp))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
array_remove_item(bps, nr, i);
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
|
||||
set_alloc_v4_u64s(a);
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
} else {
|
||||
bp_offset -= BACKPOINTER_OFFSET_MAX;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, bucket, bp_offset),
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
|
||||
ret = -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_del(struct btree_trans *trans,
|
||||
struct bkey_i_alloc_v4 *a,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
|
||||
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
|
||||
struct btree_iter bp_iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
int cmp = backpointer_cmp(bps[i], bp) ?:
|
||||
memcmp(&bps[i], &bp, sizeof(bp));
|
||||
if (!cmp)
|
||||
goto found;
|
||||
if (cmp >= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
goto btree;
|
||||
found:
|
||||
array_remove_item(bps, nr, i);
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
|
||||
set_alloc_v4_u64s(a);
|
||||
return 0;
|
||||
btree:
|
||||
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&bp_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "backpointer not found when deleting");
|
||||
prt_newline(&buf);
|
||||
printbuf_indent_add(&buf, 2);
|
||||
|
||||
prt_printf(&buf, "searching for ");
|
||||
bch2_backpointer_to_text(&buf, &bp);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "got ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_str(&buf, "alloc ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "for ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
|
||||
bch_err(c, "%s", buf.buf);
|
||||
} else {
|
||||
ret = -EIO;
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
}
|
||||
printbuf_exit(&buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &bp_iter, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_bucket_backpointer_add(struct btree_trans *trans,
|
||||
struct bkey_i_alloc_v4 *a,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca;
|
||||
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
|
||||
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
|
||||
struct bkey_i_backpointer *bp_k;
|
||||
struct btree_iter bp_iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
/* Check for duplicates: */
|
||||
for (i = 0; i < nr; i++) {
|
||||
int cmp = backpointer_cmp(bps[i], bp);
|
||||
if (cmp >= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((i &&
|
||||
(bps[i - 1].bucket_offset +
|
||||
bps[i - 1].bucket_len > bp.bucket_offset)) ||
|
||||
(i < nr &&
|
||||
(bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "overlapping backpointer found when inserting ");
|
||||
bch2_backpointer_to_text(&buf, &bp);
|
||||
prt_newline(&buf);
|
||||
printbuf_indent_add(&buf, 2);
|
||||
|
||||
prt_printf(&buf, "into ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "for ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
|
||||
bch_err(c, "%s", buf.buf);
|
||||
else {
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) {
|
||||
array_insert_item(bps, nr, i, bp);
|
||||
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
|
||||
set_alloc_v4_u64s(a);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Overflow: use backpointer btree */
|
||||
bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k));
|
||||
ret = PTR_ERR_OR_ZERO(bp_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, a->k.p.inode);
|
||||
|
||||
bkey_backpointer_init(&bp_k->k_i);
|
||||
bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
|
||||
bp_k->v = bp;
|
||||
|
||||
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p,
|
||||
BTREE_ITER_INTENT|
|
||||
BTREE_ITER_SLOTS|
|
||||
BTREE_ITER_WITH_UPDATES);
|
||||
k = bch2_btree_iter_peek_slot(&bp_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (k.k->type) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "existing btree backpointer key found when inserting ");
|
||||
bch2_backpointer_to_text(&buf, &bp);
|
||||
prt_newline(&buf);
|
||||
printbuf_indent_add(&buf, 2);
|
||||
|
||||
prt_printf(&buf, "found ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_newline(&buf);
|
||||
|
||||
prt_printf(&buf, "for ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
|
||||
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
|
||||
bch_err(c, "%s", buf.buf);
|
||||
else {
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next backpointer >= *bp_offset:
|
||||
*/
|
||||
int bch2_get_next_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket, int gen,
|
||||
u64 *bp_offset,
|
||||
struct bch_backpointer *dst)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos bp_pos =
|
||||
bucket_pos_to_bp(c, bucket,
|
||||
max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX);
|
||||
struct bpos bp_end_pos =
|
||||
bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
|
||||
struct btree_iter alloc_iter, bp_iter = { NULL };
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_alloc_v4 a;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
|
||||
bucket, BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&alloc_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (k.k->type != KEY_TYPE_alloc_v4)
|
||||
goto done;
|
||||
|
||||
a = bkey_s_c_to_alloc_v4(k);
|
||||
if (gen >= 0 && a.v->gen != gen)
|
||||
goto done;
|
||||
|
||||
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) {
|
||||
if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset)
|
||||
continue;
|
||||
|
||||
*dst = alloc_v4_backpointers_c(a.v)[i];
|
||||
*bp_offset = dst->bucket_offset;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
|
||||
bp_pos, 0, k, ret) {
|
||||
if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
|
||||
break;
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
*dst = *bkey_s_c_to_backpointer(k).v;
|
||||
*bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX;
|
||||
goto out;
|
||||
}
|
||||
done:
|
||||
*bp_offset = U64_MAX;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void backpointer_not_found(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c k,
|
||||
const char *thing_it_points_to)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
|
||||
thing_it_points_to);
|
||||
prt_printf(&buf, "bucket: ");
|
||||
bch2_bpos_to_text(&buf, bucket);
|
||||
prt_printf(&buf, "\n ");
|
||||
|
||||
if (bp_offset >= BACKPOINTER_OFFSET_MAX) {
|
||||
struct bpos bp_pos =
|
||||
bucket_pos_to_bp(c, bucket,
|
||||
bp_offset - BACKPOINTER_OFFSET_MAX);
|
||||
prt_printf(&buf, "backpointer pos: ");
|
||||
bch2_bpos_to_text(&buf, bp_pos);
|
||||
prt_printf(&buf, "\n ");
|
||||
}
|
||||
|
||||
bch2_backpointer_to_text(&buf, &bp);
|
||||
prt_printf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
|
||||
bch_err(c, "%s", buf.buf);
|
||||
else
|
||||
bch2_trans_inconsistent(trans, "%s", buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_node_iter_init(trans, iter,
|
||||
bp.btree_id,
|
||||
bp.pos,
|
||||
0,
|
||||
min(bp.level, c->btree_roots[bp.btree_id].level),
|
||||
0);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
if (bkey_err(k)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
if (bp.level == c->btree_roots[bp.btree_id].level + 1)
|
||||
k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
|
||||
|
||||
if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
|
||||
return k;
|
||||
|
||||
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
|
||||
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos bucket,
|
||||
u64 bp_offset,
|
||||
struct bch_backpointer bp)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b;
|
||||
struct bkey_s_c k;
|
||||
|
||||
BUG_ON(!bp.level);
|
||||
|
||||
bch2_trans_node_iter_init(trans, iter,
|
||||
bp.btree_id,
|
||||
bp.pos,
|
||||
0,
|
||||
bp.level - 1,
|
||||
0);
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
if (IS_ERR(b)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return b;
|
||||
}
|
||||
|
||||
if (extent_matches_bp(c, bp.btree_id, bp.level,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
bucket, bp))
|
||||
return b;
|
||||
|
||||
if (!btree_node_will_make_reachable(b))
|
||||
backpointer_not_found(trans, bucket, bp_offset,
|
||||
bp, k, "btree node");
|
||||
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter = { NULL };
|
||||
struct bch_dev *ca;
|
||||
struct bkey_s_c k, alloc_k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
k = bch2_btree_iter_peek(bp_iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!k.k)
|
||||
return 0;
|
||||
|
||||
if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
|
||||
"backpointer for mising device:\n%s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = bch2_btree_delete_at(trans, bp_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
|
||||
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
|
||||
bp_pos_to_bucket(c, k.k->p), 0);
|
||||
|
||||
alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
|
||||
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
|
||||
alloc_iter.pos.inode, alloc_iter.pos.offset,
|
||||
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
ret = bch2_btree_delete_at(trans, bp_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* verify that every backpointer has a corresponding alloc key */
|
||||
int bch2_check_btree_backpointers(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
|
||||
|
||||
do {
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
bch2_check_btree_backpointer(&trans, &iter));
|
||||
if (ret)
|
||||
break;
|
||||
} while (bch2_btree_iter_advance(&iter));
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_bp_exists(struct btree_trans *trans,
|
||||
struct bpos bucket_pos,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c orig_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter, bp_iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c alloc_k, bp_k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0);
|
||||
alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (alloc_k.k->type == KEY_TYPE_alloc_v4) {
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k);
|
||||
const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v);
|
||||
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
int cmp = backpointer_cmp(bps[i], bp) ?:
|
||||
memcmp(&bps[i], &bp, sizeof(bp));
|
||||
if (!cmp)
|
||||
goto out;
|
||||
if (cmp >= 0)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
goto missing;
|
||||
}
|
||||
|
||||
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp(c, bucket_pos, bp.bucket_offset),
|
||||
0);
|
||||
bp_k = bch2_btree_iter_peek_slot(&bp_iter);
|
||||
ret = bkey_err(bp_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp)))
|
||||
goto missing;
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
missing:
|
||||
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
|
||||
bch2_btree_ids[bp.btree_id], bp.level);
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_printf(&buf, "\nin alloc key ");
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k);
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_backpointers ||
|
||||
fsck_err(c, "%s", buf.buf)) {
|
||||
struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?:
|
||||
bch2_trans_update(trans, &alloc_iter, &a->k_i, 0);
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_ptrs_c ptrs;
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
k = bch2_btree_iter_peek_all_levels(iter);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!k.k)
|
||||
return 0;
|
||||
|
||||
ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
struct bpos bucket_pos;
|
||||
struct bch_backpointer bp;
|
||||
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
|
||||
k, p, &bucket_pos, &bp);
|
||||
|
||||
ret = check_bp_exists(trans, bucket_pos, bp, k);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_btree_root_to_backpointers(struct btree_trans *trans,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_ptrs_c ptrs;
|
||||
struct extent_ptr_decoded p;
|
||||
const union bch_extent_entry *entry;
|
||||
int ret;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
|
||||
c->btree_roots[btree_id].level, 0);
|
||||
b = bch2_btree_iter_peek_node(&iter);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(b != btree_node_root(c, b));
|
||||
|
||||
k = bkey_i_to_s_c(&b->key);
|
||||
ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
struct bpos bucket_pos;
|
||||
struct bch_backpointer bp;
|
||||
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
|
||||
k, p, &bucket_pos, &bp);
|
||||
|
||||
ret = check_bp_exists(trans, bucket_pos, bp, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
enum btree_id btree_id;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0,
|
||||
0,
|
||||
BTREE_ITER_ALL_LEVELS|
|
||||
BTREE_ITER_PREFETCH);
|
||||
|
||||
do {
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_extent_to_backpointers(&trans, &iter));
|
||||
if (ret)
|
||||
break;
|
||||
} while (!bch2_btree_iter_advance(&iter));
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_btree_root_to_backpointers(&trans, btree_id));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_one_backpointer(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
u64 *bp_offset)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bch_backpointer bp;
|
||||
struct bkey_s_c k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
ret = bch2_get_next_backpointer(trans, bucket, -1,
|
||||
bp_offset, &bp);
|
||||
if (ret || *bp_offset == U64_MAX)
|
||||
return ret;
|
||||
|
||||
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(!k.k, trans->c,
|
||||
"%s backpointer points to missing extent\n%s",
|
||||
*bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree",
|
||||
(bch2_backpointer_to_text(&buf, &bp), buf.buf))) {
|
||||
ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp);
|
||||
if (ret == -ENOENT)
|
||||
bch_err(trans->c, "backpointer at %llu not found", *bp_offset);
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
u64 bp_offset = 0;
|
||||
|
||||
while (!(ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
|
||||
bp_offset < U64_MAX)
|
||||
bp_offset++;
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
38
libbcachefs/backpointers.h
Normal file
38
libbcachefs/backpointers.h
Normal file
@ -0,0 +1,38 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
|
||||
#include "super.h"
|
||||
|
||||
int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k,
|
||||
int, struct printbuf *);
|
||||
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
|
||||
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_backpointer_swab(struct bkey_s);
|
||||
|
||||
#define bch2_bkey_ops_backpointer (struct bkey_ops) { \
|
||||
.key_invalid = bch2_backpointer_invalid, \
|
||||
.val_to_text = bch2_backpointer_k_to_text, \
|
||||
.swab = bch2_backpointer_swab, \
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_to_bp(struct bch_fs *, enum btree_id, unsigned,
|
||||
struct bkey_s_c, struct extent_ptr_decoded,
|
||||
struct bpos *, struct bch_backpointer *);
|
||||
|
||||
int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
|
||||
struct bch_backpointer, struct bkey_s_c);
|
||||
int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *,
|
||||
struct bch_backpointer, struct bkey_s_c);
|
||||
int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
|
||||
u64 *, struct bch_backpointer *);
|
||||
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64, struct bch_backpointer);
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
|
||||
struct bpos, u64, struct bch_backpointer);
|
||||
|
||||
int bch2_check_btree_backpointers(struct bch_fs *);
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *);
|
||||
int bch2_check_backpointers_to_extents(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
|
@ -509,6 +509,7 @@ enum {
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
|
||||
BCH_FS_CHECK_LRUS_DONE,
|
||||
BCH_FS_CHECK_BACKPOINTERS_DONE,
|
||||
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
|
||||
|
@ -365,7 +365,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(alloc_v3, 24) \
|
||||
x(set, 25) \
|
||||
x(lru, 26) \
|
||||
x(alloc_v4, 27)
|
||||
x(alloc_v4, 27) \
|
||||
x(backpointer, 28)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -886,6 +887,12 @@ struct bch_alloc {
|
||||
x(stripe, 32) \
|
||||
x(stripe_redundancy, 8)
|
||||
|
||||
enum {
|
||||
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
||||
BCH_ALLOC_FIELDS_V1()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bch_alloc_v2 {
|
||||
struct bch_val v;
|
||||
__u8 nr_fields;
|
||||
@ -914,6 +921,9 @@ struct bch_alloc_v3 {
|
||||
__u8 data[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
@ -927,22 +937,27 @@ struct bch_alloc_v4 {
|
||||
__u64 io_time[2];
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
struct bpos backpointers[0];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
||||
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
#define BCH_ALLOC_V4_U64s_V0 6
|
||||
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64))
|
||||
|
||||
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
|
||||
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
|
||||
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
|
||||
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
|
||||
|
||||
enum {
|
||||
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
||||
BCH_ALLOC_FIELDS_V1()
|
||||
#undef x
|
||||
};
|
||||
#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40
|
||||
|
||||
struct bch_backpointer {
|
||||
struct bch_val v;
|
||||
__u8 btree_id;
|
||||
__u8 level;
|
||||
__u8 data_type;
|
||||
__u64 bucket_offset:40;
|
||||
__u32 bucket_len;
|
||||
struct bpos pos;
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/* Quotas: */
|
||||
|
||||
@ -1326,7 +1341,8 @@ struct bch_sb_field_disk_groups {
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3)
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
@ -1407,7 +1423,8 @@ struct bch_sb_field_journal_seq_blacklist {
|
||||
x(inode_v2, 18) \
|
||||
x(freespace, 19) \
|
||||
x(alloc_v4, 20) \
|
||||
x(new_data_types, 21)
|
||||
x(new_data_types, 21) \
|
||||
x(backpointers, 22)
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_types.h"
|
||||
#include "alloc_background.h"
|
||||
@ -191,6 +192,9 @@ static unsigned bch2_key_types_allowed[] = {
|
||||
[BKEY_TYPE_need_discard] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_set),
|
||||
[BKEY_TYPE_backpointers] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_backpointer),
|
||||
[BKEY_TYPE_btree] =
|
||||
(1U << KEY_TYPE_deleted)|
|
||||
(1U << KEY_TYPE_btree_ptr)|
|
||||
|
@ -376,7 +376,7 @@ struct btree_trans_commit_hook {
|
||||
struct btree_trans_commit_hook *next;
|
||||
};
|
||||
|
||||
#define BTREE_TRANS_MEM_MAX (1U << 14)
|
||||
#define BTREE_TRANS_MEM_MAX (1U << 16)
|
||||
|
||||
struct btree_trans {
|
||||
struct bch_fs *c;
|
||||
@ -638,6 +638,11 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
|
||||
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id id)
|
||||
{
|
||||
return (1 << id) & BTREE_ID_HAS_PTRS;
|
||||
}
|
||||
|
||||
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
|
||||
{
|
||||
return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "backpointers.h"
|
||||
#include "bset.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
@ -655,16 +656,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
|
||||
{
|
||||
EBUG_ON(sectors < 0);
|
||||
|
||||
return crc_is_compressed(p.crc)
|
||||
? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
|
||||
p.crc.uncompressed_size)
|
||||
: sectors;
|
||||
}
|
||||
|
||||
static int check_bucket_ref(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
@ -1368,21 +1359,43 @@ need_mark:
|
||||
/* trans_mark: */
|
||||
|
||||
static int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type)
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
unsigned flags)
|
||||
{
|
||||
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_alloc_v4 *a;
|
||||
struct bpos bucket_pos;
|
||||
struct bch_backpointer bp;
|
||||
s64 sectors;
|
||||
int ret;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
|
||||
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
|
||||
sectors = bp.bucket_len;
|
||||
if (!insert)
|
||||
sectors = -sectors;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
|
||||
if (IS_ERR(a))
|
||||
return PTR_ERR(a);
|
||||
|
||||
ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
|
||||
ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
|
||||
a->v.gen, &a->v.data_type,
|
||||
&a->v.dirty_sectors, &a->v.cached_sectors) ?:
|
||||
bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
&a->v.dirty_sectors, &a->v.cached_sectors);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!p.ptr.cached) {
|
||||
ret = insert
|
||||
? bch2_bucket_backpointer_add(trans, a, bp, k)
|
||||
: bch2_bucket_backpointer_del(trans, a, bp, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
@ -1476,8 +1489,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
if (flags & BTREE_TRIGGER_OVERWRITE)
|
||||
disk_sectors = -disk_sectors;
|
||||
|
||||
ret = bch2_trans_mark_pointer(trans, k, p,
|
||||
disk_sectors, data_type);
|
||||
ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
@ -75,6 +75,15 @@ static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
|
||||
return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
|
||||
}
|
||||
|
||||
static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
u32 *bucket_offset)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
|
||||
return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset));
|
||||
}
|
||||
|
||||
static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
@ -90,6 +99,16 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k,
|
||||
return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
|
||||
}
|
||||
|
||||
static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
|
||||
{
|
||||
EBUG_ON(sectors < 0);
|
||||
|
||||
return crc_is_compressed(p.crc)
|
||||
? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
|
||||
p.crc.uncompressed_size)
|
||||
: sectors;
|
||||
}
|
||||
|
||||
static inline int gen_cmp(u8 a, u8 b)
|
||||
{
|
||||
return (s8) (a - b);
|
||||
@ -144,12 +163,25 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser
|
||||
return reserved;
|
||||
}
|
||||
|
||||
static inline u64 dev_buckets_free(struct bch_dev *ca,
|
||||
struct bch_dev_usage usage,
|
||||
enum alloc_reserve reserve)
|
||||
{
|
||||
return max_t(s64, 0,
|
||||
usage.d[BCH_DATA_free].buckets -
|
||||
ca->nr_open_buckets -
|
||||
bch2_dev_buckets_reserved(ca, reserve));
|
||||
}
|
||||
|
||||
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||
struct bch_dev_usage usage,
|
||||
enum alloc_reserve reserve)
|
||||
{
|
||||
return max_t(s64, 0,
|
||||
usage.d[BCH_DATA_free].buckets -
|
||||
usage.d[BCH_DATA_cached].buckets -
|
||||
usage.d[BCH_DATA_need_gc_gens].buckets -
|
||||
usage.d[BCH_DATA_need_discard].buckets -
|
||||
ca->nr_open_buckets -
|
||||
bch2_dev_buckets_reserved(ca, reserve));
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ struct copygc_heap_entry {
|
||||
u8 replicas;
|
||||
u32 fragmentation;
|
||||
u32 sectors;
|
||||
u64 offset;
|
||||
u64 bucket;
|
||||
};
|
||||
|
||||
typedef HEAP(struct copygc_heap_entry) copygc_heap;
|
||||
|
@ -1015,47 +1015,6 @@ static int check_subvols(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checking for overlapping extents needs to be reimplemented
|
||||
*/
|
||||
#if 0
|
||||
static int fix_overlapping_extent(struct btree_trans *trans,
|
||||
struct bkey_s_c k, struct bpos cut_at)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i *u;
|
||||
int ret;
|
||||
|
||||
u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
ret = PTR_ERR_OR_ZERO(u);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_reassemble(u, k);
|
||||
bch2_cut_front(cut_at, u);
|
||||
|
||||
|
||||
/*
|
||||
* We don't want to go through the extent_handle_overwrites path:
|
||||
*
|
||||
* XXX: this is going to screw up disk accounting, extent triggers
|
||||
* assume things about extent overwrites - we should be running the
|
||||
* triggers manually here
|
||||
*/
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
|
||||
BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
|
||||
|
||||
BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bpos pos)
|
||||
@ -1150,9 +1109,72 @@ fsck_err:
|
||||
return ret ?: ret2;
|
||||
}
|
||||
|
||||
struct extent_end {
|
||||
u32 snapshot;
|
||||
u64 offset;
|
||||
};
|
||||
|
||||
typedef DARRAY(struct extent_end) extent_ends;
|
||||
|
||||
static int extent_ends_at(extent_ends *extent_ends,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct extent_end *i, n = (struct extent_end) {
|
||||
.snapshot = k.k->p.snapshot,
|
||||
.offset = k.k->p.offset,
|
||||
};
|
||||
|
||||
darray_for_each(*extent_ends, i) {
|
||||
if (i->snapshot == k.k->p.snapshot) {
|
||||
*i = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i->snapshot >= k.k->p.snapshot)
|
||||
break;
|
||||
}
|
||||
|
||||
return darray_insert_item(extent_ends, i - extent_ends->data, n);
|
||||
}
|
||||
|
||||
static int check_extent_start(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
extent_ends *extent_ends,
|
||||
struct bkey_s_c k,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct extent_end *i;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
darray_for_each(*extent_ends, i) {
|
||||
if (fsck_err_on(i->offset > bkey_start_offset(k.k) &&
|
||||
key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot), c,
|
||||
"overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
|
||||
i->snapshot,
|
||||
i->offset,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
goto err;
|
||||
bkey_reassemble(update, k);
|
||||
ret = bch2_trans_update_extent(trans, iter, update, 0);
|
||||
if (!ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct inode_walker *inode,
|
||||
struct snapshots_seen *s)
|
||||
struct snapshots_seen *s,
|
||||
extent_ends *extent_ends)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
@ -1182,6 +1204,8 @@ peek:
|
||||
goto out;
|
||||
|
||||
if (inode->cur_inum != k.k->p.inode) {
|
||||
extent_ends->nr = 0;
|
||||
|
||||
ret = check_i_sectors(trans, inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1195,20 +1219,7 @@ peek:
|
||||
*/
|
||||
goto peek;
|
||||
}
|
||||
#if 0
|
||||
if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
|
||||
char buf1[200];
|
||||
char buf2[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
|
||||
bch2_bkey_val_to_text(&PBUF(buf2), c, k);
|
||||
|
||||
if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
|
||||
ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
ret = __walk_inode(trans, inode, k.k->p);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
@ -1259,13 +1270,17 @@ peek:
|
||||
}
|
||||
}
|
||||
|
||||
ret = check_extent_start(trans, s, extent_ends, k, iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bkey_extent_is_allocation(k.k))
|
||||
for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
|
||||
i->count += k.k->size;
|
||||
#if 0
|
||||
bch2_bkey_buf_reassemble(&prev, c, k);
|
||||
#endif
|
||||
|
||||
ret = extent_ends_at(extent_ends, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
@ -1287,13 +1302,9 @@ static int check_extents(struct bch_fs *c)
|
||||
struct snapshots_seen s;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
extent_ends extent_ends = { 0 };
|
||||
int ret = 0;
|
||||
|
||||
#if 0
|
||||
struct bkey_buf prev;
|
||||
bch2_bkey_buf_init(&prev);
|
||||
prev.k->k = KEY(0, 0, 0);
|
||||
#endif
|
||||
snapshots_seen_init(&s);
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
|
||||
@ -1309,14 +1320,12 @@ static int check_extents(struct bch_fs *c)
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
check_extent(&trans, &iter, &w, &s));
|
||||
check_extent(&trans, &iter, &w, &s, &extent_ends));
|
||||
if (ret)
|
||||
break;
|
||||
} while (bch2_btree_iter_advance(&iter));
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
#if 0
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
#endif
|
||||
darray_exit(&extent_ends);
|
||||
inode_walker_exit(&w);
|
||||
bch2_trans_exit(&trans);
|
||||
snapshots_seen_exit(&s);
|
||||
|
@ -1,7 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_update.h"
|
||||
@ -9,6 +11,7 @@
|
||||
#include "buckets.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "journal_reclaim.h"
|
||||
@ -632,6 +635,70 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int move_ratelimit(struct btree_trans *trans,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_ratelimit *rate)
|
||||
{
|
||||
u64 delay;
|
||||
|
||||
do {
|
||||
delay = rate ? bch2_ratelimit_delay(rate) : 0;
|
||||
|
||||
if (delay) {
|
||||
bch2_trans_unlock(trans);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (delay)
|
||||
schedule_timeout(delay);
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (delay);
|
||||
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
atomic_read(&ctxt->write_sectors) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
move_ctxt_wait_event(ctxt, trans,
|
||||
atomic_read(&ctxt->read_sectors) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int move_get_io_opts(struct btree_trans *trans,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct bkey_s_c k, u64 *cur_inum)
|
||||
{
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret;
|
||||
|
||||
if (*cur_inum == k.k->p.inode)
|
||||
return 0;
|
||||
|
||||
*io_opts = bch2_opts_to_inode_opts(trans->c->opts);
|
||||
|
||||
ret = lookup_inode(trans,
|
||||
SPOS(0, k.k->p.inode, k.k->p.snapshot),
|
||||
&inode);
|
||||
if (ret == -EINTR)
|
||||
return ret;
|
||||
|
||||
if (!ret)
|
||||
bch2_io_opts_apply(io_opts, bch2_inode_opts_get(&inode));
|
||||
|
||||
*cur_inum = k.k->p.inode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_move_data(struct bch_fs *c,
|
||||
struct moving_context *ctxt,
|
||||
struct bch_ratelimit *rate,
|
||||
@ -642,7 +709,6 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
struct bch_move_stats *stats,
|
||||
enum btree_id btree_id)
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct bkey_buf sk;
|
||||
struct btree_trans trans;
|
||||
@ -650,7 +716,7 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
struct bkey_s_c k;
|
||||
struct data_opts data_opts;
|
||||
enum data_cmd data_cmd;
|
||||
u64 delay, cur_inum = U64_MAX;
|
||||
u64 cur_inum = U64_MAX;
|
||||
int ret = 0, ret2;
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
@ -667,37 +733,7 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
if (rate)
|
||||
bch2_ratelimit_reset(rate);
|
||||
|
||||
while (1) {
|
||||
do {
|
||||
delay = rate ? bch2_ratelimit_delay(rate) : 0;
|
||||
|
||||
if (delay) {
|
||||
bch2_trans_unlock(&trans);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
if (kthread && (ret = kthread_should_stop())) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (delay)
|
||||
schedule_timeout(delay);
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
|
||||
try_to_freeze();
|
||||
}
|
||||
} while (delay);
|
||||
|
||||
move_ctxt_wait_event(ctxt, &trans,
|
||||
atomic_read(&ctxt->write_sectors) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
move_ctxt_wait_event(ctxt, &trans,
|
||||
atomic_read(&ctxt->read_sectors) <
|
||||
SECTORS_IN_FLIGHT_PER_DEVICE);
|
||||
|
||||
while (!move_ratelimit(&trans, ctxt, rate)) {
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
@ -718,23 +754,9 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
if (!bkey_extent_is_direct_data(k.k))
|
||||
goto next_nondata;
|
||||
|
||||
if (btree_id == BTREE_ID_extents &&
|
||||
cur_inum != k.k->p.inode) {
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
|
||||
ret = lookup_inode(&trans,
|
||||
SPOS(0, k.k->p.inode, k.k->p.snapshot),
|
||||
&inode);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
|
||||
if (!ret)
|
||||
bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
|
||||
|
||||
cur_inum = k.k->p.inode;
|
||||
}
|
||||
ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
|
||||
case DATA_SKIP:
|
||||
@ -779,7 +801,6 @@ next:
|
||||
next_nondata:
|
||||
bch2_btree_iter_advance(&iter);
|
||||
}
|
||||
out:
|
||||
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
bch2_trans_exit(&trans);
|
||||
@ -848,7 +869,6 @@ int bch2_move_data(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
|
||||
closure_sync(&ctxt.cl);
|
||||
|
||||
@ -862,6 +882,167 @@ int bch2_move_data(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int verify_bucket_evacuated(struct btree_trans *trans, struct bpos bucket, int gen)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
bucket, BTREE_ITER_CACHED);
|
||||
k = bch2_btree_iter_peek_slot(&iter);
|
||||
ret = bkey_err(k);
|
||||
|
||||
if (!ret && k.k->type == KEY_TYPE_alloc_v4) {
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
|
||||
|
||||
if (a.v->gen == gen &&
|
||||
a.v->dirty_sectors) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "failed to evacuate bucket ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_evacuate_bucket(struct bch_fs *c,
|
||||
struct bpos bucket, int gen,
|
||||
struct bch_ratelimit *rate,
|
||||
struct write_point_specifier wp,
|
||||
enum data_cmd data_cmd,
|
||||
struct data_opts *data_opts,
|
||||
struct bch_move_stats *stats)
|
||||
{
|
||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct moving_context ctxt = { .stats = stats };
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_buf sk;
|
||||
struct bch_backpointer bp;
|
||||
u64 bp_offset = 0, cur_inum = U64_MAX;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&sk);
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
progress_list_add(c, stats);
|
||||
closure_init_stack(&ctxt.cl);
|
||||
INIT_LIST_HEAD(&ctxt.reads);
|
||||
init_waitqueue_head(&ctxt.wait);
|
||||
|
||||
stats->data_type = BCH_DATA_user;
|
||||
|
||||
while (!(ret = move_ratelimit(&trans, &ctxt, rate))) {
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_get_next_backpointer(&trans, bucket, gen,
|
||||
&bp_offset, &bp);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
goto err;
|
||||
if (bp_offset == U64_MAX)
|
||||
break;
|
||||
|
||||
if (!bp.level) {
|
||||
struct bkey_s_c k;
|
||||
|
||||
k = bch2_backpointer_get_key(&trans, &iter,
|
||||
bucket, bp_offset, bp);
|
||||
ret = bkey_err(k);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!k.k)
|
||||
continue;
|
||||
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
data_opts->target = io_opts.background_target;
|
||||
data_opts->rewrite_dev = bucket.inode;
|
||||
|
||||
ret = bch2_move_extent(&trans, &ctxt, wp, io_opts, bp.btree_id, k,
|
||||
data_cmd, *data_opts);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret == -ENOMEM) {
|
||||
/* memory allocation failure, wait for some IO to finish */
|
||||
bch2_move_ctxt_wait_for_io(&ctxt, &trans);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (rate)
|
||||
bch2_ratelimit_increment(rate, k.k->size);
|
||||
atomic64_add(k.k->size, &stats->sectors_seen);
|
||||
} else {
|
||||
struct btree *b;
|
||||
|
||||
b = bch2_backpointer_get_node(&trans, &iter,
|
||||
bucket, bp_offset, bp);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!b)
|
||||
continue;
|
||||
|
||||
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
if (ret == -EINTR)
|
||||
continue;
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (rate)
|
||||
bch2_ratelimit_increment(rate, c->opts.btree_node_size >> 9);
|
||||
atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_seen);
|
||||
atomic64_add(c->opts.btree_node_size >> 9, &stats->sectors_moved);
|
||||
}
|
||||
|
||||
bp_offset++;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
|
||||
bch2_trans_unlock(&trans);
|
||||
move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
|
||||
closure_sync(&ctxt.cl);
|
||||
lockrestart_do(&trans, verify_bucket_evacuated(&trans, bucket, gen));
|
||||
}
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
|
||||
move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
|
||||
closure_sync(&ctxt.cl);
|
||||
progress_list_del(c, stats);
|
||||
|
||||
EBUG_ON(atomic_read(&ctxt.write_sectors));
|
||||
|
||||
trace_move_data(c,
|
||||
atomic64_read(&stats->sectors_moved),
|
||||
atomic64_read(&stats->keys_moved));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
|
||||
struct btree *, struct bch_io_opts *,
|
||||
struct data_opts *);
|
||||
|
@ -62,6 +62,12 @@ int bch2_move_data(struct bch_fs *,
|
||||
move_pred_fn, void *,
|
||||
struct bch_move_stats *);
|
||||
|
||||
int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
|
||||
struct bch_ratelimit *,
|
||||
struct write_point_specifier,
|
||||
enum data_cmd,
|
||||
struct data_opts *,
|
||||
struct bch_move_stats *);
|
||||
int bch2_data_job(struct bch_fs *,
|
||||
struct bch_move_stats *,
|
||||
struct bch_ioctl_data);
|
||||
|
@ -30,80 +30,6 @@
|
||||
#include <linux/sort.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
|
||||
{
|
||||
const struct copygc_heap_entry *l = _l;
|
||||
const struct copygc_heap_entry *r = _r;
|
||||
|
||||
return cmp_int(l->dev, r->dev) ?:
|
||||
cmp_int(l->offset, r->offset);
|
||||
}
|
||||
|
||||
static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
|
||||
struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_opts *data_opts)
|
||||
{
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p = { 0 };
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct copygc_heap_entry search = {
|
||||
.dev = p.ptr.dev,
|
||||
.offset = p.ptr.offset,
|
||||
};
|
||||
ssize_t i;
|
||||
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
i = eytzinger0_find_le(h->data, h->used,
|
||||
sizeof(h->data[0]),
|
||||
bucket_offset_cmp, &search);
|
||||
#if 0
|
||||
/* eytzinger search verify code: */
|
||||
ssize_t j = -1, k;
|
||||
|
||||
for (k = 0; k < h->used; k++)
|
||||
if (h->data[k].offset <= ptr->offset &&
|
||||
(j < 0 || h->data[k].offset > h->data[j].offset))
|
||||
j = k;
|
||||
|
||||
BUG_ON(i != j);
|
||||
#endif
|
||||
if (i >= 0 &&
|
||||
p.ptr.dev == h->data[i].dev &&
|
||||
p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
|
||||
p.ptr.gen == h->data[i].gen) {
|
||||
/*
|
||||
* We need to use the journal reserve here, because
|
||||
* - journal reclaim depends on btree key cache
|
||||
* flushing to make forward progress,
|
||||
* - which has to make forward progress when the
|
||||
* journal is pre-reservation full,
|
||||
* - and depends on allocation - meaning allocator and
|
||||
* copygc
|
||||
*/
|
||||
|
||||
data_opts->target = io_opts->background_target;
|
||||
data_opts->nr_replicas = 1;
|
||||
data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE|
|
||||
JOURNAL_WATERMARK_copygc;
|
||||
data_opts->rewrite_dev = p.ptr.dev;
|
||||
|
||||
if (p.has_ec)
|
||||
data_opts->nr_replicas += p.ec.redundancy;
|
||||
|
||||
return DATA_REWRITE;
|
||||
}
|
||||
}
|
||||
|
||||
return DATA_SKIP;
|
||||
}
|
||||
|
||||
static inline int fragmentation_cmp(copygc_heap *heap,
|
||||
struct copygc_heap_entry l,
|
||||
struct copygc_heap_entry r)
|
||||
@ -111,7 +37,7 @@ static inline int fragmentation_cmp(copygc_heap *heap,
|
||||
return cmp_int(l.fragmentation, r.fragmentation);
|
||||
}
|
||||
|
||||
static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
static int find_buckets_to_copygc(struct bch_fs *c)
|
||||
{
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct btree_trans trans;
|
||||
@ -122,6 +48,14 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
/*
|
||||
* Find buckets with lowest sector counts, skipping completely
|
||||
* empty buckets, by building a maxheap sorted by sector count,
|
||||
* and repeatedly replacing the maximum element until all
|
||||
* buckets have been visited.
|
||||
*/
|
||||
h->used = 0;
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
|
||||
@ -129,7 +63,8 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
|
||||
bch2_alloc_to_v4(k, &a);
|
||||
|
||||
if (a.data_type != BCH_DATA_user ||
|
||||
if ((a.data_type != BCH_DATA_btree &&
|
||||
a.data_type != BCH_DATA_user) ||
|
||||
a.dirty_sectors >= ca->mi.bucket_size ||
|
||||
bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
|
||||
continue;
|
||||
@ -141,7 +76,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
.fragmentation = div_u64((u64) a.dirty_sectors * (1ULL << 31),
|
||||
ca->mi.bucket_size),
|
||||
.sectors = a.dirty_sectors,
|
||||
.offset = bucket_to_sector(ca, iter.pos.offset),
|
||||
.bucket = iter.pos.offset,
|
||||
};
|
||||
heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
|
||||
|
||||
@ -152,77 +87,22 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bucket_inorder_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct copygc_heap_entry *l = _l;
|
||||
const struct copygc_heap_entry *r = _r;
|
||||
|
||||
return cmp_int(l->dev, r->dev) ?: cmp_int(l->offset, r->offset);
|
||||
}
|
||||
|
||||
static int check_copygc_was_done(struct bch_fs *c,
|
||||
u64 *sectors_not_moved,
|
||||
u64 *buckets_not_moved)
|
||||
{
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_alloc_v4 a;
|
||||
struct copygc_heap_entry *i;
|
||||
int ret = 0;
|
||||
|
||||
sort(h->data, h->used, sizeof(h->data[0]), bucket_inorder_cmp, NULL);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, 0);
|
||||
|
||||
for (i = h->data; i < h->data + h->used; i++) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, i->dev);
|
||||
|
||||
bch2_btree_iter_set_pos(&iter, POS(i->dev, sector_to_bucket(ca, i->offset)));
|
||||
|
||||
ret = lockrestart_do(&trans,
|
||||
bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_alloc_to_v4(k, &a);
|
||||
|
||||
if (a.gen == i->gen && a.dirty_sectors) {
|
||||
*sectors_not_moved += a.dirty_sectors;
|
||||
*buckets_not_moved += 1;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_copygc(struct bch_fs *c)
|
||||
{
|
||||
copygc_heap *h = &c->copygc_heap;
|
||||
struct copygc_heap_entry e, *i;
|
||||
struct copygc_heap_entry e;
|
||||
struct bch_move_stats move_stats;
|
||||
u64 sectors_to_move = 0, sectors_to_write = 0, sectors_not_moved = 0;
|
||||
u64 sectors_reserved = 0;
|
||||
u64 buckets_to_move, buckets_not_moved = 0;
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_idx;
|
||||
size_t heap_size = 0;
|
||||
struct data_opts data_opts = {
|
||||
.nr_replicas = 1,
|
||||
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
|
||||
};
|
||||
int ret;
|
||||
|
||||
bch_move_stats_init(&move_stats, "copygc");
|
||||
|
||||
/*
|
||||
* Find buckets with lowest sector counts, skipping completely
|
||||
* empty buckets, by building a maxheap sorted by sector count,
|
||||
* and repeatedly replacing the maximum element until all
|
||||
* buckets have been visited.
|
||||
*/
|
||||
h->used = 0;
|
||||
|
||||
for_each_rw_member(ca, c, dev_idx)
|
||||
heap_size += ca->mi.nbuckets >> 7;
|
||||
|
||||
@ -234,21 +114,7 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_idx) {
|
||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
||||
|
||||
u64 avail = max_t(s64, 0,
|
||||
usage.d[BCH_DATA_free].buckets +
|
||||
usage.d[BCH_DATA_need_discard].buckets -
|
||||
ca->nr_open_buckets -
|
||||
bch2_dev_buckets_reserved(ca, RESERVE_movinggc));
|
||||
|
||||
avail = min(avail, ca->mi.nbuckets >> 6);
|
||||
|
||||
sectors_reserved += avail * ca->mi.bucket_size;
|
||||
}
|
||||
|
||||
ret = walk_buckets_to_copygc(c);
|
||||
ret = find_buckets_to_copygc(c);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c, "error walking buckets to copygc!");
|
||||
return ret;
|
||||
@ -259,68 +125,24 @@ static int bch2_copygc(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our btree node allocations also come out of RESERVE_movingc:
|
||||
*/
|
||||
sectors_reserved = (sectors_reserved * 3) / 4;
|
||||
if (!sectors_reserved) {
|
||||
bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
|
||||
return -1;
|
||||
}
|
||||
heap_resort(h, fragmentation_cmp, NULL);
|
||||
|
||||
for (i = h->data; i < h->data + h->used; i++) {
|
||||
sectors_to_move += i->sectors;
|
||||
sectors_to_write += i->sectors * i->replicas;
|
||||
}
|
||||
|
||||
while (sectors_to_write > sectors_reserved) {
|
||||
while (h->used) {
|
||||
BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
|
||||
sectors_to_write -= e.sectors * e.replicas;
|
||||
/* not correct w.r.t. device removal */
|
||||
|
||||
ret = bch2_evacuate_bucket(c, POS(e.dev, e.bucket), e.gen, NULL,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
DATA_REWRITE, &data_opts,
|
||||
&move_stats);
|
||||
if (ret < 0)
|
||||
bch_err(c, "error %i from bch2_move_data() in copygc", ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
buckets_to_move = h->used;
|
||||
|
||||
if (!buckets_to_move) {
|
||||
bch_err_ratelimited(c, "copygc cannot run - sectors_reserved %llu!",
|
||||
sectors_reserved);
|
||||
return 0;
|
||||
}
|
||||
|
||||
eytzinger0_sort(h->data, h->used,
|
||||
sizeof(h->data[0]),
|
||||
bucket_offset_cmp, NULL);
|
||||
|
||||
ret = bch2_move_data(c,
|
||||
0, POS_MIN,
|
||||
BTREE_ID_NR, POS_MAX,
|
||||
NULL,
|
||||
writepoint_ptr(&c->copygc_write_point),
|
||||
copygc_pred, NULL,
|
||||
&move_stats);
|
||||
if (ret < 0)
|
||||
bch_err(c, "error %i from bch2_move_data() in copygc", ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_copygc_was_done(c, §ors_not_moved, &buckets_not_moved);
|
||||
if (ret) {
|
||||
bch_err(c, "error %i from check_copygc_was_done()", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (sectors_not_moved)
|
||||
bch_warn_ratelimited(c,
|
||||
"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
|
||||
sectors_not_moved, sectors_to_move,
|
||||
buckets_not_moved, buckets_to_move,
|
||||
atomic64_read(&move_stats.sectors_moved),
|
||||
atomic64_read(&move_stats.keys_raced),
|
||||
atomic64_read(&move_stats.sectors_raced));
|
||||
|
||||
trace_copygc(c,
|
||||
atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
|
||||
buckets_to_move, buckets_not_moved);
|
||||
return 0;
|
||||
trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "alloc_background.h"
|
||||
#include "btree_gc.h"
|
||||
@ -1075,8 +1076,8 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
}
|
||||
|
||||
if (!c->opts.nochanges) {
|
||||
if (c->sb.version < bcachefs_metadata_version_new_data_types) {
|
||||
bch_info(c, "version prior to new_data_types, upgrade and fsck required");
|
||||
if (c->sb.version < bcachefs_metadata_version_backpointers) {
|
||||
bch_info(c, "version prior to backpointers, upgrade and fsck required");
|
||||
c->opts.version_upgrade = true;
|
||||
c->opts.fsck = true;
|
||||
c->opts.fix_errors = FSCK_OPT_YES;
|
||||
@ -1254,6 +1255,28 @@ use_clean:
|
||||
bch_verbose(c, "done checking lrus");
|
||||
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
|
||||
|
||||
bch_info(c, "checking backpointers to alloc keys");
|
||||
err = "error checking backpointers to alloc keys";
|
||||
ret = bch2_check_btree_backpointers(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking backpointers to alloc keys");
|
||||
|
||||
bch_info(c, "checking backpointers to extents");
|
||||
err = "error checking backpointers to extents";
|
||||
ret = bch2_check_backpointers_to_extents(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking backpointers to extents");
|
||||
|
||||
bch_info(c, "checking extents to backpointers");
|
||||
err = "error checking extents to backpointers";
|
||||
ret = bch2_check_extents_to_backpointers(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "done checking extents to backpointers");
|
||||
set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
|
||||
|
||||
bch_info(c, "checking alloc to lru refs");
|
||||
err = "error checking alloc to lru refs";
|
||||
ret = bch2_check_alloc_to_lru_refs(c);
|
||||
@ -1265,6 +1288,7 @@ use_clean:
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
@ -1417,6 +1441,9 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
|
||||
|
||||
if (c->sb.version < bcachefs_metadata_version_backpointers)
|
||||
c->opts.version_upgrade = true;
|
||||
|
||||
if (c->opts.version_upgrade) {
|
||||
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
|
||||
|
@ -1433,6 +1433,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
BTREE_TRIGGER_NORUN, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
|
||||
BTREE_TRIGGER_NORUN, NULL);
|
||||
if (ret)
|
||||
|
Loading…
Reference in New Issue
Block a user