Update bcachefs sources to 720f644e63 bcachefs: Improve reflink repair code

This commit is contained in:
Kent Overstreet 2021-10-19 13:50:45 -04:00
parent a77c35a1c4
commit f10fc03854
11 changed files with 107 additions and 80 deletions

View File

@ -1 +1 @@
710cd382bf5f50ab8114a7cc22d78b5b2f574529 720f644e63e0f5b24bb69f2ffb70cdc2dd162810

View File

@ -913,24 +913,18 @@ struct bch_stripe {
struct bch_reflink_p { struct bch_reflink_p {
struct bch_val v; struct bch_val v;
__le64 idx; __le64 idx;
/*
* A reflink pointer might point to an indirect extent which is then __le32 reservation_generation;
* later split (by copygc or rebalance). If we only pointed to part of __u8 nr_replicas;
* the original indirect extent, and then one of the fragments is __u8 pad[3];
* outside the range we point to, we'd leak a refcount: so when creating };
* reflink pointers, we need to store pad values to remember the full
* range we were taking a reference on.
*/
__le32 front_pad;
__le32 back_pad;
} __attribute__((packed, aligned(8)));
struct bch_reflink_v { struct bch_reflink_v {
struct bch_val v; struct bch_val v;
__le64 refcount; __le64 refcount;
union bch_extent_entry start[0]; union bch_extent_entry start[0];
__u64 _data[0]; __u64 _data[0];
} __attribute__((packed, aligned(8))); };
struct bch_indirect_inline_data { struct bch_indirect_inline_data {
struct bch_val v; struct bch_val v;

View File

@ -738,7 +738,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum btree_id btree_id,
*max_stale = max(*max_stale, ptr_stale(ca, ptr)); *max_stale = max(*max_stale, ptr_stale(ca, ptr));
} }
bch2_mark_key(c, *k, flags); ret = bch2_mark_key(c, *k, flags);
fsck_err: fsck_err:
err: err:
if (ret) if (ret)

View File

@ -1688,7 +1688,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
struct btree_path *path; struct btree_path *path;
struct btree_insert_entry *i; struct btree_insert_entry *i;
unsigned idx; unsigned idx;
char buf[300]; char buf1[300], buf2[300];
btree_trans_verify_sorted(trans); btree_trans_verify_sorted(trans);
@ -1697,7 +1697,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
path->idx, path->ref, path->intent_ref, path->idx, path->ref, path->intent_ref,
path->preserve ? " preserve" : "", path->preserve ? " preserve" : "",
bch2_btree_ids[path->btree_id], bch2_btree_ids[path->btree_id],
(bch2_bpos_to_text(&PBUF(buf), path->pos), buf), (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1),
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated (void *) path->ip_allocated
#else #else
@ -1705,11 +1705,16 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
#endif #endif
); );
trans_for_each_update(trans, i) trans_for_each_update(trans, i) {
printk(KERN_ERR "update: btree %s %s %pS\n", struct bkey u;
struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s",
bch2_btree_ids[i->btree_id], bch2_btree_ids[i->btree_id],
(bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf), (void *) i->ip_allocated,
(void *) i->ip_allocated); (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1),
(bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2));
}
} }
static struct btree_path *btree_path_alloc(struct btree_trans *trans, static struct btree_path *btree_path_alloc(struct btree_trans *trans,

View File

@ -14,6 +14,7 @@
#include "ec.h" #include "ec.h"
#include "error.h" #include "error.h"
#include "movinggc.h" #include "movinggc.h"
#include "recovery.h"
#include "reflink.h" #include "reflink.h"
#include "replicas.h" #include "replicas.h"
#include "subvolume.h" #include "subvolume.h"
@ -1111,10 +1112,9 @@ static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p,
{ {
struct reflink_gc *r; struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
s64 ret = 0;
while (1) { while (*r_idx < c->reflink_gc_nr) {
if (*r_idx >= c->reflink_gc_nr)
goto not_found;
r = genradix_ptr(&c->reflink_gc_table, *r_idx); r = genradix_ptr(&c->reflink_gc_table, *r_idx);
BUG_ON(!r); BUG_ON(!r);
@ -1123,16 +1123,49 @@ static s64 __bch2_mark_reflink_p(struct bch_fs *c, struct bkey_s_c_reflink_p p,
(*r_idx)++; (*r_idx)++;
} }
if (*r_idx >= c->reflink_gc_nr ||
idx < r->offset - r->size) {
ret = p.k->size;
goto not_found;
}
BUG_ON((s64) r->refcount + add < 0); BUG_ON((s64) r->refcount + add < 0);
r->refcount += add; r->refcount += add;
return r->offset - idx; return r->offset - idx;
not_found: not_found:
bch2_fs_inconsistent(c, if ((flags & BTREE_TRIGGER_GC) &&
"%llu:%llu len %u points to nonexistent indirect extent %llu", (flags & BTREE_TRIGGER_NOATOMIC)) {
p.k->p.inode, p.k->p.offset, p.k->size, idx); /*
bch2_inconsistent_error(c); * XXX: we're replacing the entire reflink pointer with an error
return -EIO; * key, we should just be replacing the part that was missing:
*/
if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx)) {
struct bkey_i_error *new;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new) {
bch_err(c, "%s: error allocating new key", __func__);
return -ENOMEM;
}
bkey_init(&new->k);
new->k.type = KEY_TYPE_error;
new->k.p = p.k->p;
new->k.size = p.k->size;
ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
}
} else {
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
ret = -EIO;
}
fsck_err:
return ret;
} }
static int bch2_mark_reflink_p(struct bch_fs *c, static int bch2_mark_reflink_p(struct bch_fs *c,
@ -1143,10 +1176,8 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref; struct reflink_gc *ref;
size_t l, r, m; size_t l, r, m;
u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); u64 idx = le64_to_cpu(p.v->idx);
u64 sectors = (u64) le32_to_cpu(p.v->front_pad) + unsigned sectors = p.k->size;
le32_to_cpu(p.v->back_pad) +
p.k->size;
s64 ret = 0; s64 ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) == BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
@ -1166,7 +1197,7 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
while (sectors) { while (sectors) {
ret = __bch2_mark_reflink_p(c, p, idx, flags, &l); ret = __bch2_mark_reflink_p(c, p, idx, flags, &l);
if (ret < 0) if (ret <= 0)
return ret; return ret;
ret = min_t(s64, ret, sectors); ret = min_t(s64, ret, sectors);
@ -1722,33 +1753,12 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
bch2_fs_inconsistent(c, bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu", "%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx); p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
bch2_fs_inconsistent(c,
"%llu:%llu len %u idx %llu indirect extent refcount underflow",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
ret = -EIO;
goto err;
}
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
u64 pad;
pad = max_t(s64, le32_to_cpu(v->front_pad),
le64_to_cpu(v->idx) - bkey_start_offset(k.k));
BUG_ON(pad > U32_MAX);
v->front_pad = cpu_to_le32(pad);
pad = max_t(s64, le32_to_cpu(v->back_pad),
k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
BUG_ON(pad > U32_MAX);
v->back_pad = cpu_to_le32(pad);
}
le64_add_cpu(refcount, add); le64_add_cpu(refcount, add);
if (!*refcount) { if (!*refcount) {
@ -1771,20 +1781,10 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags) struct bkey_s_c k, unsigned flags)
{ {
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
u64 idx, sectors; u64 idx = le64_to_cpu(p.v->idx);
unsigned sectors = p.k->size;
s64 ret = 0; s64 ret = 0;
if (flags & BTREE_TRIGGER_INSERT) {
struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
v->front_pad = v->back_pad = 0;
}
idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
sectors = (u64) le32_to_cpu(p.v->front_pad) +
le32_to_cpu(p.v->back_pad) +
p.k->size;
while (sectors) { while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags); ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
if (ret < 0) if (ret < 0)

View File

@ -183,6 +183,7 @@ int __bch2_dirent_read_target(struct btree_trans *trans,
u32 *subvol, u32 *snapshot, u64 *inum, u32 *subvol, u32 *snapshot, u64 *inum,
bool is_fsck) bool is_fsck)
{ {
struct bch_subvolume s;
int ret = 0; int ret = 0;
*subvol = 0; *subvol = 0;
@ -191,9 +192,6 @@ int __bch2_dirent_read_target(struct btree_trans *trans,
if (likely(d.v->d_type != DT_SUBVOL)) { if (likely(d.v->d_type != DT_SUBVOL)) {
*inum = le64_to_cpu(d.v->d_inum); *inum = le64_to_cpu(d.v->d_inum);
} else { } else {
struct bch_subvolume s;
int ret;
*subvol = le64_to_cpu(d.v->d_inum); *subvol = le64_to_cpu(d.v->d_inum);
ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s); ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s);

View File

@ -1050,6 +1050,8 @@ static int inode_backpointer_exists(struct btree_trans *trans,
{ {
struct btree_iter iter; struct btree_iter iter;
struct bkey_s_c k; struct bkey_s_c k;
u32 target_subvol, target_snapshot;
u64 target_inum;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
@ -1061,7 +1063,15 @@ static int inode_backpointer_exists(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_dirent) if (k.k->type != KEY_TYPE_dirent)
goto out; goto out;
ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum; ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k),
&target_subvol,
&target_snapshot,
&target_inum,
true);
if (ret)
goto out;
ret = target_inum == inode->bi_inum;
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
@ -1754,7 +1764,17 @@ static int check_path(struct btree_trans *trans,
snapshot = snapshot_t(c, snapshot)->equiv; snapshot = snapshot_t(c, snapshot)->equiv;
p->nr = 0; p->nr = 0;
while (inode->bi_inum != BCACHEFS_ROOT_INO) { while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
if (inode->bi_parent_subvol) {
u64 inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
&snapshot, &inum);
if (ret)
break;
}
ret = lockrestart_do(trans, ret = lockrestart_do(trans,
inode_backpointer_exists(trans, inode, snapshot)); inode_backpointer_exists(trans, inode, snapshot));
if (ret < 0) if (ret < 0)

View File

@ -653,7 +653,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
atomic_long_read(&c->btree_key_cache.nr_dirty), atomic_long_read(&c->btree_key_cache.nr_dirty),
atomic_long_read(&c->btree_key_cache.nr_keys)); atomic_long_read(&c->btree_key_cache.nr_keys));
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), 128UL); min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
nr_flushed = journal_flush_pins(j, seq_to_flush, nr_flushed = journal_flush_pins(j, seq_to_flush,
min_nr, min_key_cache); min_nr, min_key_cache);

View File

@ -32,9 +32,6 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_bytes(p.k) != sizeof(*p.v)) if (bkey_val_bytes(p.k) != sizeof(*p.v))
return "incorrect value size"; return "incorrect value size";
if (le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
return "idx < front_pad";
return NULL; return NULL;
} }
@ -169,9 +166,15 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
/*
* orig is in a bkey_buf which statically allocates 5 64s for the val,
* so we know it will be big enough:
*/
orig->k.type = KEY_TYPE_reflink_p; orig->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(orig); r_p = bkey_i_to_reflink_p(orig);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
memset(&r_p->v, 0, sizeof(r_p->v));
r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0); ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);

View File

@ -886,6 +886,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
u32 *new_snapshotid, u32 *new_snapshotid,
bool ro) bool ro)
{ {
struct bch_fs *c = trans->c;
struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
struct bkey_i_subvolume *new_subvol = NULL; struct bkey_i_subvolume *new_subvol = NULL;
struct bkey_i_subvolume *src_subvol = NULL; struct bkey_i_subvolume *src_subvol = NULL;
@ -897,7 +898,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0) if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
break; break;
if (bkey_deleted(k.k))
/*
* bch2_subvolume_delete() doesn't flush the btree key cache -
* ideally it would but that's tricky
*/
if (bkey_deleted(k.k) &&
!bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos))
goto found_slot; goto found_slot;
} }
@ -925,7 +932,7 @@ found_slot:
goto err; goto err;
if (k.k->type != KEY_TYPE_subvolume) { if (k.k->type != KEY_TYPE_subvolume) {
bch_err(trans->c, "subvolume %u not found", src_subvolid); bch_err(c, "subvolume %u not found", src_subvolid);
ret = -ENOENT; ret = -ENOENT;
goto err; goto err;
} }

View File

@ -75,7 +75,7 @@ static inline void snapshots_seen_init(struct snapshots_seen *s)
static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{ {
if (s->nr == s->size) { if (s->nr == s->size) {
size_t new_size = max(s->size, 128UL) * 2; size_t new_size = max(s->size, (size_t) 128) * 2;
u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL); u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
if (!d) { if (!d) {