Update bcachefs sources to ece184f718 bcachefs: Reflink

This commit is contained in:
Kent Overstreet 2019-08-21 13:17:42 -04:00
parent f5ec33b556
commit 72a408f848
36 changed files with 1894 additions and 887 deletions

View File

@ -1 +1 @@
22776fe9902b0b06d6aa18cd4c7f0c5ad35a95fa
ece184f718c2b678738bc2c42906e90eeb8ba7dc

View File

View File

@ -232,7 +232,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
bch2_mark_key(c, k, 0, NULL, 0,
bch2_mark_key(c, k, 0, 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
@ -244,7 +244,8 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, j)
if (j->btree_id == BTREE_ID_ALLOC)
bch2_mark_key(c, bkey_i_to_s_c(j->k), 0, NULL, 0,
bch2_mark_key(c, bkey_i_to_s_c(j->k),
0, 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);

View File

@ -359,6 +359,7 @@ enum gc_phase {
GC_PHASE_BTREE_XATTRS,
GC_PHASE_BTREE_ALLOC,
GC_PHASE_BTREE_QUOTAS,
GC_PHASE_BTREE_REFLINK,
GC_PHASE_PENDING_DELETE,
GC_PHASE_ALLOC,
@ -746,6 +747,9 @@ struct bch_fs {
struct work_struct ec_stripe_delete_work;
struct llist_head ec_stripe_delete_list;
/* REFLINK */
u64 reflink_hint;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
struct bio_set dio_write_bioset;

View File

@ -336,7 +336,9 @@ static inline void bkey_init(struct bkey *k)
x(xattr, 11) \
x(alloc, 12) \
x(quota, 13) \
x(stripe, 14)
x(stripe, 14) \
x(reflink_p, 15) \
x(reflink_v, 16)
enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr,
@ -891,6 +893,24 @@ struct bch_stripe {
struct bch_extent_ptr ptrs[0];
} __attribute__((packed, aligned(8)));
/* Reflink: */
struct bch_reflink_p {
struct bch_val v;
__le64 idx;
__le32 reservation_generation;
__u8 nr_replicas;
__u8 pad[3];
};
struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
__u64 _data[0];
};
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@ -1293,6 +1313,7 @@ enum bch_sb_features {
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4,
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
BCH_FEATURE_REFLINK = 6,
BCH_FEATURE_NR,
};
@ -1480,7 +1501,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
x(XATTRS, 3, "xattrs") \
x(ALLOC, 4, "alloc") \
x(QUOTAS, 5, "quotas") \
x(EC, 6, "erasure_coding")
x(EC, 6, "erasure_coding") \
x(REFLINK, 7, "reflink")
enum btree_id {
#define x(kwd, val, name) BTREE_ID_##kwd = val,

View File

@ -50,7 +50,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
}
#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k))
#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
@ -552,6 +552,8 @@ BKEY_VAL_ACCESSORS(xattr);
BKEY_VAL_ACCESSORS(alloc);
BKEY_VAL_ACCESSORS(quota);
BKEY_VAL_ACCESSORS(stripe);
BKEY_VAL_ACCESSORS(reflink_p);
BKEY_VAL_ACCESSORS(reflink_v);
/* byte order helpers */

View File

@ -10,9 +10,10 @@
#include "extents.h"
#include "inode.h"
#include "quota.h"
#include "reflink.h"
#include "xattr.h"
const char * const bch_bkey_types[] = {
const char * const bch2_bkey_types[] = {
#define x(name, nr) #name,
BCH_BKEY_TYPES()
#undef x
@ -159,7 +160,8 @@ void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
{
pr_buf(out, "u64s %u type %u ", k->u64s, k->type);
pr_buf(out, "u64s %u type %s ", k->u64s,
bch2_bkey_types[k->type]);
bch2_bpos_to_text(out, k->p);
@ -174,8 +176,6 @@ void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
if (likely(ops->val_to_text))
ops->val_to_text(out, c, k);
else
pr_buf(out, " %s", bch_bkey_types[k.k->type]);
}
void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,

View File

@ -9,7 +9,7 @@ struct btree;
struct bkey;
enum btree_node_type;
extern const char * const bch_bkey_types[];
extern const char * const bch2_bkey_types[];
enum merge_result {
BCH_MERGE_NOMERGE,

View File

@ -24,6 +24,16 @@
static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
struct btree *);
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
{
unsigned n = ARRAY_SIZE(iter->data);
while (n && __btree_node_iter_set_end(iter, n - 1))
--n;
return n;
}
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
{
unsigned offset = __btree_node_key_to_offset(b, k);
@ -110,7 +120,8 @@ void bch2_dump_btree_node_iter(struct btree *b,
{
struct btree_node_iter_set *set;
printk(KERN_ERR "btree node iter with %u sets:\n", b->nsets);
printk(KERN_ERR "btree node iter with %u/%u sets:\n",
__btree_node_iter_used(iter), b->nsets);
btree_node_iter_for_each(iter, set) {
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
@ -119,8 +130,8 @@ void bch2_dump_btree_node_iter(struct btree *b,
char buf[100];
bch2_bkey_to_text(&PBUF(buf), &uk);
printk(KERN_ERR "set %zu key %zi/%u: %s\n", t - b->set,
k->_data - bset(b, t)->_data, bset(b, t)->u64s, buf);
printk(KERN_ERR "set %zu key %u: %s\n",
t - b->set, set->k, buf);
}
}
@ -182,8 +193,12 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
struct btree *b)
{
struct btree_node_iter_set *set, *s2;
struct bkey_packed *k, *p;
struct bset_tree *t;
if (bch2_btree_node_iter_end(iter))
return;
/* Verify no duplicates: */
btree_node_iter_for_each(iter, set)
btree_node_iter_for_each(iter, s2)
@ -204,6 +219,18 @@ found:
btree_node_iter_for_each(iter, set)
BUG_ON(set != iter->data &&
btree_node_iter_cmp(b, set[-1], set[0]) > 0);
k = bch2_btree_node_iter_peek_all(iter, b);
for_each_bset(b, t) {
if (iter->data[0].end == t->end_offset)
continue;
p = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(iter, b, t));
BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
}
}
void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
@ -1669,25 +1696,13 @@ void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
__bch2_btree_node_iter_advance(iter, b);
}
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
{
unsigned n = ARRAY_SIZE(iter->data);
while (n && __btree_node_iter_set_end(iter, n - 1))
--n;
return n;
}
/*
* Expensive:
*/
struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
struct btree *b,
unsigned min_key_type)
struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
struct btree *b)
{
struct bkey_packed *k, *prev = NULL;
struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
struct btree_node_iter_set *set;
struct bset_tree *t;
unsigned end = 0;
@ -1695,9 +1710,8 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
bch2_btree_node_iter_verify(iter, b);
for_each_bset(b, t) {
k = bch2_bkey_prev_filter(b, t,
bch2_btree_node_iter_bset_pos(iter, b, t),
min_key_type);
k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(iter, b, t));
if (k &&
(!prev || bkey_iter_cmp(b, k, prev) > 0)) {
prev = k;
@ -1706,7 +1720,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
}
if (!prev)
goto out;
return NULL;
/*
* We're manually memmoving instead of just calling sort() to ensure the
@ -1727,18 +1741,20 @@ found:
iter->data[0].k = __btree_node_key_to_offset(b, prev);
iter->data[0].end = end;
out:
if (btree_keys_expensive_checks(b)) {
struct btree_node_iter iter2 = *iter;
if (prev)
__bch2_btree_node_iter_advance(&iter2, b);
bch2_btree_node_iter_verify(iter, b);
return prev;
}
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
BUG_ON(k->type >= min_key_type);
__bch2_btree_node_iter_advance(&iter2, b);
}
}
struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
struct btree *b,
unsigned min_key_type)
{
struct bkey_packed *prev;
do {
prev = bch2_btree_node_iter_prev_all(iter, b);
} while (prev && prev->type < min_key_type);
return prev;
}

View File

@ -528,15 +528,11 @@ bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
return ret;
}
struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
struct btree *);
struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
struct btree *, unsigned);
static inline struct bkey_packed *
bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b)
{
return bch2_btree_node_iter_prev_filter(iter, b, 0);
}
static inline struct bkey_packed *
bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
{

View File

@ -171,7 +171,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
bch2_mark_key(c, k, k.k->size, NULL, 0, flags);
bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
fsck_err:
return ret;
}
@ -418,7 +418,8 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
bch2_mark_key(c, bkey_i_to_s_c(&d->key), 0, NULL, 0,
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
0, 0, NULL, 0,
BCH_BUCKET_MARK_GC);
mutex_unlock(&c->btree_interior_update_lock);

View File

@ -86,7 +86,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
struct btree_iter *linked;
unsigned readers = 0;
EBUG_ON(btree_node_read_locked(iter, b->level));
EBUG_ON(!btree_node_intent_locked(iter, b->level));
trans_for_each_iter(iter->trans, linked)
if (linked->l[b->level].b == b &&
@ -496,6 +496,23 @@ static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
#endif
static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
struct btree *b,
struct bset_tree *t,
struct bkey_packed *k)
{
struct btree_node_iter_set *set;
btree_node_iter_for_each(iter, set)
if (set->end == t->end_offset) {
set->k = __btree_node_key_to_offset(b, k);
bch2_btree_node_iter_sort(iter, b);
return;
}
bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
}
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree *b,
struct btree_node_iter *node_iter,
@ -527,7 +544,8 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
bch2_btree_node_iter_peek_all(node_iter, b),
&iter->k);
}
return;
goto iter_current_key_not_modified;
found:
set->end = t->end_offset;
@ -569,60 +587,42 @@ found:
bkey_disassemble(l->b, k, &iter->k);
}
iter_current_key_not_modified:
/*
* Interior nodes are special because iterators for interior nodes don't
* obey the usual invariants regarding the iterator position:
*
* We may have whiteouts that compare greater than the iterator
* position, and logically should be in the iterator, but that we
* skipped past to find the first live key greater than the iterator
* position. This becomes an issue when we insert a new key that is
* greater than the current iterator position, but smaller than the
* whiteouts we've already skipped past - this happens in the course of
* a btree split.
*
* We have to rewind the iterator past to before those whiteouts here,
* else bkey_node_iter_prev() is not going to work and who knows what
* else would happen. And we have to do it manually, because here we've
* already done the insert and the iterator is currently inconsistent:
*
* We've got multiple competing invariants, here - we have to be careful
* about rewinding iterators for interior nodes, because they should
* always point to the key for the child node the btree iterator points
* to.
* When a new key is added, and the node iterator now points to that
* key, the iterator might have skipped past deleted keys that should
* come after the key the iterator now points to. We have to rewind to
* before those deleted keys - otherwise bch2_btree_node_iter_prev_all()
* breaks:
*/
if (b->level && new_u64s &&
btree_iter_pos_cmp(iter, b, where) > 0) {
if (!bch2_btree_node_iter_end(node_iter) &&
(b->level ||
(iter->flags & BTREE_ITER_IS_EXTENTS))) {
struct bset_tree *t;
struct bkey_packed *k;
struct bkey_packed *k, *k2, *p;
k = bch2_btree_node_iter_peek_all(node_iter, b);
for_each_bset(b, t) {
if (bch2_bkey_to_bset(b, where) == t)
bool set_pos = false;
if (node_iter->data[0].end == t->end_offset)
continue;
k = bch2_bkey_prev_all(b, t,
bch2_btree_node_iter_bset_pos(node_iter, b, t));
if (k &&
bkey_iter_cmp(b, k, where) > 0) {
struct btree_node_iter_set *set;
unsigned offset =
__btree_node_key_to_offset(b, bkey_next(k));
k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
btree_node_iter_for_each(node_iter, set)
if (set->k == offset) {
set->k = __btree_node_key_to_offset(b, k);
bch2_btree_node_iter_sort(node_iter, b);
goto next_bset;
}
bch2_btree_node_iter_push(node_iter, b, k,
btree_bkey_last(b, t));
while ((p = bch2_bkey_prev_all(b, t, k2)) &&
bkey_iter_cmp(b, k, p) < 0) {
k2 = p;
set_pos = true;
}
next_bset:
t = t;
if (set_pos)
btree_node_iter_set_set_pos(node_iter,
b, t, k2);
}
}
bch2_btree_node_iter_verify(node_iter, b);
}
void bch2_btree_node_iter_fix(struct btree_iter *iter,
@ -1436,8 +1436,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
recheck:
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
bkey_deleted(k.k) &&
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
bkey_cmp(k.k->p, iter->pos) <= 0)
bch2_btree_node_iter_advance(&l->iter, l->b);
/*
@ -1477,6 +1476,8 @@ recheck:
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE;
__bch2_btree_iter_verify(iter, l->b);
return k;
}
@ -1507,6 +1508,8 @@ recheck:
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
__bch2_btree_iter_verify(iter, l->b);
return (struct bkey_s_c) { &iter->k, NULL };
}
@ -1539,19 +1542,18 @@ recheck:
goto recheck;
}
if (k.k &&
!bkey_deleted(k.k) &&
!bkey_cmp(iter->pos, k.k->p)) {
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
} else {
if (!k.k ||
bkey_deleted(k.k) ||
bkey_cmp(iter->pos, k.k->p)) {
/* hole */
bkey_init(&iter->k);
iter->k.p = iter->pos;
iter->uptodate = BTREE_ITER_UPTODATE;
return (struct bkey_s_c) { &iter->k, NULL };
k = (struct bkey_s_c) { &iter->k, NULL };
}
iter->uptodate = BTREE_ITER_UPTODATE;
__bch2_btree_iter_verify(iter, l->b);
return k;
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
@ -1779,6 +1781,12 @@ found:
iter->flags &= ~(BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
if ((iter->flags & BTREE_ITER_INTENT) &&
!bch2_btree_iter_upgrade(iter, 1)) {
trace_trans_restart_upgrade(trans->ip);
return ERR_PTR(-EINTR);
}
}
BUG_ON(iter->btree_id != btree_id);

View File

@ -242,7 +242,7 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
(_start), (_flags))) ?: \
PTR_ERR_OR_ZERO(((_k) = \
__bch2_btree_iter_peek(_iter, _flags)).k); \
!ret && (_k).k; \
!_ret && (_k).k; \
(_ret) = PTR_ERR_OR_ZERO(((_k) = \
__bch2_btree_iter_next(_iter, _flags)).k))

View File

@ -461,7 +461,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
return type == BKEY_TYPE_EXTENTS;
switch (type) {
case BKEY_TYPE_EXTENTS:
case BKEY_TYPE_REFLINK:
return true;
default:
return false;
}
}
static inline bool btree_node_is_extents(struct btree *b)
@ -477,6 +483,7 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
case BKEY_TYPE_EXTENTS:
case BKEY_TYPE_INODES:
case BKEY_TYPE_EC:
case BKEY_TYPE_REFLINK:
return true;
default:
return false;

View File

@ -194,7 +194,7 @@ found:
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key),
0, NULL, 0,
0, 0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE|
BCH_BUCKET_MARK_GC);
}
@ -266,11 +266,12 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
{
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
0, 0, NULL, 0, BCH_BUCKET_MARK_OVERWRITE);
if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
bch2_mark_key(c, bkey_i_to_s_c(&pending->key), 0, NULL, 0,
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
0, 0, NULL, 0,
BCH_BUCKET_MARK_OVERWRITE|
BCH_BUCKET_MARK_GC);
}
@ -1077,11 +1078,11 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
0, fs_usage, 0,
0, 0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
0, NULL, 0,
0, 0, NULL, 0,
BCH_BUCKET_MARK_INSERT|
BCH_BUCKET_MARK_GC);
@ -1175,12 +1176,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
0, fs_usage, 0,
0, 0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_node(b)))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
0, NULL, 0,
0, 0, NULL, 0,
BCH_BUCKET_MARK_INSERT|
BCH_BUCKET_MARK_GC);
@ -2003,11 +2004,11 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
0, fs_usage, 0,
0, 0, fs_usage, 0,
BCH_BUCKET_MARK_INSERT);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
0, NULL, 0,
0, 0, NULL, 0,
BCH_BUCKET_MARK_INSERT||
BCH_BUCKET_MARK_GC);

View File

@ -400,8 +400,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
BUG_ON(i->iter->level);
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
!bch2_extent_is_atomic(i->k, i->iter));
bkey_cmp(i->k->k.p, i->iter->l[0].b->key.k.p) > 0);
EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
!(trans->flags & BTREE_INSERT_ATOMIC));
}
@ -522,7 +521,8 @@ static inline bool update_triggers_transactional(struct btree_trans *trans,
{
return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
(i->iter->btree_id == BTREE_ID_EXTENTS ||
i->iter->btree_id == BTREE_ID_INODES);
i->iter->btree_id == BTREE_ID_INODES ||
i->iter->btree_id == BTREE_ID_REFLINK);
}
static inline bool update_has_triggers(struct btree_trans *trans,
@ -923,8 +923,6 @@ out_noupdates:
bch2_trans_unlink_iters(trans, ~trans->iters_touched|
trans->iters_unlink_on_commit);
trans->iters_touched = 0;
} else {
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
}
trans->nr_updates = 0;
trans->mem_top = 0;
@ -1033,7 +1031,10 @@ retry:
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end, &delete.k);
bch2_extent_trim_atomic(&delete, iter);
ret = bch2_extent_trim_atomic(&delete, iter);
if (ret)
break;
}
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &delete));

View File

@ -405,7 +405,8 @@ int bch2_fs_usage_apply(struct bch_fs *c,
*/
should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
if (WARN_ONCE(should_not_have_added > 0,
"disk usage increased without a reservation")) {
"disk usage increased by %lli without a reservation",
should_not_have_added)) {
atomic64_sub(should_not_have_added, &c->sectors_available);
added -= should_not_have_added;
ret = -1;
@ -810,23 +811,24 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
}
static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
s64 delta)
unsigned offset, s64 delta,
unsigned flags)
{
if (delta > 0) {
/*
* marking a new extent, which _will have size_ @delta
*
* in the bch2_mark_update -> BCH_EXTENT_OVERLAP_MIDDLE
* case, we haven't actually created the key we'll be inserting
* yet (for the split) - so we don't want to be using
* k->size/crc.live_size here:
*/
return __ptr_disk_sectors(p, delta);
} else {
BUG_ON(-delta > p.crc.live_size);
if (flags & BCH_BUCKET_MARK_OVERWRITE_SPLIT) {
BUG_ON(offset + -delta > p.crc.live_size);
return (s64) __ptr_disk_sectors(p, p.crc.live_size + delta) -
(s64) ptr_disk_sectors(p);
return -((s64) ptr_disk_sectors(p)) +
__ptr_disk_sectors(p, offset) +
__ptr_disk_sectors(p, p.crc.live_size -
offset + delta);
} else if (flags & BCH_BUCKET_MARK_OVERWRITE) {
BUG_ON(offset + -delta > p.crc.live_size);
return -((s64) ptr_disk_sectors(p)) +
__ptr_disk_sectors(p, p.crc.live_size +
delta);
} else {
return ptr_disk_sectors(p);
}
}
@ -970,7 +972,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
return -1;
return -EIO;
}
BUG_ON(m->r.e.data_type != data_type);
@ -1005,7 +1007,8 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
}
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type,
unsigned offset, s64 sectors,
enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags)
{
@ -1026,7 +1029,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors = data_type == BCH_DATA_BTREE
? sectors
: ptr_disk_sectors_delta(p, sectors);
: ptr_disk_sectors_delta(p, offset, sectors, flags);
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
fs_usage, journal_seq, flags);
@ -1115,7 +1118,8 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k, s64 sectors,
struct bkey_s_c k,
unsigned offset, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
@ -1136,11 +1140,12 @@ int bch2_mark_key_locked(struct bch_fs *c,
? c->opts.btree_node_size
: -c->opts.btree_node_size;
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_BTREE,
ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_BTREE,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_extent:
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
case KEY_TYPE_reflink_v:
ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_stripe:
@ -1171,14 +1176,14 @@ int bch2_mark_key_locked(struct bch_fs *c,
}
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
s64 sectors,
unsigned offset, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
int ret;
percpu_down_read(&c->mark_lock);
ret = bch2_mark_key_locked(c, k, sectors,
ret = bch2_mark_key_locked(c, k, offset, sectors,
fs_usage, journal_seq, flags);
percpu_up_read(&c->mark_lock);
@ -1194,8 +1199,11 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree *b = iter->l[0].b;
unsigned offset = 0;
s64 sectors = 0;
flags |= BCH_BUCKET_MARK_OVERWRITE;
if (btree_node_is_extents(b)
? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
: bkey_cmp(new->k.p, old.k->p))
@ -1204,35 +1212,33 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
offset = 0;
sectors = -((s64) old.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
offset = 0;
sectors = bkey_start_offset(old.k) -
new->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
sectors = old.k->p.offset - new->k.p.offset;
BUG_ON(sectors <= 0);
bch2_mark_key_locked(c, old, sectors,
fs_usage, trans->journal_res.seq,
BCH_BUCKET_MARK_INSERT|flags);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = -((s64) new->k.size);
flags |= BCH_BUCKET_MARK_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
}
return bch2_mark_key_locked(c, old, sectors, fs_usage,
trans->journal_res.seq,
BCH_BUCKET_MARK_OVERWRITE|flags) ?: 1;
return bch2_mark_key_locked(c, old, offset, sectors, fs_usage,
trans->journal_res.seq, flags) ?: 1;
}
int bch2_mark_update(struct btree_trans *trans,
@ -1252,8 +1258,7 @@ int bch2_mark_update(struct btree_trans *trans,
if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
0, insert->k->k.size,
fs_usage, trans->journal_res.seq,
BCH_BUCKET_MARK_INSERT|flags);
@ -1300,7 +1305,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
xchg(&warned_disk_usage, 1))
return;
pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
bch_err(c, "disk usage increased more than %llu sectors reserved",
disk_res_sectors);
trans_for_each_update_iter(trans, i) {
struct btree_iter *iter = i->iter;
@ -1315,7 +1321,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
node_iter = iter->l[0].iter;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
@ -1341,15 +1347,20 @@ static int trans_get_key(struct btree_trans *trans,
struct btree_iter **iter,
struct bkey_s_c *k)
{
unsigned i;
struct btree_insert_entry *i;
int ret;
for (i = 0; i < trans->nr_updates; i++)
if (!trans->updates[i].deferred &&
trans->updates[i].iter->btree_id == btree_id &&
!bkey_cmp(pos, trans->updates[i].iter->pos)) {
*iter = trans->updates[i].iter;
*k = bkey_i_to_s_c(trans->updates[i].k);
for (i = trans->updates;
i < trans->updates + trans->nr_updates;
i++)
if (!i->deferred &&
i->iter->btree_id == btree_id &&
(btree_node_type_is_extents(btree_id)
? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
bkey_cmp(pos, i->k->k.p) < 0
: !bkey_cmp(pos, i->iter->pos))) {
*iter = i->iter;
*k = bkey_i_to_s_c(i->k);
return 0;
}
@ -1358,6 +1369,8 @@ static int trans_get_key(struct btree_trans *trans,
if (IS_ERR(*iter))
return PTR_ERR(*iter);
bch2_trans_iter_free_on_commit(trans, *iter);
*k = bch2_btree_iter_peek_slot(*iter);
ret = bkey_err(*k);
if (ret)
@ -1460,6 +1473,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct bch_extent_stripe_ptr p,
s64 sectors, enum bch_data_type data_type)
{
struct bch_fs *c = trans->c;
struct bch_replicas_padded r;
struct btree_iter *iter;
struct bkey_i *new_k;
@ -1476,10 +1490,10 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
return ret;
if (k.k->type != KEY_TYPE_stripe) {
bch_err_ratelimited(trans->c,
"pointer to nonexistent stripe %llu",
(u64) p.idx);
ret = -1;
bch2_fs_inconsistent(c,
"pointer to nonexistent stripe %llu",
(u64) p.idx);
ret = -EIO;
goto out;
}
@ -1511,8 +1525,9 @@ out:
}
static int bch2_trans_mark_extent(struct btree_trans *trans,
struct bkey_s_c k,
s64 sectors, enum bch_data_type data_type)
struct bkey_s_c k, unsigned offset,
s64 sectors, unsigned flags,
enum bch_data_type data_type)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@ -1532,7 +1547,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors = data_type == BCH_DATA_BTREE
? sectors
: ptr_disk_sectors_delta(p, sectors);
: ptr_disk_sectors_delta(p, offset, sectors, flags);
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
data_type);
@ -1566,8 +1581,86 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
return 0;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
struct bkey_i *new_k;
struct bkey_s_c k;
struct bkey_i_reflink_v *r_v;
s64 ret;
ret = trans_get_key(trans, BTREE_ID_REFLINK,
POS(0, idx), &iter, &k);
if (ret)
return ret;
if (k.k->type != KEY_TYPE_reflink_v) {
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
ret = -EIO;
goto err;
}
if ((flags & BCH_BUCKET_MARK_OVERWRITE) &&
(bkey_start_offset(k.k) < idx ||
k.k->p.offset > idx + sectors))
goto out;
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
new_k = trans_update_key(trans, iter, k.k->u64s);
ret = PTR_ERR_OR_ZERO(new_k);
if (ret)
goto err;
bkey_reassemble(new_k, k);
r_v = bkey_i_to_reflink_v(new_k);
le64_add_cpu(&r_v->v.refcount,
!(flags & BCH_BUCKET_MARK_OVERWRITE) ? 1 : -1);
if (!r_v->v.refcount) {
r_v->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&r_v->k, 0);
}
out:
ret = k.k->p.offset - idx;
err:
bch2_trans_iter_put(trans, iter);
return ret;
}
static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p, unsigned offset,
s64 sectors, unsigned flags)
{
u64 idx = le64_to_cpu(p.v->idx) + offset;
s64 ret = 0;
sectors = abs(sectors);
BUG_ON(offset + sectors > p.k->size);
while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
if (ret < 0)
break;
idx += ret;
sectors = max_t(s64, 0LL, sectors - ret);
ret = 0;
}
return ret;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
unsigned offset, s64 sectors, unsigned flags)
{
struct replicas_delta_list *d;
struct bch_fs *c = trans->c;
@ -1578,11 +1671,12 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
? c->opts.btree_node_size
: -c->opts.btree_node_size;
return bch2_trans_mark_extent(trans, k, sectors,
BCH_DATA_BTREE);
return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_BTREE);
case KEY_TYPE_extent:
return bch2_trans_mark_extent(trans, k, sectors,
BCH_DATA_USER);
case KEY_TYPE_reflink_v:
return bch2_trans_mark_extent(trans, k, offset, sectors,
flags, BCH_DATA_USER);
case KEY_TYPE_inode:
d = replicas_deltas_realloc(trans, 0);
@ -1604,6 +1698,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
d->fs_usage.persistent_reserved[replicas - 1] += sectors;
return 0;
}
case KEY_TYPE_reflink_p:
return bch2_trans_mark_reflink_p(trans,
bkey_s_c_to_reflink_p(k),
offset, sectors, flags);
default:
return 0;
}
@ -1621,11 +1719,8 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
ret = bch2_trans_mark_key(trans,
bkey_i_to_s_c(insert),
bpos_min(insert->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k),
BCH_BUCKET_MARK_INSERT);
ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
0, insert->k.size, BCH_BUCKET_MARK_INSERT);
if (ret)
return ret;
@ -1633,7 +1728,9 @@ int bch2_trans_mark_update(struct btree_trans *trans,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
unsigned offset = 0;
s64 sectors = 0;
unsigned flags = BCH_BUCKET_MARK_OVERWRITE;
k = bkey_disassemble(b, _k, &unpacked);
@ -1645,35 +1742,32 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&insert->k, k.k)) {
case BCH_EXTENT_OVERLAP_ALL:
offset = 0;
sectors = -((s64) k.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
sectors = bkey_start_offset(&insert->k) -
k.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
offset = 0;
sectors = bkey_start_offset(k.k) -
insert->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
sectors = k.k->p.offset - insert->k.p.offset;
BUG_ON(sectors <= 0);
ret = bch2_trans_mark_key(trans, k, sectors,
BCH_BUCKET_MARK_INSERT);
if (ret)
return ret;
sectors = bkey_start_offset(&insert->k) -
k.k->p.offset;
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
sectors = -((s64) insert->k.size);
flags |= BCH_BUCKET_MARK_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
}
ret = bch2_trans_mark_key(trans, k, sectors,
BCH_BUCKET_MARK_OVERWRITE);
ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
if (ret)
return ret;

View File

@ -251,14 +251,15 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_INSERT (1 << 0)
#define BCH_BUCKET_MARK_OVERWRITE (1 << 1)
#define BCH_BUCKET_MARK_BUCKET_INVALIDATE (1 << 2)
#define BCH_BUCKET_MARK_GC (1 << 3)
#define BCH_BUCKET_MARK_ALLOC_READ (1 << 4)
#define BCH_BUCKET_MARK_NOATOMIC (1 << 5)
#define BCH_BUCKET_MARK_OVERWRITE_SPLIT (1 << 2)
#define BCH_BUCKET_MARK_BUCKET_INVALIDATE (1 << 3)
#define BCH_BUCKET_MARK_GC (1 << 4)
#define BCH_BUCKET_MARK_ALLOC_READ (1 << 5)
#define BCH_BUCKET_MARK_NOATOMIC (1 << 6)
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, s64,
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, unsigned, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned, s64,
struct bch_fs_usage *, u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *, unsigned);
@ -272,7 +273,8 @@ int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
void bch2_replicas_delta_list_apply(struct bch_fs *,
struct bch_fs_usage *,
struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
unsigned, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *,
struct btree_iter *iter,
struct bkey_i *insert);

View File

@ -162,19 +162,20 @@ static int extent_matches_stripe(struct bch_fs *c,
struct bch_stripe *v,
struct bkey_s_c k)
{
struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
int idx;
if (!bkey_extent_is_data(k.k))
return -1;
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
int idx;
e = bkey_s_c_to_extent(k);
extent_for_each_ptr(e, ptr) {
idx = ptr_matches_stripe(c, v, ptr);
if (idx >= 0)
return idx;
extent_for_each_ptr(e, ptr) {
idx = ptr_matches_stripe(c, v, ptr);
if (idx >= 0)
return idx;
}
break;
}
}
return -1;
@ -182,19 +183,20 @@ static int extent_matches_stripe(struct bch_fs *c,
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
struct bkey_s_c_extent e;
const union bch_extent_entry *entry;
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
if (!bkey_extent_is_data(k.k))
return false;
extent_for_each_entry(e, entry)
if (extent_entry_type(entry) ==
BCH_EXTENT_ENTRY_stripe_ptr &&
entry->stripe_ptr.idx == idx)
return true;
e = bkey_s_c_to_extent(k);
extent_for_each_entry(e, entry)
if (extent_entry_type(entry) ==
BCH_EXTENT_ENTRY_stripe_ptr &&
entry->stripe_ptr.idx == idx)
return true;
break;
}
}
return false;
}
@ -1310,7 +1312,7 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
break;
}
bch2_mark_key(c, k, 0, NULL, 0,
bch2_mark_key(c, k, 0, 0, NULL, 0,
BCH_BUCKET_MARK_ALLOC_READ|
BCH_BUCKET_MARK_NOATOMIC);
}

View File

@ -250,6 +250,33 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
}
const struct bch_extent_ptr *
bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (ptr->dev == dev)
return ptr;
return NULL;
}
bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr)
if (bch2_dev_in_target(c, ptr->dev, target) &&
(!ptr->cached ||
!ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
return true;
return false;
}
/* extent specific utility code */
const struct bch_extent_ptr *
@ -280,20 +307,6 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group
return NULL;
}
const struct bch_extent_ptr *
bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
{
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (bch2_dev_in_target(c, ptr->dev, target) &&
(!ptr->cached ||
!ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
return ptr;
return NULL;
}
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
{
unsigned ret = 0;
@ -314,16 +327,17 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k)
return ret;
}
bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
struct bch_extent_ptr m, u64 offset)
bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
struct bch_extent_ptr m, u64 offset)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
extent_for_each_ptr_decode(e, p, entry)
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (p.ptr.dev == m.dev &&
p.ptr.gen == m.gen &&
(s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) ==
(s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
(s64) m.offset - offset)
return true;
@ -390,16 +404,17 @@ static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
bch2_csum_type_is_encryption(n.csum_type);
}
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent e,
bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
struct bch_extent_crc_unpacked n)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *i;
if (!n.csum_type)
return false;
extent_for_each_crc(e, crc, i)
bkey_for_each_crc(k.k, ptrs, crc, i)
if (can_narrow_crc(crc, n))
return true;
@ -415,9 +430,9 @@ bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent e,
* currently live (so that readers won't have to bounce) while we've got the
* checksum we need:
*/
bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked n)
bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
struct bch_extent_crc_unpacked u;
struct extent_ptr_decoded p;
union bch_extent_entry *i;
@ -425,7 +440,7 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
/* Find a checksum entry that covers only live data: */
if (!n.csum_type) {
extent_for_each_crc(extent_i_to_s(e), u, i)
bkey_for_each_crc(&k->k, ptrs, u, i)
if (!u.compression_type &&
u.csum_type &&
u.live_size == u.uncompressed_size) {
@ -437,15 +452,15 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
found:
BUG_ON(n.compression_type);
BUG_ON(n.offset);
BUG_ON(n.live_size != e->k.size);
BUG_ON(n.live_size != k->k.size);
restart_narrow_pointers:
extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
if (can_narrow_crc(p.crc, n)) {
bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr);
bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(e, &p);
bch2_extent_ptr_decoded_append(k, &p);
ret = true;
goto restart_narrow_pointers;
}
@ -708,44 +723,48 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
/* Extents */
bool __bch2_cut_front(struct bpos where, struct bkey_s k)
void __bch2_cut_front(struct bpos where, struct bkey_s k)
{
u64 len = 0;
u64 sub;
if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
return false;
return;
EBUG_ON(bkey_cmp(where, k.k->p) > 0);
len = k.k->p.offset - where.offset;
sub = where.offset - bkey_start_offset(k.k);
BUG_ON(len > k.k->size);
k.k->size -= sub;
/*
* Don't readjust offset if the key size is now 0, because that could
* cause offset to point to the next bucket:
*/
if (!len)
if (!k.k->size)
k.k->type = KEY_TYPE_deleted;
else if (bkey_extent_is_data(k.k)) {
struct bkey_s_extent e = bkey_s_to_extent(k);
switch (k.k->type) {
case KEY_TYPE_deleted:
case KEY_TYPE_discard:
case KEY_TYPE_error:
case KEY_TYPE_cookie:
break;
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v: {
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
bool seen_crc = false;
extent_for_each_entry(e, entry) {
bkey_extent_entry_for_each(ptrs, entry) {
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
if (!seen_crc)
entry->ptr.offset += e.k->size - len;
entry->ptr.offset += sub;
break;
case BCH_EXTENT_ENTRY_crc32:
entry->crc32.offset += e.k->size - len;
entry->crc32.offset += sub;
break;
case BCH_EXTENT_ENTRY_crc64:
entry->crc64.offset += e.k->size - len;
entry->crc64.offset += sub;
break;
case BCH_EXTENT_ENTRY_crc128:
entry->crc128.offset += e.k->size - len;
entry->crc128.offset += sub;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
@ -754,11 +773,20 @@ bool __bch2_cut_front(struct bpos where, struct bkey_s k)
if (extent_entry_is_crc(entry))
seen_crc = true;
}
break;
}
case KEY_TYPE_reflink_p: {
struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
k.k->size = len;
return true;
le64_add_cpu(&p.v->idx, sub);
break;
}
case KEY_TYPE_reservation:
break;
default:
BUG();
}
}
bool bch2_cut_back(struct bpos where, struct bkey *k)
@ -772,8 +800,6 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
len = where.offset - bkey_start_offset(k);
BUG_ON(len > k->size);
k->p = where;
k->size = len;
@ -897,6 +923,16 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false))
return;
/*
* may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary:
*/
node_iter = l->iter;
while ((k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
bkey_cmp_left_packed(l->b, k, &insert->k.p) > 0)
l->iter = node_iter;
k = bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b));
bch2_bset_insert(l->b, &l->iter, k, insert, 0);
@ -921,47 +957,131 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
return ret;
}
static inline struct bpos
bch2_extent_atomic_end(struct bkey_i *insert, struct btree_iter *iter)
static int __bch2_extent_atomic_end(struct btree_trans *trans,
struct bkey_s_c k,
unsigned offset,
struct bpos *end,
unsigned *nr_iters,
unsigned max_iters)
{
int ret = 0;
switch (k.k->type) {
case KEY_TYPE_extent:
*nr_iters += bch2_bkey_nr_alloc_ptrs(k);
if (*nr_iters >= max_iters) {
*end = bpos_min(*end, k.k->p);
return 0;
}
break;
case KEY_TYPE_reflink_p: {
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
u64 idx = le64_to_cpu(p.v->idx);
unsigned sectors = end->offset - bkey_start_offset(p.k);
struct btree_iter *iter;
struct bkey_s_c r_k;
for_each_btree_key(trans, iter,
BTREE_ID_REFLINK, POS(0, idx + offset),
BTREE_ITER_SLOTS, r_k, ret) {
if (bkey_cmp(bkey_start_pos(r_k.k),
POS(0, idx + sectors)) >= 0)
break;
*nr_iters += 1;
if (*nr_iters >= max_iters) {
struct bpos pos = bkey_start_pos(k.k);
pos.offset += r_k.k->p.offset - idx;
*end = bpos_min(*end, pos);
break;
}
}
bch2_trans_iter_put(trans, iter);
break;
}
}
return ret;
}
int bch2_extent_atomic_end(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert,
struct bpos *end)
{
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
unsigned nr_alloc_ptrs =
unsigned nr_iters =
bch2_bkey_nr_alloc_ptrs(bkey_i_to_s_c(insert));
int ret = 0;
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
*end = bpos_min(insert->k.p, b->key.k.p);
ret = __bch2_extent_atomic_end(trans, bkey_i_to_s_c(insert),
0, end, &nr_iters, 10);
if (ret)
return ret;
while (nr_iters < 20 &&
(_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
unsigned offset = 0;
if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
break;
nr_alloc_ptrs += bch2_bkey_nr_alloc_ptrs(k);
if (bkey_cmp(bkey_start_pos(&insert->k),
bkey_start_pos(k.k)) > 0)
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
if (nr_alloc_ptrs > 20) {
BUG_ON(bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0);
return bpos_min(insert->k.p, k.k->p);
}
ret = __bch2_extent_atomic_end(trans, k, offset,
end, &nr_iters, 20);
if (ret)
return ret;
if (nr_iters >= 20)
break;
bch2_btree_node_iter_advance(&node_iter, b);
}
return bpos_min(insert->k.p, b->key.k.p);
return 0;
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
{
bch2_cut_back(bch2_extent_atomic_end(k, iter), &k->k);
struct bpos end;
int ret;
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
if (ret)
return ret;
bch2_cut_back(end, &k->k);
return 0;
}
bool bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
{
return !bkey_cmp(bch2_extent_atomic_end(k, iter), k->k.p);
struct bpos end;
int ret;
ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
if (ret)
return ret;
return !bkey_cmp(end, k->k.p);
}
enum btree_insert_ret
@ -1185,19 +1305,6 @@ next:
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
}
/*
* may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
* rewind the iterator a bit if necessary:
*/
{
struct btree_node_iter node_iter = l->iter;
while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
bkey_cmp_left_packed(l->b, _k, &insert->k.p) > 0)
l->iter = node_iter;
}
}
/**
@ -1394,9 +1501,12 @@ static void bch2_extent_crc_pack(union bch_extent_crc *dst,
#undef set_common_fields
}
static void bch2_extent_crc_init(union bch_extent_crc *crc,
struct bch_extent_crc_unpacked new)
static void bch2_extent_crc_append(struct bkey_i *k,
struct bch_extent_crc_unpacked new)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
union bch_extent_crc *crc = (void *) ptrs.end;
if (bch_crc_bytes[new.csum_type] <= 4 &&
new.uncompressed_size - 1 <= CRC32_SIZE_MAX &&
new.nonce <= CRC32_NONCE_MAX)
@ -1413,54 +1523,53 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc,
BUG();
bch2_extent_crc_pack(crc, new);
k->k.u64s += extent_entry_u64s(ptrs.end);
EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
}
void bch2_extent_crc_append(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked new)
{
bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
__extent_entry_push(e);
}
static inline void __extent_entry_insert(struct bkey_i_extent *e,
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
{
union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e));
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
memmove_u64s_up((u64 *) dst + extent_entry_u64s(new),
dst, (u64 *) end - (u64 *) dst);
e->k.u64s += extent_entry_u64s(new);
k->k.u64s += extent_entry_u64s(new);
memcpy(dst, new, extent_entry_bytes(new));
}
void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
void bch2_extent_ptr_decoded_append(struct bkey_i *k,
struct extent_ptr_decoded *p)
{
struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(&e->k, NULL);
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(&k->k, NULL);
union bch_extent_entry *pos;
unsigned i;
if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
pos = e->v.start;
pos = ptrs.start;
goto found;
}
extent_for_each_crc(extent_i_to_s(e), crc, pos)
bkey_for_each_crc(&k->k, ptrs, crc, pos)
if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
pos = extent_entry_next(pos);
goto found;
}
bch2_extent_crc_append(e, p->crc);
pos = extent_entry_last(extent_i_to_s(e));
bch2_extent_crc_append(k, p->crc);
pos = bkey_val_end(bkey_i_to_s(k));
found:
p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
__extent_entry_insert(e, pos, to_entry(&p->ptr));
__extent_entry_insert(k, pos, to_entry(&p->ptr));
for (i = 0; i < p->ec_nr; i++) {
p->ec[i].type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
__extent_entry_insert(e, pos, to_entry(&p->ec[i]));
__extent_entry_insert(k, pos, to_entry(&p->ec[i]));
}
}
@ -1487,17 +1596,17 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
return false;
}
void bch2_extent_mark_replicas_cached(struct bch_fs *c,
struct bkey_s_extent e,
unsigned target,
unsigned nr_desired_replicas)
void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
unsigned target,
unsigned nr_desired_replicas)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas;
int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
if (target && extra > 0)
extent_for_each_ptr_decode(e, p, entry) {
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra &&
@ -1508,7 +1617,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
}
if (extra > 0)
extent_for_each_ptr_decode(e, p, entry) {
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int n = bch2_extent_ptr_durability(c, p);
if (n && n <= extra) {

View File

@ -12,7 +12,8 @@ struct btree_insert_entry;
/* extent entries: */
#define extent_entry_last(_e) bkey_val_end(_e)
#define extent_entry_last(_e) \
((typeof(&(_e).v->start[0])) bkey_val_end(_e))
#define entry_to_ptr(_entry) \
({ \
@ -258,6 +259,27 @@ out: \
__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \
_ptr, _entry)
#define bkey_crc_next(_k, _start, _end, _crc, _iter) \
({ \
__bkey_extent_entry_for_each_from(_iter, _end, _iter) \
if (extent_entry_is_crc(_iter)) { \
(_crc) = bch2_extent_crc_unpack(_k, \
entry_to_crc(_iter)); \
break; \
} \
\
(_iter) < (_end); \
})
#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \
for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \
(_iter) = (_start); \
bkey_crc_next(_k, _start, _end, _crc, _iter); \
(_iter) = extent_entry_next(_iter))
#define bkey_for_each_crc(_k, _p, _crc, _iter) \
__bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
/* utility code common to all keys with pointers: */
static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
@ -267,7 +289,7 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
return (struct bkey_ptrs_c) {
to_entry(&e.v->start[0]),
to_entry(bkey_val_end(e))
to_entry(extent_entry_last(e))
};
}
case KEY_TYPE_extent: {
@ -284,6 +306,14 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
to_entry(&s.v->ptrs[s.v->nr_blocks]),
};
}
case KEY_TYPE_reflink_v: {
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
return (struct bkey_ptrs_c) {
r.v->start,
bkey_val_end(r),
};
}
default:
return (struct bkey_ptrs_c) { NULL, NULL };
}
@ -337,18 +367,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
return ret;
}
static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(p, ptr)
if (ptr->dev == dev)
return ptr;
return NULL;
}
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
@ -359,6 +377,11 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
struct bch_io_failures *,
struct extent_ptr_decoded *);
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_bkey_drop_device(struct bkey_s, unsigned);
const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
@ -410,8 +433,10 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
.key_merge = bch2_reservation_merge, \
}
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
bool bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct bpos *);
int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
enum btree_insert_ret
bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
@ -419,52 +444,46 @@ bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
void bch2_insert_fixup_extent(struct btree_trans *,
struct btree_insert_entry *);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned, unsigned);
void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
unsigned, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned);
unsigned bch2_extent_is_compressed(struct bkey_s_c);
bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent,
struct bch_extent_ptr, u64);
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
static inline bool bkey_extent_is_data(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent:
case KEY_TYPE_reflink_p:
case KEY_TYPE_reflink_v:
return true;
default:
return false;
}
}
/*
* Should extent be counted under inode->i_sectors?
*/
static inline bool bkey_extent_is_allocation(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_extent:
case KEY_TYPE_reservation:
case KEY_TYPE_reflink_p:
case KEY_TYPE_reflink_v:
return true;
default:
return false;
}
}
static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k)
{
return bkey_extent_is_allocation(k.k) &&
!bch2_extent_is_compressed(k);
}
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_bkey_drop_device(struct bkey_s, unsigned);
/* Extent entry iteration: */
#define extent_for_each_entry_from(_e, _entry, _start) \
@ -480,45 +499,16 @@ void bch2_bkey_drop_device(struct bkey_s, unsigned);
#define extent_for_each_ptr(_e, _ptr) \
__bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
#define extent_crc_next(_e, _crc, _iter) \
({ \
extent_for_each_entry_from(_e, _iter, _iter) \
if (extent_entry_is_crc(_iter)) { \
(_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\
break; \
} \
\
(_iter) < extent_entry_last(_e); \
})
#define extent_for_each_crc(_e, _crc, _iter) \
for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \
(_iter) = (_e).v->start; \
extent_crc_next(_e, _crc, _iter); \
(_iter) = extent_entry_next(_iter))
#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
__bkey_for_each_ptr_decode((_e).k, (_e).v->start, \
extent_entry_last(_e), _ptr, _entry)
void bch2_extent_crc_append(struct bkey_i_extent *,
struct bch_extent_crc_unpacked);
void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
static inline void __extent_entry_push(struct bkey_i_extent *e)
{
union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e));
EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
BKEY_EXTENT_VAL_U64s_MAX);
e->k.u64s += extent_entry_u64s(entry);
}
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
struct bch_extent_crc_unpacked);
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
struct bch_extent_ptr *);
@ -540,11 +530,11 @@ do { \
} \
} while (0)
bool __bch2_cut_front(struct bpos, struct bkey_s);
void __bch2_cut_front(struct bpos, struct bkey_s);
static inline bool bch2_cut_front(struct bpos where, struct bkey_i *k)
static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
{
return __bch2_cut_front(where, bkey_i_to_s(k));
__bch2_cut_front(where, bkey_i_to_s(k));
}
bool bch2_cut_back(struct bpos, struct bkey *);

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,22 @@
#include <linux/uio.h>
struct quota_res;
int bch2_extent_update(struct btree_trans *,
struct bch_inode_info *,
struct disk_reservation *,
struct quota_res *,
struct btree_iter *,
struct bkey_i *,
u64, bool, bool, s64 *);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
struct bpos, struct bch_inode_info *, u64);
int __must_check bch2_write_inode_size(struct bch_fs *,
struct bch_inode_info *,
loff_t, unsigned);
int bch2_writepage(struct page *, struct writeback_control *);
int bch2_readpage(struct file *, struct page *);
@ -30,6 +46,9 @@ int bch2_fsync(struct file *, loff_t, loff_t, int);
int bch2_truncate(struct bch_inode_info *, struct iattr *);
long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
loff_t, loff_t, unsigned);
loff_t bch2_llseek(struct file *, loff_t, int);
vm_fault_t bch2_page_mkwrite(struct vm_fault *);

View File

@ -1068,16 +1068,20 @@ static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
return 0;
}
static int bch2_fill_extent(struct fiemap_extent_info *info,
const struct bkey_i *k, unsigned flags)
static int bch2_fill_extent(struct bch_fs *c,
struct fiemap_extent_info *info,
struct bkey_s_c k, unsigned flags)
{
if (bkey_extent_is_data(&k->k)) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
if (bkey_extent_is_data(k.k)) {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
int ret;
extent_for_each_ptr_decode(e, p, entry) {
if (k.k->type == KEY_TYPE_reflink_v)
flags |= FIEMAP_EXTENT_SHARED;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
int flags2 = 0;
u64 offset = p.ptr.offset;
@ -1086,23 +1090,23 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
else
offset += p.crc.offset;
if ((offset & (PAGE_SECTORS - 1)) ||
(e.k->size & (PAGE_SECTORS - 1)))
if ((offset & (c->opts.block_size - 1)) ||
(k.k->size & (c->opts.block_size - 1)))
flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
ret = fiemap_fill_next_extent(info,
bkey_start_offset(e.k) << 9,
bkey_start_offset(k.k) << 9,
offset << 9,
e.k->size << 9, flags|flags2);
k.k->size << 9, flags|flags2);
if (ret)
return ret;
}
return 0;
} else if (k->k.type == KEY_TYPE_reservation) {
} else if (k.k->type == KEY_TYPE_reservation) {
return fiemap_fill_next_extent(info,
bkey_start_offset(&k->k) << 9,
0, k->k.size << 9,
bkey_start_offset(k.k) << 9,
0, k.k->size << 9,
flags|
FIEMAP_EXTENT_DELALLOC|
FIEMAP_EXTENT_UNWRITTEN);
@ -1119,7 +1123,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
BKEY_PADDED(k) tmp;
BKEY_PADDED(k) cur, prev;
unsigned offset_into_extent, sectors;
bool have_extent = false;
int ret = 0;
@ -1128,27 +1133,58 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_trans_init(&trans, c, 0, 0);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9), 0, k, ret)
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(ei->v.i_ino, start >> 9),
BTREE_ITER_SLOTS);
while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) {
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
bkey_reassemble(&cur.k, k);
k = bkey_i_to_s_c(&cur.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
&offset_into_extent, &cur.k);
if (ret)
break;
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_cut_front(POS(k.k->p.inode,
bkey_start_offset(k.k) + offset_into_extent),
&cur.k);
bch2_key_resize(&cur.k.k, sectors);
cur.k.k.p.offset = iter->pos.offset + cur.k.k.size;
if (bkey_extent_is_data(k.k) ||
k.k->type == KEY_TYPE_reservation) {
if (bkey_cmp(bkey_start_pos(k.k),
POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
break;
if (have_extent) {
ret = bch2_fill_extent(info, &tmp.k, 0);
ret = bch2_fill_extent(c, info,
bkey_i_to_s_c(&prev.k), 0);
if (ret)
break;
}
bkey_reassemble(&tmp.k, k);
bkey_copy(&prev.k, &cur.k);
have_extent = true;
}
if (!ret && have_extent)
ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST);
bch2_btree_iter_set_pos(iter,
POS(iter->pos.inode,
iter->pos.offset + sectors));
}
if (!ret && have_extent)
ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k),
FIEMAP_EXTENT_LAST);
err:
ret = bch2_trans_exit(&trans) ?: ret;
return ret < 0 ? ret : 0;
}
@ -1196,6 +1232,7 @@ static const struct file_operations bch_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = bch2_compat_fs_ioctl,
#endif
.remap_file_range = bch2_remap_file_range,
};
static const struct inode_operations bch_file_inode_operations = {
@ -1712,9 +1749,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
goto out;
}
/* XXX: blocksize */
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_blocksize = block_bytes(c);
sb->s_blocksize_bits = ilog2(block_bytes(c));
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_op = &bch_super_operations;
sb->s_export_op = &bch_export_ops;

View File

@ -259,6 +259,8 @@ int bch2_write_index_default(struct bch_write_op *op)
bch2_verify_keylist_sorted(keys);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
@ -269,7 +271,9 @@ int bch2_write_index_default(struct bch_write_op *op)
bkey_copy(&split.k, bch2_keylist_front(keys));
bch2_extent_trim_atomic(&split.k, iter);
ret = bch2_extent_trim_atomic(&split.k, iter);
if (ret)
break;
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &split.k));
@ -286,6 +290,11 @@ int bch2_write_index_default(struct bch_write_op *op)
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
if (ret == -EINTR) {
ret = 0;
goto retry;
}
bch2_trans_exit(&trans);
return ret;
@ -426,7 +435,7 @@ static void init_append_extent(struct bch_write_op *op,
p.ptr.cached = !ca->mi.durability ||
(op->flags & BCH_WRITE_CACHED) != 0;
p.ptr.offset += ca->mi.bucket_size - ob->sectors_free;
bch2_extent_ptr_decoded_append(e, &p);
bch2_extent_ptr_decoded_append(&e->k_i, &p);
BUG_ON(crc.compressed_size > ob->sectors_free);
ob->sectors_free -= crc.compressed_size;
@ -954,17 +963,13 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts opts,
unsigned flags)
{
if (!bkey_extent_is_data(k.k))
return false;
if (!(flags & BCH_READ_MAY_PROMOTE))
return false;
if (!opts.promote_target)
return false;
if (bch2_extent_has_target(c, bkey_s_c_to_extent(k),
opts.promote_target))
if (bch2_bkey_has_target(c, k, opts.promote_target))
return false;
if (bch2_target_congested(c, opts.promote_target)) {
@ -1028,6 +1033,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
noinline
static struct promote_op *__promote_alloc(struct bch_fs *c,
enum btree_id btree_id,
struct bpos pos,
struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
@ -1084,6 +1090,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
(struct data_opts) {
.target = opts.promote_target
},
btree_id,
bkey_s_c_null);
BUG_ON(ret);
@ -1121,7 +1128,11 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c,
if (!should_promote(c, k, pos, opts, flags))
return NULL;
promote = __promote_alloc(c, pos, pick, opts, sectors, rbio);
promote = __promote_alloc(c,
k.k->type == KEY_TYPE_reflink_v
? BTREE_ID_REFLINK
: BTREE_ID_EXTENTS,
pos, pick, opts, sectors, rbio);
if (!promote)
return NULL;
@ -1222,17 +1233,16 @@ retry:
k = bkey_i_to_s_c(&tmp.k);
bch2_trans_unlock(&trans);
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k),
rbio->pick.ptr,
rbio->pos.offset -
rbio->pick.crc.offset)) {
if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k),
rbio->pick.ptr,
rbio->pos.offset -
rbio->pick.crc.offset)) {
/* extent we wanted to read no longer exists: */
rbio->hole = true;
goto out;
}
ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
@ -1255,26 +1265,40 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0);
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS, k, ret) {
BKEY_PADDED(k) tmp;
unsigned bytes;
unsigned bytes, sectors, offset_into_extent;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
&offset_into_extent, &tmp.k);
if (ret)
break;
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_trans_unlock(&trans);
bytes = min_t(unsigned, bvec_iter.bi_size,
(k.k->p.offset - bvec_iter.bi_sector) << 9);
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
ret = __bch2_read_extent(c, rbio, bvec_iter, k,
offset_into_extent, failed, flags);
switch (ret) {
case READ_RETRY:
goto retry;
@ -1355,7 +1379,6 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_extent *e;
BKEY_PADDED(k) new;
struct bch_extent_crc_unpacked new_crc;
u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
@ -1374,34 +1397,30 @@ retry:
if (IS_ERR_OR_NULL(k.k))
goto out;
if (!bkey_extent_is_data(k.k))
goto out;
bkey_reassemble(&new.k, k);
e = bkey_i_to_extent(&new.k);
k = bkey_i_to_s_c(&new.k);
if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e),
rbio->pick.ptr, data_offset) ||
bversion_cmp(e->k.version, rbio->version))
if (bversion_cmp(k.k->version, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out;
/* Extent was merged? */
if (bkey_start_offset(&e->k) < data_offset ||
e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size)
if (bkey_start_offset(k.k) < data_offset ||
k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
goto out;
if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
rbio->pick.crc, NULL, &new_crc,
bkey_start_offset(&e->k) - data_offset, e->k.size,
bkey_start_offset(k.k) - data_offset, k.k->size,
rbio->pick.crc.csum_type)) {
bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
goto out;
}
if (!bch2_extent_narrow_crcs(e, new_crc))
if (!bch2_bkey_narrow_crcs(&new.k, new_crc))
goto out;
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i));
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new.k));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
@ -1412,15 +1431,6 @@ out:
bch2_trans_exit(&trans);
}
static bool should_narrow_crcs(struct bkey_s_c k,
struct extent_ptr_decoded *pick,
unsigned flags)
{
return !(flags & BCH_READ_IN_RETRY) &&
bkey_extent_is_data(k.k) &&
bch2_can_narrow_extent_crcs(bkey_s_c_to_extent(k), pick->crc);
}
/* Inner part that may run in process context */
static void __bch2_read_endio(struct work_struct *work)
{
@ -1455,7 +1465,7 @@ static void __bch2_read_endio(struct work_struct *work)
goto nodecode;
/* Adjust crc to point to subset of data we want: */
crc.offset += rbio->bvec_iter.bi_sector - rbio->pos.offset;
crc.offset += rbio->offset_into_extent;
crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
if (crc.compression_type != BCH_COMPRESSION_NONE) {
@ -1564,8 +1574,51 @@ static void bch2_read_endio(struct bio *bio)
bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
}
int bch2_read_indirect_extent(struct btree_trans *trans,
struct btree_iter *extent_iter,
unsigned *offset_into_extent,
struct bkey_i *orig_k)
{
struct btree_iter *iter;
struct bkey_s_c k;
u64 reflink_offset;
int ret;
if (orig_k->k.type != KEY_TYPE_reflink_p)
return 0;
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
*offset_into_extent;
iter = __bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
POS(0, reflink_offset),
BTREE_ITER_SLOTS, 1);
ret = PTR_ERR_OR_ZERO(iter);
if (ret)
return ret;
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type != KEY_TYPE_reflink_v) {
__bcache_io_error(trans->c,
"pointer to nonexistent indirect extent");
ret = -EIO;
goto err;
}
*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
bkey_reassemble(orig_k, k);
err:
bch2_trans_iter_put(trans, iter);
return ret;
}
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags)
{
struct extent_ptr_decoded pick;
@ -1598,7 +1651,6 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
goto hole;
iter.bi_sector = pos.offset;
iter.bi_size = pick.crc.compressed_size << 9;
goto noclone;
}
@ -1607,13 +1659,13 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
bio_flagged(&orig->bio, BIO_CHAIN))
flags |= BCH_READ_MUST_CLONE;
narrow_crcs = should_narrow_crcs(k, &pick, flags);
narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
bch2_can_narrow_extent_crcs(k, pick.crc);
if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
flags |= BCH_READ_MUST_BOUNCE;
EBUG_ON(bkey_start_offset(k.k) > iter.bi_sector ||
k.k->p.offset < bvec_iter_end_sector(iter));
BUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (pick.crc.compression_type != BCH_COMPRESSION_NONE ||
(pick.crc.csum_type != BCH_CSUM_NONE &&
@ -1634,15 +1686,17 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
bvec_iter_sectors(iter) != pick.crc.live_size ||
pick.crc.offset ||
iter.bi_sector != pos.offset));
offset_into_extent));
pos.offset += offset_into_extent;
pick.ptr.offset += pick.crc.offset +
(iter.bi_sector - pos.offset);
offset_into_extent;
offset_into_extent = 0;
pick.crc.compressed_size = bvec_iter_sectors(iter);
pick.crc.uncompressed_size = bvec_iter_sectors(iter);
pick.crc.offset = 0;
pick.crc.live_size = bvec_iter_sectors(iter);
pos.offset = iter.bi_sector;
offset_into_extent = 0;
}
if (rbio) {
@ -1697,6 +1751,7 @@ noclone:
else
rbio->end_io = orig->bio.bi_end_io;
rbio->bvec_iter = iter;
rbio->offset_into_extent= offset_into_extent;
rbio->flags = flags;
rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
rbio->narrow_crcs = narrow_crcs;
@ -1815,45 +1870,67 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_SLOTS, k, ret) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(inode, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_SLOTS);
while (1) {
BKEY_PADDED(k) tmp;
unsigned bytes;
unsigned bytes, sectors, offset_into_extent;
bch2_btree_iter_set_pos(iter,
POS(inode, rbio->bio.bi_iter.bi_sector));
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans, iter,
&offset_into_extent, &tmp.k);
if (ret)
goto err;
/*
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
*/
sectors = min(sectors, k.k->size - offset_into_extent);
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
bch2_trans_unlock(&trans);
bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
(k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
swap(rbio->bio.bi_iter.bi_size, bytes);
if (rbio->bio.bi_iter.bi_size == bytes)
flags |= BCH_READ_LAST_FRAGMENT;
bch2_read_extent(c, rbio, k, flags);
bch2_read_extent(c, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
return;
break;
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
}
/*
* If we get here, it better have been because there was an error
* reading a btree node
*/
BUG_ON(!ret);
bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
out:
bch2_trans_exit(&trans);
return;
err:
bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
bch2_rbio_done(rbio);
goto out;
}
void bch2_fs_io_exit(struct bch_fs *c)

View File

@ -95,9 +95,8 @@ struct bch_devs_mask;
struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
struct bkey_s_c, struct bch_io_failures *, unsigned);
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *,
unsigned *, struct bkey_i *);
enum bch_read_flags {
BCH_READ_RETRY_IF_STALE = 1 << 0,
@ -112,14 +111,22 @@ enum bch_read_flags {
BCH_READ_IN_RETRY = 1 << 7,
};
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *,
struct bvec_iter, struct bkey_s_c, unsigned,
struct bch_io_failures *, unsigned);
static inline void bch2_read_extent(struct bch_fs *c,
struct bch_read_bio *rbio,
struct bkey_s_c k,
unsigned offset_into_extent,
unsigned flags)
{
__bch2_read_extent(c, rbio, rbio->bio.bi_iter, k, NULL, flags);
__bch2_read_extent(c, rbio, rbio->bio.bi_iter, k,
offset_into_extent, NULL, flags);
}
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
static inline struct bch_read_bio *rbio_init(struct bio *bio,
struct bch_io_opts opts)
{

View File

@ -38,6 +38,8 @@ struct bch_read_bio {
*/
struct bvec_iter bvec_iter;
unsigned offset_into_extent;
u16 flags;
union {
struct {

View File

@ -34,7 +34,8 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
return 0;
}
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
enum btree_id btree_id)
{
struct btree_trans trans;
struct btree_iter *iter;
@ -44,13 +45,12 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
BTREE_ITER_PREFETCH);
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
if (!bch2_bkey_has_device(k, dev_idx)) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
@ -99,6 +99,12 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
return ret;
}
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
__bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
}
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;

View File

@ -64,13 +64,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
iter = bch2_trans_get_iter(&trans, m->btree_id,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (1) {
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_i_extent *insert, *new =
struct bkey_i *insert;
struct bkey_i_extent *new =
bkey_i_to_extent(bch2_keylist_front(keys));
BKEY_PADDED(k) _new, _insert;
const union bch_extent_entry *entry;
@ -83,32 +84,29 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
break;
if (bversion_cmp(k.k->version, new->k.version) ||
!bkey_extent_is_data(k.k) ||
!bch2_extent_matches_ptr(c, bkey_s_c_to_extent(k),
m->ptr, m->offset))
!bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
goto nomatch;
if (m->data_cmd == DATA_REWRITE &&
!bch2_extent_has_device(bkey_s_c_to_extent(k),
m->data_opts.rewrite_dev))
!bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
goto nomatch;
bkey_reassemble(&_insert.k, k);
insert = bkey_i_to_extent(&_insert.k);
insert = &_insert.k;
bkey_copy(&_new.k, bch2_keylist_front(keys));
new = bkey_i_to_extent(&_new.k);
bch2_cut_front(iter->pos, &insert->k_i);
bch2_cut_front(iter->pos, insert);
bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k);
if (m->data_cmd == DATA_REWRITE)
bch2_bkey_drop_device(extent_i_to_s(insert).s,
bch2_bkey_drop_device(bkey_i_to_s(insert),
m->data_opts.rewrite_dev);
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
/*
* raced with another move op? extent already
* has a pointer to the device we just wrote
@ -124,18 +122,18 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
if (!did_work)
goto nomatch;
bch2_extent_narrow_crcs(insert,
bch2_bkey_narrow_crcs(insert,
(struct bch_extent_crc_unpacked) { 0 });
bch2_extent_normalize(c, extent_i_to_s(insert).s);
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert),
op->opts.background_target,
op->opts.data_replicas);
bch2_extent_normalize(c, bkey_i_to_s(insert));
bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
op->opts.background_target,
op->opts.data_replicas);
/*
* If we're not fully overwriting @k, and it's compressed, we
* need a reservation for all the pointers in @insert
*/
nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(insert)) -
m->nr_ptrs_reserved;
if (insert->k.size < k.k->size &&
@ -151,7 +149,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
}
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &insert->k_i));
BTREE_INSERT_ENTRY(iter, insert));
ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
@ -216,10 +214,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
struct bch_io_opts io_opts,
enum data_cmd data_cmd,
struct data_opts data_opts,
enum btree_id btree_id,
struct bkey_s_c k)
{
int ret;
m->btree_id = btree_id;
m->data_cmd = data_cmd;
m->data_opts = data_opts;
m->nr_ptrs_reserved = 0;
@ -267,11 +267,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
break;
}
case DATA_REWRITE: {
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned compressed_sectors = 0;
extent_for_each_ptr_decode(bkey_s_c_to_extent(k), p, entry)
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (!p.ptr.cached &&
p.crc.compression_type != BCH_COMPRESSION_NONE &&
bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
@ -395,14 +396,16 @@ static int bch2_move_extent(struct bch_fs *c,
struct moving_context *ctxt,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
struct bkey_s_c_extent e,
enum btree_id btree_id,
struct bkey_s_c k,
enum data_cmd data_cmd,
struct data_opts data_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct moving_io *io;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned sectors = e.k->size, pages;
unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
move_ctxt_wait_event(ctxt,
@ -414,7 +417,7 @@ static int bch2_move_extent(struct bch_fs *c,
SECTORS_IN_FLIGHT_PER_DEVICE);
/* write path might have to decompress data: */
extent_for_each_ptr_decode(e, p, entry)
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
@ -424,8 +427,8 @@ static int bch2_move_extent(struct bch_fs *c,
goto err;
io->write.ctxt = ctxt;
io->read_sectors = e.k->size;
io->write_sectors = e.k->size;
io->read_sectors = k.k->size;
io->write_sectors = k.k->size;
bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
bio_set_prio(&io->write.op.wbio.bio,
@ -442,18 +445,18 @@ static int bch2_move_extent(struct bch_fs *c,
io->rbio.bio.bi_iter.bi_size = sectors << 9;
bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k);
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
io->rbio.bio.bi_end_io = move_read_endio;
ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
data_cmd, data_opts, e.s_c);
data_cmd, data_opts, btree_id, k);
if (ret)
goto err_free_pages;
atomic64_inc(&ctxt->stats->keys_moved);
atomic64_add(e.k->size, &ctxt->stats->sectors_moved);
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
trace_move_extent(e.k);
trace_move_extent(k.k);
atomic_add(io->read_sectors, &ctxt->read_sectors);
list_add_tail(&io->list, &ctxt->reads);
@ -463,7 +466,7 @@ static int bch2_move_extent(struct bch_fs *c,
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
bch2_read_extent(c, &io->rbio, e.s_c,
bch2_read_extent(c, &io->rbio, k, 0,
BCH_READ_NODECODE|
BCH_READ_LAST_FRAGMENT);
return 0;
@ -472,20 +475,21 @@ err_free_pages:
err_free:
kfree(io);
err:
trace_move_alloc_fail(e.k);
trace_move_alloc_fail(k.k);
return ret;
}
int bch2_move_data(struct bch_fs *c,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
static int __bch2_move_data(struct bch_fs *c,
struct moving_context *ctxt,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats,
enum btree_id btree_id)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct moving_context ctxt = { .stats = stats };
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
BKEY_PADDED(k) tmp;
struct btree_trans trans;
@ -496,17 +500,13 @@ int bch2_move_data(struct bch_fs *c,
u64 delay, cur_inum = U64_MAX;
int ret = 0, ret2;
closure_init_stack(&ctxt.cl);
INIT_LIST_HEAD(&ctxt.reads);
init_waitqueue_head(&ctxt.wait);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_USER;
stats->btree_id = BTREE_ID_EXTENTS;
stats->btree_id = btree_id;
stats->pos = POS_MIN;
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
iter = bch2_trans_get_iter(&trans, btree_id, start,
BTREE_ITER_PREFETCH);
if (rate)
@ -531,7 +531,7 @@ int bch2_move_data(struct bch_fs *c,
if (unlikely(freezing(current))) {
bch2_trans_unlock(&trans);
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
try_to_freeze();
}
} while (delay);
@ -582,13 +582,12 @@ peek:
k = bkey_i_to_s_c(&tmp.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(c, &ctxt, wp, io_opts,
bkey_s_c_to_extent(k),
ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
if (ret2) {
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(&ctxt);
bch2_move_ctxt_wait_for_io(ctxt);
continue;
}
@ -606,7 +605,32 @@ next_nondata:
bch2_trans_cond_resched(&trans);
}
out:
bch2_trans_exit(&trans);
ret = bch2_trans_exit(&trans) ?: ret;
return ret;
}
int bch2_move_data(struct bch_fs *c,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
{
struct moving_context ctxt = { .stats = stats };
int ret;
closure_init_stack(&ctxt.cl);
INIT_LIST_HEAD(&ctxt.reads);
init_waitqueue_head(&ctxt.wait);
stats->data_type = BCH_DATA_USER;
ret = __bch2_move_data(c, &ctxt, rate, wp, start, end,
pred, arg, stats, BTREE_ID_EXTENTS) ?:
__bch2_move_data(c, &ctxt, rate, wp, start, end,
pred, arg, stats, BTREE_ID_REFLINK);
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);

View File

@ -25,6 +25,7 @@ struct data_opts {
};
struct migrate_write {
enum btree_id btree_id;
enum data_cmd data_cmd;
struct data_opts data_opts;
@ -44,7 +45,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
struct write_point_specifier,
struct bch_io_opts,
enum data_cmd, struct data_opts,
struct bkey_s_c);
enum btree_id, struct bkey_s_c);
typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
struct bkey_s_c,

View File

@ -69,26 +69,19 @@ static bool __copygc_pred(struct bch_dev *ca,
struct bkey_s_c k)
{
copygc_heap *h = &ca->copygc_heap;
const struct bch_extent_ptr *ptr =
bch2_bkey_has_device(k, ca->dev_idx);
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr =
bch2_extent_has_device(e, ca->dev_idx);
if (ptr) {
struct copygc_heap_entry search = { .offset = ptr->offset };
if (ptr) {
struct copygc_heap_entry search = { .offset = ptr->offset };
ssize_t i = eytzinger0_find_le(h->data, h->used,
sizeof(h->data[0]),
bucket_offset_cmp, &search);
ssize_t i = eytzinger0_find_le(h->data, h->used,
sizeof(h->data[0]),
bucket_offset_cmp, &search);
return (i >= 0 &&
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
ptr->gen == h->data[i].gen);
}
break;
}
return (i >= 0 &&
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
ptr->gen == h->data[i].gen);
}
return false;

View File

@ -38,9 +38,9 @@ void bch2_rebalance_add_key(struct bch_fs *c,
struct bkey_s_c k,
struct bch_io_opts *io_opts)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct bkey_s_c_extent e;
if (!bkey_extent_is_data(k.k))
return;
@ -49,9 +49,7 @@ void bch2_rebalance_add_key(struct bch_fs *c,
!io_opts->background_compression)
return;
e = bkey_s_c_to_extent(k);
extent_for_each_ptr_decode(e, p, entry)
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (rebalance_ptr_pred(c, p, io_opts)) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);

View File

@ -236,7 +236,8 @@ static void replay_now_at(struct journal *j, u64 seq)
bch2_journal_pin_put(j, j->replay_journal_seq++);
}
static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
struct bkey_i *k)
{
struct btree_trans trans;
struct btree_iter *iter, *split_iter;
@ -247,6 +248,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i *split;
struct bpos atomic_end;
bool split_compressed = false;
int ret;
@ -254,7 +256,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
iter = bch2_trans_get_iter(&trans, btree_id,
bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
@ -273,9 +275,14 @@ retry:
if (ret)
goto err;
ret = bch2_extent_atomic_end(&trans, split_iter,
k, &atomic_end);
if (ret)
goto err;
if (!split_compressed &&
bch2_extent_is_compressed(bkey_i_to_s_c(k)) &&
!bch2_extent_is_atomic(k, split_iter)) {
bkey_cmp(atomic_end, k->k.p) < 0) {
ret = bch2_disk_reservation_add(c, &disk_res,
k->k.size *
bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(k)),
@ -287,7 +294,7 @@ retry:
bkey_copy(split, k);
bch2_cut_front(split_iter->pos, split);
bch2_extent_trim_atomic(split, split_iter);
bch2_cut_back(atomic_end, &split->k);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split));
bch2_btree_iter_set_pos(iter, split->k.p);
@ -295,7 +302,7 @@ retry:
if (split_compressed) {
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
-((s64) k->k.size),
0, -((s64) k->k.size),
BCH_BUCKET_MARK_OVERWRITE) ?:
bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_ATOMIC|
@ -335,22 +342,17 @@ static int bch2_journal_replay(struct bch_fs *c,
for_each_journal_key(keys, i) {
replay_now_at(j, keys.journal_seq_base + i->journal_seq);
switch (i->btree_id) {
case BTREE_ID_ALLOC:
if (i->btree_id == BTREE_ID_ALLOC)
ret = bch2_alloc_replay_key(c, i->k);
break;
case BTREE_ID_EXTENTS:
ret = bch2_extent_replay_key(c, i->k);
break;
default:
else if (btree_node_type_is_extents(i->btree_id))
ret = bch2_extent_replay_key(c, i->btree_id, i->k);
else
ret = bch2_btree_insert(c, i->btree_id, i->k,
NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
break;
}
if (ret) {
bch_err(c, "journal replay: error %d while replaying key",

300
libbcachefs/reflink.c Normal file
View File

@ -0,0 +1,300 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_update.h"
#include "extents.h"
#include "fs.h"
#include "fs-io.h"
#include "reflink.h"
#include <linux/sched/signal.h>
/* reflink pointers */
const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
if (bkey_val_bytes(p.k) != sizeof(*p.v))
return "incorrect value size";
return NULL;
}
void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
}
enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
struct bkey_s _l, struct bkey_s _r)
{
struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
return BCH_MERGE_NOMERGE;
if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
bch2_key_resize(l.k, KEY_SIZE_MAX);
__bch2_cut_front(l.k->p, _r);
return BCH_MERGE_PARTIAL;
}
bch2_key_resize(l.k, l.k->size + r.k->size);
return BCH_MERGE_MERGE;
}
/* indirect extents */
const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
if (bkey_val_bytes(r.k) < sizeof(*r.v))
return "incorrect value size";
return bch2_bkey_ptrs_invalid(c, k);
}
void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
bch2_bkey_ptrs_to_text(out, c, k);
}
/*
* bch2_remap_range() depends on bch2_extent_update(), which depends on various
* things tied to the linux vfs for inode updates, for now:
*/
#ifndef NO_BCACHEFS_FS
static int bch2_make_extent_indirect(struct btree_trans *trans,
struct btree_iter *extent_iter,
struct bkey_i_extent *e)
{
struct bch_fs *c = trans->c;
struct btree_iter *reflink_iter;
struct bkey_s_c k;
struct bkey_i_reflink_v *r_v;
struct bkey_i_reflink_p *r_p;
int ret;
for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
POS(0, c->reflink_hint),
BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
if (reflink_iter->pos.inode) {
bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
continue;
}
if (bkey_deleted(k.k) && e->k.size <= k.k->size)
break;
}
if (ret)
goto err;
/* rewind iter to start of hole, if necessary: */
bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
ret = PTR_ERR_OR_ZERO(r_v);
if (ret)
goto err;
bkey_reflink_v_init(&r_v->k_i);
r_v->k.p = reflink_iter->pos;
bch2_key_resize(&r_v->k, e->k.size);
r_v->k.version = e->k.version;
set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
bkey_val_u64s(&e->k));
r_v->v.refcount = 0;
memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(reflink_iter, &r_v->k_i));
r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
if (IS_ERR(r_p))
return PTR_ERR(r_p);
e->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(&e->k_i);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(extent_iter, &r_p->k_i));
err:
if (!IS_ERR(reflink_iter)) {
c->reflink_hint = reflink_iter->pos.offset;
bch2_trans_iter_put(trans, reflink_iter);
}
return ret;
}
static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
{
struct bkey_s_c k = bch2_btree_iter_peek(iter);
while (1) {
if (bkey_err(k))
return k;
if (bkey_cmp(iter->pos, end) >= 0)
return bkey_s_c_null;
if (k.k->type == KEY_TYPE_extent ||
k.k->type == KEY_TYPE_reflink_p)
return k;
k = bch2_btree_iter_next(iter);
}
}
s64 bch2_remap_range(struct bch_fs *c,
struct bch_inode_info *dst_inode,
struct bpos dst_start, struct bpos src_start,
u64 remap_sectors, u64 new_i_size)
{
struct btree_trans trans;
struct btree_iter *dst_iter, *src_iter;
struct bkey_s_c src_k;
BKEY_PADDED(k) new_dst, new_src;
struct bpos dst_end = dst_start, src_end = src_start;
struct bpos dst_want, src_want;
u64 src_done, dst_done;
int ret = 0;
if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
mutex_lock(&c->sb_lock);
if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
c->disk_sb.sb->features[0] |=
cpu_to_le64(1ULL << BCH_FEATURE_REFLINK);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
dst_end.offset += remap_sectors;
src_end.offset += remap_sectors;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
src_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
BTREE_ITER_INTENT, 1);
dst_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
BTREE_ITER_INTENT, 2);
while (1) {
bch2_trans_begin_updates(&trans);
trans.mem_top = 0;
if (fatal_signal_pending(current)) {
ret = -EINTR;
goto err;
}
src_k = get_next_src(src_iter, src_end);
ret = bkey_err(src_k);
if (ret)
goto btree_err;
src_done = bpos_min(src_iter->pos, src_end).offset -
src_start.offset;
dst_want = POS(dst_start.inode, dst_start.offset + src_done);
if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
dst_inode, new_i_size);
if (ret)
goto btree_err;
continue;
}
BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
if (!bkey_cmp(dst_iter->pos, dst_end))
break;
if (src_k.k->type == KEY_TYPE_extent) {
bkey_reassemble(&new_src.k, src_k);
src_k = bkey_i_to_s_c(&new_src.k);
bch2_cut_front(src_iter->pos, &new_src.k);
bch2_cut_back(src_end, &new_src.k.k);
ret = bch2_make_extent_indirect(&trans, src_iter,
bkey_i_to_extent(&new_src.k));
if (ret)
goto btree_err;
BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
}
if (src_k.k->type == KEY_TYPE_reflink_p) {
struct bkey_s_c_reflink_p src_p =
bkey_s_c_to_reflink_p(src_k);
struct bkey_i_reflink_p *dst_p =
bkey_reflink_p_init(&new_dst.k);
u64 offset = le64_to_cpu(src_p.v->idx) +
(src_iter->pos.offset -
bkey_start_offset(src_k.k));
dst_p->v.idx = cpu_to_le64(offset);
} else {
BUG();
}
new_dst.k.k.p = dst_iter->pos;
bch2_key_resize(&new_dst.k.k,
min(src_k.k->p.offset - src_iter->pos.offset,
dst_end.offset - dst_iter->pos.offset));
ret = bch2_extent_update(&trans, dst_inode, NULL, NULL,
dst_iter, &new_dst.k,
new_i_size, false, true, NULL);
if (ret)
goto btree_err;
dst_done = dst_iter->pos.offset - dst_start.offset;
src_want = POS(src_start.inode, src_start.offset + dst_done);
bch2_btree_iter_set_pos(src_iter, src_want);
btree_err:
if (ret == -EINTR)
ret = 0;
if (ret)
goto err;
}
BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
err:
BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
dst_done = dst_iter->pos.offset - dst_start.offset;
new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
ret = bch2_trans_exit(&trans) ?: ret;
mutex_lock(&dst_inode->ei_update_lock);
if (dst_inode->v.i_size < new_i_size) {
i_size_write(&dst_inode->v, new_i_size);
ret = bch2_write_inode_size(c, dst_inode, new_i_size,
ATTR_MTIME|ATTR_CTIME);
}
mutex_unlock(&dst_inode->ei_update_lock);
return dst_done ?: ret;
}
#endif /* NO_BCACHEFS_FS */

32
libbcachefs/reflink.h Normal file
View File

@ -0,0 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REFLINK_H
#define _BCACHEFS_REFLINK_H
const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
enum merge_result bch2_reflink_p_merge(struct bch_fs *,
struct bkey_s, struct bkey_s);
#define bch2_bkey_ops_reflink_p (struct bkey_ops) { \
.key_invalid = bch2_reflink_p_invalid, \
.val_to_text = bch2_reflink_p_to_text, \
.key_merge = bch2_reflink_p_merge, \
}
const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
#define bch2_bkey_ops_reflink_v (struct bkey_ops) { \
.key_invalid = bch2_reflink_v_invalid, \
.val_to_text = bch2_reflink_v_to_text, \
}
#ifndef NO_BCACHEFS_FS
s64 bch2_remap_range(struct bch_fs *, struct bch_inode_info *,
struct bpos, struct bpos, u64, u64);
#endif /* NO_BCACHEFS_FS */
#endif /* _BCACHEFS_REFLINK_H */

View File

@ -113,6 +113,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
extent_to_replicas(k, e);
break;
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v:
e->data_type = BCH_DATA_USER;
extent_to_replicas(k, e);
break;