Update bcachefs sources to 3e0c5b0722 fixup! bcachefs: Btree write buffer

This commit is contained in:
Kent Overstreet 2023-02-12 21:40:59 -05:00
parent 157ea20eb2
commit e160e9b979
32 changed files with 1051 additions and 298 deletions

View File

@ -1 +1 @@
dab31ca168df1c7a492a0a2a626ba6cd983b2a7a 3e0c5b0722d7fccf0ba83435c5da8892b00c0fe0

View File

@ -1145,6 +1145,51 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
__entry->new_u64s) __entry->new_u64s)
); );
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
);
TRACE_EVENT(write_buffer_flush,
TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast, size_t size),
TP_ARGS(trans, nr, skipped, fast, size),
TP_STRUCT__entry(
__field(size_t, nr )
__field(size_t, skipped )
__field(size_t, fast )
__field(size_t, size )
),
TP_fast_assign(
__entry->nr = nr;
__entry->skipped = skipped;
__entry->fast = fast;
__entry->size = size;
),
TP_printk("%zu/%zu skipped %zu fast %zu",
__entry->nr, __entry->size, __entry->skipped, __entry->fast)
);
TRACE_EVENT(write_buffer_flush_slowpath,
TP_PROTO(struct btree_trans *trans, size_t nr, size_t size),
TP_ARGS(trans, nr, size),
TP_STRUCT__entry(
__field(size_t, nr )
__field(size_t, size )
),
TP_fast_assign(
__entry->nr = nr;
__entry->size = size;
),
TP_printk("%zu/%zu", __entry->nr, __entry->size)
);
#endif /* _TRACE_BCACHE_H */ #endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */ /* This part must be outside protection */

View File

@ -414,14 +414,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_newline(out); prt_newline(out);
prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
prt_newline(out); prt_newline(out);
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
prt_newline(out);
if (k.k->type == KEY_TYPE_alloc_v4) { if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a_raw.v));
prt_newline(out);
prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
printbuf_indent_add(out, 2); printbuf_indent_add(out, 2);
@ -674,7 +673,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
ret = commit_do(&trans, NULL, NULL, ret = commit_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW, BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i)); __bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
if (ret) if (ret)
break; break;
have_bucket_gens_key = false; have_bucket_gens_key = false;
@ -694,7 +693,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
ret = commit_do(&trans, NULL, NULL, ret = commit_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW, BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i)); __bch2_btree_insert(&trans, BTREE_ID_bucket_gens, &g.k_i, 0));
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
@ -1934,7 +1933,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
freespace->k.p = k.k->p; freespace->k.p = k.k->p;
freespace->k.size = k.k->size; freespace->k.size = k.k->size;
ret = __bch2_btree_insert(&trans, BTREE_ID_freespace, freespace) ?: ret = __bch2_btree_insert(&trans, BTREE_ID_freespace, freespace, 0) ?:
bch2_trans_commit(&trans, NULL, NULL, bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL);

View File

@ -5,42 +5,11 @@
#include "backpointers.h" #include "backpointers.h"
#include "btree_cache.h" #include "btree_cache.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_write_buffer.h"
#include "error.h" #include "error.h"
#include <linux/mm.h> #include <linux/mm.h>
/*
* Convert from pos in backpointer btree to pos of corresponding bucket in alloc
* btree:
*/
static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
struct bpos bp_pos)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
}
/*
* Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
*/
static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
struct bpos bucket,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
struct bpos ret;
ret = POS(bucket.inode,
(bucket_to_sector(ca, bucket.offset) <<
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
BUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
return ret;
}
static bool extent_matches_bp(struct bch_fs *c, static bool extent_matches_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct bkey_s_c k,
@ -200,51 +169,53 @@ err:
return ret; return ret;
} }
int bch2_bucket_backpointer_del(struct btree_trans *trans, bool bch2_bucket_backpointer_del(struct btree_trans *trans,
struct bkey_i_alloc_v4 *a, struct bkey_i_alloc_v4 *a,
struct bch_backpointer bp, struct bch_backpointer bp)
struct bkey_s_c orig_k)
{ {
struct bch_fs *c = trans->c;
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v); struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
struct btree_iter bp_iter;
struct bkey_s_c k;
int ret;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
int cmp = backpointer_cmp(bps[i], bp) ?: int cmp = backpointer_cmp(bps[i], bp) ?:
memcmp(&bps[i], &bp, sizeof(bp)); memcmp(&bps[i], &bp, sizeof(bp));
if (!cmp) if (!cmp) {
goto found; array_remove_item(bps, nr, i);
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
set_alloc_v4_u64s(a);
return true;
}
if (cmp >= 0) if (cmp >= 0)
break; break;
} }
goto btree; return false;
found: }
array_remove_item(bps, nr, i);
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
set_alloc_v4_u64s(a);
return 0;
btree:
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
BTREE_ITER_INTENT|
BTREE_ITER_SLOTS|
BTREE_ITER_WITH_UPDATES);
k = bch2_btree_iter_peek_slot(&bp_iter);
ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type != KEY_TYPE_backpointer || static noinline int backpointer_mod_err(struct btree_trans *trans,
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) { struct bch_backpointer bp,
struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k,
struct bkey_s_c orig_k,
bool insert)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) if (insert) {
goto err; prt_printf(&buf, "existing backpointer found when inserting ");
bch2_backpointer_to_text(&buf, &bp);
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
prt_printf(&buf, "found ");
bch2_bkey_val_to_text(&buf, c, bp_k);
prt_newline(&buf);
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf);
} else if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
prt_printf(&buf, "backpointer not found when deleting"); prt_printf(&buf, "backpointer not found when deleting");
prt_newline(&buf); prt_newline(&buf);
printbuf_indent_add(&buf, 2); printbuf_indent_add(&buf, 2);
@ -254,85 +225,51 @@ btree:
prt_newline(&buf); prt_newline(&buf);
prt_printf(&buf, "got "); prt_printf(&buf, "got ");
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&buf, c, bp_k);
prt_newline(&buf);
prt_str(&buf, "alloc ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
prt_newline(&buf); prt_newline(&buf);
prt_printf(&buf, "for "); prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k); bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf); bch_err(c, "%s", buf.buf);
bch2_inconsistent_error(c);
ret = -EIO;
printbuf_exit(&buf);
goto err;
} }
ret = bch2_btree_delete_at(trans, &bp_iter, 0); printbuf_exit(&buf);
err:
bch2_trans_iter_exit(trans, &bp_iter); if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
return ret; bch2_inconsistent_error(c);
return -EIO;
} else {
return 0;
}
} }
int bch2_bucket_backpointer_add(struct btree_trans *trans, int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
struct bkey_i_alloc_v4 *a, struct bkey_i_alloc_v4 *a,
struct bch_backpointer bp, struct bch_backpointer bp,
struct bkey_s_c orig_k) struct bkey_s_c orig_k,
bool insert)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
struct bkey_i_backpointer *bp_k; struct bkey_i_backpointer *bp_k;
struct btree_iter bp_iter; struct btree_iter bp_iter;
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
/* Check for duplicates: */ bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
for (i = 0; i < nr; i++) { ret = PTR_ERR_OR_ZERO(bp_k);
int cmp = backpointer_cmp(bps[i], bp); if (ret)
if (cmp >= 0) return ret;
break;
bkey_backpointer_init(&bp_k->k_i);
bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
bp_k->v = bp;
if (!insert) {
bp_k->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&bp_k->k, 0);
} }
if ((i &&
(bps[i - 1].bucket_offset +
bps[i - 1].bucket_len > bp.bucket_offset)) ||
(i < nr &&
(bp.bucket_offset + bp.bucket_len > bps[i].bucket_offset))) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "overlapping backpointer found when inserting ");
bch2_backpointer_to_text(&buf, &bp);
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
prt_printf(&buf, "into ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
prt_newline(&buf);
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
bch2_inconsistent_error(c);
return -EIO;
}
}
if (nr < BCH_ALLOC_V4_NR_BACKPOINTERS_MAX) {
array_insert_item(bps, nr, i, bp);
SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr);
set_alloc_v4_u64s(a);
return 0;
}
/* Overflow: use backpointer btree */
bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, a->k.p, bp.bucket_offset), bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
@ -343,37 +280,15 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
if (k.k->type) { if (insert
struct printbuf buf = PRINTBUF; ? k.k->type
: (k.k->type != KEY_TYPE_backpointer ||
prt_printf(&buf, "existing btree backpointer key found when inserting "); memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
bch2_backpointer_to_text(&buf, &bp); ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
prt_newline(&buf); if (ret)
printbuf_indent_add(&buf, 2);
prt_printf(&buf, "found ");
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
bch2_inconsistent_error(c);
ret = -EIO;
goto err; goto err;
}
} }
bp_k = bch2_bkey_alloc(trans, &bp_iter, backpointer);
ret = PTR_ERR_OR_ZERO(bp_k);
if (ret)
goto err;
bp_k->v = bp;
ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
err: err:
bch2_trans_iter_exit(trans, &bp_iter); bch2_trans_iter_exit(trans, &bp_iter);
@ -457,6 +372,9 @@ static void backpointer_not_found(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
if (likely(!bch2_backpointers_no_use_write_buffer))
return;
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
thing_it_points_to); thing_it_points_to);
prt_printf(&buf, "bucket: "); prt_printf(&buf, "bucket: ");
@ -512,27 +430,30 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
bch2_trans_iter_exit(trans, iter); bch2_trans_iter_exit(trans, iter);
if (bp.level) { if (unlikely(bch2_backpointers_no_use_write_buffer)) {
struct btree *b; if (bp.level) {
struct btree *b;
/* /*
* If a backpointer for a btree node wasn't found, it may be * If a backpointer for a btree node wasn't found, it may be
* because it was overwritten by a new btree node that hasn't * because it was overwritten by a new btree node that hasn't
* been written out yet - backpointer_get_node() checks for * been written out yet - backpointer_get_node() checks for
* this: * this:
*/ */
b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp); b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
if (!IS_ERR_OR_NULL(b)) if (!IS_ERR_OR_NULL(b))
return bkey_i_to_s_c(&b->key); return bkey_i_to_s_c(&b->key);
bch2_trans_iter_exit(trans, iter); bch2_trans_iter_exit(trans, iter);
if (IS_ERR(b)) if (IS_ERR(b))
return bkey_s_c_err(PTR_ERR(b)); return bkey_s_c_err(PTR_ERR(b));
return bkey_s_c_null; return bkey_s_c_null;
}
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
} }
backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
return bkey_s_c_null; return bkey_s_c_null;
} }
@ -633,7 +554,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct bch_backpointer bp, struct bch_backpointer bp,
struct bkey_s_c orig_k, struct bkey_s_c orig_k,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end) struct bpos bucket_end,
struct bpos *last_flushed_pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter alloc_iter, bp_iter = { NULL }; struct btree_iter alloc_iter, bp_iter = { NULL };
@ -677,8 +599,15 @@ static int check_bp_exists(struct btree_trans *trans,
goto err; goto err;
if (bp_k.k->type != KEY_TYPE_backpointer || if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
if (!bpos_eq(*last_flushed_pos, orig_k.k->p)) {
*last_flushed_pos = orig_k.k->p;
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
goto missing; goto missing;
}
out: out:
err: err:
fsck_err: fsck_err:
@ -687,11 +616,12 @@ fsck_err:
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
missing: missing:
prt_printf(&buf, "missing backpointer for btree=%s l=%u ", prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_ids[bp.btree_id], bp.level); bch2_btree_ids[bp.btree_id], bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k); bch2_bkey_val_to_text(&buf, c, orig_k);
prt_printf(&buf, "\nin alloc key "); prt_printf(&buf, "\nbp pos ");
bch2_bkey_val_to_text(&buf, c, alloc_k); bch2_bpos_to_text(&buf, bp_iter.pos);
if (c->sb.version < bcachefs_metadata_version_backpointers || if (c->sb.version < bcachefs_metadata_version_backpointers ||
c->opts.reconstruct_alloc || c->opts.reconstruct_alloc ||
@ -699,8 +629,7 @@ missing:
struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k); struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k);
ret = PTR_ERR_OR_ZERO(a) ?: ret = PTR_ERR_OR_ZERO(a) ?:
bch2_bucket_backpointer_add(trans, a, bp, orig_k) ?: bch2_bucket_backpointer_mod(trans, a, bp, orig_k, true);
bch2_trans_update(trans, &alloc_iter, &a->k_i, 0);
} }
goto out; goto out;
@ -709,7 +638,8 @@ missing:
static int check_extent_to_backpointers(struct btree_trans *trans, static int check_extent_to_backpointers(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end) struct bpos bucket_end,
struct bpos *last_flushed_pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs; struct bkey_ptrs_c ptrs;
@ -736,7 +666,9 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
k, p, &bucket_pos, &bp); k, p, &bucket_pos, &bp);
ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end); ret = check_bp_exists(trans, bucket_pos, bp, k,
bucket_start, bucket_end,
last_flushed_pos);
if (ret) if (ret)
return ret; return ret;
} }
@ -747,7 +679,8 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
static int check_btree_root_to_backpointers(struct btree_trans *trans, static int check_btree_root_to_backpointers(struct btree_trans *trans,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos bucket_start, struct bpos bucket_start,
struct bpos bucket_end) struct bpos bucket_end,
struct bpos *last_flushed_pos)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
@ -779,7 +712,9 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1, bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1,
k, p, &bucket_pos, &bp); k, p, &bucket_pos, &bp);
ret = check_bp_exists(trans, bucket_pos, bp, k, bucket_start, bucket_end); ret = check_bp_exists(trans, bucket_pos, bp, k,
bucket_start, bucket_end,
last_flushed_pos);
if (ret) if (ret)
goto err; goto err;
} }
@ -859,6 +794,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
{ {
struct btree_iter iter; struct btree_iter iter;
enum btree_id btree_id; enum btree_id btree_id;
struct bpos last_flushed_pos = SPOS_MAX;
int ret = 0; int ret = 0;
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
@ -874,7 +810,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL, BTREE_INSERT_NOFAIL,
check_extent_to_backpointers(trans, &iter, check_extent_to_backpointers(trans, &iter,
bucket_start, bucket_end)); bucket_start, bucket_end,
&last_flushed_pos));
if (ret) if (ret)
break; break;
} while (!bch2_btree_iter_advance(&iter)); } while (!bch2_btree_iter_advance(&iter));
@ -888,7 +825,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL, BTREE_INSERT_NOFAIL,
check_btree_root_to_backpointers(trans, btree_id, check_btree_root_to_backpointers(trans, btree_id,
bucket_start, bucket_end)); bucket_start, bucket_end,
&last_flushed_pos));
if (ret) if (ret)
break; break;
} }

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H #ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H #define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
#include "btree_iter.h"
#include "btree_update.h"
#include "buckets.h" #include "buckets.h"
#include "super.h" #include "super.h"
@ -19,6 +21,81 @@ void bch2_backpointer_swab(struct bkey_s);
#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10 #define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
/*
* Convert from pos in backpointer btree to pos of corresponding bucket in alloc
* btree:
*/
static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
struct bpos bp_pos)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
}
/*
* Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
*/
static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
struct bpos bucket,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
struct bpos ret;
ret = POS(bucket.inode,
(bucket_to_sector(ca, bucket.offset) <<
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
BUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
return ret;
}
bool bch2_bucket_backpointer_del(struct btree_trans *,
struct bkey_i_alloc_v4 *,
struct bch_backpointer);
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *,
struct bkey_i_alloc_v4 *,
struct bch_backpointer, struct bkey_s_c, bool);
static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
struct bkey_i_alloc_v4 *a,
struct bch_backpointer bp,
struct bkey_s_c orig_k,
bool insert)
{
struct bch_fs *c = trans->c;
struct bkey_i_backpointer *bp_k;
int ret;
if (!insert &&
unlikely(BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v)) &&
bch2_bucket_backpointer_del(trans, a, bp))
return 0;
if (unlikely(bch2_backpointers_no_use_write_buffer))
return bch2_bucket_backpointer_mod_nowritebuffer(trans, a, bp, orig_k, insert);
bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
ret = PTR_ERR_OR_ZERO(bp_k);
if (ret)
return ret;
bkey_backpointer_init(&bp_k->k_i);
bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
bp_k->v = bp;
if (!insert) {
bp_k->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&bp_k->k, 0);
}
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p, struct bkey_s_c k, struct extent_ptr_decoded p,
@ -40,10 +117,6 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
}; };
} }
int bch2_bucket_backpointer_del(struct btree_trans *, struct bkey_i_alloc_v4 *,
struct bch_backpointer, struct bkey_s_c);
int bch2_bucket_backpointer_add(struct btree_trans *, struct bkey_i_alloc_v4 *,
struct bch_backpointer, struct bkey_s_c);
int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int, int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
u64 *, struct bch_backpointer *, unsigned); u64 *, struct bch_backpointer *, unsigned);
struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,

View File

@ -316,7 +316,10 @@ do { \
"done in memory") \ "done in memory") \
BCH_DEBUG_PARAM(verify_all_btree_replicas, \ BCH_DEBUG_PARAM(verify_all_btree_replicas, \
"When reading btree nodes, read all replicas and " \ "When reading btree nodes, read all replicas and " \
"compare them") "compare them") \
BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \
"Don't use the write buffer for backpointers, enabling "\
"extra runtime checks")
/* Parameters that should only be compiled in debug mode: */ /* Parameters that should only be compiled in debug mode: */
#define BCH_DEBUG_PARAMS_DEBUG() \ #define BCH_DEBUG_PARAMS_DEBUG() \
@ -393,6 +396,7 @@ enum bch_time_stats {
#include "alloc_types.h" #include "alloc_types.h"
#include "btree_types.h" #include "btree_types.h"
#include "btree_write_buffer_types.h"
#include "buckets_types.h" #include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h" #include "buckets_waiting_for_journal_types.h"
#include "clock_types.h" #include "clock_types.h"
@ -581,6 +585,7 @@ struct btree_transaction_stats {
struct bch2_time_stats lock_hold_times; struct bch2_time_stats lock_hold_times;
struct mutex lock; struct mutex lock;
unsigned nr_max_paths; unsigned nr_max_paths;
unsigned wb_updates_size;
unsigned max_mem; unsigned max_mem;
char *max_paths_text; char *max_paths_text;
}; };
@ -775,6 +780,9 @@ struct bch_fs {
struct workqueue_struct *btree_interior_update_worker; struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work; struct work_struct btree_interior_update_work;
struct list_head pending_node_rewrites;
struct mutex pending_node_rewrites_lock;
/* btree_io.c: */ /* btree_io.c: */
spinlock_t btree_write_error_lock; spinlock_t btree_write_error_lock;
struct btree_write_stats { struct btree_write_stats {
@ -795,6 +803,8 @@ struct bch_fs {
struct btree_key_cache btree_key_cache; struct btree_key_cache btree_key_cache;
unsigned btree_key_cache_btrees; unsigned btree_key_cache_btrees;
struct btree_write_buffer btree_write_buffer;
struct workqueue_struct *btree_update_wq; struct workqueue_struct *btree_update_wq;
struct workqueue_struct *btree_io_complete_wq; struct workqueue_struct *btree_io_complete_wq;
/* copygc needs its own workqueue for index updates.. */ /* copygc needs its own workqueue for index updates.. */

View File

@ -1472,7 +1472,8 @@ struct bch_sb_field_disk_groups {
x(trans_traverse_all, 71) \ x(trans_traverse_all, 71) \
x(transaction_commit, 72) \ x(transaction_commit, 72) \
x(write_super, 73) \ x(write_super, 73) \
x(trans_restart_would_deadlock_recursion_limit, 74) x(trans_restart_would_deadlock_recursion_limit, 74) \
x(trans_restart_write_buffer_flush, 75)
enum bch_persistent_counters { enum bch_persistent_counters {
#define x(t, n, ...) BCH_COUNTER_##t, #define x(t, n, ...) BCH_COUNTER_##t,
@ -1707,6 +1708,7 @@ LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
/* /*
* Features: * Features:

View File

@ -1246,9 +1246,7 @@ start:
bio_put(&rb->bio); bio_put(&rb->bio);
printbuf_exit(&buf); printbuf_exit(&buf);
if (!btree_node_read_error(b) && if (saw_error && !btree_node_read_error(b)) {
(saw_error ||
btree_node_need_rewrite(b))) {
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bch2_bpos_to_text(&buf, b->key.k.p); bch2_bpos_to_text(&buf, b->key.k.p);

View File

@ -1374,6 +1374,7 @@ noinline __cold
void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
{ {
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct btree_write_buffered_key *wb;
prt_printf(buf, "transaction updates for %s journal seq %llu", prt_printf(buf, "transaction updates for %s journal seq %llu",
trans->fn, trans->journal_res.seq); trans->fn, trans->journal_res.seq);
@ -1398,6 +1399,17 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
prt_newline(buf); prt_newline(buf);
} }
trans_for_each_wb_update(trans, wb) {
prt_printf(buf, "update: btree=%s wb=1 %pS",
bch2_btree_ids[wb->btree],
(void *) i->ip_allocated);
prt_newline(buf);
prt_printf(buf, " new ");
bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(&wb->k));
prt_newline(buf);
}
printbuf_indent_sub(buf, 2); printbuf_indent_sub(buf, 2);
} }
@ -2931,8 +2943,11 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
trans->mem_bytes = expected_mem_bytes; trans->mem_bytes = expected_mem_bytes;
} }
} }
if (s)
if (s) {
trans->nr_max_paths = s->nr_max_paths; trans->nr_max_paths = s->nr_max_paths;
trans->wb_updates_size = s->wb_updates_size;
}
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies; trans->srcu_lock_time = jiffies;

View File

@ -9,6 +9,7 @@
//#include "bkey_methods.h" //#include "bkey_methods.h"
#include "buckets_types.h" #include "buckets_types.h"
#include "darray.h" #include "darray.h"
#include "errcode.h"
#include "journal_types.h" #include "journal_types.h"
#include "replicas_types.h" #include "replicas_types.h"
@ -429,6 +430,8 @@ struct btree_trans {
u8 fn_idx; u8 fn_idx;
u8 nr_sorted; u8 nr_sorted;
u8 nr_updates; u8 nr_updates;
u8 nr_wb_updates;
u8 wb_updates_size;
bool used_mempool:1; bool used_mempool:1;
bool in_traverse_all:1; bool in_traverse_all:1;
bool paths_sorted:1; bool paths_sorted:1;
@ -459,6 +462,7 @@ struct btree_trans {
u8 sorted[BTREE_ITER_MAX + 8]; u8 sorted[BTREE_ITER_MAX + 8];
struct btree_path *paths; struct btree_path *paths;
struct btree_insert_entry *updates; struct btree_insert_entry *updates;
struct btree_write_buffered_key *wb_updates;
/* update path: */ /* update path: */
struct btree_trans_commit_hook *hooks; struct btree_trans_commit_hook *hooks;

View File

@ -15,6 +15,9 @@ bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
struct bkey_i *); struct bkey_i *);
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
struct bkey_i *, u64);
enum btree_insert_flags { enum btree_insert_flags {
/* First two bits for journal watermark: */ /* First two bits for journal watermark: */
__BTREE_INSERT_NOFAIL = 2, __BTREE_INSERT_NOFAIL = 2,
@ -55,7 +58,8 @@ int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
unsigned, unsigned); unsigned, unsigned);
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *); int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
enum btree_update_flags);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags); struct disk_reservation *, u64 *, int flags);
@ -77,6 +81,8 @@ int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *,
int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, enum btree_update_flags); struct bkey_i *, enum btree_update_flags);
int __must_check bch2_trans_update_buffered(struct btree_trans *,
enum btree_id, struct bkey_i *);
void bch2_trans_commit_hook(struct btree_trans *, void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *); struct btree_trans_commit_hook *);
@ -142,6 +148,11 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
(_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++) (_i)++)
#define trans_for_each_wb_update(_trans, _i) \
for ((_i) = (_trans)->wb_updates; \
(_i) < (_trans)->wb_updates + (_trans)->nr_wb_updates; \
(_i)++)
static inline void bch2_trans_reset_updates(struct btree_trans *trans) static inline void bch2_trans_reset_updates(struct btree_trans *trans)
{ {
struct btree_insert_entry *i; struct btree_insert_entry *i;
@ -151,6 +162,8 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
trans->extra_journal_res = 0; trans->extra_journal_res = 0;
trans->nr_updates = 0; trans->nr_updates = 0;
trans->nr_wb_updates = 0;
trans->wb_updates = NULL;
trans->hooks = NULL; trans->hooks = NULL;
trans->extra_journal_entries.nr = 0; trans->extra_journal_entries.nr = 0;

View File

@ -1997,6 +1997,7 @@ err:
struct async_btree_rewrite { struct async_btree_rewrite {
struct bch_fs *c; struct bch_fs *c;
struct work_struct work; struct work_struct work;
struct list_head list;
enum btree_id btree_id; enum btree_id btree_id;
unsigned level; unsigned level;
struct bpos pos; struct bpos pos;
@ -2056,15 +2057,10 @@ void async_btree_node_rewrite_work(struct work_struct *work)
void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
{ {
struct async_btree_rewrite *a; struct async_btree_rewrite *a;
int ret;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
bch_err(c, "%s: error getting c->writes ref", __func__);
return;
}
a = kmalloc(sizeof(*a), GFP_NOFS); a = kmalloc(sizeof(*a), GFP_NOFS);
if (!a) { if (!a) {
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
bch_err(c, "%s: error allocating memory", __func__); bch_err(c, "%s: error allocating memory", __func__);
return; return;
} }
@ -2074,11 +2070,63 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
a->level = b->c.level; a->level = b->c.level;
a->pos = b->key.k.p; a->pos = b->key.k.p;
a->seq = b->data->keys.seq; a->seq = b->data->keys.seq;
INIT_WORK(&a->work, async_btree_node_rewrite_work); INIT_WORK(&a->work, async_btree_node_rewrite_work);
if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) {
mutex_lock(&c->pending_node_rewrites_lock);
list_add(&a->list, &c->pending_node_rewrites);
mutex_unlock(&c->pending_node_rewrites_lock);
return;
}
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
if (test_bit(BCH_FS_STARTED, &c->flags)) {
bch_err(c, "%s: error getting c->writes ref", __func__);
kfree(a);
return;
}
ret = bch2_fs_read_write_early(c);
if (ret) {
bch_err(c, "%s: error going read-write: %s",
__func__, bch2_err_str(ret));
kfree(a);
return;
}
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
}
queue_work(c->btree_interior_update_worker, &a->work); queue_work(c->btree_interior_update_worker, &a->work);
} }
void bch2_do_pending_node_rewrites(struct bch_fs *c)
{
struct async_btree_rewrite *a, *n;
mutex_lock(&c->pending_node_rewrites_lock);
list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
list_del(&a->list);
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_interior_update_worker, &a->work);
}
mutex_unlock(&c->pending_node_rewrites_lock);
}
void bch2_free_pending_node_rewrites(struct bch_fs *c)
{
struct async_btree_rewrite *a, *n;
mutex_lock(&c->pending_node_rewrites_lock);
list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
list_del(&a->list);
kfree(a);
}
mutex_unlock(&c->pending_node_rewrites_lock);
}
static int __bch2_btree_node_update_key(struct btree_trans *trans, static int __bch2_btree_node_update_key(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct btree *b, struct btree *new_hash, struct btree *b, struct btree *new_hash,
@ -2416,6 +2464,9 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
mutex_init(&c->btree_interior_update_lock); mutex_init(&c->btree_interior_update_lock);
INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
INIT_LIST_HEAD(&c->pending_node_rewrites);
mutex_init(&c->pending_node_rewrites_lock);
c->btree_interior_update_worker = c->btree_interior_update_worker =
alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1); alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
if (!c->btree_interior_update_worker) if (!c->btree_interior_update_worker)

View File

@ -318,6 +318,9 @@ void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
struct jset_entry *, struct jset_entry *); struct jset_entry *, struct jset_entry *);
void bch2_do_pending_node_rewrites(struct bch_fs *);
void bch2_free_pending_node_rewrites(struct bch_fs *);
void bch2_fs_btree_interior_update_exit(struct bch_fs *); void bch2_fs_btree_interior_update_exit(struct bch_fs *);
int bch2_fs_btree_interior_update_init(struct bch_fs *); int bch2_fs_btree_interior_update_init(struct bch_fs *);

View File

@ -8,6 +8,7 @@
#include "btree_iter.h" #include "btree_iter.h"
#include "btree_key_cache.h" #include "btree_key_cache.h"
#include "btree_locking.h" #include "btree_locking.h"
#include "btree_write_buffer.h"
#include "buckets.h" #include "buckets.h"
#include "debug.h" #include "debug.h"
#include "errcode.h" #include "errcode.h"
@ -100,9 +101,6 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
if (path->cached)
return;
if (unlikely(btree_node_just_written(b)) && if (unlikely(btree_node_just_written(b)) &&
bch2_btree_post_write_cleanup(c, b)) bch2_btree_post_write_cleanup(c, b))
bch2_trans_node_reinit_iter(trans, b); bch2_trans_node_reinit_iter(trans, b);
@ -252,25 +250,26 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
/** /**
* btree_insert_key - insert a key one key into a leaf node * btree_insert_key - insert a key one key into a leaf node
*/ */
static void btree_insert_key_leaf(struct btree_trans *trans, inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
struct btree_insert_entry *insert) struct btree_path *path,
struct bkey_i *insert,
u64 journal_seq)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree *b = insert_l(insert)->b; struct btree *b = path_l(path)->b;
struct bset_tree *t = bset_tree_last(b); struct bset_tree *t = bset_tree_last(b);
struct bset *i = bset(b, t); struct bset *i = bset(b, t);
int old_u64s = bset_u64s(t); int old_u64s = bset_u64s(t);
int old_live_u64s = b->nr.live_u64s; int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added; int live_u64s_added, u64s_added;
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, if (unlikely(!bch2_btree_bset_insert_key(trans, path, b,
&insert_l(insert)->iter, insert->k))) &path_l(path)->iter, insert)))
return; return;
i->journal_seq = cpu_to_le64(max(trans->journal_res.seq, i->journal_seq = cpu_to_le64(max(journal_seq, le64_to_cpu(i->journal_seq)));
le64_to_cpu(i->journal_seq)));
bch2_btree_add_journal_pin(c, b, trans->journal_res.seq); bch2_btree_add_journal_pin(c, b, journal_seq);
if (unlikely(!btree_node_dirty(b))) if (unlikely(!btree_node_dirty(b)))
set_btree_node_dirty_acct(c, b); set_btree_node_dirty_acct(c, b);
@ -288,6 +287,12 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
bch2_trans_node_reinit_iter(trans, b); bch2_trans_node_reinit_iter(trans, b);
} }
static void btree_insert_key_leaf(struct btree_trans *trans,
struct btree_insert_entry *insert)
{
bch2_btree_insert_key_leaf(trans, insert->path, insert->k, trans->journal_res.seq);
}
/* Cached btree updates: */ /* Cached btree updates: */
/* Normal update interface: */ /* Normal update interface: */
@ -594,6 +599,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct btree_write_buffered_key *wb;
struct btree_trans_commit_hook *h; struct btree_trans_commit_hook *h;
unsigned u64s = 0; unsigned u64s = 0;
bool marking = false; bool marking = false;
@ -638,6 +644,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
marking = true; marking = true;
} }
if (trans->nr_wb_updates &&
trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
return -BCH_ERR_btree_insert_need_flush_buffer;
/* /*
* Don't get journal reservation until after we know insert will * Don't get journal reservation until after we know insert will
* succeed: * succeed:
@ -674,17 +684,25 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
return -BCH_ERR_btree_insert_need_mark_replicas; return -BCH_ERR_btree_insert_need_mark_replicas;
if (trans->nr_wb_updates) {
EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY);
ret = bch2_btree_insert_keys_write_buffer(trans);
if (ret)
goto revert_fs_usage;
}
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
ret = run_one_mem_trigger(trans, i, i->flags); ret = run_one_mem_trigger(trans, i, i->flags);
if (ret) if (ret)
return ret; goto fatal_err;
} }
if (unlikely(c->gc_pos.phase)) { if (unlikely(c->gc_pos.phase)) {
ret = bch2_trans_commit_run_gc_triggers(trans); ret = bch2_trans_commit_run_gc_triggers(trans);
if (ret) if (ret)
return ret; goto fatal_err;
} }
if (unlikely(trans->extra_journal_entries.nr)) { if (unlikely(trans->extra_journal_entries.nr)) {
@ -697,10 +715,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
} }
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
trans_for_each_update(trans, i) { struct journal *j = &c->journal;
struct journal *j = &c->journal; struct jset_entry *entry;
struct jset_entry *entry;
trans_for_each_update(trans, i) {
if (i->key_cache_already_flushed) if (i->key_cache_already_flushed)
continue; continue;
@ -725,6 +743,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
bkey_copy(&entry->start[0], i->k); bkey_copy(&entry->start[0], i->k);
} }
trans_for_each_wb_update(trans, wb) {
entry = bch2_journal_add_entry(j, &trans->journal_res,
BCH_JSET_ENTRY_btree_keys,
wb->btree, 0,
wb->k.k.u64s);
bkey_copy(&entry->start[0], &wb->k);
}
if (trans->journal_seq) if (trans->journal_seq)
*trans->journal_seq = trans->journal_res.seq; *trans->journal_seq = trans->journal_res.seq;
} }
@ -742,6 +768,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
} }
} }
return 0;
fatal_err:
bch2_fatal_error(c);
revert_fs_usage:
if (trans->fs_usage_deltas)
bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas);
return ret; return ret;
} }
@ -769,7 +801,8 @@ static inline int trans_lock_write(struct btree_trans *trans)
if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
return trans_lock_write_fail(trans, i); return trans_lock_write_fail(trans, i);
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); if (!i->cached)
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
} }
return 0; return 0;
@ -778,9 +811,13 @@ static inline int trans_lock_write(struct btree_trans *trans)
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{ {
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct btree_write_buffered_key *wb;
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
trans_for_each_wb_update(trans, wb)
bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p);
} }
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
@ -821,10 +858,11 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct printbuf buf = PRINTBUF;
int ret, u64s_delta = 0; int ret, u64s_delta = 0;
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
struct printbuf buf = PRINTBUF;
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE; int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE;
@ -833,8 +871,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
return bch2_trans_commit_bkey_invalid(trans, flags, i, &buf); return bch2_trans_commit_bkey_invalid(trans, flags, i, &buf);
btree_insert_entry_checks(trans, i); btree_insert_entry_checks(trans, i);
} }
#endif
printbuf_exit(&buf); printbuf_exit(&buf);
#endif
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
if (i->cached) if (i->cached)
@ -962,6 +1000,30 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
if (ret) if (ret)
trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip); trace_and_count(c, trans_restart_journal_reclaim, trans, trace_ip);
break; break;
case -BCH_ERR_btree_insert_need_flush_buffer: {
struct btree_write_buffer *wb = &c->btree_write_buffer;
ret = 0;
if (wb->state.nr > wb->size * 3 / 4) {
bch2_trans_reset_updates(trans);
bch2_trans_unlock(trans);
mutex_lock(&wb->flush_lock);
if (wb->state.nr > wb->size * 3 / 4)
ret = __bch2_btree_write_buffer_flush(trans,
flags|BTREE_INSERT_NOCHECK_RW, true);
else
mutex_unlock(&wb->flush_lock);
if (!ret) {
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
}
}
break;
}
default: default:
BUG_ON(ret >= 0); BUG_ON(ret >= 0);
break; break;
@ -1023,10 +1085,12 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL; struct btree_insert_entry *i = NULL;
struct btree_write_buffered_key *wb;
unsigned u64s; unsigned u64s;
int ret = 0; int ret = 0;
if (!trans->nr_updates && if (!trans->nr_updates &&
!trans->nr_wb_updates &&
!trans->extra_journal_entries.nr) !trans->extra_journal_entries.nr)
goto out_reset; goto out_reset;
@ -1049,6 +1113,20 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset; goto out_reset;
} }
if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
mutex_trylock(&c->btree_write_buffer.flush_lock)) {
bch2_trans_begin(trans);
bch2_trans_unlock(trans);
ret = __bch2_btree_write_buffer_flush(trans,
flags|BTREE_INSERT_NOCHECK_RW, true);
if (!ret) {
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
}
goto out;
}
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
@ -1089,6 +1167,9 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
trans->journal_u64s += jset_u64s(i->old_k.u64s); trans->journal_u64s += jset_u64s(i->old_k.u64s);
} }
trans_for_each_wb_update(trans, wb)
trans->journal_u64s += jset_u64s(wb->k.k.u64s);
if (trans->extra_journal_res) { if (trans->extra_journal_res) {
ret = bch2_disk_reservation_add(c, trans->disk_res, ret = bch2_disk_reservation_add(c, trans->disk_res,
trans->extra_journal_res, trans->extra_journal_res,
@ -1606,6 +1687,59 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
return bch2_trans_update_by_path(trans, path, k, flags); return bch2_trans_update_by_path(trans, path, k, flags);
} }
int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
struct btree_write_buffered_key *i;
int ret;
EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
trans_for_each_wb_update(trans, i) {
if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
bkey_copy(&i->k, k);
return 0;
}
}
if (!trans->wb_updates ||
trans->nr_wb_updates == trans->wb_updates_size) {
struct btree_write_buffered_key *u;
if (trans->nr_wb_updates == trans->wb_updates_size) {
struct btree_transaction_stats *s = btree_trans_stats(trans);
BUG_ON(trans->wb_updates_size > U8_MAX / 2);
trans->wb_updates_size = max(1, trans->wb_updates_size * 2);
if (s)
s->wb_updates_size = trans->wb_updates_size;
}
u = bch2_trans_kmalloc_nomemzero(trans,
trans->wb_updates_size *
sizeof(struct btree_write_buffered_key));
ret = PTR_ERR_OR_ZERO(u);
if (ret)
return ret;
if (trans->nr_wb_updates)
memcpy(u, trans->wb_updates, trans->nr_wb_updates *
sizeof(struct btree_write_buffered_key));
trans->wb_updates = u;
}
trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) {
.btree = btree,
};
bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k);
trans->nr_wb_updates++;
return 0;
}
void bch2_trans_commit_hook(struct btree_trans *trans, void bch2_trans_commit_hook(struct btree_trans *trans,
struct btree_trans_commit_hook *h) struct btree_trans_commit_hook *h)
{ {
@ -1614,7 +1748,8 @@ void bch2_trans_commit_hook(struct btree_trans *trans,
} }
int __bch2_btree_insert(struct btree_trans *trans, int __bch2_btree_insert(struct btree_trans *trans,
enum btree_id id, struct bkey_i *k) enum btree_id id,
struct bkey_i *k, enum btree_update_flags flags)
{ {
struct btree_iter iter; struct btree_iter iter;
int ret; int ret;
@ -1622,7 +1757,7 @@ int __bch2_btree_insert(struct btree_trans *trans,
bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter) ?: ret = bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(trans, &iter, k, 0); bch2_trans_update(trans, &iter, k, flags);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }
@ -1640,7 +1775,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
u64 *journal_seq, int flags) u64 *journal_seq, int flags)
{ {
return bch2_trans_do(c, disk_res, journal_seq, flags, return bch2_trans_do(c, disk_res, journal_seq, flags,
__bch2_btree_insert(&trans, id, k)); __bch2_btree_insert(&trans, id, k, 0));
} }
int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,

View File

@ -0,0 +1,328 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
#include "journal.h"
#include "journal_reclaim.h"
#include <linux/sort.h>
static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
{
const struct btree_write_buffered_key *l = _l;
const struct btree_write_buffered_key *r = _r;
return cmp_int(l->btree, r->btree) ?:
bpos_cmp(l->k.k.p, r->k.k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset);
}
static int btree_write_buffered_journal_cmp(const void *_l, const void *_r)
{
const struct btree_write_buffered_key *l = _l;
const struct btree_write_buffered_key *r = _r;
return cmp_int(l->journal_seq, r->journal_seq);
}
static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
struct btree_iter *iter,
struct btree_write_buffered_key *wb,
unsigned commit_flags,
bool *write_locked,
size_t *fast)
{
struct bch_fs *c = trans->c;
struct btree_path *path;
int ret;
ret = bch2_btree_iter_traverse(iter);
if (ret)
return ret;
path = iter->path;
if (!*write_locked) {
ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c);
if (ret)
return ret;
bch2_btree_node_prep_for_write(trans, path, path->l[0].b);
*write_locked = true;
}
if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) {
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
*write_locked = false;
goto trans_commit;
}
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
(*fast)++;
return 0;
trans_commit:
return bch2_trans_update(trans, iter, &wb->k, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
commit_flags|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RECLAIM);
}
static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
{
union btree_write_buffer_state old, new;
u64 v = READ_ONCE(wb->state.v);
do {
old.v = new.v = v;
new.nr = 0;
new.idx++;
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
cpu_relax();
return old;
}
int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags,
bool locked)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct journal_entry_pin pin;
struct btree_write_buffered_key *i, *dst, *keys;
struct btree_iter iter = { NULL };
size_t nr = 0, skipped = 0, fast = 0;
bool write_locked = false;
union btree_write_buffer_state s;
int ret = 0;
memset(&pin, 0, sizeof(pin));
if (!locked && !mutex_trylock(&wb->flush_lock))
return 0;
bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
bch2_journal_pin_drop(j, &wb->journal_pin);
s = btree_write_buffer_switch(wb);
keys = wb->keys[s.idx];
nr = s.nr;
/*
* We first sort so that we can detect and skip redundant updates, and
* then we attempt to flush in sorted btree order, as this is most
* efficient.
*
* However, since we're not flushing in the order they appear in the
* journal we won't be able to drop our journal pin until everything is
* flushed - which means this could deadlock the journal, if we weren't
* passing BTREE_INSERT_JORUNAL_RECLAIM. This causes the update to fail
* if it would block taking a journal reservation.
*
* If that happens, we sort them by the order they appeared in the
* journal - after dropping redundant entries - and then restart
* flushing, this time dropping journal pins as we go.
*/
sort(keys, nr, sizeof(keys[0]),
btree_write_buffered_key_cmp, NULL);
for (i = keys; i < keys + nr; i++) {
if (i + 1 < keys + nr &&
i[0].btree == i[1].btree &&
bpos_eq(i[0].k.k.p, i[1].k.k.p)) {
skipped++;
continue;
}
if (write_locked &&
(iter.path->btree_id != i->btree ||
bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) {
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
write_locked = false;
}
if (!iter.path || iter.path->btree_id != i->btree) {
bch2_trans_iter_exit(trans, &iter);
bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, BTREE_ITER_INTENT);
}
bch2_btree_iter_set_pos(&iter, i->k.k.p);
iter.path->preserve = false;
do {
ret = bch2_btree_write_buffer_flush_one(trans, &iter, i,
commit_flags, &write_locked, &fast);
if (!write_locked)
bch2_trans_begin(trans);
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
if (ret)
break;
}
if (write_locked)
bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b);
bch2_trans_iter_exit(trans, &iter);
trace_write_buffer_flush(trans, nr, skipped, fast, wb->size);
if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
goto slowpath;
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
out:
bch2_journal_pin_drop(j, &pin);
mutex_unlock(&wb->flush_lock);
return ret;
slowpath:
trace_write_buffer_flush_slowpath(trans, i - keys, nr);
dst = keys;
for (; i < keys + nr; i++) {
if (i + 1 < keys + nr &&
i[0].btree == i[1].btree &&
bpos_eq(i[0].k.k.p, i[1].k.k.p))
continue;
*dst = *i;
dst++;
}
nr = dst - keys;
sort(keys, nr, sizeof(keys[0]),
btree_write_buffered_journal_cmp,
NULL);
for (i = keys; i < keys + nr; i++) {
if (i->journal_seq > pin.seq) {
struct journal_entry_pin pin2;
memset(&pin2, 0, sizeof(pin2));
bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL);
bch2_journal_pin_drop(j, &pin);
bch2_journal_pin_copy(j, &pin, &pin2, NULL);
bch2_journal_pin_drop(j, &pin2);
}
ret = commit_do(trans, NULL, NULL,
commit_flags|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_RECLAIM|
JOURNAL_WATERMARK_reserved,
__bch2_btree_insert(trans, i->btree, &i->k, 0));
if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
break;
}
goto out;
}
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
{
bch2_trans_unlock(trans);
mutex_lock(&trans->c->btree_write_buffer.flush_lock);
return __bch2_btree_write_buffer_flush(trans, 0, true);
}
int bch2_btree_write_buffer_flush(struct btree_trans *trans)
{
return __bch2_btree_write_buffer_flush(trans, 0, false);
}
static int bch2_btree_write_buffer_journal_flush(struct journal *j,
struct journal_entry_pin *_pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write_buffer *wb = &c->btree_write_buffer;
mutex_lock(&wb->flush_lock);
return bch2_trans_run(c,
__bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true));
}
static inline u64 btree_write_buffer_ref(int idx)
{
return ((union btree_write_buffer_state) {
.ref0 = idx == 0,
.ref1 = idx == 1,
}).v;
}
int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_write_buffered_key *i;
union btree_write_buffer_state old, new;
int ret = 0;
u64 v;
trans_for_each_wb_update(trans, i) {
EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
i->journal_seq = trans->journal_res.seq;
i->journal_offset = trans->journal_res.offset;
}
preempt_disable();
v = READ_ONCE(wb->state.v);
do {
old.v = new.v = v;
new.v += btree_write_buffer_ref(new.idx);
new.nr += trans->nr_wb_updates;
if (new.nr > wb->size) {
ret = -BCH_ERR_btree_insert_need_flush_buffer;
goto out;
}
} while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v);
memcpy(wb->keys[new.idx] + old.nr,
trans->wb_updates,
sizeof(trans->wb_updates[0]) * trans->nr_wb_updates);
bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin,
bch2_btree_write_buffer_journal_flush);
atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter);
out:
preempt_enable();
return ret;
}
void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal));
kvfree(wb->keys[1]);
kvfree(wb->keys[0]);
}
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
mutex_init(&wb->flush_lock);
wb->size = c->opts.btree_write_buffer_size;
wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL);
wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL);
if (!wb->keys[0] || !wb->keys[1])
return -ENOMEM;
return 0;
}

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
#define _BCACHEFS_BTREE_WRITE_BUFFER_H
int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool);
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush(struct btree_trans *);
int bch2_btree_insert_keys_write_buffer(struct btree_trans *);
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */

View File

@ -0,0 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
#include "journal_types.h"
#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
struct btree_write_buffered_key {
u64 journal_seq;
unsigned journal_offset;
enum btree_id btree;
__BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
};
union btree_write_buffer_state {
struct {
atomic64_t counter;
};
struct {
u64 v;
};
struct {
u64 nr:23;
u64 idx:1;
u64 ref0:20;
u64 ref1:20;
};
};
struct btree_write_buffer {
struct mutex flush_lock;
struct journal_entry_pin journal_pin;
union btree_write_buffer_state state;
size_t size;
struct btree_write_buffered_key *keys[2];
};
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */

View File

@ -663,13 +663,14 @@ err:
return ret; return ret;
} }
static int check_bucket_ref(struct bch_fs *c, static int check_bucket_ref(struct btree_trans *trans,
struct bkey_s_c k, struct bkey_s_c k,
const struct bch_extent_ptr *ptr, const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type, s64 sectors, enum bch_data_type ptr_data_type,
u8 b_gen, u8 bucket_data_type, u8 b_gen, u8 bucket_data_type,
u32 dirty_sectors, u32 cached_sectors) u32 dirty_sectors, u32 cached_sectors)
{ {
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
u16 bucket_sectors = !ptr->cached u16 bucket_sectors = !ptr->cached
@ -756,9 +757,12 @@ static int check_bucket_ref(struct bch_fs *c,
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
err: out:
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
err:
bch2_dump_trans_updates(trans);
goto out;
} }
static int mark_stripe_bucket(struct btree_trans *trans, static int mark_stripe_bucket(struct btree_trans *trans,
@ -800,7 +804,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
bucket_lock(g); bucket_lock(g);
old = *g; old = *g;
ret = check_bucket_ref(c, k, ptr, sectors, data_type, ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
g->gen, g->data_type, g->gen, g->data_type,
g->dirty_sectors, g->cached_sectors); g->dirty_sectors, g->cached_sectors);
if (ret) if (ret)
@ -832,7 +836,7 @@ static int __mark_pointer(struct btree_trans *trans,
u32 *dst_sectors = !ptr->cached u32 *dst_sectors = !ptr->cached
? dirty_sectors ? dirty_sectors
: cached_sectors; : cached_sectors;
int ret = check_bucket_ref(trans->c, k, ptr, sectors, ptr_data_type, int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
bucket_gen, *bucket_data_type, bucket_gen, *bucket_data_type,
*dirty_sectors, *cached_sectors); *dirty_sectors, *cached_sectors);
@ -1220,7 +1224,8 @@ not_found:
new->k.p = bkey_start_pos(p.k); new->k.p = bkey_start_pos(p.k);
new->k.p.offset += *idx - start; new->k.p.offset += *idx - start;
bch2_key_resize(&new->k, next_idx - *idx); bch2_key_resize(&new->k, next_idx - *idx);
ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new->k_i); ret = __bch2_btree_insert(trans, BTREE_ID_extents, &new->k_i,
BTREE_TRIGGER_NORUN);
} }
*idx = next_idx; *idx = next_idx;
@ -1269,6 +1274,47 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
return ret; return ret;
} }
void bch2_trans_fs_usage_revert(struct btree_trans *trans,
struct replicas_delta_list *deltas)
{
struct bch_fs *c = trans->c;
struct bch_fs_usage *dst;
struct replicas_delta *d, *top = (void *) deltas->d + deltas->used;
s64 added = 0;
unsigned i;
percpu_down_read(&c->mark_lock);
preempt_disable();
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
/* revert changes: */
for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
switch (d->r.data_type) {
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
added += d->delta;
}
BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
}
dst->nr_inodes -= deltas->nr_inodes;
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
added -= deltas->persistent_reserved[i];
dst->reserved -= deltas->persistent_reserved[i];
dst->persistent_reserved[i] -= deltas->persistent_reserved[i];
}
if (added > 0) {
trans->disk_res->sectors += added;
this_cpu_add(*c->online_reserved, added);
}
preempt_enable();
percpu_up_read(&c->mark_lock);
}
int bch2_trans_fs_usage_apply(struct btree_trans *trans, int bch2_trans_fs_usage_apply(struct btree_trans *trans,
struct replicas_delta_list *deltas) struct replicas_delta_list *deltas)
{ {
@ -1349,7 +1395,7 @@ need_mark:
/* trans_mark: */ /* trans_mark: */
static int bch2_trans_mark_pointer(struct btree_trans *trans, static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p, struct bkey_s_c k, struct extent_ptr_decoded p,
unsigned flags) unsigned flags)
@ -1378,9 +1424,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
goto err; goto err;
if (!p.ptr.cached) { if (!p.ptr.cached) {
ret = insert ret = bch2_bucket_backpointer_mod(trans, a, bp, k, insert);
? bch2_bucket_backpointer_add(trans, a, bp, k)
: bch2_bucket_backpointer_del(trans, a, bp, k);
if (ret) if (ret)
goto err; goto err;
} }
@ -1518,7 +1562,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
if (IS_ERR(a)) if (IS_ERR(a))
return PTR_ERR(a); return PTR_ERR(a);
ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type, ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
a->v.gen, a->v.data_type, a->v.gen, a->v.data_type,
a->v.dirty_sectors, a->v.cached_sectors); a->v.dirty_sectors, a->v.cached_sectors);
if (ret) if (ret)

View File

@ -248,6 +248,7 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct
int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,

View File

@ -309,9 +309,11 @@ void bch2_data_update_exit(struct data_update *update)
bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k));
const struct bch_extent_ptr *ptr; const struct bch_extent_ptr *ptr;
bkey_for_each_ptr(ptrs, ptr) bkey_for_each_ptr(ptrs, ptr) {
bch2_bucket_nocow_unlock(&c->nocow_locks, bch2_bucket_nocow_unlock(&c->nocow_locks,
PTR_BUCKET_POS(c, ptr), 0); PTR_BUCKET_POS(c, ptr), 0);
percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
}
bch2_bkey_buf_exit(&update->k, c); bch2_bkey_buf_exit(&update->k, c);
bch2_disk_reservation_put(c, &update->op.res); bch2_disk_reservation_put(c, &update->op.res);
@ -411,6 +413,7 @@ int bch2_data_update_init(struct btree_trans *trans,
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
const struct bch_extent_ptr *ptr;
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
unsigned int ptrs_locked = 0; unsigned int ptrs_locked = 0;
int ret; int ret;
@ -436,6 +439,9 @@ int bch2_data_update_init(struct btree_trans *trans,
if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE)
m->op.alloc_reserve = RESERVE_movinggc; m->op.alloc_reserve = RESERVE_movinggc;
bkey_for_each_ptr(ptrs, ptr)
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
i = 0; i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
bool locked; bool locked;
@ -508,7 +514,8 @@ err:
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if ((1U << i) & ptrs_locked) if ((1U << i) & ptrs_locked)
bch2_bucket_nocow_unlock(&c->nocow_locks, bch2_bucket_nocow_unlock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0); PTR_BUCKET_POS(c, &p.ptr), 0);
percpu_ref_put(&bch_dev_bkey_exists(c, p.ptr.dev)->ref);
i++; i++;
} }

View File

@ -9,6 +9,7 @@
#include "bset.h" #include "bset.h"
#include "btree_gc.h" #include "btree_gc.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h" #include "buckets.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "ec.h" #include "ec.h"
@ -921,13 +922,16 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
ret = bch2_btree_write_buffer_flush(&trans);
if (ret)
goto err;
for (i = 0; i < nr_data; i++) { for (i = 0; i < nr_data; i++) {
ret = ec_stripe_update_bucket(&trans, s, i); ret = ec_stripe_update_bucket(&trans, s, i);
if (ret) if (ret)
break; break;
} }
err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;

View File

@ -42,6 +42,7 @@
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \
x(0, no_btree_node) \ x(0, no_btree_node) \
x(BCH_ERR_no_btree_node, no_btree_node_relock) \ x(BCH_ERR_no_btree_node, no_btree_node_relock) \
@ -58,6 +59,7 @@
x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \
x(0, backpointer_to_overwritten_btree_node) \ x(0, backpointer_to_overwritten_btree_node) \
x(0, lock_fail_root_changed) \ x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \ x(0, journal_reclaim_would_deadlock) \

View File

@ -1797,7 +1797,8 @@ static int check_root_trans(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL, ret = commit_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW, BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i)); __bch2_btree_insert(trans, BTREE_ID_subvolumes,
&root_subvol.k_i, 0));
if (ret) { if (ret) {
bch_err(c, "error writing root subvol: %s", bch2_err_str(ret)); bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
goto err; goto err;

View File

@ -684,7 +684,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
bio_set_dev(&n->bio, ca->disk_sb.bdev); bio_set_dev(&n->bio, ca->disk_sb.bdev);
if (IS_ENABLED(CONFIG_BCACHEFS_NO_IO) && type != BCH_DATA_btree) { if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) {
bio_endio(&n->bio); bio_endio(&n->bio);
continue; continue;
} }
@ -2356,8 +2356,7 @@ static void __bch2_read_endio(struct work_struct *work)
} }
csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
!IS_ENABLED(CONFIG_BCACHEFS_NO_IO))
goto csum_err; goto csum_err;
/* /*
@ -2808,7 +2807,7 @@ get_bio:
bio_sectors(&rbio->bio)); bio_sectors(&rbio->bio));
bio_set_dev(&rbio->bio, ca->disk_sb.bdev); bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
if (IS_ENABLED(CONFIG_BCACHEFS_NO_IO)) { if (unlikely(c->opts.no_data_io)) {
if (likely(!(flags & BCH_READ_IN_RETRY))) if (likely(!(flags & BCH_READ_IN_RETRY)))
bio_endio(&rbio->bio); bio_endio(&rbio->bio);
} else { } else {

View File

@ -769,6 +769,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (c) { if (c) {
bch2_journal_flush_all_pins(&c->journal); bch2_journal_flush_all_pins(&c->journal);
bch2_journal_block(&c->journal); bch2_journal_block(&c->journal);
mutex_lock(&c->sb_lock);
} }
bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
@ -849,6 +850,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (!new_fs) if (!new_fs)
spin_unlock(&c->journal.lock); spin_unlock(&c->journal.lock);
if (ja->nr != old_nr && !new_fs)
bch2_write_super(c);
if (c) if (c)
bch2_journal_unblock(&c->journal); bch2_journal_unblock(&c->journal);
@ -868,6 +872,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
} }
} }
err: err:
if (c)
mutex_unlock(&c->sb_lock);
if (ob && !new_fs) if (ob && !new_fs)
for (i = 0; i < nr_got; i++) for (i = 0; i < nr_got; i++)
bch2_open_bucket_put(c, ob[i]); bch2_open_bucket_put(c, ob[i]);
@ -893,7 +900,6 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
{ {
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
struct closure cl; struct closure cl;
unsigned current_nr;
int ret = 0; int ret = 0;
/* don't handle reducing nr of buckets yet: */ /* don't handle reducing nr of buckets yet: */
@ -902,44 +908,44 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
closure_init_stack(&cl); closure_init_stack(&cl);
while (ja->nr != nr && (ret == 0 || ret == -BCH_ERR_bucket_alloc_blocked)) { while (ja->nr != nr) {
struct disk_reservation disk_res = { 0, 0 }; struct disk_reservation disk_res = { 0, 0 };
closure_sync(&cl);
mutex_lock(&c->sb_lock);
current_nr = ja->nr;
/* /*
* note: journal buckets aren't really counted as _sectors_ used yet, so * note: journal buckets aren't really counted as _sectors_ used yet, so
* we don't need the disk reservation to avoid the BUG_ON() in buckets.c * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
* when space used goes up without a reservation - but we do need the * when space used goes up without a reservation - but we do need the
* reservation to ensure we'll actually be able to allocate: * reservation to ensure we'll actually be able to allocate:
*
* XXX: that's not right, disk reservations only ensure a
* filesystem-wide allocation will succeed, this is a device
* specific allocation - we can hang here:
*/ */
ret = bch2_disk_reservation_get(c, &disk_res, ret = bch2_disk_reservation_get(c, &disk_res,
bucket_to_sector(ca, nr - ja->nr), 1, 0); bucket_to_sector(ca, nr - ja->nr), 1, 0);
if (ret) { if (ret)
mutex_unlock(&c->sb_lock); break;
return ret;
}
ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
bch2_disk_reservation_put(c, &disk_res); bch2_disk_reservation_put(c, &disk_res);
if (ja->nr != current_nr) closure_sync(&cl);
bch2_write_super(c);
mutex_unlock(&c->sb_lock); if (ret && ret != -BCH_ERR_bucket_alloc_blocked)
break;
} }
if (ret)
bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
return ret; return ret;
} }
int bch2_dev_journal_alloc(struct bch_dev *ca) int bch2_dev_journal_alloc(struct bch_dev *ca)
{ {
unsigned nr; unsigned nr;
int ret;
if (dynamic_fault("bcachefs:add:journal_alloc")) if (dynamic_fault("bcachefs:add:journal_alloc"))
return -ENOMEM; return -ENOMEM;
@ -956,15 +962,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13, min(1 << 13,
(1 << 24) / ca->mi.bucket_size)); (1 << 24) / ca->mi.bucket_size));
if (ca->fs) return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
mutex_lock(&ca->fs->sb_lock);
ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
if (ca->fs)
mutex_unlock(&ca->fs->sb_lock);
return ret;
} }
/* startup/shutdown: */ /* startup/shutdown: */

View File

@ -8,6 +8,7 @@
#include "btree_gc.h" #include "btree_gc.h"
#include "btree_update.h" #include "btree_update.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "ec.h" #include "ec.h"
#include "errcode.h" #include "errcode.h"
@ -680,10 +681,19 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
bkey_err(k = bch2_btree_iter_peek_slot(&iter))); bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
if (!ret) { if (ret) {
a = bch2_alloc_to_v4(k, &a_convert); bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret));
dirty_sectors = a->dirty_sectors; goto err;
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; }
a = bch2_alloc_to_v4(k, &a_convert);
dirty_sectors = a->dirty_sectors;
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
ret = bch2_btree_write_buffer_flush(&trans);
if (ret) {
bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret));
goto err;
} }
while (!(ret = move_ratelimit(&trans, ctxt))) { while (!(ret = move_ratelimit(&trans, ctxt))) {
@ -712,7 +722,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
if (ret) if (ret)
goto err; goto err;
if (!k.k) if (!k.k)
continue; goto next;
bch2_bkey_buf_reassemble(&sk, c, k); bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k); k = bkey_i_to_s_c(sk.k);
@ -763,7 +773,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
if (ret) if (ret)
goto err; goto err;
if (!b) if (!b)
continue; goto next;
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0); ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
@ -779,7 +789,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen);
atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved);
} }
next:
bp_offset++; bp_offset++;
} }

View File

@ -206,6 +206,11 @@ enum opt_type {
OPT_BOOL(), \ OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \ BCH2_NO_SB_OPT, true, \
NULL, "Stash pointer to in memory btree node in btree ptr")\ NULL, "Stash pointer to in memory btree node in btree ptr")\
x(btree_write_buffer_size, u32, \
OPT_FS|OPT_MOUNT, \
OPT_UINT(16, (1U << 20) - 1), \
BCH2_NO_SB_OPT, 1U << 13, \
NULL, "Number of btree write buffer entries") \
x(gc_reserve_percent, u8, \ x(gc_reserve_percent, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(5, 21), \ OPT_UINT(5, 21), \
@ -399,6 +404,12 @@ enum opt_type {
NULL, "Nocow mode: Writes will be done in place when possible.\n"\ NULL, "Nocow mode: Writes will be done in place when possible.\n"\
"Snapshots and reflink will still caused writes to be COW\n"\ "Snapshots and reflink will still caused writes to be COW\n"\
"Implicitly disables data checksumming, compression and encryption")\ "Implicitly disables data checksumming, compression and encryption")\
x(no_data_io, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Skip submit_bio() for data reads and writes, " \
"for performance testing purposes") \
x(fs_size, u64, \ x(fs_size, u64, \
OPT_DEVICE, \ OPT_DEVICE, \
OPT_UINT(0, S64_MAX), \ OPT_UINT(0, S64_MAX), \

View File

@ -974,9 +974,15 @@ static int read_btree_roots(struct bch_fs *c)
} }
} }
for (i = 0; i < BTREE_ID_NR; i++) for (i = 0; i < BTREE_ID_NR; i++) {
if (!c->btree_roots[i].b) struct btree_root *r = &c->btree_roots[i];
if (!r->b) {
r->alive = false;
r->level = 0;
bch2_btree_root_alloc(c, i); bch2_btree_root_alloc(c, i);
}
}
fsck_err: fsck_err:
return ret; return ret;
} }

View File

@ -299,13 +299,6 @@ static int replicas_table_update(struct bch_fs *c,
memset(new_usage, 0, sizeof(new_usage)); memset(new_usage, 0, sizeof(new_usage));
for (i = 0; i < ARRAY_SIZE(new_usage); i++)
if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
sizeof(u64), GFP_KERNEL)))
goto err;
memset(new_usage, 0, sizeof(new_usage));
for (i = 0; i < ARRAY_SIZE(new_usage); i++) for (i = 0; i < ARRAY_SIZE(new_usage); i++)
if (!(new_usage[i] = __alloc_percpu_gfp(bytes, if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
sizeof(u64), GFP_KERNEL))) sizeof(u64), GFP_KERNEL)))

View File

@ -16,6 +16,7 @@
#include "btree_key_cache.h" #include "btree_key_cache.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "btree_io.h" #include "btree_io.h"
#include "btree_write_buffer.h"
#include "buckets_waiting_for_journal.h" #include "buckets_waiting_for_journal.h"
#include "chardev.h" #include "chardev.h"
#include "checksum.h" #include "checksum.h"
@ -418,6 +419,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_do_discards(c); bch2_do_discards(c);
bch2_do_invalidates(c); bch2_do_invalidates(c);
bch2_do_stripe_deletes(c); bch2_do_stripe_deletes(c);
bch2_do_pending_node_rewrites(c);
return 0; return 0;
err: err:
__bch2_fs_read_only(c); __bch2_fs_read_only(c);
@ -446,6 +448,7 @@ static void __bch2_fs_free(struct bch_fs *c)
for (i = 0; i < BCH_TIME_STAT_NR; i++) for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]); bch2_time_stats_exit(&c->times[i]);
bch2_free_pending_node_rewrites(c);
bch2_fs_counters_exit(c); bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c); bch2_fs_snapshots_exit(c);
bch2_fs_quota_exit(c); bch2_fs_quota_exit(c);
@ -465,6 +468,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_compress_exit(c); bch2_fs_compress_exit(c);
bch2_journal_keys_free(&c->journal_keys); bch2_journal_keys_free(&c->journal_keys);
bch2_journal_entries_free(c); bch2_journal_entries_free(c);
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock); percpu_free_rwsem(&c->mark_lock);
free_percpu(c->online_reserved); free_percpu(c->online_reserved);
@ -817,6 +821,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_interior_update_init(c) ?: bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_subvolumes_init(c) ?: bch2_fs_subvolumes_init(c) ?:
bch2_fs_io_init(c) ?: bch2_fs_io_init(c) ?:
bch2_fs_nocow_locking_init(c) ?: bch2_fs_nocow_locking_init(c) ?:

View File

@ -473,14 +473,14 @@ SHOW(bch2_fs)
if (attr == &sysfs_data_jobs) if (attr == &sysfs_data_jobs)
data_progress_to_text(out, c); data_progress_to_text(out, c);
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
#ifdef BCH_WRITE_REF_DEBUG #ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs) if (attr == &sysfs_write_refs)
bch2_write_refs_to_text(out, c); bch2_write_refs_to_text(out, c);
#endif #endif
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
return 0; return 0;
} }
@ -664,10 +664,10 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_stripes_heap, &sysfs_stripes_heap,
&sysfs_open_buckets, &sysfs_open_buckets,
&sysfs_write_points, &sysfs_write_points,
&sysfs_nocow_lock_table,
#ifdef BCH_WRITE_REF_DEBUG #ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs, &sysfs_write_refs,
#endif #endif
&sysfs_nocow_lock_table,
&sysfs_io_timers_read, &sysfs_io_timers_read,
&sysfs_io_timers_write, &sysfs_io_timers_write,

View File

@ -592,7 +592,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
k.k.p.snapshot = U32_MAX; k.k.p.snapshot = U32_MAX;
ret = commit_do(&trans, NULL, NULL, 0, ret = commit_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i, 0));
if (ret) { if (ret) {
bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret)); bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
break; break;
@ -621,14 +621,14 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
} }
ret = commit_do(&trans, NULL, NULL, 0, ret = commit_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[3].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[4].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[5].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i)); __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i, 0));
if (ret) { if (ret) {
bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret)); bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
break; break;