mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 3ca08ab51ec9 bcachefs: six locks: Simplify optimistic spinning
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
a613340b26
commit
7fd6c3ffe4
@ -1 +1 @@
|
||||
d464ec667b2b9de097e39d1505b45aafd87a9552
|
||||
3ca08ab51ec996180c20105489176b8c4327240c
|
||||
|
@ -278,4 +278,7 @@ static inline void dump_stack(void) {}
|
||||
#define unsafe_memcpy(dst, src, bytes, justification) \
|
||||
memcpy(dst, src, bytes)
|
||||
|
||||
#define DECLARE_FLEX_ARRAY(TYPE, NAME) \
|
||||
__DECLARE_FLEX_ARRAY(TYPE, NAME)
|
||||
|
||||
#endif
|
||||
|
@ -98,4 +98,15 @@ static inline void hlist_del_init(struct hlist_node *n)
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
|
||||
static inline size_t list_count_nodes(struct list_head *head)
|
||||
{
|
||||
struct list_head *pos;
|
||||
size_t count = 0;
|
||||
|
||||
list_for_each(pos, head)
|
||||
count++;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif /* _LIST_LIST_H */
|
||||
|
@ -561,8 +561,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
|
||||
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
|
||||
if (ret)
|
||||
break;
|
||||
@ -581,8 +581,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
|
||||
if (have_bucket_gens_key && !ret)
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
|
||||
|
||||
bch2_trans_put(trans);
|
||||
@ -1267,7 +1267,7 @@ delete:
|
||||
ret = bch2_btree_delete_extent_at(trans, iter,
|
||||
iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW);
|
||||
BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1422,8 +1422,8 @@ int bch2_check_alloc_info(struct bch_fs *c)
|
||||
}
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW);
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
@ -1453,7 +1453,7 @@ bkey_err:
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_bucket_gens, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_check_bucket_gens_key(trans, &iter, k));
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
@ -1546,7 +1546,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
@ -1655,7 +1655,7 @@ write:
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1760,7 +1760,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
BTREE_TRIGGER_BUCKET_INVALIDATE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1884,8 +1884,8 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bch2_bucket_do_index(trans, k, a, true) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
@ -1905,8 +1905,8 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret)
|
||||
goto bkey_err;
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "backpointers.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "error.h"
|
||||
|
||||
@ -220,18 +221,22 @@ out:
|
||||
static void backpointer_not_found(struct btree_trans *trans,
|
||||
struct bpos bp_pos,
|
||||
struct bch_backpointer bp,
|
||||
struct bkey_s_c k,
|
||||
const char *thing_it_points_to)
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
|
||||
|
||||
/*
|
||||
* If we're using the btree write buffer, the backpointer we were
|
||||
* looking at may have already been deleted - failure to find what it
|
||||
* pointed to is not an error:
|
||||
*/
|
||||
if (likely(!bch2_backpointers_no_use_write_buffer))
|
||||
return;
|
||||
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
|
||||
thing_it_points_to);
|
||||
bp.level ? "btree node" : "extent");
|
||||
prt_printf(&buf, "bucket: ");
|
||||
bch2_bpos_to_text(&buf, bucket);
|
||||
prt_printf(&buf, "\n ");
|
||||
@ -257,16 +262,15 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
struct bch_backpointer bp,
|
||||
unsigned iter_flags)
|
||||
{
|
||||
if (likely(!bp.level)) {
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_root *r = bch2_btree_id_root(c, bp.btree_id);
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
|
||||
struct bkey_s_c k;
|
||||
|
||||
bch2_trans_node_iter_init(trans, iter,
|
||||
bp.btree_id,
|
||||
bp.pos,
|
||||
0,
|
||||
min(bp.level, r->level),
|
||||
0, 0,
|
||||
iter_flags);
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
if (bkey_err(k)) {
|
||||
@ -274,39 +278,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
|
||||
return k;
|
||||
}
|
||||
|
||||
if (bp.level == r->level + 1)
|
||||
k = bkey_i_to_s_c(&r->key);
|
||||
|
||||
if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
|
||||
return k;
|
||||
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
backpointer_not_found(trans, bp_pos, bp, k);
|
||||
return bkey_s_c_null;
|
||||
} else {
|
||||
struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
|
||||
|
||||
if (unlikely(bch2_backpointers_no_use_write_buffer)) {
|
||||
if (bp.level) {
|
||||
struct btree *b;
|
||||
|
||||
/*
|
||||
* If a backpointer for a btree node wasn't found, it may be
|
||||
* because it was overwritten by a new btree node that hasn't
|
||||
* been written out yet - backpointer_get_node() checks for
|
||||
* this:
|
||||
*/
|
||||
b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
|
||||
if (!IS_ERR_OR_NULL(b))
|
||||
return bkey_i_to_s_c(&b->key);
|
||||
|
||||
if (IS_ERR_OR_NULL(b)) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
if (IS_ERR(b))
|
||||
return bkey_s_c_err(PTR_ERR(b));
|
||||
return bkey_s_c_null;
|
||||
return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
|
||||
}
|
||||
|
||||
backpointer_not_found(trans, bp_pos, bp, k, "extent");
|
||||
return bkey_i_to_s_c(&b->key);
|
||||
}
|
||||
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
@ -327,19 +313,20 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
|
||||
bp.level - 1,
|
||||
0);
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
if (IS_ERR(b))
|
||||
if (IS_ERR_OR_NULL(b))
|
||||
goto err;
|
||||
|
||||
if (b && extent_matches_bp(c, bp.btree_id, bp.level,
|
||||
BUG_ON(b->c.level != bp.level - 1);
|
||||
|
||||
if (extent_matches_bp(c, bp.btree_id, bp.level,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
bucket, bp))
|
||||
return b;
|
||||
|
||||
if (b && btree_node_will_make_reachable(b)) {
|
||||
if (btree_node_will_make_reachable(b)) {
|
||||
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
|
||||
} else {
|
||||
backpointer_not_found(trans, bp_pos, bp,
|
||||
bkey_i_to_s_c(&b->key), "btree node");
|
||||
backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
|
||||
b = NULL;
|
||||
}
|
||||
err:
|
||||
@ -395,7 +382,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_backpointers, POS_MIN, 0, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_btree_backpointer(trans, &iter, k)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
@ -642,8 +629,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
|
||||
do {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
check_extent_to_backpointers(trans, &iter,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed));
|
||||
@ -657,8 +644,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
break;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
check_btree_root_to_backpointers(trans, btree_id,
|
||||
bucket_start, bucket_end,
|
||||
&last_flushed));
|
||||
@ -797,7 +784,8 @@ static int check_one_backpointer(struct btree_trans *trans,
|
||||
|
||||
if (fsck_err_on(!k.k, c,
|
||||
backpointer_to_missing_ptr,
|
||||
"backpointer for missing extent\n %s",
|
||||
"backpointer for missing %s\n %s",
|
||||
bp.v->level ? "btree node" : "extent",
|
||||
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
|
||||
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
|
||||
goto out;
|
||||
@ -819,7 +807,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
|
||||
return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_one_backpointer(trans, start, end,
|
||||
bkey_s_c_to_backpointer(k),
|
||||
&last_flushed_pos));
|
||||
|
@ -401,7 +401,9 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(journal_flush_write) \
|
||||
x(journal_noflush_write) \
|
||||
x(journal_flush_seq) \
|
||||
x(blocked_journal) \
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
x(nocow_lock_contended)
|
||||
@ -617,7 +619,7 @@ struct journal_seq_blacklist_table {
|
||||
u64 start;
|
||||
u64 end;
|
||||
bool dirty;
|
||||
} entries[0];
|
||||
} entries[];
|
||||
};
|
||||
|
||||
struct journal_keys {
|
||||
|
@ -2256,7 +2256,8 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
enum btree_id_flags {
|
||||
BTREE_ID_EXTENTS = BIT(0),
|
||||
BTREE_ID_SNAPSHOTS = BIT(1),
|
||||
BTREE_ID_DATA = BIT(2),
|
||||
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
|
||||
BTREE_ID_DATA = BIT(3),
|
||||
};
|
||||
|
||||
#define BCH_BTREE_IDS() \
|
||||
@ -2311,12 +2312,12 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_bucket_gens)) \
|
||||
x(snapshot_trees, 15, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot_tree)) \
|
||||
x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \
|
||||
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOTS, \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
|
||||
|
||||
enum btree_id {
|
||||
|
@ -186,15 +186,20 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
if (type != BKEY_TYPE_btree) {
|
||||
enum btree_id btree = type - 1;
|
||||
|
||||
bkey_fsck_err_on(!btree_type_has_snapshots(btree) &&
|
||||
k.k->p.snapshot, c, err,
|
||||
bkey_snapshot_nonzero,
|
||||
"nonzero snapshot");
|
||||
|
||||
bkey_fsck_err_on(btree_type_has_snapshots(btree) &&
|
||||
!k.k->p.snapshot, c, err,
|
||||
if (btree_type_has_snapshots(btree)) {
|
||||
bkey_fsck_err_on(!k.k->p.snapshot, c, err,
|
||||
bkey_snapshot_zero,
|
||||
"snapshot == 0");
|
||||
} else if (!btree_type_has_snapshot_field(btree)) {
|
||||
bkey_fsck_err_on(k.k->p.snapshot, c, err,
|
||||
bkey_snapshot_nonzero,
|
||||
"nonzero snapshot");
|
||||
} else {
|
||||
/*
|
||||
* btree uses snapshot field but it's not required to be
|
||||
* nonzero
|
||||
*/
|
||||
}
|
||||
|
||||
bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err,
|
||||
bkey_at_pos_max,
|
||||
|
@ -93,7 +93,6 @@ static inline int bch2_mark_key(struct btree_trans *trans,
|
||||
enum btree_update_flags {
|
||||
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
|
||||
__BTREE_UPDATE_NOJOURNAL,
|
||||
__BTREE_UPDATE_PREJOURNAL,
|
||||
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
|
||||
|
||||
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
|
||||
@ -108,7 +107,6 @@ enum btree_update_flags {
|
||||
|
||||
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
|
||||
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
|
||||
#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL)
|
||||
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
|
||||
|
||||
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
|
||||
|
@ -1502,7 +1502,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS(ca->dev_idx, ca->mi.first_bucket),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_alloc_write_key(trans, &iter, k, metadata_only));
|
||||
|
||||
if (ret < 0) {
|
||||
@ -1659,7 +1659,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
|
||||
ret = for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_gc_write_reflink_key(trans, &iter, k, &idx));
|
||||
|
||||
c->reflink_gc_nr = 0;
|
||||
@ -1783,7 +1783,7 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
|
||||
ret = for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_gc_write_stripes_key(trans, &iter, k));
|
||||
|
||||
bch2_trans_put(trans);
|
||||
@ -2019,7 +2019,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
|
||||
k,
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
gc_btree_gens_key(trans, &iter, k));
|
||||
if (ret && !bch2_err_matches(ret, EROFS))
|
||||
bch_err_fn(c, ret);
|
||||
@ -2032,7 +2032,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
BTREE_ITER_PREFETCH,
|
||||
k,
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_alloc_write_oldest_gen(trans, &iter, k));
|
||||
if (ret && !bch2_err_matches(ret, EROFS))
|
||||
bch_err_fn(c, ret);
|
||||
|
@ -1801,9 +1801,9 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
|
||||
BCH_WATERMARK_reclaim|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW,
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_check_rw,
|
||||
!wbio->wbio.failed.nr));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -257,7 +257,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
|
||||
|
||||
BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
!btree_type_has_snapshots(iter->btree_id));
|
||||
!btree_type_has_snapshot_field(iter->btree_id));
|
||||
|
||||
if (iter->update_path)
|
||||
bch2_btree_path_verify(trans, iter->update_path);
|
||||
@ -1214,8 +1214,6 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
|
||||
struct btree_path *path, struct bpos new_pos,
|
||||
bool intent, unsigned long ip, int cmp)
|
||||
{
|
||||
unsigned level = path->level;
|
||||
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
EBUG_ON(!path->ref);
|
||||
|
||||
@ -1231,7 +1229,7 @@ __bch2_btree_path_set_pos(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
level = btree_path_up_until_good_node(trans, path, cmp);
|
||||
unsigned level = btree_path_up_until_good_node(trans, path, cmp);
|
||||
|
||||
if (btree_path_node(path, level)) {
|
||||
struct btree_path_level *l = &path->l[level];
|
||||
@ -2835,8 +2833,9 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
|
||||
static inline void check_srcu_held_too_long(struct btree_trans *trans)
|
||||
{
|
||||
WARN(time_after(jiffies, trans->srcu_lock_time + HZ * 10),
|
||||
"btree trans held srcu lock (delaying memory reclaim) by more than 10 seconds");
|
||||
WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
|
||||
"btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
|
||||
(jiffies - trans->srcu_lock_time) / HZ);
|
||||
}
|
||||
|
||||
void bch2_trans_srcu_unlock(struct btree_trans *trans)
|
||||
@ -3088,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||
}
|
||||
|
||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
||||
|
||||
kfree(trans->extra_journal_entries.data);
|
||||
|
||||
if (trans->fs_usage_deltas) {
|
||||
|
@ -416,7 +416,7 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
flags |= BTREE_ITER_IS_EXTENTS;
|
||||
|
||||
if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
!btree_type_has_snapshots(btree_id))
|
||||
!btree_type_has_snapshot_field(btree_id))
|
||||
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
|
||||
|
||||
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
|
@ -90,10 +90,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
|
||||
ck->btree_trans_barrier_seq =
|
||||
start_poll_synchronize_srcu(&c->btree_trans_barrier);
|
||||
|
||||
if (ck->c.lock.readers)
|
||||
if (ck->c.lock.readers) {
|
||||
list_move_tail(&ck->list, &bc->freed_pcpu);
|
||||
else
|
||||
bc->nr_freed_pcpu++;
|
||||
} else {
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
bc->nr_freed_nonpcpu++;
|
||||
}
|
||||
atomic_long_inc(&bc->nr_freed);
|
||||
|
||||
kfree(ck->k);
|
||||
@ -110,6 +113,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
|
||||
{
|
||||
struct bkey_cached *pos;
|
||||
|
||||
bc->nr_freed_nonpcpu++;
|
||||
|
||||
list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
|
||||
if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
|
||||
pos->btree_trans_barrier_seq)) {
|
||||
@ -159,6 +164,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
|
||||
#else
|
||||
mutex_lock(&bc->lock);
|
||||
list_move_tail(&ck->list, &bc->freed_nonpcpu);
|
||||
bc->nr_freed_nonpcpu++;
|
||||
mutex_unlock(&bc->lock);
|
||||
#endif
|
||||
} else {
|
||||
@ -218,6 +224,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
f->nr < ARRAY_SIZE(f->objs) / 2) {
|
||||
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
bc->nr_freed_nonpcpu--;
|
||||
f->objs[f->nr++] = ck;
|
||||
}
|
||||
|
||||
@ -230,6 +237,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
if (!list_empty(&bc->freed_nonpcpu)) {
|
||||
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
|
||||
list_del_init(&ck->list);
|
||||
bc->nr_freed_nonpcpu--;
|
||||
}
|
||||
mutex_unlock(&bc->lock);
|
||||
#endif
|
||||
@ -649,8 +657,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
||||
BTREE_TRIGGER_NORUN) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
(ck->journal.seq == journal_last_seq(j)
|
||||
? BCH_WATERMARK_reclaim
|
||||
: 0)|
|
||||
@ -665,7 +673,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
bch2_journal_pin_drop(j, &ck->journal);
|
||||
bch2_journal_preres_put(j, &ck->res);
|
||||
|
||||
BUG_ON(!btree_node_locked(c_iter.path, 0));
|
||||
|
||||
@ -728,7 +735,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
btree_key_cache_flush_pos(trans, key, seq,
|
||||
BTREE_INSERT_JOURNAL_RECLAIM, false));
|
||||
BCH_TRANS_COMMIT_journal_reclaim, false));
|
||||
unlock:
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
|
||||
@ -763,18 +770,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
|
||||
BUG_ON(insert->k.u64s > ck->u64s);
|
||||
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
int difference;
|
||||
|
||||
BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
|
||||
|
||||
difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
|
||||
if (difference > 0) {
|
||||
trans->journal_preres.u64s -= difference;
|
||||
ck->res.u64s += difference;
|
||||
}
|
||||
}
|
||||
|
||||
bkey_copy(ck->k, insert);
|
||||
ck->valid = true;
|
||||
|
||||
@ -852,6 +847,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
* Newest freed entries are at the end of the list - once we hit one
|
||||
* that's too new to be freed, we can bail out:
|
||||
*/
|
||||
scanned += bc->nr_freed_nonpcpu;
|
||||
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
@ -861,13 +858,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
freed++;
|
||||
bc->nr_freed_nonpcpu--;
|
||||
}
|
||||
|
||||
if (scanned >= nr)
|
||||
goto out;
|
||||
|
||||
scanned += bc->nr_freed_pcpu;
|
||||
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
@ -877,8 +876,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
freed++;
|
||||
bc->nr_freed_pcpu--;
|
||||
}
|
||||
|
||||
if (scanned >= nr)
|
||||
@ -985,6 +984,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
}
|
||||
#endif
|
||||
|
||||
BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
|
||||
BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
|
||||
|
||||
list_splice(&bc->freed_pcpu, &items);
|
||||
list_splice(&bc->freed_nonpcpu, &items);
|
||||
|
||||
@ -994,7 +996,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
cond_resched();
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||
bch2_journal_preres_put(&c->journal, &ck->res);
|
||||
|
||||
list_del(&ck->list);
|
||||
kfree(ck->k);
|
||||
|
34
libbcachefs/btree_key_cache_types.h
Normal file
34
libbcachefs/btree_key_cache_types.h
Normal file
@ -0,0 +1,34 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
|
||||
#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
|
||||
|
||||
struct btree_key_cache_freelist {
|
||||
struct bkey_cached *objs[16];
|
||||
unsigned nr;
|
||||
};
|
||||
|
||||
struct btree_key_cache {
|
||||
struct mutex lock;
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
|
||||
struct list_head freed_pcpu;
|
||||
size_t nr_freed_pcpu;
|
||||
struct list_head freed_nonpcpu;
|
||||
size_t nr_freed_nonpcpu;
|
||||
|
||||
struct shrinker shrink;
|
||||
unsigned shrink_iter;
|
||||
struct btree_key_cache_freelist __percpu *pcpu_freed;
|
||||
|
||||
atomic_long_t nr_freed;
|
||||
atomic_long_t nr_keys;
|
||||
atomic_long_t nr_dirty;
|
||||
};
|
||||
|
||||
struct bkey_cached_key {
|
||||
u32 btree_id;
|
||||
struct bpos pos;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
|
@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
|
||||
bch2_btree_init_next(trans, b);
|
||||
}
|
||||
|
||||
static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
|
||||
}
|
||||
|
||||
static inline int bch2_trans_lock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
EBUG_ON(trans->write_locked);
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
|
||||
return trans_lock_write_fail(trans, i);
|
||||
|
||||
if (!i->cached)
|
||||
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trans->write_locked = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
if (likely(trans->write_locked)) {
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
bch2_btree_node_unlock_write_inlined(trans, i->path,
|
||||
insert_l(i)->b);
|
||||
trans->write_locked = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Inserting into a given leaf node (last stage of insert): */
|
||||
|
||||
/* Handle overwrites and do insert, for non extents: */
|
||||
@ -269,23 +316,13 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->level != i->path->level);
|
||||
BUG_ON(i->btree_id != i->path->btree_id);
|
||||
EBUG_ON(!i->level &&
|
||||
btree_type_has_snapshots(i->btree_id) &&
|
||||
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
|
||||
i->k->k.p.snapshot &&
|
||||
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
|
||||
}
|
||||
|
||||
static noinline int
|
||||
bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
|
||||
unsigned long trace_ip)
|
||||
{
|
||||
return drop_locks_do(trans,
|
||||
bch2_journal_preres_get(&trans->c->journal,
|
||||
&trans->journal_preres,
|
||||
trans->journal_preres_u64s,
|
||||
(flags & BCH_WATERMARK_MASK)));
|
||||
}
|
||||
|
||||
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
@ -320,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
noinline static int
|
||||
btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_path *path, unsigned new_u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
struct bkey_i *new_k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_unlock_write(trans);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_k) {
|
||||
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_id_str(path->btree_id), new_u64s);
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_insert;
|
||||
}
|
||||
|
||||
ret = bch2_trans_relock(trans) ?:
|
||||
bch2_trans_lock_write(trans);
|
||||
if (unlikely(ret)) {
|
||||
kfree(new_k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (i->old_v == &ck->k->v)
|
||||
i->old_v = &new_k->v;
|
||||
|
||||
kfree(ck->k);
|
||||
ck->u64s = new_u64s;
|
||||
ck->k = new_k;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_path *path, unsigned u64s)
|
||||
{
|
||||
@ -333,7 +409,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
|
||||
|
||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
|
||||
bch2_btree_key_cache_must_wait(c) &&
|
||||
!(flags & BTREE_INSERT_JOURNAL_RECLAIM))
|
||||
!(flags & BCH_TRANS_COMMIT_journal_reclaim))
|
||||
return -BCH_ERR_btree_insert_need_journal_reclaim;
|
||||
|
||||
/*
|
||||
@ -346,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
|
||||
return 0;
|
||||
|
||||
new_u64s = roundup_pow_of_two(u64s);
|
||||
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
|
||||
if (!new_k) {
|
||||
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_id_str(path->btree_id), new_u64s);
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_insert;
|
||||
}
|
||||
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
|
||||
if (unlikely(!new_k))
|
||||
return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (i->old_v == &ck->k->v)
|
||||
@ -583,6 +656,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
*stopped_at = i;
|
||||
return ret;
|
||||
}
|
||||
|
||||
i->k->k.needs_whiteout = false;
|
||||
}
|
||||
|
||||
if (trans->nr_wb_updates &&
|
||||
@ -593,7 +668,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
* Don't get journal reservation until after we know insert will
|
||||
* succeed:
|
||||
*/
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
|
||||
ret = bch2_trans_journal_res_get(trans,
|
||||
(flags & BCH_WATERMARK_MASK)|
|
||||
JOURNAL_RES_GET_NONBLOCK);
|
||||
@ -602,8 +677,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
|
||||
if (unlikely(trans->journal_transaction_names))
|
||||
journal_transaction_name(trans);
|
||||
} else {
|
||||
trans->journal_res.seq = c->journal.replay_journal_seq;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -612,7 +685,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
*/
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
|
||||
!(flags & BTREE_INSERT_JOURNAL_REPLAY)) {
|
||||
!(flags & BCH_TRANS_COMMIT_no_journal_res)) {
|
||||
if (bch2_journal_seq_verify)
|
||||
trans_for_each_update(trans, i)
|
||||
i->k->k.version.lo = trans->journal_res.seq;
|
||||
@ -626,7 +699,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
return -BCH_ERR_btree_insert_need_mark_replicas;
|
||||
|
||||
if (trans->nr_wb_updates) {
|
||||
EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY);
|
||||
EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res);
|
||||
|
||||
ret = bch2_btree_insert_keys_write_buffer(trans);
|
||||
if (ret)
|
||||
@ -663,7 +736,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
trans->journal_res.u64s -= trans->extra_journal_entries.nr;
|
||||
}
|
||||
|
||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
|
||||
struct journal *j = &c->journal;
|
||||
struct jset_entry *entry;
|
||||
|
||||
@ -705,15 +778,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
i->k->k.needs_whiteout = false;
|
||||
|
||||
if (!i->cached) {
|
||||
u64 seq = trans->journal_res.seq;
|
||||
|
||||
if (i->flags & BTREE_UPDATE_PREJOURNAL)
|
||||
seq = i->seq;
|
||||
|
||||
bch2_btree_insert_key_leaf(trans, i->path, i->k, seq);
|
||||
bch2_btree_insert_key_leaf(trans, i->path, i->k, trans->journal_res.seq);
|
||||
} else if (!i->key_cache_already_flushed)
|
||||
bch2_btree_insert_key_cached(trans, flags, i);
|
||||
else {
|
||||
@ -731,37 +797,6 @@ revert_fs_usage:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
while (--i >= trans->updates) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
|
||||
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
|
||||
}
|
||||
|
||||
static inline int trans_lock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
if (same_leaf_as_prev(trans, i))
|
||||
continue;
|
||||
|
||||
if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
|
||||
return trans_lock_write_fail(trans, i);
|
||||
|
||||
if (!i->cached)
|
||||
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
@ -799,6 +834,12 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_journal_pin_flush(struct journal *j,
|
||||
struct journal_entry_pin *_pin, u64 seq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
@ -829,15 +870,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal,
|
||||
&trans->journal_preres, trans->journal_preres_u64s,
|
||||
(flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
|
||||
if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
|
||||
ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = trans_lock_write(trans);
|
||||
ret = bch2_trans_lock_write(trans);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
@ -846,19 +879,18 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
||||
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||
bch2_drop_overwrites_from_journal(trans);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
bch2_btree_node_unlock_write_inlined(trans, i->path,
|
||||
insert_l(i)->b);
|
||||
bch2_trans_unlock_write(trans);
|
||||
|
||||
if (!ret && trans->journal_pin)
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
trans->journal_pin, NULL);
|
||||
trans->journal_pin,
|
||||
bch2_trans_commit_journal_pin_flush);
|
||||
|
||||
/*
|
||||
* Drop journal reservation after dropping write locks, since dropping
|
||||
* the journal reservation may kick off a journal write:
|
||||
*/
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
|
||||
bch2_journal_res_put(&c->journal, &trans->journal_res);
|
||||
|
||||
return ret;
|
||||
@ -896,7 +928,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
|
||||
* flag
|
||||
*/
|
||||
if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
break;
|
||||
@ -931,7 +963,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
if (wb->state.nr > wb->size * 3 / 4) {
|
||||
bch2_trans_begin(trans);
|
||||
ret = __bch2_btree_write_buffer_flush(trans,
|
||||
flags|BTREE_INSERT_NOCHECK_RW, true);
|
||||
flags|BCH_TRANS_COMMIT_no_check_rw, true);
|
||||
if (!ret) {
|
||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
||||
@ -951,8 +983,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
|
||||
BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
|
||||
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
|
||||
!(flags & BTREE_INSERT_NOWAIT) &&
|
||||
(flags & BTREE_INSERT_NOFAIL), c,
|
||||
(flags & BCH_TRANS_COMMIT_no_enospc), c,
|
||||
"%s: incorrectly got %s\n", __func__, bch2_err_str(ret));
|
||||
|
||||
return ret;
|
||||
@ -964,7 +995,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret;
|
||||
|
||||
if (likely(!(flags & BTREE_INSERT_LAZY_RW)) ||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) ||
|
||||
test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -BCH_ERR_erofs_trans_commit;
|
||||
|
||||
@ -1002,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i = NULL;
|
||||
struct btree_write_buffered_key *wb;
|
||||
unsigned u64s;
|
||||
int ret = 0;
|
||||
|
||||
if (!trans->nr_updates &&
|
||||
@ -1010,9 +1040,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
!trans->extra_journal_entries.nr)
|
||||
goto out_reset;
|
||||
|
||||
if (flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
|
||||
ret = bch2_trans_commit_run_triggers(trans);
|
||||
if (ret)
|
||||
goto out_reset;
|
||||
@ -1021,7 +1048,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
enum bkey_invalid_flags invalid_flags = 0;
|
||||
|
||||
if (!(flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT;
|
||||
|
||||
if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
|
||||
@ -1039,7 +1066,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
if (!(flags & BTREE_INSERT_NOCHECK_RW) &&
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
|
||||
unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
|
||||
ret = bch2_trans_commit_get_rw_cold(trans, flags);
|
||||
if (ret)
|
||||
@ -1052,7 +1079,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
ret = __bch2_btree_write_buffer_flush(trans,
|
||||
flags|BTREE_INSERT_NOCHECK_RW, true);
|
||||
flags|BCH_TRANS_COMMIT_no_check_rw, true);
|
||||
if (!ret) {
|
||||
trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
|
||||
@ -1062,13 +1089,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
|
||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||
|
||||
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
|
||||
|
||||
trans->journal_u64s = trans->extra_journal_entries.nr;
|
||||
trans->journal_preres_u64s = 0;
|
||||
|
||||
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
|
||||
|
||||
if (trans->journal_transaction_names)
|
||||
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
|
||||
|
||||
@ -1084,16 +1106,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
if (i->key_cache_already_flushed)
|
||||
continue;
|
||||
|
||||
/* we're going to journal the key being updated: */
|
||||
u64s = jset_u64s(i->k->k.u64s);
|
||||
if (i->cached &&
|
||||
likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
|
||||
trans->journal_preres_u64s += u64s;
|
||||
|
||||
if (i->flags & BTREE_UPDATE_NOJOURNAL)
|
||||
continue;
|
||||
|
||||
trans->journal_u64s += u64s;
|
||||
/* we're going to journal the key being updated: */
|
||||
trans->journal_u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
/* and we're also going to log the overwrite: */
|
||||
if (trans->journal_transaction_names)
|
||||
@ -1106,13 +1123,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
if (trans->extra_journal_res) {
|
||||
ret = bch2_disk_reservation_add(c, trans->disk_res,
|
||||
trans->extra_journal_res,
|
||||
(flags & BTREE_INSERT_NOFAIL)
|
||||
(flags & BCH_TRANS_COMMIT_no_enospc)
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
retry:
|
||||
bch2_trans_verify_not_in_restart(trans);
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
|
||||
ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_);
|
||||
@ -1125,9 +1143,7 @@ retry:
|
||||
|
||||
trace_and_count(c, transaction_commit, trans, _RET_IP_);
|
||||
out:
|
||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
||||
|
||||
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
|
||||
out_reset:
|
||||
if (!ret)
|
||||
@ -1140,5 +1156,17 @@ err:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We might have done another transaction commit in the error path -
|
||||
* i.e. btree write buffer flush - which will have made use of
|
||||
* trans->journal_res, but with BCH_TRANS_COMMIT_no_journal_res that is
|
||||
* how the journal sequence number to pin is passed in - so we must
|
||||
* restart:
|
||||
*/
|
||||
if (flags & BCH_TRANS_COMMIT_no_journal_res) {
|
||||
ret = -BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/rhashtable.h>
|
||||
|
||||
//#include "bkey_methods.h"
|
||||
#include "btree_key_cache_types.h"
|
||||
#include "buckets_types.h"
|
||||
#include "darray.h"
|
||||
#include "errcode.h"
|
||||
@ -322,31 +322,6 @@ struct btree_iter {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct btree_key_cache_freelist {
|
||||
struct bkey_cached *objs[16];
|
||||
unsigned nr;
|
||||
};
|
||||
|
||||
struct btree_key_cache {
|
||||
struct mutex lock;
|
||||
struct rhashtable table;
|
||||
bool table_init_done;
|
||||
struct list_head freed_pcpu;
|
||||
struct list_head freed_nonpcpu;
|
||||
struct shrinker shrink;
|
||||
unsigned shrink_iter;
|
||||
struct btree_key_cache_freelist __percpu *pcpu_freed;
|
||||
|
||||
atomic_long_t nr_freed;
|
||||
atomic_long_t nr_keys;
|
||||
atomic_long_t nr_dirty;
|
||||
};
|
||||
|
||||
struct bkey_cached_key {
|
||||
u32 btree_id;
|
||||
struct bpos pos;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#define BKEY_CACHED_ACCESSED 0
|
||||
#define BKEY_CACHED_DIRTY 1
|
||||
|
||||
@ -362,7 +337,6 @@ struct bkey_cached {
|
||||
struct rhash_head hash;
|
||||
struct list_head list;
|
||||
|
||||
struct journal_preres res;
|
||||
struct journal_entry_pin journal;
|
||||
u64 seq;
|
||||
|
||||
@ -392,7 +366,6 @@ struct btree_insert_entry {
|
||||
u8 old_btree_u64s;
|
||||
struct bkey_i *k;
|
||||
struct btree_path *path;
|
||||
u64 seq;
|
||||
/* key being overwritten: */
|
||||
struct bkey old_k;
|
||||
const struct bch_val *old_v;
|
||||
@ -441,6 +414,7 @@ struct btree_trans {
|
||||
bool journal_replay_not_finished:1;
|
||||
bool is_initial_gc:1;
|
||||
bool notrace_relock_fail:1;
|
||||
bool write_locked:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
unsigned long last_begin_ip;
|
||||
@ -472,11 +446,9 @@ struct btree_trans {
|
||||
struct journal_entry_pin *journal_pin;
|
||||
|
||||
struct journal_res journal_res;
|
||||
struct journal_preres journal_preres;
|
||||
u64 *journal_seq;
|
||||
struct disk_reservation *disk_res;
|
||||
unsigned journal_u64s;
|
||||
unsigned journal_preres_u64s;
|
||||
struct replicas_delta_list *fs_usage_deltas;
|
||||
};
|
||||
|
||||
@ -717,6 +689,17 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
|
||||
return (1U << id) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshot_field(enum btree_id id)
|
||||
{
|
||||
const unsigned mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return (1U << id) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id id)
|
||||
{
|
||||
const unsigned mask = 0
|
||||
|
@ -380,21 +380,12 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i, n;
|
||||
u64 seq = 0;
|
||||
int cmp;
|
||||
|
||||
EBUG_ON(!path->should_be_locked);
|
||||
EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
|
||||
EBUG_ON(!bpos_eq(k->k.p, path->pos));
|
||||
|
||||
/*
|
||||
* The transaction journal res hasn't been allocated at this point.
|
||||
* That occurs at commit time. Reuse the seq field to pass in the seq
|
||||
* of a prejournaled key.
|
||||
*/
|
||||
if (flags & BTREE_UPDATE_PREJOURNAL)
|
||||
seq = trans->journal_res.seq;
|
||||
|
||||
n = (struct btree_insert_entry) {
|
||||
.flags = flags,
|
||||
.bkey_type = __btree_node_type(path->level, path->btree_id),
|
||||
@ -403,7 +394,6 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
|
||||
.cached = path->cached,
|
||||
.path = path,
|
||||
.k = k,
|
||||
.seq = seq,
|
||||
.ip_allocated = ip,
|
||||
};
|
||||
|
||||
@ -431,7 +421,6 @@ bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path,
|
||||
i->cached = n.cached;
|
||||
i->k = n.k;
|
||||
i->path = n.path;
|
||||
i->seq = n.seq;
|
||||
i->ip_allocated = n.ip_allocated;
|
||||
} else {
|
||||
array_insert_item(trans->updates, trans->nr_updates,
|
||||
@ -542,18 +531,6 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
|
||||
return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a transaction update for a key that has already been journaled.
|
||||
*/
|
||||
int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq,
|
||||
struct btree_iter *iter, struct bkey_i *k,
|
||||
enum btree_update_flags flags)
|
||||
{
|
||||
trans->journal_res.seq = seq;
|
||||
return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL|
|
||||
BTREE_UPDATE_PREJOURNAL);
|
||||
}
|
||||
|
||||
int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
@ -792,7 +769,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
|
||||
bch2_trans_commit(trans, &disk_res, journal_seq,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
bch2_disk_reservation_put(trans->c, &disk_res);
|
||||
err:
|
||||
/*
|
||||
@ -897,7 +874,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
|
||||
} else {
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|commit_flags,
|
||||
BCH_TRANS_COMMIT_lazy_rw|commit_flags,
|
||||
__bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args));
|
||||
}
|
||||
|
||||
|
@ -21,37 +21,28 @@ void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
|
||||
void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
|
||||
struct bkey_i *, u64);
|
||||
|
||||
enum btree_insert_flags {
|
||||
#define BCH_TRANS_COMMIT_FLAGS() \
|
||||
x(no_enospc, "don't check for enospc") \
|
||||
x(no_check_rw, "don't attempt to take a ref on c->writes") \
|
||||
x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \
|
||||
x(no_journal_res, "don't take a journal reservation, instead " \
|
||||
"pin journal entry referred to by trans->journal_res.seq") \
|
||||
x(journal_reclaim, "operation required for journal reclaim; may return error" \
|
||||
"instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
|
||||
|
||||
enum __bch_trans_commit_flags {
|
||||
/* First bits for bch_watermark: */
|
||||
__BTREE_INSERT_NOFAIL = BCH_WATERMARK_BITS,
|
||||
__BTREE_INSERT_NOCHECK_RW,
|
||||
__BTREE_INSERT_LAZY_RW,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__BTREE_INSERT_JOURNAL_RECLAIM,
|
||||
__BTREE_INSERT_NOWAIT,
|
||||
__BTREE_INSERT_GC_LOCK_HELD,
|
||||
__BCH_HASH_SET_MUST_CREATE,
|
||||
__BCH_HASH_SET_MUST_REPLACE,
|
||||
__BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS,
|
||||
#define x(n, ...) __BCH_TRANS_COMMIT_##n,
|
||||
BCH_TRANS_COMMIT_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
/* Don't check for -ENOSPC: */
|
||||
#define BTREE_INSERT_NOFAIL BIT(__BTREE_INSERT_NOFAIL)
|
||||
|
||||
#define BTREE_INSERT_NOCHECK_RW BIT(__BTREE_INSERT_NOCHECK_RW)
|
||||
#define BTREE_INSERT_LAZY_RW BIT(__BTREE_INSERT_LAZY_RW)
|
||||
|
||||
/* Insert is for journal replay - don't get journal reservations: */
|
||||
#define BTREE_INSERT_JOURNAL_REPLAY BIT(__BTREE_INSERT_JOURNAL_REPLAY)
|
||||
|
||||
/* Insert is being called from journal reclaim path: */
|
||||
#define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
|
||||
/* Don't block on allocation failure (for new btree nodes: */
|
||||
#define BTREE_INSERT_NOWAIT BIT(__BTREE_INSERT_NOWAIT)
|
||||
#define BTREE_INSERT_GC_LOCK_HELD BIT(__BTREE_INSERT_GC_LOCK_HELD)
|
||||
|
||||
#define BCH_HASH_SET_MUST_CREATE BIT(__BCH_HASH_SET_MUST_CREATE)
|
||||
#define BCH_HASH_SET_MUST_REPLACE BIT(__BCH_HASH_SET_MUST_REPLACE)
|
||||
enum bch_trans_commit_flags {
|
||||
#define x(n, ...) BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n),
|
||||
BCH_TRANS_COMMIT_FLAGS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
|
||||
unsigned, unsigned);
|
||||
|
@ -475,9 +475,6 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
|
||||
/*
|
||||
* Protects reaping from the btree node cache and using the btree node
|
||||
* open bucket reserve:
|
||||
*
|
||||
* BTREE_INSERT_NOWAIT only applies to btree node allocation, not
|
||||
* blocking on this lock:
|
||||
*/
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, cl);
|
||||
if (ret)
|
||||
@ -487,8 +484,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
|
||||
struct prealloc_nodes *p = as->prealloc_nodes + interior;
|
||||
|
||||
while (p->nr < nr_nodes[interior]) {
|
||||
b = __bch2_btree_node_alloc(trans, &as->disk_res,
|
||||
flags & BTREE_INSERT_NOWAIT ? NULL : cl,
|
||||
b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
|
||||
interior, flags);
|
||||
if (IS_ERR(b)) {
|
||||
ret = PTR_ERR(b);
|
||||
@ -513,8 +509,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
||||
up_read(&c->gc_lock);
|
||||
as->took_gc_lock = false;
|
||||
|
||||
bch2_journal_preres_put(&c->journal, &as->journal_preres);
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
bch2_journal_pin_flush(&c->journal, &as->journal);
|
||||
bch2_disk_reservation_put(c, &as->disk_res);
|
||||
@ -646,9 +640,9 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
*/
|
||||
ret = commit_do(trans, &as->disk_res, &journal_seq,
|
||||
BCH_WATERMARK_reclaim|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_journal_reclaim,
|
||||
btree_update_nodes_written_trans(trans, as));
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
@ -734,8 +728,6 @@ err:
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
|
||||
bch2_journal_preres_put(&c->journal, &as->journal_preres);
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
for (i = 0; i < as->nr_new_nodes; i++) {
|
||||
b = as->new_nodes[i];
|
||||
@ -818,6 +810,12 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
static int bch2_update_reparent_journal_pin_flush(struct journal *j,
|
||||
struct journal_entry_pin *_pin, u64 seq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btree_update_reparent(struct btree_update *as,
|
||||
struct btree_update *child)
|
||||
{
|
||||
@ -828,7 +826,8 @@ static void btree_update_reparent(struct btree_update *as,
|
||||
child->b = NULL;
|
||||
child->mode = BTREE_INTERIOR_UPDATING_AS;
|
||||
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
|
||||
bch2_update_reparent_journal_pin_flush);
|
||||
}
|
||||
|
||||
static void btree_update_updated_root(struct btree_update *as, struct btree *b)
|
||||
@ -937,6 +936,12 @@ static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct b
|
||||
b->ob.v[--b->ob.nr];
|
||||
}
|
||||
|
||||
static int bch2_btree_update_will_free_node_journal_pin_flush(struct journal *j,
|
||||
struct journal_entry_pin *_pin, u64 seq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* @b is being split/rewritten: it may have pointers to not-yet-written btree
|
||||
* nodes and thus outstanding btree_updates - redirect @b's
|
||||
@ -988,11 +993,13 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
* when the new nodes are persistent and reachable on disk:
|
||||
*/
|
||||
w = btree_current_write(b);
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal,
|
||||
bch2_btree_update_will_free_node_journal_pin_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
w = btree_prev_write(b);
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
|
||||
bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal,
|
||||
bch2_btree_update_will_free_node_journal_pin_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
@ -1042,7 +1049,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_update *as;
|
||||
u64 start_time = local_clock();
|
||||
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
|
||||
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
|
||||
? BCH_DISK_RESERVATION_NOFAIL : 0;
|
||||
unsigned nr_nodes[2] = { 0, 0 };
|
||||
unsigned update_level = level;
|
||||
@ -1061,7 +1068,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
flags &= ~BCH_WATERMARK_MASK;
|
||||
flags |= watermark;
|
||||
|
||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
||||
if (flags & BCH_TRANS_COMMIT_journal_reclaim)
|
||||
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
|
||||
journal_flags |= watermark;
|
||||
|
||||
@ -1087,9 +1094,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
|
||||
}
|
||||
|
||||
if (flags & BTREE_INSERT_GC_LOCK_HELD)
|
||||
lockdep_assert_held(&c->gc_lock);
|
||||
else if (!down_read_trylock(&c->gc_lock)) {
|
||||
if (!down_read_trylock(&c->gc_lock)) {
|
||||
ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0));
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
@ -1103,7 +1108,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
as->c = c;
|
||||
as->start_time = start_time;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
|
||||
as->took_gc_lock = true;
|
||||
as->btree_id = path->btree_id;
|
||||
as->update_level = update_level;
|
||||
INIT_LIST_HEAD(&as->list);
|
||||
@ -1129,27 +1134,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags|JOURNAL_RES_GET_NONBLOCK);
|
||||
if (ret) {
|
||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
||||
BTREE_UPDATE_JOURNAL_RES,
|
||||
journal_flags));
|
||||
if (ret == -BCH_ERR_journal_preres_get_blocked) {
|
||||
trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_disk_reservation_get(c, &as->disk_res,
|
||||
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
|
||||
c->opts.metadata_replicas,
|
||||
@ -1167,7 +1151,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
* flag
|
||||
*/
|
||||
if (bch2_err_matches(ret, ENOSPC) &&
|
||||
(flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark != BCH_WATERMARK_reclaim) {
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
goto err;
|
||||
@ -1855,7 +1839,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
parent = btree_node_parent(path, b);
|
||||
as = bch2_btree_update_start(trans, path, level, false,
|
||||
BTREE_INSERT_NOFAIL|flags);
|
||||
BCH_TRANS_COMMIT_no_enospc|flags);
|
||||
ret = PTR_ERR_OR_ZERO(as);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1941,7 +1925,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
struct btree_update *as;
|
||||
int ret;
|
||||
|
||||
flags |= BTREE_INSERT_NOFAIL;
|
||||
flags |= BCH_TRANS_COMMIT_no_enospc;
|
||||
|
||||
parent = btree_node_parent(iter->path, b);
|
||||
as = bch2_btree_update_start(trans, iter->path, b->c.level,
|
||||
@ -2418,23 +2402,17 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
|
||||
|
||||
struct jset_entry *
|
||||
bch2_btree_roots_to_journal_entries(struct bch_fs *c,
|
||||
struct jset_entry *start,
|
||||
struct jset_entry *end)
|
||||
struct jset_entry *end,
|
||||
unsigned long skip)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
unsigned long have = 0;
|
||||
unsigned i;
|
||||
|
||||
for (entry = start; entry < end; entry = vstruct_next(entry))
|
||||
if (entry->type == BCH_JSET_ENTRY_btree_root)
|
||||
__set_bit(entry->btree_id, &have);
|
||||
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
|
||||
if (r->alive && !test_bit(i, &have)) {
|
||||
if (r->alive && !test_bit(i, &skip)) {
|
||||
journal_entry_set(end, BCH_JSET_ENTRY_btree_root,
|
||||
i, r->level, &r->key, r->key.k.u64s);
|
||||
end = vstruct_next(end);
|
||||
|
@ -55,7 +55,6 @@ struct btree_update {
|
||||
unsigned update_level;
|
||||
|
||||
struct disk_reservation disk_res;
|
||||
struct journal_preres journal_preres;
|
||||
|
||||
/*
|
||||
* BTREE_INTERIOR_UPDATING_NODE:
|
||||
@ -325,7 +324,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *);
|
||||
|
||||
void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *);
|
||||
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
|
||||
struct jset_entry *, struct jset_entry *);
|
||||
struct jset_entry *, unsigned long);
|
||||
|
||||
void bch2_do_pending_node_rewrites(struct bch_fs *);
|
||||
void bch2_free_pending_node_rewrites(struct bch_fs *);
|
||||
|
@ -9,9 +9,11 @@
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_btree_write_buffer_journal_flush(struct journal *,
|
||||
struct journal_entry_pin *, u64);
|
||||
|
||||
static int btree_write_buffered_key_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct btree_write_buffered_key *l = _l;
|
||||
@ -46,6 +48,13 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We can't clone a path that has write locks: unshare it now, before
|
||||
* set_pos and traverse():
|
||||
*/
|
||||
if (iter->path->ref > 1)
|
||||
iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_);
|
||||
|
||||
path = iter->path;
|
||||
|
||||
if (!*write_locked) {
|
||||
@ -65,24 +74,18 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
|
||||
(*fast)++;
|
||||
|
||||
if (path->ref > 1) {
|
||||
/*
|
||||
* We can't clone a path that has write locks: if the path is
|
||||
* shared, unlock before set_pos(), traverse():
|
||||
*/
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
|
||||
*write_locked = false;
|
||||
}
|
||||
return 0;
|
||||
trans_commit:
|
||||
return bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k,
|
||||
trans->journal_res.seq = wb->journal_seq;
|
||||
|
||||
return bch2_trans_update(trans, iter, &wb->k,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
commit_flags|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM);
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_journal_res|
|
||||
BCH_TRANS_COMMIT_journal_reclaim);
|
||||
}
|
||||
|
||||
static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb)
|
||||
@ -125,8 +128,10 @@ btree_write_buffered_insert(struct btree_trans *trans,
|
||||
bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
|
||||
BTREE_ITER_CACHED|BTREE_ITER_INTENT);
|
||||
|
||||
trans->journal_res.seq = wb->journal_seq;
|
||||
|
||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||
bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k,
|
||||
bch2_trans_update(trans, &iter, &wb->k,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -151,7 +156,8 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
if (!locked && !mutex_trylock(&wb->flush_lock))
|
||||
return 0;
|
||||
|
||||
bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL);
|
||||
bch2_journal_pin_copy(j, &pin, &wb->journal_pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
bch2_journal_pin_drop(j, &wb->journal_pin);
|
||||
|
||||
s = btree_write_buffer_switch(wb);
|
||||
@ -169,7 +175,7 @@ int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_f
|
||||
* However, since we're not flushing in the order they appear in the
|
||||
* journal we won't be able to drop our journal pin until everything is
|
||||
* flushed - which means this could deadlock the journal if we weren't
|
||||
* passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail
|
||||
* passing BCH_TRANS_COMMIT_journal_reclaim. This causes the update to fail
|
||||
* if it would block taking a journal reservation.
|
||||
*
|
||||
* If that happens, simply skip the key so we can optimistically insert
|
||||
@ -253,21 +259,14 @@ slowpath:
|
||||
if (!i->journal_seq)
|
||||
continue;
|
||||
|
||||
if (i->journal_seq > pin.seq) {
|
||||
struct journal_entry_pin pin2;
|
||||
|
||||
memset(&pin2, 0, sizeof(pin2));
|
||||
|
||||
bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL);
|
||||
bch2_journal_pin_drop(j, &pin);
|
||||
bch2_journal_pin_copy(j, &pin, &pin2, NULL);
|
||||
bch2_journal_pin_drop(j, &pin2);
|
||||
}
|
||||
bch2_journal_pin_update(j, i->journal_seq, &pin,
|
||||
bch2_btree_write_buffer_journal_flush);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
commit_flags|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RECLAIM,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_no_journal_res|
|
||||
BCH_TRANS_COMMIT_journal_reclaim,
|
||||
btree_write_buffered_insert(trans, i));
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)))
|
||||
break;
|
||||
@ -297,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
||||
mutex_lock(&wb->flush_lock);
|
||||
|
||||
return bch2_trans_run(c,
|
||||
__bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true));
|
||||
__bch2_btree_write_buffer_flush(trans, BCH_TRANS_COMMIT_no_check_rw, true));
|
||||
}
|
||||
|
||||
static inline u64 btree_write_buffer_ref(int idx)
|
||||
|
21
libbcachefs/darray.c
Normal file
21
libbcachefs/darray.c
Normal file
@ -0,0 +1,21 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/log2.h>
|
||||
#include <linux/slab.h>
|
||||
#include "darray.h"
|
||||
|
||||
int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, gfp_t gfp)
|
||||
{
|
||||
if (new_size > d->size) {
|
||||
new_size = roundup_pow_of_two(new_size);
|
||||
|
||||
void *data = krealloc_array(d->data, new_size, element_size, gfp);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
d->data = data;
|
||||
d->size = new_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -8,7 +8,6 @@
|
||||
* Inspired by CCAN's darray
|
||||
*/
|
||||
|
||||
#include "util.h"
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define DARRAY(type) \
|
||||
@ -19,20 +18,25 @@ struct { \
|
||||
|
||||
typedef DARRAY(void) darray_void;
|
||||
|
||||
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
|
||||
int __bch2_darray_resize(darray_void *, size_t, size_t, gfp_t);
|
||||
|
||||
static inline int __darray_resize(darray_void *d, size_t element_size,
|
||||
size_t new_size, gfp_t gfp)
|
||||
{
|
||||
if (d->nr + more > d->size) {
|
||||
size_t new_size = roundup_pow_of_two(d->nr + more);
|
||||
void *data = krealloc_array(d->data, new_size, t_size, gfp);
|
||||
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
d->data = data;
|
||||
d->size = new_size;
|
||||
return unlikely(new_size > d->size)
|
||||
? __bch2_darray_resize(d, element_size, new_size, gfp)
|
||||
: 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
#define darray_resize_gfp(_d, _new_size, _gfp) \
|
||||
__darray_resize((darray_void *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp)
|
||||
|
||||
#define darray_resize(_d, _new_size) \
|
||||
darray_resize_gfp(_d, _new_size, GFP_KERNEL)
|
||||
|
||||
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
|
||||
{
|
||||
return __darray_resize(d, t_size, d->nr + more, gfp);
|
||||
}
|
||||
|
||||
#define darray_make_room_gfp(_d, _more, _gfp) \
|
||||
@ -41,6 +45,8 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
|
||||
#define darray_make_room(_d, _more) \
|
||||
darray_make_room_gfp(_d, _more, GFP_KERNEL)
|
||||
|
||||
#define darray_room(_d) ((_d).size - (_d).nr)
|
||||
|
||||
#define darray_top(_d) ((_d).data[(_d).nr])
|
||||
|
||||
#define darray_push_gfp(_d, _item, _gfp) \
|
||||
|
@ -239,6 +239,34 @@ restart_drop_extra_replicas:
|
||||
|
||||
next_pos = insert->k.p;
|
||||
|
||||
/*
|
||||
* Check for nonce offset inconsistency:
|
||||
* This is debug code - we've been seeing this bug rarely, and
|
||||
* it's been hard to reproduce, so this should give us some more
|
||||
* information when it does occur:
|
||||
*/
|
||||
struct printbuf err = PRINTBUF;
|
||||
int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
|
||||
printbuf_exit(&err);
|
||||
|
||||
if (invalid) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "about to insert invalid key in data update path");
|
||||
prt_str(&buf, "\nold: ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
prt_str(&buf, "\nk: ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, "\nnew: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
bch2_fatal_error(c);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, bkey_start_pos(&insert->k)) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
@ -250,8 +278,8 @@ restart_drop_extra_replicas:
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, &op->res,
|
||||
NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
m->data_opts.btree_insert_flags);
|
||||
if (!ret) {
|
||||
bch2_btree_iter_set_pos(&iter, next_pos);
|
||||
|
@ -201,7 +201,8 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
u8 type, const struct qstr *name, u64 dst_inum,
|
||||
u64 *dir_offset, int flags)
|
||||
u64 *dir_offset,
|
||||
bch_str_hash_flags_t str_hash_flags)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
int ret;
|
||||
@ -212,7 +213,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
|
||||
return ret;
|
||||
|
||||
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
|
||||
dir, &dirent->k_i, flags);
|
||||
dir, &dirent->k_i, str_hash_flags);
|
||||
*dir_offset = dirent->k.p.offset;
|
||||
|
||||
return ret;
|
||||
|
@ -37,7 +37,8 @@ int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
|
||||
|
||||
int bch2_dirent_create(struct btree_trans *, subvol_inum,
|
||||
const struct bch_hash_info *, u8,
|
||||
const struct qstr *, u64, u64 *, int);
|
||||
const struct qstr *, u64, u64 *,
|
||||
bch_str_hash_flags_t);
|
||||
|
||||
static inline unsigned vfs_d_type(unsigned type)
|
||||
{
|
||||
|
@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca;
|
||||
|
||||
out->atomic++;
|
||||
rcu_read_lock();
|
||||
ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
out->atomic--;
|
||||
break;
|
||||
}
|
||||
case TARGET_GROUP:
|
||||
@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
|
||||
static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
|
||||
{
|
||||
struct target t = target_decode(v);
|
||||
|
||||
|
@ -150,6 +150,7 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
|
||||
if (i < nr_data)
|
||||
prt_printf(out, "#%u", stripe_blockcount_get(s, i));
|
||||
prt_printf(out, " gen %u", ptr->gen);
|
||||
if (ptr_stale(ca, ptr))
|
||||
prt_printf(out, " stale");
|
||||
}
|
||||
@ -303,16 +304,21 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
|
||||
struct bch_csum got = ec_block_checksum(buf, i, offset);
|
||||
|
||||
if (bch2_crc_cmp(want, got)) {
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
struct printbuf err = PRINTBUF;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
|
||||
|
||||
bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key));
|
||||
prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
|
||||
want.hi, want.lo,
|
||||
got.hi, got.lo,
|
||||
bch2_csum_types[v->csum_type]);
|
||||
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
|
||||
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
|
||||
bch_err_ratelimited(ca, "%s", err.buf);
|
||||
printbuf_exit(&err);
|
||||
|
||||
bch_err_ratelimited(c,
|
||||
"stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s",
|
||||
(void *) _RET_IP_, i, j, v->csum_type,
|
||||
want.lo, got.lo, buf2.buf);
|
||||
printbuf_exit(&buf2);
|
||||
clear_bit(i, buf->valid);
|
||||
|
||||
bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -475,14 +481,10 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
|
||||
{
|
||||
return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe));
|
||||
}
|
||||
|
||||
/* recovery read path: */
|
||||
int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
|
||||
int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct ec_stripe_buf *buf;
|
||||
struct closure cl;
|
||||
struct bch_stripe *v;
|
||||
@ -497,7 +499,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
|
||||
if (!buf)
|
||||
return -BCH_ERR_ENOMEM_ec_read_extent;
|
||||
|
||||
ret = get_stripe_key(c, rbio->pick.ec.idx, buf);
|
||||
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
|
||||
if (ret) {
|
||||
bch_err_ratelimited(c,
|
||||
"error doing reconstruct read: error %i looking up stripe", ret);
|
||||
@ -801,7 +803,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
if (!idx)
|
||||
break;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
ec_stripe_delete(trans, idx));
|
||||
if (ret) {
|
||||
bch_err_fn(c, ret);
|
||||
@ -981,8 +983,8 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
|
||||
while (1) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
|
||||
s, &bp_pos));
|
||||
if (ret)
|
||||
@ -1119,8 +1121,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
|
||||
}
|
||||
|
||||
ret = bch2_trans_do(c, &s->res, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
ec_stripe_key_update(trans,
|
||||
bkey_i_to_stripe(&s->new_stripe.key),
|
||||
!s->have_existing_stripe));
|
||||
@ -1371,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
|
||||
h->nr_active_devs++;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If we only have redundancy + 1 devices, we're better off with just
|
||||
* replication:
|
||||
*/
|
||||
if (h->nr_active_devs < h->redundancy + 2)
|
||||
bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
|
||||
h->nr_active_devs, h->redundancy + 2);
|
||||
|
||||
list_add(&h->list, &c->ec_stripe_head_list);
|
||||
return h;
|
||||
}
|
||||
@ -1422,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
|
||||
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
|
||||
found:
|
||||
if (!IS_ERR_OR_NULL(h) &&
|
||||
h->nr_active_devs < h->redundancy + 2) {
|
||||
mutex_unlock(&h->lock);
|
||||
h = NULL;
|
||||
}
|
||||
mutex_unlock(&c->ec_stripe_head_lock);
|
||||
return h;
|
||||
}
|
||||
@ -1679,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
|
||||
if (!h)
|
||||
bch_err(c, "no stripe head");
|
||||
if (IS_ERR_OR_NULL(h))
|
||||
return h;
|
||||
|
||||
|
@ -199,7 +199,7 @@ struct ec_stripe_head {
|
||||
struct ec_stripe_new *s;
|
||||
};
|
||||
|
||||
int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
|
||||
int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
|
||||
|
||||
void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
|
||||
|
||||
|
@ -73,7 +73,6 @@
|
||||
x(ENOMEM, ENOMEM_fsck_add_nlink) \
|
||||
x(ENOMEM, ENOMEM_journal_key_insert) \
|
||||
x(ENOMEM, ENOMEM_journal_keys_sort) \
|
||||
x(ENOMEM, ENOMEM_journal_replay) \
|
||||
x(ENOMEM, ENOMEM_read_superblock_clean) \
|
||||
x(ENOMEM, ENOMEM_fs_alloc) \
|
||||
x(ENOMEM, ENOMEM_fs_name_alloc) \
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
|
||||
loff_t start, u64 end,
|
||||
int fgp_flags, gfp_t gfp,
|
||||
fgf_t fgp_flags, gfp_t gfp,
|
||||
folios *fs)
|
||||
{
|
||||
struct folio *f;
|
||||
|
@ -7,7 +7,7 @@
|
||||
typedef DARRAY(struct folio *) folios;
|
||||
|
||||
int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
|
||||
u64, int, gfp_t, folios *);
|
||||
u64, fgf_t, gfp_t, folios *);
|
||||
int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
|
||||
|
||||
/*
|
||||
|
@ -93,7 +93,7 @@ retry:
|
||||
BTREE_ITER_INTENT) ?:
|
||||
(set ? set(trans, inode, &inode_u, p) : 0) ?:
|
||||
bch2_inode_write(trans, &iter, &inode_u) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
|
||||
/*
|
||||
* the btree node lock protects inode->ei_inode, not ei_update_lock;
|
||||
@ -452,7 +452,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
|
||||
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_unlink_trans(trans,
|
||||
inode_inum(dir), &dir_u,
|
||||
&inode_u, &dentry->d_name,
|
||||
@ -717,7 +717,7 @@ retry:
|
||||
|
||||
ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
btree_err:
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
|
||||
@ -1922,10 +1922,7 @@ out:
|
||||
return dget(sb->s_root);
|
||||
|
||||
err_put_super:
|
||||
sb->s_fs_info = NULL;
|
||||
c->vfs_sb = NULL;
|
||||
deactivate_locked_super(sb);
|
||||
bch2_fs_stop(c);
|
||||
return ERR_PTR(bch2_err_class(ret));
|
||||
}
|
||||
|
||||
@ -1933,10 +1930,7 @@ static void bch2_kill_sb(struct super_block *sb)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
|
||||
if (c)
|
||||
c->vfs_sb = NULL;
|
||||
generic_shutdown_super(sb);
|
||||
if (c)
|
||||
bch2_fs_free(c);
|
||||
}
|
||||
|
||||
|
@ -208,8 +208,8 @@ static int fsck_write_inode(struct btree_trans *trans,
|
||||
u32 snapshot)
|
||||
{
|
||||
int ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
__write_inode(trans, inode, snapshot));
|
||||
if (ret)
|
||||
bch_err_fn(trans->c, ret);
|
||||
@ -354,8 +354,8 @@ static int reattach_inode(struct btree_trans *trans,
|
||||
u32 inode_snapshot)
|
||||
{
|
||||
int ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
__reattach_inode(trans, inode, inode_snapshot));
|
||||
bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum);
|
||||
return ret;
|
||||
@ -757,8 +757,8 @@ static int hash_redo_key(struct btree_trans *trans,
|
||||
BCH_HASH_SET_MUST_CREATE,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW);
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw);
|
||||
}
|
||||
|
||||
static int hash_check_key(struct btree_trans *trans,
|
||||
@ -992,7 +992,7 @@ int bch2_check_inodes(struct bch_fs *c)
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
|
||||
POS_MIN,
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_inode(trans, &iter, k, &prev, &s, full));
|
||||
|
||||
snapshots_seen_exit(&s);
|
||||
@ -1226,7 +1226,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
|
||||
k1, k2) ?:
|
||||
bch2_trans_commit(trans, &res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc);
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
|
||||
if (ret)
|
||||
@ -1465,7 +1465,7 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
POS(BCACHEFS_ROOT_INO, 0),
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
&res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
|
||||
BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
@ -1494,7 +1494,7 @@ int bch2_check_indirect_extents(struct bch_fs *c)
|
||||
POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
&res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
|
||||
BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
}));
|
||||
@ -1854,7 +1854,7 @@ int bch2_check_dirents(struct bch_fs *c)
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
|
||||
k,
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s));
|
||||
|
||||
bch2_trans_put(trans);
|
||||
@ -1918,7 +1918,7 @@ int bch2_check_xattrs(struct bch_fs *c)
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
|
||||
k,
|
||||
NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_xattr(trans, &iter, k, &hash_info, &inode)));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -1949,8 +1949,8 @@ static int check_root_trans(struct btree_trans *trans)
|
||||
root_subvol.v.snapshot = cpu_to_le32(snapshot);
|
||||
root_subvol.v.inode = cpu_to_le64(inum);
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_subvolumes,
|
||||
&root_subvol.k_i, 0));
|
||||
bch_err_msg(c, ret, "writing root subvol");
|
||||
@ -1986,8 +1986,8 @@ int bch2_check_root(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
check_root_trans(trans));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -2116,8 +2116,8 @@ static int check_path(struct btree_trans *trans,
|
||||
return 0;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw,
|
||||
remove_backpointer(trans, inode));
|
||||
if (ret) {
|
||||
bch_err(c, "error removing dirent: %i", ret);
|
||||
@ -2398,7 +2398,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
|
||||
POS(0, range_start),
|
||||
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
|
||||
if (ret < 0) {
|
||||
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
|
||||
@ -2483,7 +2483,7 @@ int bch2_fix_reflink_p(struct bch_fs *c)
|
||||
BTREE_ID_extents, POS_MIN,
|
||||
BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
fix_reflink_p_key(trans, &iter, k)));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
|
@ -830,7 +830,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
err:
|
||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
break;
|
||||
@ -893,7 +893,7 @@ retry:
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -1057,7 +1057,7 @@ retry:
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -1091,7 +1091,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
|
||||
if (fsck_err_on(S_ISDIR(inode.bi_mode), c,
|
||||
deleted_inode_is_dir,
|
||||
@ -1109,38 +1109,45 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
|
||||
!fsck_err(c,
|
||||
deleted_inode_but_clean,
|
||||
"filesystem marked as clean but have deleted inode %llu:%u",
|
||||
pos.offset, pos.snapshot))
|
||||
return 0;
|
||||
pos.offset, pos.snapshot)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bch2_snapshot_is_internal_node(c, pos.snapshot)) {
|
||||
struct bpos new_min_pos;
|
||||
|
||||
ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
|
||||
inode.bi_flags &= ~BCH_INODE_unlinked;
|
||||
|
||||
ret = bch2_inode_write_flags(trans, &inode_iter, &inode,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_lazy_rw);
|
||||
bch_err_msg(c, ret, "clearing inode unlinked flag");
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We'll need another write buffer flush to pick up the new
|
||||
* unlinked inodes in the snapshot leaves:
|
||||
*/
|
||||
*need_another_pass = true;
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return 1;
|
||||
err:
|
||||
ret = 1;
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
return ret;
|
||||
delete:
|
||||
return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
|
||||
ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_delete_dead_inodes(struct bch_fs *c)
|
||||
|
@ -256,7 +256,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
|
||||
u64 new_i_size = le64_to_cpu(op->v.new_i_size);
|
||||
int ret;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
truncate_set_isize(trans, inum, new_i_size));
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -378,7 +378,7 @@ case LOGGED_OP_FINSERT_start:
|
||||
op->v.state = LOGGED_OP_FINSERT_shift_extents;
|
||||
|
||||
if (insert) {
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
adjust_i_size(trans, inum, src_offset, len) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i));
|
||||
if (ret)
|
||||
@ -390,7 +390,7 @@ case LOGGED_OP_FINSERT_start:
|
||||
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto err;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_logged_op_update(trans, &op->k_i));
|
||||
}
|
||||
|
||||
@ -455,7 +455,7 @@ case LOGGED_OP_FINSERT_shift_extents:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i) ?:
|
||||
bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL);
|
||||
bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
btree_err:
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
|
||||
@ -470,12 +470,12 @@ btree_err:
|
||||
op->v.state = LOGGED_OP_FINSERT_finish;
|
||||
|
||||
if (!insert) {
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
adjust_i_size(trans, inum, src_offset, shift) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i));
|
||||
} else {
|
||||
/* We need an inode update to update bi_journal_seq for fsync: */
|
||||
ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
adjust_i_size(trans, inum, 0, 0) ?:
|
||||
bch2_logged_op_update(trans, &op->k_i));
|
||||
}
|
||||
|
@ -526,7 +526,7 @@ out:
|
||||
|
||||
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
|
||||
{
|
||||
bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
__bch2_rbio_narrow_crcs(trans, rbio));
|
||||
}
|
||||
|
||||
@ -1025,7 +1025,7 @@ get_bio:
|
||||
trans->notrace_relock_fail = true;
|
||||
} else {
|
||||
/* Attempting reconstruct read: */
|
||||
if (bch2_ec_read_extent(c, rbio)) {
|
||||
if (bch2_ec_read_extent(trans, rbio)) {
|
||||
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
|
||||
goto out;
|
||||
}
|
||||
|
@ -202,6 +202,17 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
|
||||
struct btree_iter iter;
|
||||
struct bkey_i *k;
|
||||
struct bkey_i_inode_v3 *inode;
|
||||
/*
|
||||
* Crazy performance optimization:
|
||||
* Every extent update needs to also update the inode: the inode trigger
|
||||
* will set bi->journal_seq to the journal sequence number of this
|
||||
* transaction - for fsync.
|
||||
*
|
||||
* But if that's the only reason we're updating the inode (we're not
|
||||
* updating bi_size or bi_sectors), then we don't need the inode update
|
||||
* to be journalled - if we crash, the bi_journal_seq update will be
|
||||
* lost, but that's fine.
|
||||
*/
|
||||
unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
|
||||
int ret;
|
||||
|
||||
@ -305,8 +316,8 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
i_sectors_delta) ?:
|
||||
bch2_trans_update(trans, iter, k, 0) ?:
|
||||
bch2_trans_commit(trans, disk_res, NULL,
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_check_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
@ -1165,7 +1176,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents,
|
||||
bkey_start_pos(&orig->k), orig->k.p,
|
||||
BTREE_ITER_INTENT, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL, ({
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
|
||||
}));
|
||||
|
||||
|
@ -361,11 +361,6 @@ static int journal_entry_open(struct journal *j)
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
if (j->res_get_blocked_start)
|
||||
bch2_time_stats_update(j->blocked_time,
|
||||
j->res_get_blocked_start);
|
||||
j->res_get_blocked_start = 0;
|
||||
|
||||
mod_delayed_work(c->io_complete_wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(c->opts.journal_flush_delay));
|
||||
@ -465,15 +460,12 @@ retry:
|
||||
__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
|
||||
ret = journal_entry_open(j);
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight)
|
||||
if (ret == JOURNAL_ERR_max_in_flight) {
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
&j->max_in_flight_start, true);
|
||||
trace_and_count(c, journal_entry_full, c);
|
||||
unlock:
|
||||
if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
|
||||
!j->res_get_blocked_start) {
|
||||
j->res_get_blocked_start = local_clock() ?: 1;
|
||||
trace_and_count(c, journal_full, c);
|
||||
}
|
||||
|
||||
unlock:
|
||||
can_discard = j->can_discard;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
@ -526,36 +518,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* journal_preres: */
|
||||
|
||||
static bool journal_preres_available(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
{
|
||||
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
|
||||
|
||||
if (!ret && mutex_trylock(&j->reclaim_lock)) {
|
||||
bch2_journal_reclaim(j);
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_journal_preres_get(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
closure_wait_event(&j->preres_wait,
|
||||
(ret = bch2_journal_error(j)) ||
|
||||
journal_preres_available(j, res, new_u64s, flags));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *j,
|
||||
@ -1290,6 +1252,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
union journal_res_state s;
|
||||
struct bch_dev *ca;
|
||||
unsigned long now = jiffies;
|
||||
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
|
||||
u64 seq;
|
||||
unsigned i;
|
||||
|
||||
@ -1306,11 +1269,13 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
|
||||
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
|
||||
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
|
||||
prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
|
||||
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
|
||||
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
|
||||
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
|
||||
prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
|
||||
prt_printf(out, "average write size:\t");
|
||||
prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
|
||||
prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
|
||||
prt_printf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked);
|
||||
|
@ -395,104 +395,6 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* journal_preres: */
|
||||
|
||||
static inline void journal_set_watermark(struct journal *j)
|
||||
{
|
||||
union journal_preres_state s = READ_ONCE(j->prereserved);
|
||||
unsigned watermark = BCH_WATERMARK_stripe;
|
||||
|
||||
if (fifo_free(&j->pin) < j->pin.size / 4)
|
||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
|
||||
if (fifo_free(&j->pin) < j->pin.size / 8)
|
||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
||||
|
||||
if (s.reserved > s.remaining)
|
||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
|
||||
if (!s.remaining)
|
||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
||||
|
||||
if (watermark == j->watermark)
|
||||
return;
|
||||
|
||||
swap(watermark, j->watermark);
|
||||
if (watermark > j->watermark)
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_preres_put(struct journal *j,
|
||||
struct journal_preres *res)
|
||||
{
|
||||
union journal_preres_state s = { .reserved = res->u64s };
|
||||
|
||||
if (!res->u64s)
|
||||
return;
|
||||
|
||||
s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
|
||||
res->u64s = 0;
|
||||
|
||||
if (unlikely(s.waiting)) {
|
||||
clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
|
||||
(unsigned long *) &j->prereserved.v);
|
||||
closure_wake_up(&j->preres_wait);
|
||||
}
|
||||
|
||||
if (s.reserved <= s.remaining && j->watermark)
|
||||
journal_set_watermark(j);
|
||||
}
|
||||
|
||||
int __bch2_journal_preres_get(struct journal *,
|
||||
struct journal_preres *, unsigned, unsigned);
|
||||
|
||||
static inline int bch2_journal_preres_get_fast(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags,
|
||||
bool set_waiting)
|
||||
{
|
||||
int d = new_u64s - res->u64s;
|
||||
union journal_preres_state old, new;
|
||||
u64 v = atomic64_read(&j->prereserved.counter);
|
||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
ret = 0;
|
||||
|
||||
if (watermark == BCH_WATERMARK_reclaim ||
|
||||
new.reserved + d < new.remaining) {
|
||||
new.reserved += d;
|
||||
ret = 1;
|
||||
} else if (set_waiting && !new.waiting)
|
||||
new.waiting = true;
|
||||
else
|
||||
return 0;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
if (ret)
|
||||
res->u64s += d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_journal_preres_get(struct journal *j,
|
||||
struct journal_preres *res,
|
||||
unsigned new_u64s,
|
||||
unsigned flags)
|
||||
{
|
||||
if (new_u64s <= res->u64s)
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
|
||||
return 0;
|
||||
|
||||
if (flags & JOURNAL_RES_GET_NONBLOCK)
|
||||
return -BCH_ERR_journal_preres_get_blocked;
|
||||
|
||||
return __bch2_journal_preres_get(j, res, new_u64s, flags);
|
||||
}
|
||||
|
||||
/* journal_entry_res: */
|
||||
|
||||
void bch2_journal_entry_res_resize(struct journal *,
|
||||
|
@ -1079,6 +1079,12 @@ found:
|
||||
|
||||
if (ja->bucket_seq[ja->cur_idx] &&
|
||||
ja->sectors_free == ca->mi.bucket_size) {
|
||||
#if 0
|
||||
/*
|
||||
* Debug code for ZNS support, where we (probably) want to be
|
||||
* correlated where we stopped in the journal to the zone write
|
||||
* points:
|
||||
*/
|
||||
bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
|
||||
bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
|
||||
for (i = 0; i < 3; i++) {
|
||||
@ -1086,6 +1092,7 @@ found:
|
||||
|
||||
bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
|
||||
}
|
||||
#endif
|
||||
ja->sectors_free = 0;
|
||||
}
|
||||
|
||||
@ -1585,6 +1592,9 @@ static void journal_write_done(struct closure *cl)
|
||||
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
|
||||
&j->max_in_flight_start, false);
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
journal_wake(j);
|
||||
|
||||
@ -1678,9 +1688,15 @@ static void do_journal_write(struct closure *cl)
|
||||
continue_at(cl, journal_write_done, c->io_complete_wq);
|
||||
}
|
||||
|
||||
static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset)
|
||||
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
{
|
||||
struct jset_entry *i, *next, *prev = NULL;
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct jset_entry *start, *end, *i, *next, *prev = NULL;
|
||||
struct jset *jset = w->data;
|
||||
unsigned sectors, bytes, u64s;
|
||||
bool validate_before_checksum = false;
|
||||
unsigned long btree_roots_have = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Simple compaction, dropping empty jset_entries (from journal
|
||||
@ -1697,8 +1713,20 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
|
||||
if (!u64s)
|
||||
continue;
|
||||
|
||||
if (i->type == BCH_JSET_ENTRY_btree_root)
|
||||
/*
|
||||
* New btree roots are set by journalling them; when the journal
|
||||
* entry gets written we have to propagate them to
|
||||
* c->btree_roots
|
||||
*
|
||||
* But, every journal entry we write has to contain all the
|
||||
* btree roots (at least for now); so after we copy btree roots
|
||||
* to c->btree_roots we have to get any missing btree roots and
|
||||
* add them to this journal entry:
|
||||
*/
|
||||
if (i->type == BCH_JSET_ENTRY_btree_root) {
|
||||
bch2_journal_entry_to_btree_root(c, i);
|
||||
__set_bit(i->btree_id, &btree_roots_have);
|
||||
}
|
||||
|
||||
/* Can we merge with previous entry? */
|
||||
if (prev &&
|
||||
@ -1722,85 +1750,10 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
|
||||
|
||||
prev = prev ? vstruct_next(prev) : jset->start;
|
||||
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
|
||||
}
|
||||
|
||||
void bch2_journal_write(struct closure *cl)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_replicas_padded replicas;
|
||||
struct jset_entry *start, *end;
|
||||
struct jset *jset;
|
||||
struct bio *bio;
|
||||
struct printbuf journal_debug_buf = PRINTBUF;
|
||||
bool validate_before_checksum = false;
|
||||
unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
|
||||
int ret;
|
||||
|
||||
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
|
||||
|
||||
journal_buf_realloc(j, w);
|
||||
jset = w->data;
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
/*
|
||||
* If the journal is in an error state - we did an emergency shutdown -
|
||||
* we prefer to continue doing journal writes. We just mark them as
|
||||
* noflush so they'll never be used, but they'll still be visible by the
|
||||
* list_journal tool - this helps in debugging.
|
||||
*
|
||||
* There's a caveat: the first journal write after marking the
|
||||
* superblock dirty must always be a flush write, because on startup
|
||||
* from a clean shutdown we didn't necessarily read the journal and the
|
||||
* new journal write might overwrite whatever was in the journal
|
||||
* previously - we can't leave the journal without any flush writes in
|
||||
* it.
|
||||
*
|
||||
* So if we're in an error state, and we're still starting up, we don't
|
||||
* write anything at all.
|
||||
*/
|
||||
if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
|
||||
(bch2_journal_error(j) ||
|
||||
w->noflush ||
|
||||
(!w->must_flush &&
|
||||
(jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
|
||||
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
|
||||
w->noflush = true;
|
||||
SET_JSET_NO_FLUSH(jset, true);
|
||||
jset->last_seq = 0;
|
||||
w->last_seq = 0;
|
||||
|
||||
j->nr_noflush_writes++;
|
||||
} else if (!bch2_journal_error(j)) {
|
||||
j->last_flush_write = jiffies;
|
||||
j->nr_flush_writes++;
|
||||
clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
|
||||
} else {
|
||||
spin_unlock(&j->lock);
|
||||
goto err;
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* New btree roots are set by journalling them; when the journal entry
|
||||
* gets written we have to propagate them to c->btree_roots
|
||||
*
|
||||
* But, every journal entry we write has to contain all the btree roots
|
||||
* (at least for now); so after we copy btree roots to c->btree_roots we
|
||||
* have to get any missing btree roots and add them to this journal
|
||||
* entry:
|
||||
*/
|
||||
|
||||
bch2_journal_entries_postprocess(c, jset);
|
||||
|
||||
start = end = vstruct_last(jset);
|
||||
|
||||
end = bch2_btree_roots_to_journal_entries(c, jset->start, end);
|
||||
end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
|
||||
|
||||
bch2_journal_super_entries_add_common(c, &end,
|
||||
le64_to_cpu(jset->seq));
|
||||
@ -1816,7 +1769,7 @@ void bch2_journal_write(struct closure *cl)
|
||||
bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
|
||||
vstruct_bytes(jset), w->sectors << 9,
|
||||
u64s, w->u64s_reserved, j->entry_u64s_reserved);
|
||||
goto err;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
jset->magic = cpu_to_le64(jset_magic(c));
|
||||
@ -1835,37 +1788,119 @@ void bch2_journal_write(struct closure *cl)
|
||||
validate_before_checksum = true;
|
||||
|
||||
if (validate_before_checksum &&
|
||||
jset_validate(c, NULL, jset, 0, WRITE))
|
||||
goto err;
|
||||
(ret = jset_validate(c, NULL, jset, 0, WRITE)))
|
||||
return ret;
|
||||
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %i", ret))
|
||||
goto err;
|
||||
return ret;
|
||||
|
||||
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
||||
journal_nonce(jset), jset);
|
||||
|
||||
if (!validate_before_checksum &&
|
||||
jset_validate(c, NULL, jset, 0, WRITE))
|
||||
goto err;
|
||||
(ret = jset_validate(c, NULL, jset, 0, WRITE)))
|
||||
return ret;
|
||||
|
||||
memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
|
||||
|
||||
retry_alloc:
|
||||
spin_lock(&j->lock);
|
||||
ret = journal_write_alloc(j, w);
|
||||
|
||||
if (ret && j->can_discard) {
|
||||
spin_unlock(&j->lock);
|
||||
bch2_journal_do_discards(j);
|
||||
goto retry_alloc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
int error = bch2_journal_error(j);
|
||||
|
||||
/*
|
||||
* If the journal is in an error state - we did an emergency shutdown -
|
||||
* we prefer to continue doing journal writes. We just mark them as
|
||||
* noflush so they'll never be used, but they'll still be visible by the
|
||||
* list_journal tool - this helps in debugging.
|
||||
*
|
||||
* There's a caveat: the first journal write after marking the
|
||||
* superblock dirty must always be a flush write, because on startup
|
||||
* from a clean shutdown we didn't necessarily read the journal and the
|
||||
* new journal write might overwrite whatever was in the journal
|
||||
* previously - we can't leave the journal without any flush writes in
|
||||
* it.
|
||||
*
|
||||
* So if we're in an error state, and we're still starting up, we don't
|
||||
* write anything at all.
|
||||
*/
|
||||
if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags))
|
||||
return -EIO;
|
||||
|
||||
if (error ||
|
||||
w->noflush ||
|
||||
(!w->must_flush &&
|
||||
(jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
|
||||
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
|
||||
w->noflush = true;
|
||||
SET_JSET_NO_FLUSH(w->data, true);
|
||||
w->data->last_seq = 0;
|
||||
w->last_seq = 0;
|
||||
|
||||
j->nr_noflush_writes++;
|
||||
} else {
|
||||
j->last_flush_write = jiffies;
|
||||
j->nr_flush_writes++;
|
||||
clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_journal_write(struct closure *cl)
|
||||
{
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
struct journal_buf *w = journal_last_unwritten_buf(j);
|
||||
struct bch_replicas_padded replicas;
|
||||
struct bio *bio;
|
||||
struct printbuf journal_debug_buf = PRINTBUF;
|
||||
unsigned i, nr_rw_members = 0;
|
||||
int ret;
|
||||
|
||||
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = bch2_journal_write_pick_flush(j, w);
|
||||
spin_unlock(&j->lock);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
journal_buf_realloc(j, w);
|
||||
|
||||
ret = bch2_journal_write_prep(j, w);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
j->entry_bytes_written += vstruct_bytes(w->data);
|
||||
|
||||
while (1) {
|
||||
spin_lock(&j->lock);
|
||||
ret = journal_write_alloc(j, w);
|
||||
if (!ret || !j->can_discard)
|
||||
break;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
bch2_journal_do_discards(j);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
__bch2_journal_debug_to_text(&journal_debug_buf, j);
|
||||
spin_unlock(&j->lock);
|
||||
bch_err(c, "Unable to allocate journal write:\n%s",
|
||||
journal_debug_buf.buf);
|
||||
printbuf_exit(&journal_debug_buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* write is allocated, no longer need to account for it in
|
||||
@ -1880,13 +1915,6 @@ retry_alloc:
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "Unable to allocate journal write:\n%s",
|
||||
journal_debug_buf.buf);
|
||||
printbuf_exit(&journal_debug_buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
|
||||
if (c->opts.nochanges)
|
||||
@ -1908,7 +1936,7 @@ retry_alloc:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
|
||||
if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
|
||||
for_each_rw_member(ca, c, i) {
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
|
||||
|
@ -50,16 +50,25 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
||||
return available;
|
||||
}
|
||||
|
||||
static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
|
||||
static inline void journal_set_watermark(struct journal *j)
|
||||
{
|
||||
union journal_preres_state old, new;
|
||||
u64 v = atomic64_read(&j->prereserved.counter);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool low_on_space = j->space[journal_space_clean].total * 4 <=
|
||||
j->space[journal_space_total].total;
|
||||
bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
|
||||
unsigned watermark = low_on_space || low_on_pin
|
||||
? BCH_WATERMARK_reclaim
|
||||
: BCH_WATERMARK_stripe;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
new.remaining = u64s_remaining;
|
||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
|
||||
&j->low_on_space_start, low_on_space) ||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
|
||||
&j->low_on_pin_start, low_on_pin))
|
||||
trace_and_count(c, journal_full, c);
|
||||
|
||||
swap(watermark, j->watermark);
|
||||
if (watermark > j->watermark)
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
static struct journal_space
|
||||
@ -162,7 +171,6 @@ void bch2_journal_space_available(struct journal *j)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned clean, clean_ondisk, total;
|
||||
s64 u64s_remaining = 0;
|
||||
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
||||
j->buf[1].buf_size >> 9);
|
||||
unsigned i, nr_online = 0, nr_devs_want;
|
||||
@ -222,16 +230,10 @@ void bch2_journal_space_available(struct journal *j)
|
||||
else
|
||||
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
||||
|
||||
u64s_remaining = (u64) clean << 6;
|
||||
u64s_remaining -= (u64) total << 3;
|
||||
u64s_remaining = max(0LL, u64s_remaining);
|
||||
u64s_remaining /= 4;
|
||||
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
|
||||
journal_set_watermark(j);
|
||||
out:
|
||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||
j->cur_entry_error = ret;
|
||||
journal_set_remaining(j, u64s_remaining);
|
||||
journal_set_watermark(j);
|
||||
|
||||
if (!ret)
|
||||
journal_wake(j);
|
||||
@ -369,15 +371,36 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
|
||||
return JOURNAL_PIN_other;
|
||||
}
|
||||
|
||||
void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn,
|
||||
enum journal_pin_type type)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||
|
||||
/*
|
||||
* flush_fn is how we identify journal pins in debugfs, so must always
|
||||
* exist, even if it doesn't do anything:
|
||||
*/
|
||||
BUG_ON(!flush_fn);
|
||||
|
||||
atomic_inc(&pin_list->count);
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
list_add(&pin->list, &pin_list->list[type]);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_copy(struct journal *j,
|
||||
struct journal_entry_pin *dst,
|
||||
struct journal_entry_pin *src,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
bool reclaim;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
u64 seq = READ_ONCE(src->seq);
|
||||
|
||||
if (seq < journal_last_seq(j)) {
|
||||
/*
|
||||
* bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
|
||||
@ -389,18 +412,34 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
return;
|
||||
}
|
||||
|
||||
pin_list = journal_seq_pin(j, seq);
|
||||
reclaim = __journal_pin_drop(j, dst);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* If the journal is currently full, we might want to call flush_fn
|
||||
* immediately:
|
||||
*/
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
bool reclaim;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq < journal_last_seq(j));
|
||||
|
||||
reclaim = __journal_pin_drop(j, pin);
|
||||
|
||||
atomic_inc(&pin_list->count);
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
|
||||
if (flush_fn)
|
||||
list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]);
|
||||
else
|
||||
list_add(&pin->list, &pin_list->flushed);
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
@ -555,11 +594,6 @@ static u64 journal_seq_to_flush(struct journal *j)
|
||||
/* Try to keep the journal at most half full: */
|
||||
nr_buckets = ja->nr / 2;
|
||||
|
||||
/* And include pre-reservations: */
|
||||
nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
|
||||
(ca->mi.bucket_size << 6) -
|
||||
journal_entry_overhead(j));
|
||||
|
||||
nr_buckets = min(nr_buckets, ja->nr);
|
||||
|
||||
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
|
||||
@ -638,10 +672,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
|
||||
min_nr = 1;
|
||||
|
||||
if (j->prereserved.reserved * 4 > j->prereserved.remaining)
|
||||
min_nr = 1;
|
||||
|
||||
if (fifo_free(&j->pin) <= 32)
|
||||
if (j->watermark != BCH_WATERMARK_stripe)
|
||||
min_nr = 1;
|
||||
|
||||
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
|
||||
@ -652,8 +683,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
trace_and_count(c, journal_reclaim_start, c,
|
||||
direct, kicked,
|
||||
min_nr, min_key_cache,
|
||||
j->prereserved.reserved,
|
||||
j->prereserved.remaining,
|
||||
atomic_read(&c->btree_cache.dirty),
|
||||
c->btree_cache.used,
|
||||
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
||||
@ -805,6 +834,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
/* time_stats this */
|
||||
bool did_work = false;
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
|
@ -47,17 +47,10 @@ static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
|
||||
bch2_journal_pin_set(j, seq, pin, flush_fn);
|
||||
}
|
||||
|
||||
static inline void bch2_journal_pin_copy(struct journal *j,
|
||||
struct journal_entry_pin *dst,
|
||||
struct journal_entry_pin *src,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
/* Guard against racing with journal_pin_drop(src): */
|
||||
u64 seq = READ_ONCE(src->seq);
|
||||
|
||||
if (seq)
|
||||
bch2_journal_pin_add(j, seq, dst, flush_fn);
|
||||
}
|
||||
void bch2_journal_pin_copy(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
|
||||
static inline void bch2_journal_pin_update(struct journal *j, u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
|
@ -76,14 +76,6 @@ struct journal_res {
|
||||
u64 seq;
|
||||
};
|
||||
|
||||
/*
|
||||
* For reserving space in the journal prior to getting a reservation on a
|
||||
* particular journal entry:
|
||||
*/
|
||||
struct journal_preres {
|
||||
unsigned u64s;
|
||||
};
|
||||
|
||||
union journal_res_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
@ -104,22 +96,6 @@ union journal_res_state {
|
||||
};
|
||||
};
|
||||
|
||||
union journal_preres_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 v;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 waiting:1,
|
||||
reserved:31,
|
||||
remaining:32;
|
||||
};
|
||||
};
|
||||
|
||||
/* bytes: */
|
||||
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
||||
@ -180,8 +156,6 @@ struct journal {
|
||||
union journal_res_state reservations;
|
||||
enum bch_watermark watermark;
|
||||
|
||||
union journal_preres_state prereserved;
|
||||
|
||||
} __aligned(SMP_CACHE_BYTES);
|
||||
|
||||
unsigned long flags;
|
||||
@ -288,15 +262,18 @@ struct journal {
|
||||
|
||||
unsigned long last_flush_write;
|
||||
|
||||
u64 res_get_blocked_start;
|
||||
u64 write_start_time;
|
||||
|
||||
u64 nr_flush_writes;
|
||||
u64 nr_noflush_writes;
|
||||
u64 entry_bytes_written;
|
||||
|
||||
u64 low_on_space_start;
|
||||
u64 low_on_pin_start;
|
||||
u64 max_in_flight_start;
|
||||
|
||||
struct bch2_time_stats *flush_write_time;
|
||||
struct bch2_time_stats *noflush_write_time;
|
||||
struct bch2_time_stats *blocked_time;
|
||||
struct bch2_time_stats *flush_seq_time;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
@ -85,13 +85,13 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
|
||||
|
||||
int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
|
||||
{
|
||||
return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
__bch2_logged_op_start(trans, k));
|
||||
}
|
||||
|
||||
void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
|
||||
{
|
||||
int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
|
||||
/*
|
||||
* This needs to be a fatal error because we've left an unfinished
|
||||
|
@ -155,7 +155,7 @@ int bch2_check_lrus(struct bch_fs *c)
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
|
@ -90,7 +90,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
|
||||
ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -263,7 +263,7 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
int bch2_move_extent(struct moving_context *ctxt,
|
||||
|
@ -370,6 +370,7 @@ static int bch2_copygc_thread(void *arg)
|
||||
if (min_member_capacity == U64_MAX)
|
||||
min_member_capacity = 128 * 2048;
|
||||
|
||||
bch2_trans_unlock_long(ctxt.trans);
|
||||
bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ err:
|
||||
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
|
||||
{
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
__bch2_set_rebalance_needs_scan(trans, inum));
|
||||
rebalance_wakeup(c);
|
||||
return ret;
|
||||
@ -125,7 +125,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
|
||||
|
||||
extent_entry_drop(bkey_i_to_s(n),
|
||||
(void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n)));
|
||||
return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
||||
@ -273,7 +273,7 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
|
||||
r->state = BCH_REBALANCE_scanning;
|
||||
|
||||
ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
|
||||
commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_clear_rebalance_needs_scan(trans, inum, cookie));
|
||||
|
||||
bch2_move_stats_exit(&r->scan_stats, trans->c);
|
||||
|
@ -98,6 +98,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
unsigned update_flags = BTREE_TRIGGER_NORUN;
|
||||
int ret;
|
||||
|
||||
if (k->overwritten)
|
||||
return 0;
|
||||
|
||||
trans->journal_res.seq = k->journal_seq;
|
||||
|
||||
/*
|
||||
* BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
|
||||
* keep the key cache coherent with the underlying btree. Nothing
|
||||
@ -139,27 +144,14 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
|
||||
static int bch2_journal_replay(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key **keys_sorted, *k;
|
||||
DARRAY(struct journal_key *) keys_sorted = { 0 };
|
||||
struct journal_key **kp;
|
||||
struct journal *j = &c->journal;
|
||||
u64 start_seq = c->journal_replay_seq_start;
|
||||
u64 end_seq = c->journal_replay_seq_start;
|
||||
size_t i;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret;
|
||||
|
||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||
keys->gap = keys->nr;
|
||||
|
||||
keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL);
|
||||
if (!keys_sorted)
|
||||
return -BCH_ERR_ENOMEM_journal_replay;
|
||||
|
||||
for (i = 0; i < keys->nr; i++)
|
||||
keys_sorted[i] = &keys->d[i];
|
||||
|
||||
sort(keys_sorted, keys->nr,
|
||||
sizeof(keys_sorted[0]),
|
||||
journal_sort_seq_cmp, NULL);
|
||||
|
||||
if (keys->nr) {
|
||||
ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
|
||||
keys->nr, start_seq, end_seq);
|
||||
@ -167,27 +159,61 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
/*
|
||||
* First, attempt to replay keys in sorted order. This is more
|
||||
* efficient, but some might fail if that would cause a journal
|
||||
* deadlock.
|
||||
*/
|
||||
for (size_t i = 0; i < keys->nr; i++) {
|
||||
cond_resched();
|
||||
|
||||
replay_now_at(j, k->journal_seq);
|
||||
struct journal_key *k = keys->d + i;
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
(!k->allocated
|
||||
? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim
|
||||
: 0),
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
BCH_TRANS_COMMIT_journal_reclaim|
|
||||
(!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
|
||||
bch2_journal_replay_key(trans, k));
|
||||
BUG_ON(!ret && !k->overwritten);
|
||||
if (ret) {
|
||||
bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
|
||||
bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret));
|
||||
ret = darray_push(&keys_sorted, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, replay any remaining keys in the order in which they appear in
|
||||
* the journal, unpinning those journal entries as we go:
|
||||
*/
|
||||
sort(keys_sorted.data, keys_sorted.nr,
|
||||
sizeof(keys_sorted.data[0]),
|
||||
journal_sort_seq_cmp, NULL);
|
||||
|
||||
darray_for_each(keys_sorted, kp) {
|
||||
cond_resched();
|
||||
|
||||
struct journal_key *k = *kp;
|
||||
|
||||
replay_now_at(j, k->journal_seq);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
(!k->allocated
|
||||
? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
|
||||
: 0),
|
||||
bch2_journal_replay_key(trans, k));
|
||||
bch_err_msg(c, ret, "while replaying key at btree %s level %u:",
|
||||
bch2_btree_id_str(k->btree_id), k->level);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(!k->overwritten);
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
trans = NULL;
|
||||
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
j->replay_journal_seq = 0;
|
||||
|
||||
@ -198,9 +224,9 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
if (keys->nr && !ret)
|
||||
bch2_journal_log_msg(c, "journal replay finished");
|
||||
err:
|
||||
kvfree(keys_sorted);
|
||||
|
||||
if (ret)
|
||||
if (trans)
|
||||
bch2_trans_put(trans);
|
||||
darray_exit(&keys_sorted);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -468,7 +494,7 @@ err:
|
||||
noinline_for_stack
|
||||
static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
__bch2_fs_upgrade_for_subvolumes(trans));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
@ -489,7 +515,19 @@ static int bch2_check_allocations(struct bch_fs *c)
|
||||
|
||||
static int bch2_set_may_go_rw(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
||||
/*
|
||||
* After we go RW, the journal keys buffer can't be modified (except for
|
||||
* setting journal_key->overwritten: it will be accessed by multiple
|
||||
* threads
|
||||
*/
|
||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||
keys->gap = keys->nr;
|
||||
|
||||
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
|
||||
if (keys->nr)
|
||||
return bch2_fs_read_write_early(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -390,7 +390,7 @@ s64 bch2_remap_range(struct bch_fs *c,
|
||||
inode_u.bi_size = new_i_size;
|
||||
ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
bch2_trans_iter_exit(trans, &inode_iter);
|
||||
|
@ -376,7 +376,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
|
||||
|
||||
entry = sb_clean->start;
|
||||
bch2_journal_super_entries_add_common(c, &entry, 0);
|
||||
entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
|
||||
entry = bch2_btree_roots_to_journal_entries(c, entry, 0);
|
||||
BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
|
||||
|
||||
memset(entry, 0,
|
||||
|
@ -70,7 +70,7 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
prt_tab(out);
|
||||
prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
|
||||
prt_tab(out);
|
||||
bch2_prt_date_seconds(out, le64_to_cpu(e->entries[i].last_error_time));
|
||||
bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ static void member_to_text(struct printbuf *out,
|
||||
prt_printf(out, "Last mount:");
|
||||
prt_tab(out);
|
||||
if (m.last_mount)
|
||||
bch2_prt_date_seconds(out, le64_to_cpu(m.last_mount));
|
||||
bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
|
||||
else
|
||||
prt_printf(out, "(never)");
|
||||
prt_newline(out);
|
||||
|
@ -590,7 +590,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c)
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_snapshot_trees, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_snapshot_tree(trans, &iter, k)));
|
||||
|
||||
if (ret)
|
||||
@ -868,7 +868,7 @@ int bch2_check_snapshots(struct bch_fs *c)
|
||||
for_each_btree_key_reverse_commit(trans, iter,
|
||||
BTREE_ID_snapshots, POS_MAX,
|
||||
BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_snapshot(trans, &iter, k)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
|
||||
parent_id, id))
|
||||
goto err;
|
||||
|
||||
parent->v.children[i] = le32_to_cpu(child_id);
|
||||
parent->v.children[i] = cpu_to_le32(child_id);
|
||||
|
||||
normalize_snapshot_child_pointers(&parent->v);
|
||||
}
|
||||
@ -1449,12 +1449,12 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
ret = for_each_btree_key_commit(trans, iter,
|
||||
id, POS_MIN,
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
&res, NULL, BTREE_INSERT_NOFAIL,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
id, POS_MIN,
|
||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||
&res, NULL, BTREE_INSERT_NOFAIL,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
move_key_to_correct_snapshot(trans, &iter, k));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
@ -1489,7 +1489,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
*/
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
|
||||
BTREE_ITER_INTENT, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
|
||||
if (ret)
|
||||
goto err_create_lock;
|
||||
|
@ -15,6 +15,16 @@
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/sha2.h>
|
||||
|
||||
typedef unsigned __bitwise bch_str_hash_flags_t;
|
||||
|
||||
enum bch_str_hash_flags {
|
||||
__BCH_HASH_SET_MUST_CREATE,
|
||||
__BCH_HASH_SET_MUST_REPLACE,
|
||||
};
|
||||
|
||||
#define BCH_HASH_SET_MUST_CREATE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_CREATE)
|
||||
#define BCH_HASH_SET_MUST_REPLACE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_REPLACE)
|
||||
|
||||
static inline enum bch_str_hash_type
|
||||
bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
|
||||
{
|
||||
@ -246,7 +256,7 @@ int bch2_hash_set_snapshot(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum, u32 snapshot,
|
||||
struct bkey_i *insert,
|
||||
int flags,
|
||||
bch_str_hash_flags_t str_hash_flags,
|
||||
int update_flags)
|
||||
{
|
||||
struct btree_iter iter, slot = { NULL };
|
||||
@ -269,7 +279,7 @@ int bch2_hash_set_snapshot(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!slot.path &&
|
||||
!(flags & BCH_HASH_SET_MUST_REPLACE))
|
||||
!(str_hash_flags & BCH_HASH_SET_MUST_REPLACE))
|
||||
bch2_trans_copy_iter(&slot, &iter);
|
||||
|
||||
if (k.k->type != KEY_TYPE_hash_whiteout)
|
||||
@ -287,16 +297,16 @@ found:
|
||||
found = true;
|
||||
not_found:
|
||||
|
||||
if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
|
||||
if (!found && (str_hash_flags & BCH_HASH_SET_MUST_REPLACE)) {
|
||||
ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
|
||||
} else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
|
||||
} else if (found && (str_hash_flags & BCH_HASH_SET_MUST_CREATE)) {
|
||||
ret = -EEXIST;
|
||||
} else {
|
||||
if (!found && slot.path)
|
||||
swap(iter, slot);
|
||||
|
||||
insert->k.p = iter.pos;
|
||||
ret = bch2_trans_update(trans, &iter, insert, 0);
|
||||
ret = bch2_trans_update(trans, &iter, insert, update_flags);
|
||||
}
|
||||
|
||||
goto out;
|
||||
@ -307,7 +317,8 @@ int bch2_hash_set(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
subvol_inum inum,
|
||||
struct bkey_i *insert, int flags)
|
||||
struct bkey_i *insert,
|
||||
bch_str_hash_flags_t str_hash_flags)
|
||||
{
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
@ -319,7 +330,7 @@ int bch2_hash_set(struct btree_trans *trans,
|
||||
insert->k.p.inode = inum.inum;
|
||||
|
||||
return bch2_hash_set_snapshot(trans, desc, info, inum,
|
||||
snapshot, insert, flags, 0);
|
||||
snapshot, insert, str_hash_flags, 0);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
|
@ -89,7 +89,7 @@ int bch2_check_subvols(struct bch_fs *c)
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc,
|
||||
check_subvol(trans, &iter, k)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
@ -219,7 +219,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
|
||||
BTREE_ITER_CACHED, &s)) ?:
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_subvolume_reparent(trans, &iter, k,
|
||||
subvolid_to_delete, le32_to_cpu(s.parent)));
|
||||
}
|
||||
@ -256,7 +256,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
|
||||
static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
|
||||
{
|
||||
return bch2_subvolumes_reparent(trans, subvolid) ?:
|
||||
commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
|
||||
commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
__bch2_subvolume_delete(trans, subvolid));
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ struct snapshot_t {
|
||||
};
|
||||
|
||||
struct snapshot_table {
|
||||
struct snapshot_t s[0];
|
||||
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
@ -1183,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
prt_printf(out, "Created:");
|
||||
prt_tab(out);
|
||||
if (sb->time_base_lo)
|
||||
bch2_prt_date_seconds(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
|
||||
bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
|
||||
else
|
||||
prt_printf(out, "(not set)");
|
||||
prt_newline(out);
|
||||
|
@ -641,7 +641,9 @@ static int bch2_fs_online(struct bch_fs *c)
|
||||
ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?:
|
||||
kobject_add(&c->internal, &c->kobj, "internal") ?:
|
||||
kobject_add(&c->opts_dir, &c->kobj, "options") ?:
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
|
||||
#endif
|
||||
kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
|
||||
bch2_opts_create_sysfs_files(&c->opts_dir);
|
||||
if (ret) {
|
||||
@ -750,7 +752,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write];
|
||||
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
|
||||
c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal];
|
||||
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
|
||||
|
||||
bch2_fs_btree_cache_init_early(&c->btree_cache);
|
||||
|
@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
|
||||
TRACE_EVENT(journal_reclaim_start,
|
||||
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
|
||||
u64 min_nr, u64 min_key_cache,
|
||||
u64 prereserved, u64 prereserved_total,
|
||||
u64 btree_cache_dirty, u64 btree_cache_total,
|
||||
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
|
||||
TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
|
||||
TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
|
||||
btree_cache_dirty, btree_cache_total,
|
||||
btree_key_cache_dirty, btree_key_cache_total),
|
||||
|
||||
@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
|
||||
__field(bool, kicked )
|
||||
__field(u64, min_nr )
|
||||
__field(u64, min_key_cache )
|
||||
__field(u64, prereserved )
|
||||
__field(u64, prereserved_total )
|
||||
__field(u64, btree_cache_dirty )
|
||||
__field(u64, btree_cache_total )
|
||||
__field(u64, btree_key_cache_dirty )
|
||||
@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
|
||||
__entry->kicked = kicked;
|
||||
__entry->min_nr = min_nr;
|
||||
__entry->min_key_cache = min_key_cache;
|
||||
__entry->prereserved = prereserved;
|
||||
__entry->prereserved_total = prereserved_total;
|
||||
__entry->btree_cache_dirty = btree_cache_dirty;
|
||||
__entry->btree_cache_total = btree_cache_total;
|
||||
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
|
||||
__entry->btree_key_cache_total = btree_key_cache_total;
|
||||
),
|
||||
|
||||
TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
|
||||
TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->direct,
|
||||
__entry->kicked,
|
||||
__entry->min_nr,
|
||||
__entry->min_key_cache,
|
||||
__entry->prereserved,
|
||||
__entry->prereserved_total,
|
||||
__entry->btree_cache_dirty,
|
||||
__entry->btree_cache_total,
|
||||
__entry->btree_key_cache_dirty,
|
||||
|
@ -315,6 +315,57 @@ int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#include <time.h>
|
||||
void bch2_prt_datetime(struct printbuf *out, time64_t sec)
|
||||
{
|
||||
time_t t = sec;
|
||||
char buf[64];
|
||||
ctime_r(&t, buf);
|
||||
prt_str(out, buf);
|
||||
}
|
||||
#else
|
||||
void bch2_prt_datetime(struct printbuf *out, time64_t sec)
|
||||
{
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof(buf), "%ptT", &sec);
|
||||
prt_u64(out, sec);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct time_unit {
|
||||
const char *name;
|
||||
u64 nsecs;
|
||||
} time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
{ "ms", NSEC_PER_MSEC },
|
||||
{ "s", NSEC_PER_SEC },
|
||||
{ "m", (u64) NSEC_PER_SEC * 60},
|
||||
{ "h", (u64) NSEC_PER_SEC * 3600},
|
||||
{ "eon", U64_MAX },
|
||||
};
|
||||
|
||||
static const struct time_unit *pick_time_units(u64 ns)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
|
||||
for (u = time_units;
|
||||
u + 1 < time_units + ARRAY_SIZE(time_units) &&
|
||||
ns >= u[1].nsecs << 1;
|
||||
u++)
|
||||
;
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
/* time stats: */
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
@ -359,6 +410,7 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
|
||||
stats->max_duration = max(stats->max_duration, duration);
|
||||
stats->min_duration = min(stats->min_duration, duration);
|
||||
stats->total_duration += duration;
|
||||
bch2_quantiles_update(&stats->quantiles, duration);
|
||||
}
|
||||
|
||||
@ -372,22 +424,26 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
}
|
||||
}
|
||||
|
||||
static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
struct bch2_time_stat_buffer *b)
|
||||
{
|
||||
struct bch2_time_stat_buffer_entry *i;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
for (i = b->entries;
|
||||
for (struct bch2_time_stat_buffer_entry *i = b->entries;
|
||||
i < b->entries + ARRAY_SIZE(b->entries);
|
||||
i++)
|
||||
bch2_time_stats_update_one(stats, i->start, i->end);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
|
||||
b->nr = 0;
|
||||
}
|
||||
|
||||
static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
|
||||
struct bch2_time_stat_buffer *b)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stats->lock, flags);
|
||||
__bch2_time_stats_clear_buffer(stats, b);
|
||||
spin_unlock_irqrestore(&stats->lock, flags);
|
||||
}
|
||||
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -423,40 +479,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct time_unit {
|
||||
const char *name;
|
||||
u64 nsecs;
|
||||
} time_units[] = {
|
||||
{ "ns", 1 },
|
||||
{ "us", NSEC_PER_USEC },
|
||||
{ "ms", NSEC_PER_MSEC },
|
||||
{ "s", NSEC_PER_SEC },
|
||||
{ "m", (u64) NSEC_PER_SEC * 60},
|
||||
{ "h", (u64) NSEC_PER_SEC * 3600},
|
||||
{ "eon", U64_MAX },
|
||||
};
|
||||
|
||||
static const struct time_unit *pick_time_units(u64 ns)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
|
||||
for (u = time_units;
|
||||
u + 1 < time_units + ARRAY_SIZE(time_units) &&
|
||||
ns >= u[1].nsecs << 1;
|
||||
u++)
|
||||
;
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
void bch2_pr_time_units(struct printbuf *out, u64 ns)
|
||||
{
|
||||
const struct time_unit *u = pick_time_units(ns);
|
||||
|
||||
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
|
||||
}
|
||||
|
||||
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
||||
{
|
||||
@ -467,26 +489,6 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
|
||||
prt_printf(out, "%s", u->name);
|
||||
}
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#include <time.h>
|
||||
void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
|
||||
{
|
||||
time_t t = sec;
|
||||
char buf[64];
|
||||
ctime_r(&t, buf);
|
||||
prt_str(out, buf);
|
||||
}
|
||||
#else
|
||||
void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
|
||||
{
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof(buf), "%ptT", &sec);
|
||||
prt_u64(out, sec);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define TABSTOP_SIZE 12
|
||||
|
||||
static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
|
||||
{
|
||||
prt_str(out, name);
|
||||
@ -495,12 +497,24 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
#define TABSTOP_SIZE 12
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
|
||||
{
|
||||
const struct time_unit *u;
|
||||
s64 f_mean = 0, d_mean = 0;
|
||||
u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
|
||||
int i;
|
||||
|
||||
if (stats->buffer) {
|
||||
int cpu;
|
||||
|
||||
spin_lock_irq(&stats->lock);
|
||||
for_each_possible_cpu(cpu)
|
||||
__bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
|
||||
spin_unlock_irq(&stats->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* avoid divide by zero
|
||||
*/
|
||||
@ -546,6 +560,7 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
||||
|
||||
pr_name_and_units(out, "min:", stats->min_duration);
|
||||
pr_name_and_units(out, "max:", stats->max_duration);
|
||||
pr_name_and_units(out, "total:", stats->total_duration);
|
||||
|
||||
prt_printf(out, "mean:");
|
||||
prt_tab(out);
|
||||
@ -603,6 +618,9 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
||||
last_q = q;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {}
|
||||
#endif
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
{
|
||||
|
@ -244,7 +244,7 @@ do { \
|
||||
#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
|
||||
|
||||
void bch2_pr_time_units(struct printbuf *, u64);
|
||||
void bch2_prt_date_seconds(struct printbuf *, time64_t);
|
||||
void bch2_prt_datetime(struct printbuf *, time64_t);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
static inline void uuid_unparse_lower(u8 *uuid, char *out)
|
||||
@ -372,8 +372,9 @@ struct bch2_time_stat_buffer {
|
||||
struct bch2_time_stats {
|
||||
spinlock_t lock;
|
||||
/* all fields are in nanoseconds */
|
||||
u64 max_duration;
|
||||
u64 min_duration;
|
||||
u64 max_duration;
|
||||
u64 total_duration;
|
||||
u64 max_freq;
|
||||
u64 min_freq;
|
||||
u64 last_event;
|
||||
@ -388,15 +389,39 @@ struct bch2_time_stats {
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
|
||||
#else
|
||||
static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
|
||||
#endif
|
||||
|
||||
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
|
||||
{
|
||||
__bch2_time_stats_update(stats, start, local_clock());
|
||||
}
|
||||
|
||||
static inline bool track_event_change(struct bch2_time_stats *stats,
|
||||
u64 *start, bool v)
|
||||
{
|
||||
if (v != !!*start) {
|
||||
if (!v) {
|
||||
bch2_time_stats_update(stats, *start);
|
||||
*start = 0;
|
||||
} else {
|
||||
*start = local_clock() ?: 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
|
||||
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {}
|
||||
static inline bool track_event_change(struct bch2_time_stats *stats,
|
||||
u64 *start, bool v)
|
||||
{
|
||||
bool ret = v && !*start;
|
||||
*start = v;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
|
||||
|
||||
void bch2_time_stats_exit(struct bch2_time_stats *);
|
||||
|
Loading…
Reference in New Issue
Block a user