mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 9736cbbc5cc3 bcachefs: bs > ps support
Some checks are pending
build / bcachefs-tools-deb (ubuntu-22.04) (push) Waiting to run
build / bcachefs-tools-deb (ubuntu-24.04) (push) Waiting to run
build / bcachefs-tools-rpm (push) Waiting to run
build / bcachefs-tools-msrv (push) Waiting to run
Nix-Tests / nix-flake-check (push) Waiting to run
Some checks are pending
build / bcachefs-tools-deb (ubuntu-22.04) (push) Waiting to run
build / bcachefs-tools-deb (ubuntu-24.04) (push) Waiting to run
build / bcachefs-tools-rpm (push) Waiting to run
build / bcachefs-tools-msrv (push) Waiting to run
Nix-Tests / nix-flake-check (push) Waiting to run
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
3e15e96cb9
commit
dd1a882d17
@ -1 +1 @@
|
||||
63bbe0ca416791095c994aba7bea388e947dd60a
|
||||
9736cbbc5cc39f6c666befdd787788b6ce6497f6
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -68,7 +68,7 @@ checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "bcachefs-tools"
|
||||
version = "1.12.0"
|
||||
version = "1.20.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bch_bindgen",
|
||||
|
0
include/linux/unicode.h
Normal file
0
include/linux/unicode.h
Normal file
@ -871,6 +871,9 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if (data_type_is_empty(new_a->data_type) &&
|
||||
BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
|
||||
if (new_a->oldest_gen == new_a->gen &&
|
||||
!bch2_bucket_sectors_total(*new_a))
|
||||
new_a->oldest_gen++;
|
||||
new_a->gen++;
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
|
||||
alloc_data_type_set(new_a, new_a->data_type);
|
||||
@ -889,26 +892,20 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
!new_a->io_time[READ])
|
||||
new_a->io_time[READ] = bch2_current_io_time(c, READ);
|
||||
|
||||
u64 old_lru = alloc_lru_idx_read(*old_a);
|
||||
u64 new_lru = alloc_lru_idx_read(*new_a);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans, new.k->p.inode,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_lru_change(trans, new.k->p.inode,
|
||||
bucket_to_u64(new.k->p),
|
||||
alloc_lru_idx_read(*old_a),
|
||||
alloc_lru_idx_read(*new_a));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
old_lru = alloc_lru_idx_fragmentation(*old_a, ca);
|
||||
new_lru = alloc_lru_idx_fragmentation(*new_a, ca);
|
||||
if (old_lru != new_lru) {
|
||||
ret = bch2_lru_change(trans,
|
||||
BCH_LRU_FRAGMENTATION_START,
|
||||
bucket_to_u64(new.k->p),
|
||||
old_lru, new_lru);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_lru_change(trans,
|
||||
BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(new.k->p),
|
||||
alloc_lru_idx_fragmentation(*old_a, ca),
|
||||
alloc_lru_idx_fragmentation(*new_a, ca));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (old_a->gen != new_a->gen) {
|
||||
ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
|
||||
@ -1705,7 +1702,8 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
|
||||
if (lru_idx) {
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
lru_idx, alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1735,7 +1733,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
a = &a_mut->v;
|
||||
}
|
||||
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
a->io_time[READ],
|
||||
alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
|
||||
bch2_check_stripe_to_lru_refs(c);
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
@ -2058,16 +2059,71 @@ put_ref:
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
}
|
||||
|
||||
static int invalidate_one_bp(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct btree_iter extent_iter;
|
||||
struct bkey_s_c extent_k =
|
||||
bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed);
|
||||
int ret = bkey_err(extent_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_i *n =
|
||||
bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &extent_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct bpos bucket,
|
||||
u8 gen,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bpos bp_start = bucket_pos_to_bp_start(ca, bucket);
|
||||
struct bpos bp_end = bucket_pos_to_bp_end(ca, bucket);
|
||||
|
||||
return for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
|
||||
bp_start, bp_end, 0, k,
|
||||
NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc, ({
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
|
||||
if (bp.v->bucket_gen != gen)
|
||||
continue;
|
||||
|
||||
/* filter out bps with gens that don't match */
|
||||
|
||||
invalidate_one_bp(trans, ca, bp, last_flushed);
|
||||
}));
|
||||
}
|
||||
|
||||
noinline_for_stack
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bkey_buf *last_flushed,
|
||||
s64 *nr_to_invalidate)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i_alloc_v4 *a = NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
unsigned cached_sectors;
|
||||
struct btree_iter alloc_iter = {};
|
||||
int ret = 0;
|
||||
|
||||
if (*nr_to_invalidate <= 0)
|
||||
@ -2084,35 +2140,37 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
|
||||
BTREE_ID_alloc, bucket,
|
||||
BTREE_ITER_cached);
|
||||
ret = bkey_err(alloc_k);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
|
||||
/* We expect harmless races here due to the btree write buffer: */
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
|
||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a))
|
||||
goto out;
|
||||
|
||||
BUG_ON(a->v.data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->v.dirty_sectors);
|
||||
/*
|
||||
* Impossible since alloc_lru_idx_read() only returns nonzero if the
|
||||
* bucket is supposed to be on the cached bucket LRU (i.e.
|
||||
* BCH_DATA_cached)
|
||||
*
|
||||
* bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
|
||||
*/
|
||||
BUG_ON(a->data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->dirty_sectors);
|
||||
|
||||
if (!a->v.cached_sectors)
|
||||
if (!a->cached_sectors)
|
||||
bch_err(c, "invalidating empty bucket, confused");
|
||||
|
||||
cached_sectors = a->v.cached_sectors;
|
||||
unsigned cached_sectors = a->cached_sectors;
|
||||
u8 gen = a->gen;
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
|
||||
a->v.gen++;
|
||||
a->v.data_type = 0;
|
||||
a->v.dirty_sectors = 0;
|
||||
a->v.stripe_sectors = 0;
|
||||
a->v.cached_sectors = 0;
|
||||
a->v.io_time[READ] = bch2_current_io_time(c, READ);
|
||||
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -2120,6 +2178,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
--*nr_to_invalidate;
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -2146,6 +2205,10 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret = 0;
|
||||
|
||||
struct bkey_buf last_flushed;
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
ret = bch2_btree_write_buffer_tryflush(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -2170,7 +2233,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
|
||||
ret = invalidate_one_bucket(trans, ca, &iter, k, &last_flushed, &nr_to_invalidate);
|
||||
restart_err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
@ -2183,6 +2246,7 @@ restart_err:
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
||||
if (a.stripe)
|
||||
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||
if (bch2_bucket_sectors_dirty(a))
|
||||
return data_type;
|
||||
return bucket_data_type(data_type);
|
||||
if (a.cached_sectors)
|
||||
return BCH_DATA_cached;
|
||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "checksum.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "error.h"
|
||||
#include "progress.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
@ -518,6 +519,22 @@ check_existing_bp:
|
||||
if (!other_extent.k)
|
||||
goto missing;
|
||||
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
|
||||
if (ca) {
|
||||
struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
|
||||
bkey_for_each_ptr(other_extent_ptrs, ptr)
|
||||
if (ptr->dev == bp->k.p.inode &&
|
||||
dev_ptr_stale_rcu(ca, ptr)) {
|
||||
ret = drop_dev_and_update(trans, other_bp.v->btree_id,
|
||||
other_extent, bp->k.p.inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (bch2_extents_match(orig_k, other_extent)) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
|
||||
@ -594,9 +611,6 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
|
||||
continue;
|
||||
|
||||
@ -604,9 +618,11 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
|
||||
bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
|
||||
bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
|
||||
|
||||
bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (check || empty) {
|
||||
if ((check || empty) && !stale) {
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
|
||||
|
||||
@ -719,71 +735,6 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct progress_indicator_state {
|
||||
unsigned long next_print;
|
||||
u64 nodes_seen;
|
||||
u64 nodes_total;
|
||||
struct btree *last_node;
|
||||
};
|
||||
|
||||
static inline void progress_init(struct progress_indicator_state *s,
|
||||
struct bch_fs *c,
|
||||
u64 btree_id_mask)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||
if (!(btree_id_mask & BIT_ULL(i)))
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = i,
|
||||
};
|
||||
|
||||
u64 v;
|
||||
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
||||
s->nodes_total += div64_ul(v, btree_sectors(c));
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool progress_update_p(struct progress_indicator_state *s)
|
||||
{
|
||||
bool ret = time_after_eq(jiffies, s->next_print);
|
||||
|
||||
if (ret)
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void progress_update_iter(struct btree_trans *trans,
|
||||
struct progress_indicator_state *s,
|
||||
struct btree_iter *iter,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
||||
|
||||
s->nodes_seen += b != s->last_node;
|
||||
s->last_node = b;
|
||||
|
||||
if (progress_update_p(s)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned percent = s->nodes_total
|
||||
? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
||||
: 0;
|
||||
|
||||
prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
||||
msg, percent, s->nodes_seen, s->nodes_total);
|
||||
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
||||
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s)
|
||||
{
|
||||
@ -791,7 +742,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct progress_indicator_state progress;
|
||||
int ret = 0;
|
||||
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
|
||||
|
||||
for (enum btree_id btree_id = 0;
|
||||
btree_id < btree_id_nr_alive(c);
|
||||
@ -810,7 +761,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}));
|
||||
@ -905,9 +856,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Cached pointers don't have backpointers: */
|
||||
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] != a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
@ -916,6 +866,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
|
||||
sectors[ALLOC_cached] > a->cached_sectors ||
|
||||
sectors[ALLOC_stripe] > a->stripe_sectors) {
|
||||
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
@ -923,7 +874,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
}
|
||||
|
||||
if (!sectors[ALLOC_dirty] &&
|
||||
!sectors[ALLOC_stripe])
|
||||
!sectors[ALLOC_stripe] &&
|
||||
!sectors[ALLOC_cached])
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
|
||||
else
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
|
||||
@ -1210,11 +1162,11 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
|
||||
|
||||
int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
check_one_backpointer(trans, start, end, k, &last_flushed);
|
||||
}));
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
|
||||
#ifndef _BCACHEFS_BACKPOINTERS_H
|
||||
#define _BCACHEFS_BACKPOINTERS_H
|
||||
|
||||
#include "btree_cache.h"
|
||||
#include "btree_iter.h"
|
||||
@ -123,7 +123,12 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
if (p.has_ec)
|
||||
return BCH_DATA_stripe;
|
||||
if (p.ptr.cached)
|
||||
return BCH_DATA_cached;
|
||||
else
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
@ -147,7 +152,20 @@ static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
struct bkey_i_backpointer *bp)
|
||||
{
|
||||
bkey_backpointer_init(&bp->k_i);
|
||||
bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset);
|
||||
bp->k.p.inode = p.ptr.dev;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
bp->k.p.offset = ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset;
|
||||
else {
|
||||
/*
|
||||
* Put stripe backpointers where they won't collide with the
|
||||
* extent backpointers within the stripe:
|
||||
*/
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
bp->k.p.offset = ((u64) (p.ptr.offset + le16_to_cpu(s.v->sectors)) <<
|
||||
MAX_EXTENT_COMPRESS_RATIO_SHIFT) - 1;
|
||||
}
|
||||
|
||||
bp->v = (struct bch_backpointer) {
|
||||
.btree_id = btree_id,
|
||||
.level = level,
|
||||
|
@ -203,6 +203,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "btree_journal_iter_types.h"
|
||||
@ -444,6 +445,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(btree_node_sort) \
|
||||
x(btree_node_read) \
|
||||
x(btree_node_read_done) \
|
||||
x(btree_node_write) \
|
||||
x(btree_interior_update_foreground) \
|
||||
x(btree_interior_update_total) \
|
||||
x(btree_gc) \
|
||||
@ -456,6 +458,7 @@ BCH_DEBUG_PARAMS_DEBUG()
|
||||
x(blocked_journal_low_on_space) \
|
||||
x(blocked_journal_low_on_pin) \
|
||||
x(blocked_journal_max_in_flight) \
|
||||
x(blocked_journal_max_open) \
|
||||
x(blocked_key_cache_flush) \
|
||||
x(blocked_allocate) \
|
||||
x(blocked_allocate_open_bucket) \
|
||||
@ -697,6 +700,8 @@ enum bch_write_ref {
|
||||
BCH_WRITE_REF_NR,
|
||||
};
|
||||
|
||||
#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
|
||||
|
||||
struct bch_fs {
|
||||
struct closure cl;
|
||||
|
||||
@ -781,6 +786,9 @@ struct bch_fs {
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
#endif
|
||||
|
||||
struct bch_sb_handle disk_sb;
|
||||
|
||||
|
@ -686,7 +686,11 @@ struct bch_sb_field_ext {
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19)) \
|
||||
x(directory_size, BCH_VERSION(1, 20))
|
||||
x(directory_size, BCH_VERSION(1, 20)) \
|
||||
x(cached_backpointers, BCH_VERSION(1, 21)) \
|
||||
x(stripe_backpointers, BCH_VERSION(1, 22)) \
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -908,7 +912,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
|
||||
x(journal_no_flush, 16) \
|
||||
x(alloc_v2, 17) \
|
||||
x(extents_across_btree_nodes, 18) \
|
||||
x(incompat_version_field, 19)
|
||||
x(incompat_version_field, 19) \
|
||||
x(casefolding, 20)
|
||||
|
||||
#define BCH_SB_FEATURES_ALWAYS \
|
||||
(BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \
|
||||
|
@ -234,6 +234,11 @@ enum bch_data_event {
|
||||
BCH_DATA_EVENT_NR = 1,
|
||||
};
|
||||
|
||||
enum data_progress_data_type_special {
|
||||
DATA_PROGRESS_DATA_TYPE_phys = 254,
|
||||
DATA_PROGRESS_DATA_TYPE_done = 255,
|
||||
};
|
||||
|
||||
struct bch_ioctl_data_progress {
|
||||
__u8 data_type;
|
||||
__u8 btree_id;
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
@ -656,7 +657,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
|
||||
static int bch2_gc_btree(struct btree_trans *trans,
|
||||
struct progress_indicator_state *progress,
|
||||
enum btree_id btree, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
|
||||
@ -673,6 +676,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
|
||||
BTREE_ITER_prefetch);
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "check_allocations");
|
||||
gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
|
||||
bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
|
||||
}));
|
||||
@ -717,22 +721,24 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
||||
static int bch2_gc_btrees(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c, ~0ULL);
|
||||
|
||||
enum btree_id ids[BTREE_ID_NR];
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
ids[i] = i;
|
||||
bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
|
||||
|
||||
if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
ret = bch2_gc_btree(trans, &progress, btree, true);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
|
@ -997,7 +997,7 @@ drop_this_key:
|
||||
}
|
||||
got_good_key:
|
||||
le16_add_cpu(&i->u64s, -next_good_key);
|
||||
memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
|
||||
set_btree_node_need_rewrite(b);
|
||||
}
|
||||
fsck_err:
|
||||
@ -2016,7 +2016,7 @@ static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
}
|
||||
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
unsigned long old, new;
|
||||
@ -2024,6 +2024,9 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
|
||||
if (start_time)
|
||||
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time);
|
||||
|
||||
old = READ_ONCE(b->flags);
|
||||
do {
|
||||
new = old;
|
||||
@ -2054,7 +2057,7 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
@ -2062,7 +2065,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
|
||||
/* we don't need transaction context anymore after we got the lock. */
|
||||
bch2_trans_put(trans);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, start_time);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -2072,6 +2075,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
u64 start_time = wbio->start_time;
|
||||
int ret = 0;
|
||||
|
||||
btree_bounce_free(c,
|
||||
@ -2104,7 +2108,7 @@ static void btree_node_write_work(struct work_struct *work)
|
||||
}
|
||||
out:
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
btree_node_write_done(c, b, start_time);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
@ -2208,6 +2212,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
|
||||
bool validate_before_checksum = false;
|
||||
enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
|
||||
void *data;
|
||||
u64 start_time = local_clock();
|
||||
int ret;
|
||||
|
||||
if (flags & BTREE_WRITE_ALREADY_STARTED)
|
||||
@ -2416,6 +2421,7 @@ do_write:
|
||||
wbio->data = data;
|
||||
wbio->data_bytes = bytes;
|
||||
wbio->sector_offset = b->written;
|
||||
wbio->start_time = start_time;
|
||||
wbio->wbio.c = c;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.first_btree_write = !b->written;
|
||||
@ -2443,7 +2449,7 @@ err:
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, bytes, used_mempool, data);
|
||||
__btree_node_write_done(c, b);
|
||||
__btree_node_write_done(c, b, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -52,6 +52,7 @@ struct btree_write_bio {
|
||||
void *data;
|
||||
unsigned data_bytes;
|
||||
unsigned sector_offset;
|
||||
u64 start_time;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
|
@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
if (iter->update_path) {
|
||||
bch2_path_put_nokeep(trans, iter->update_path,
|
||||
iter->flags & BTREE_ITER_intent);
|
||||
@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
|
||||
|
||||
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
k = __bch2_btree_iter_peek_prev(iter, search_key);
|
||||
if (unlikely(!k.k))
|
||||
@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret)) {
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out_no_locked;
|
||||
}
|
||||
|
||||
/* extents can't span inode numbers: */
|
||||
if ((iter->flags & BTREE_ITER_is_extents) &&
|
||||
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
|
||||
@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
|
||||
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
s->max_mem = max(s->max_mem, new_bytes);
|
||||
|
||||
@ -3163,7 +3185,8 @@ out_new_mem:
|
||||
|
||||
if (old_bytes) {
|
||||
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
return ERR_PTR(btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
|
||||
}
|
||||
out_change_top:
|
||||
p = trans->mem + trans->mem_top;
|
||||
@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
||||
|
||||
trans->last_begin_ip = _RET_IP_;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (trans->restarted) {
|
||||
trans->restart_count_this_trans++;
|
||||
} else {
|
||||
trans->restart_count_this_trans = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
trans_set_locked(trans, false);
|
||||
|
||||
if (trans->restarted) {
|
||||
|
@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
|
||||
return btree_trans_restart_ip(trans, err, _THIS_IP_);
|
||||
}
|
||||
|
||||
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
|
||||
trace_and_count(trans->c, trans_restart_injected, trans, ip);
|
||||
return btree_trans_restart_ip(trans,
|
||||
BCH_ERR_transaction_restart_fault_inject, ip);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_btree_node_upgrade(struct btree_trans *,
|
||||
struct btree_path *, unsigned);
|
||||
|
||||
@ -739,7 +751,7 @@ transaction_restart: \
|
||||
if (!_ret2) \
|
||||
bch2_trans_verify_not_restarted(_trans, _restart_count);\
|
||||
\
|
||||
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
|
||||
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
|
||||
})
|
||||
|
||||
#define for_each_btree_key_max_continue(_trans, _iter, \
|
||||
|
@ -336,6 +336,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
BUG_ON(i->cached != path->cached);
|
||||
BUG_ON(i->level != path->level);
|
||||
BUG_ON(i->btree_id != path->btree_id);
|
||||
BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id));
|
||||
EBUG_ON(!i->level &&
|
||||
btree_type_has_snapshots(i->btree_id) &&
|
||||
!(i->flags & BTREE_UPDATE_internal_snapshot_node) &&
|
||||
@ -517,69 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
}
|
||||
}
|
||||
|
||||
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned *btree_id_updates_start)
|
||||
{
|
||||
bool trans_trigger_run;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (unsigned i = *btree_id_updates_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id < btree_id) {
|
||||
*btree_id_updates_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
i->btree_id == btree_id &&
|
||||
btree_node_type_has_trans_triggers(i->bkey_type) &&
|
||||
(!i->insert_trigger_run || !i->overwrite_trigger_run));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned btree_id = 0, btree_id_updates_start = 0;
|
||||
int ret = 0;
|
||||
unsigned sort_id_start = 0;
|
||||
|
||||
/*
|
||||
*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being moved
|
||||
* (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
|
||||
* they are re-added.
|
||||
*/
|
||||
for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
|
||||
if (btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
while (sort_id_start < trans->nr_updates) {
|
||||
unsigned i, sort_id = trans->updates[sort_id_start].sort_order;
|
||||
bool trans_trigger_run;
|
||||
|
||||
ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* For a given btree, this algorithm runs insert triggers before
|
||||
* overwrite triggers: this is so that when extents are being
|
||||
* moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop
|
||||
* references before they are re-added.
|
||||
*
|
||||
* Running triggers will append more updates to the list of
|
||||
* updates as we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (i = sort_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].sort_order <= sort_id;
|
||||
i++) {
|
||||
if (trans->updates[i].sort_order < sort_id) {
|
||||
sort_id_start = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
sort_id_start = i;
|
||||
}
|
||||
|
||||
btree_id_updates_start = 0;
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
@ -999,6 +976,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
||||
|
||||
bch2_trans_verify_not_unlocked_or_in_restart(trans);
|
||||
|
||||
ret = trans_maybe_inject_restart(trans, _RET_IP_);
|
||||
if (unlikely(ret))
|
||||
goto out_reset;
|
||||
|
||||
if (!trans->nr_updates &&
|
||||
!trans->journal_entries_u64s)
|
||||
goto out_reset;
|
||||
|
@ -423,6 +423,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
|
||||
|
||||
struct btree_insert_entry {
|
||||
unsigned flags;
|
||||
u8 sort_order;
|
||||
u8 bkey_type;
|
||||
enum btree_id btree_id:8;
|
||||
u8 level:4;
|
||||
@ -509,6 +510,9 @@ struct btree_trans {
|
||||
bool notrace_relock_fail:1;
|
||||
enum bch_errcode restarted:16;
|
||||
u32 restart_count;
|
||||
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
|
||||
u32 restart_count_this_trans;
|
||||
#endif
|
||||
|
||||
u64 last_begin_time;
|
||||
unsigned long last_begin_ip;
|
||||
@ -850,6 +854,18 @@ static inline bool btree_type_uses_write_buffer(enum btree_id btree)
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline u8 btree_trigger_order(enum btree_id btree)
|
||||
{
|
||||
switch (btree) {
|
||||
case BTREE_ID_alloc:
|
||||
return U8_MAX;
|
||||
case BTREE_ID_stripes:
|
||||
return U8_MAX - 1;
|
||||
default:
|
||||
return btree;
|
||||
}
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
struct btree *b;
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
|
||||
const struct btree_insert_entry *r)
|
||||
{
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
return cmp_int(l->sort_order, r->sort_order) ?:
|
||||
cmp_int(l->cached, r->cached) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
@ -397,6 +397,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
|
||||
|
||||
n = (struct btree_insert_entry) {
|
||||
.flags = flags,
|
||||
.sort_order = btree_trigger_order(path->btree_id),
|
||||
.bkey_type = __btree_node_type(path->level, path->btree_id),
|
||||
.btree_id = path->btree_id,
|
||||
.level = path->level,
|
||||
|
@ -681,9 +681,11 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
|
||||
b = as->old_nodes[i];
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
seq = b->data ? b->data->keys.seq : 0;
|
||||
six_unlock_read(&b->c.lock);
|
||||
bch2_trans_unlock_long(trans);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
|
||||
|
@ -282,12 +282,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
|
||||
{
|
||||
struct bset_tree *t = bset_tree_last(b);
|
||||
struct btree_node_entry *bne = max(write_block(b),
|
||||
(void *) btree_bkey_last(b, bset_tree_last(b)));
|
||||
(void *) btree_bkey_last(b, t));
|
||||
ssize_t remaining_space =
|
||||
__bch2_btree_u64s_remaining(b, bne->keys.start);
|
||||
|
||||
if (unlikely(bset_written(b, bset(b, t)))) {
|
||||
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
|
||||
if (b->written + block_sectors(c) <= btree_sectors(c))
|
||||
return bne;
|
||||
} else {
|
||||
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
|
||||
|
@ -590,11 +590,9 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!p.ptr.cached) {
|
||||
ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
@ -674,10 +672,10 @@ err:
|
||||
return -BCH_ERR_ENOMEM_mark_stripe_ptr;
|
||||
}
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_lock(m);
|
||||
|
||||
if (!m || !m->alive) {
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_unlock(m);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
|
||||
@ -693,7 +691,7 @@ err:
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
|
||||
mutex_unlock(&c->ec_stripes_heap_lock);
|
||||
gc_stripe_unlock(m);
|
||||
|
||||
acc.replicas.data_type = data_type;
|
||||
int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true);
|
||||
|
@ -39,33 +39,6 @@ static inline u64 sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t
|
||||
for (_b = (_buckets)->b + (_buckets)->first_bucket; \
|
||||
_b < (_buckets)->b + (_buckets)->nbuckets; _b++)
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
static inline void bucket_unlock(struct bucket *b)
|
||||
{
|
||||
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
|
||||
|
@ -7,6 +7,33 @@
|
||||
|
||||
#define BUCKET_JOURNAL_SEQ_BITS 16
|
||||
|
||||
/*
|
||||
* Ugly hack alert:
|
||||
*
|
||||
* We need to cram a spinlock in a single byte, because that's what we have left
|
||||
* in struct bucket, and we care about the size of these - during fsck, we need
|
||||
* in memory state for every single bucket on every device.
|
||||
*
|
||||
* We used to do
|
||||
* while (xchg(&b->lock, 1) cpu_relax();
|
||||
* but, it turns out not all architectures support xchg on a single byte.
|
||||
*
|
||||
* So now we use bit_spin_lock(), with fun games since we can't burn a whole
|
||||
* ulong for this - we just need to make sure the lock bit always ends up in the
|
||||
* first byte.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define BUCKET_LOCK_BITNR 0
|
||||
#else
|
||||
#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
|
||||
#endif
|
||||
|
||||
union ulong_byte_assert {
|
||||
ulong ulong;
|
||||
u8 byte;
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
u8 lock;
|
||||
u8 gen_valid:1;
|
||||
|
@ -315,8 +315,10 @@ static int bch2_data_thread(void *arg)
|
||||
ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
|
||||
if (ctx->thr.ret == -BCH_ERR_device_offline)
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
|
||||
else
|
||||
else {
|
||||
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
|
||||
ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -271,8 +271,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: extent too big");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"extent too big to decompress");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
@ -283,8 +283,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
if (__bio_uncompress(c, bio, data.b, *crc)) {
|
||||
if (!c->opts.no_data_io) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: decompression error");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"decompression error");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
|
||||
return true;
|
||||
}
|
||||
|
||||
static noinline void trace_move_extent_finish2(struct data_update *u,
|
||||
static noinline void trace_io_move_finish2(struct data_update *u,
|
||||
struct bkey_i *new,
|
||||
struct bkey_i *insert)
|
||||
{
|
||||
@ -113,11 +113,11 @@ static noinline void trace_move_extent_finish2(struct data_update *u,
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
prt_newline(&buf);
|
||||
|
||||
trace_move_extent_finish(c, buf.buf);
|
||||
trace_io_move_finish(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
static void trace_move_extent_fail2(struct data_update *m,
|
||||
static void trace_io_move_fail2(struct data_update *m,
|
||||
struct bkey_s_c new,
|
||||
struct bkey_s_c wrote,
|
||||
struct bkey_i *insert,
|
||||
@ -128,7 +128,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned rewrites_found = 0;
|
||||
|
||||
if (!trace_move_extent_fail_enabled())
|
||||
if (!trace_io_move_fail_enabled())
|
||||
return;
|
||||
|
||||
prt_str(&buf, msg);
|
||||
@ -168,7 +168,7 @@ static void trace_move_extent_fail2(struct data_update *m,
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
}
|
||||
|
||||
trace_move_extent_fail(c, buf.buf);
|
||||
trace_io_move_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -216,7 +216,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
new = bkey_i_to_extent(bch2_keylist_front(keys));
|
||||
|
||||
if (!bch2_extents_match(k, old)) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i),
|
||||
NULL, "no match:");
|
||||
goto nowork;
|
||||
}
|
||||
@ -256,7 +256,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
||||
if (m->data_opts.rewrite_ptrs &&
|
||||
!rewrites_found &&
|
||||
bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
|
||||
goto nowork;
|
||||
}
|
||||
|
||||
@ -273,7 +273,7 @@ restart_drop_conflicting_replicas:
|
||||
}
|
||||
|
||||
if (!bkey_val_u64s(&new->k)) {
|
||||
trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
||||
trace_io_move_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
|
||||
goto nowork;
|
||||
}
|
||||
|
||||
@ -342,6 +342,7 @@ restart_drop_extra_replicas:
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "about to insert invalid key in data update path");
|
||||
prt_printf(&buf, "\nop.nonce: %u", m->op.nonce);
|
||||
prt_str(&buf, "\nold: ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
prt_str(&buf, "\nk: ");
|
||||
@ -386,9 +387,9 @@ restart_drop_extra_replicas:
|
||||
if (!ret) {
|
||||
bch2_btree_iter_set_pos(&iter, next_pos);
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
|
||||
if (trace_move_extent_finish_enabled())
|
||||
trace_move_extent_finish2(m, &new->k_i, insert);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
|
||||
if (trace_io_move_finish_enabled())
|
||||
trace_io_move_finish2(m, &new->k_i, insert);
|
||||
}
|
||||
err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -410,7 +411,7 @@ nowork:
|
||||
&m->stats->sectors_raced);
|
||||
}
|
||||
|
||||
count_event(c, move_extent_fail);
|
||||
count_event(c, io_move_fail);
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
goto next;
|
||||
@ -438,7 +439,7 @@ void bch2_data_update_read_done(struct data_update *m)
|
||||
m->op.crc = m->rbio.pick.crc;
|
||||
m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
|
||||
|
||||
this_cpu_add(m->op.c->counters[BCH_COUNTER_move_extent_write], m->k.k->k.size);
|
||||
this_cpu_add(m->op.c->counters[BCH_COUNTER_io_move_write], m->k.k->k.size);
|
||||
|
||||
closure_call(&m->op.cl, bch2_write, NULL, NULL);
|
||||
}
|
||||
@ -672,12 +673,46 @@ static bool can_allocate_without_blocking(struct bch_fs *c,
|
||||
return nr_replicas >= m->op.nr_replicas;
|
||||
}
|
||||
|
||||
int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
|
||||
struct bch_io_opts *io_opts)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(m->k.k));
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
unsigned buf_bytes = 0;
|
||||
bkey_for_each_ptr_decode(&m->k.k->k, ptrs, p, entry)
|
||||
buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
|
||||
|
||||
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
||||
|
||||
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
||||
if (!m->bvecs)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
||||
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
||||
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) {
|
||||
kfree(m->bvecs);
|
||||
m->bvecs = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rbio_init(&m->rbio.bio, c, *io_opts, NULL);
|
||||
m->rbio.bio.bi_iter.bi_size = buf_bytes;
|
||||
m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(&m->k.k->k);
|
||||
m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_data_update_init(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct moving_context *ctxt,
|
||||
struct data_update *m,
|
||||
struct write_point_specifier wp,
|
||||
struct bch_io_opts io_opts,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts data_opts,
|
||||
enum btree_id btree_id,
|
||||
struct bkey_s_c k)
|
||||
@ -704,7 +739,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
m->ctxt = ctxt;
|
||||
m->stats = ctxt ? ctxt->stats : NULL;
|
||||
|
||||
bch2_write_op_init(&m->op, c, io_opts);
|
||||
bch2_write_op_init(&m->op, c, *io_opts);
|
||||
m->op.pos = bkey_start_pos(k.k);
|
||||
m->op.version = k.k->bversion;
|
||||
m->op.target = data_opts.target;
|
||||
@ -715,7 +750,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
BCH_WRITE_data_encoded|
|
||||
BCH_WRITE_move|
|
||||
m->data_opts.write_flags;
|
||||
m->op.compression_opt = io_opts.background_compression;
|
||||
m->op.compression_opt = io_opts->background_compression;
|
||||
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
|
||||
|
||||
unsigned durability_have = 0, durability_removing = 0;
|
||||
@ -753,7 +788,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
ptr_bit <<= 1;
|
||||
}
|
||||
|
||||
unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
|
||||
unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
|
||||
|
||||
/*
|
||||
* If current extent durability is less than io_opts.data_replicas,
|
||||
@ -786,7 +821,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
m->data_opts.rewrite_ptrs = 0;
|
||||
/* if iter == NULL, it's just a promote */
|
||||
if (iter)
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
|
||||
if (!ret)
|
||||
ret = -BCH_ERR_data_update_done_no_writes_needed;
|
||||
goto out_bkey_buf_exit;
|
||||
@ -824,33 +859,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
goto out_nocow_unlock;
|
||||
}
|
||||
|
||||
/* write path might have to decompress data: */
|
||||
unsigned buf_bytes = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||
buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
|
||||
|
||||
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
||||
|
||||
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
||||
if (!m->bvecs)
|
||||
goto enomem;
|
||||
|
||||
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
||||
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
||||
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL))
|
||||
goto enomem;
|
||||
|
||||
rbio_init(&m->rbio.bio, c, io_opts, NULL);
|
||||
m->rbio.bio.bi_iter.bi_size = buf_bytes;
|
||||
m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
|
||||
ret = bch2_data_update_bios_init(m, c, io_opts);
|
||||
if (ret)
|
||||
goto out_nocow_unlock;
|
||||
|
||||
return 0;
|
||||
enomem:
|
||||
ret = -ENOMEM;
|
||||
kfree(m->bvecs);
|
||||
m->bvecs = NULL;
|
||||
out_nocow_unlock:
|
||||
if (c->opts.nocow_enabled)
|
||||
bkey_nocow_unlock(c, k);
|
||||
|
@ -51,12 +51,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *,
|
||||
struct bch_io_opts *,
|
||||
struct data_update_opts *);
|
||||
|
||||
int bch2_data_update_bios_init(struct data_update *, struct bch_fs *,
|
||||
struct bch_io_opts *);
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
|
||||
struct moving_context *,
|
||||
struct data_update *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts, struct data_update_opts,
|
||||
struct bch_io_opts *, struct data_update_opts,
|
||||
enum btree_id, struct bkey_s_c);
|
||||
void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
|
||||
|
||||
|
@ -13,6 +13,40 @@
|
||||
|
||||
#include <linux/dcache.h>
|
||||
|
||||
static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
|
||||
int ret = PTR_ERR_OR_ZERO(buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
|
||||
return 0;
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int bch2_maybe_casefold(struct btree_trans *trans,
|
||||
const struct bch_hash_info *info,
|
||||
const struct qstr *str, struct qstr *out_cf)
|
||||
{
|
||||
if (likely(!info->cf_encoding)) {
|
||||
*out_cf = *str;
|
||||
return 0;
|
||||
} else {
|
||||
return bch2_casefold(trans, info, str, out_cf);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
|
||||
@ -28,13 +62,38 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
#endif
|
||||
|
||||
return bkey_bytes -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
(d.v->d_casefold
|
||||
? offsetof(struct bch_dirent, d_cf_name_block.d_names)
|
||||
: offsetof(struct bch_dirent, d_name)) -
|
||||
trailing_nuls;
|
||||
}
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
|
||||
if (d.v->d_casefold) {
|
||||
unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
|
||||
return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len);
|
||||
} else {
|
||||
return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
|
||||
}
|
||||
}
|
||||
|
||||
static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (d.v->d_casefold) {
|
||||
unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
|
||||
unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len);
|
||||
return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len);
|
||||
} else {
|
||||
return (struct qstr) QSTR_INIT(NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d)
|
||||
{
|
||||
return d.v->d_casefold
|
||||
? bch2_dirent_get_casefold_name(d)
|
||||
: bch2_dirent_get_name(d);
|
||||
}
|
||||
|
||||
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
|
||||
@ -57,7 +116,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
|
||||
static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
struct qstr name = bch2_dirent_get_name(d);
|
||||
struct qstr name = bch2_dirent_get_lookup_name(d);
|
||||
|
||||
return bch2_dirent_hash(info, &name);
|
||||
}
|
||||
@ -65,7 +124,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
|
||||
static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
|
||||
{
|
||||
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
|
||||
const struct qstr l_name = bch2_dirent_get_name(l);
|
||||
const struct qstr l_name = bch2_dirent_get_lookup_name(l);
|
||||
const struct qstr *r_name = _r;
|
||||
|
||||
return !qstr_eq(l_name, *r_name);
|
||||
@ -75,8 +134,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
||||
{
|
||||
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
|
||||
struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
|
||||
const struct qstr l_name = bch2_dirent_get_name(l);
|
||||
const struct qstr r_name = bch2_dirent_get_name(r);
|
||||
const struct qstr l_name = bch2_dirent_get_lookup_name(l);
|
||||
const struct qstr r_name = bch2_dirent_get_lookup_name(r);
|
||||
|
||||
return !qstr_eq(l_name, r_name);
|
||||
}
|
||||
@ -104,17 +163,19 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
|
||||
unsigned name_block_len = bch2_dirent_name_bytes(d);
|
||||
struct qstr d_name = bch2_dirent_get_name(d);
|
||||
struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(!d_name.len,
|
||||
c, dirent_empty_name,
|
||||
"empty name");
|
||||
|
||||
bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len),
|
||||
bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len,
|
||||
c, dirent_val_too_big,
|
||||
"value too big (%zu > %u)",
|
||||
bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
|
||||
"dirent names exceed bkey size (%d + %d > %d)",
|
||||
d_name.len, d_cf_name.len, name_block_len);
|
||||
|
||||
/*
|
||||
* Check new keys don't exceed the max length
|
||||
@ -142,6 +203,18 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
le64_to_cpu(d.v->d_inum) == d.k->p.inode,
|
||||
c, dirent_to_itself,
|
||||
"dirent points to own directory");
|
||||
|
||||
if (d.v->d_casefold) {
|
||||
bkey_fsck_err_on(from.from == BKEY_VALIDATE_commit &&
|
||||
d_cf_name.len > BCH_NAME_MAX,
|
||||
c, dirent_cf_name_too_big,
|
||||
"dirent w/ cf name too big (%u > %u)",
|
||||
d_cf_name.len, BCH_NAME_MAX);
|
||||
|
||||
bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len),
|
||||
c, dirent_stray_data_after_cf_name,
|
||||
"dirent has stray data after cf name's NUL");
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
@ -163,15 +236,14 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
subvol_inum dir, u8 type,
|
||||
const struct qstr *name, u64 dst)
|
||||
static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
|
||||
subvol_inum dir,
|
||||
u8 type,
|
||||
int name_len, int cf_name_len,
|
||||
u64 dst)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
|
||||
|
||||
if (name->len > BCH_NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
|
||||
|
||||
BUG_ON(u64s > U8_MAX);
|
||||
|
||||
@ -190,14 +262,65 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
dirent->v.d_type = type;
|
||||
dirent->v.d_unused = 0;
|
||||
dirent->v.d_casefold = cf_name_len ? 1 : 0;
|
||||
|
||||
memcpy(dirent->v.d_name, name->name, name->len);
|
||||
memset(dirent->v.d_name + name->len, 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
name->len);
|
||||
return dirent;
|
||||
}
|
||||
|
||||
EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
|
||||
static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
|
||||
const struct qstr *name)
|
||||
{
|
||||
EBUG_ON(dirent->v.d_casefold);
|
||||
|
||||
memcpy(&dirent->v.d_name[0], name->name, name->len);
|
||||
memset(&dirent->v.d_name[name->len], 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_name) -
|
||||
name->len);
|
||||
}
|
||||
|
||||
static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
|
||||
const struct qstr *name,
|
||||
const struct qstr *cf_name)
|
||||
{
|
||||
EBUG_ON(!dirent->v.d_casefold);
|
||||
EBUG_ON(!cf_name->len);
|
||||
|
||||
dirent->v.d_cf_name_block.d_name_len = name->len;
|
||||
dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len;
|
||||
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
|
||||
memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
|
||||
memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
|
||||
bkey_val_bytes(&dirent->k) -
|
||||
offsetof(struct bch_dirent, d_cf_name_block.d_names) -
|
||||
name->len + cf_name->len);
|
||||
|
||||
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
subvol_inum dir,
|
||||
u8 type,
|
||||
const struct qstr *name,
|
||||
const struct qstr *cf_name,
|
||||
u64 dst)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
|
||||
if (name->len > BCH_NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
|
||||
dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
|
||||
if (IS_ERR(dirent))
|
||||
return dirent;
|
||||
|
||||
if (cf_name)
|
||||
dirent_init_casefolded_name(dirent, name, cf_name);
|
||||
else
|
||||
dirent_init_regular_name(dirent, name);
|
||||
|
||||
EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
|
||||
|
||||
return dirent;
|
||||
}
|
||||
@ -213,7 +336,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
|
||||
struct bkey_i_dirent *dirent;
|
||||
int ret;
|
||||
|
||||
dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
|
||||
dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
|
||||
ret = PTR_ERR_OR_ZERO(dirent);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -233,16 +356,28 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
|
||||
const struct bch_hash_info *hash_info,
|
||||
u8 type, const struct qstr *name, u64 dst_inum,
|
||||
u64 *dir_offset,
|
||||
u64 *i_size,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
int ret;
|
||||
|
||||
dirent = dirent_create_key(trans, dir, type, name, dst_inum);
|
||||
if (hash_info->cf_encoding) {
|
||||
struct qstr cf_name;
|
||||
ret = bch2_casefold(trans, hash_info, name, &cf_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
|
||||
} else {
|
||||
dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
|
||||
}
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(dirent);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*i_size += bkey_bytes(&dirent->k);
|
||||
|
||||
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
|
||||
dir, &dirent->k_i, flags);
|
||||
*dir_offset = dirent->k.p.offset;
|
||||
@ -275,12 +410,13 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
|
||||
}
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *trans,
|
||||
subvol_inum src_dir, struct bch_hash_info *src_hash,
|
||||
subvol_inum dst_dir, struct bch_hash_info *dst_hash,
|
||||
subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size,
|
||||
subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size,
|
||||
const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
|
||||
const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
|
||||
enum bch_rename_mode mode)
|
||||
{
|
||||
struct qstr src_name_lookup, dst_name_lookup;
|
||||
struct btree_iter src_iter = { NULL };
|
||||
struct btree_iter dst_iter = { NULL };
|
||||
struct bkey_s_c old_src, old_dst = bkey_s_c_null;
|
||||
@ -295,8 +431,11 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
memset(dst_inum, 0, sizeof(*dst_inum));
|
||||
|
||||
/* Lookup src: */
|
||||
ret = bch2_maybe_casefold(trans, src_hash, src_name, &src_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
old_src = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
|
||||
src_hash, src_dir, src_name,
|
||||
src_hash, src_dir, &src_name_lookup,
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(old_src);
|
||||
if (ret)
|
||||
@ -308,6 +447,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
goto out;
|
||||
|
||||
/* Lookup dst: */
|
||||
ret = bch2_maybe_casefold(trans, dst_hash, dst_name, &dst_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (mode == BCH_RENAME) {
|
||||
/*
|
||||
* Note that we're _not_ checking if the target already exists -
|
||||
@ -315,12 +457,12 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
* correctness:
|
||||
*/
|
||||
ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
|
||||
dst_hash, dst_dir, dst_name);
|
||||
dst_hash, dst_dir, &dst_name_lookup);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
old_dst = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
|
||||
dst_hash, dst_dir, dst_name,
|
||||
dst_hash, dst_dir, &dst_name_lookup,
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(old_dst);
|
||||
if (ret)
|
||||
@ -336,7 +478,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
*src_offset = dst_iter.pos.offset;
|
||||
|
||||
/* Create new dst key: */
|
||||
new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
|
||||
new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
|
||||
dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
|
||||
ret = PTR_ERR_OR_ZERO(new_dst);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -346,7 +489,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
|
||||
/* Create new src key: */
|
||||
if (mode == BCH_RENAME_EXCHANGE) {
|
||||
new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
|
||||
new_src = dirent_create_key(trans, src_dir, 0, src_name,
|
||||
src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
|
||||
ret = PTR_ERR_OR_ZERO(new_src);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -406,6 +550,14 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
new_src->v.d_type == DT_SUBVOL)
|
||||
new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
|
||||
|
||||
if (old_dst.k)
|
||||
*dst_dir_i_size -= bkey_bytes(old_dst.k);
|
||||
*src_dir_i_size -= bkey_bytes(old_src.k);
|
||||
|
||||
if (mode == BCH_RENAME_EXCHANGE)
|
||||
*src_dir_i_size += bkey_bytes(&new_src->k);
|
||||
*dst_dir_i_size += bkey_bytes(&new_dst->k);
|
||||
|
||||
ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -465,9 +617,14 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
const struct qstr *name, subvol_inum *inum,
|
||||
unsigned flags)
|
||||
{
|
||||
struct qstr lookup_name;
|
||||
int ret = bch2_maybe_casefold(trans, hash_info, name, &lookup_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
|
||||
hash_info, dir, name, flags);
|
||||
int ret = bkey_err(k);
|
||||
hash_info, dir, &lookup_name, flags);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -25,15 +25,13 @@ struct bch_inode_info;
|
||||
|
||||
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
|
||||
|
||||
static inline unsigned dirent_val_u64s(unsigned len)
|
||||
static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
|
||||
{
|
||||
return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
|
||||
sizeof(u64));
|
||||
}
|
||||
unsigned bytes = cf_len
|
||||
? offsetof(struct bch_dirent, d_cf_name_block.d_names) + len + cf_len
|
||||
: offsetof(struct bch_dirent, d_name) + len;
|
||||
|
||||
static inline unsigned int dirent_occupied_size(const struct qstr *name)
|
||||
{
|
||||
return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64);
|
||||
return DIV_ROUND_UP(bytes, sizeof(u64));
|
||||
}
|
||||
|
||||
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
|
||||
@ -52,7 +50,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
int bch2_dirent_create(struct btree_trans *, subvol_inum,
|
||||
const struct bch_hash_info *, u8,
|
||||
const struct qstr *, u64, u64 *,
|
||||
const struct qstr *, u64, u64 *, u64 *,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
|
||||
static inline unsigned vfs_d_type(unsigned type)
|
||||
@ -67,8 +65,8 @@ enum bch_rename_mode {
|
||||
};
|
||||
|
||||
int bch2_dirent_rename(struct btree_trans *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
subvol_inum, struct bch_hash_info *,
|
||||
subvol_inum, struct bch_hash_info *, u64 *,
|
||||
subvol_inum, struct bch_hash_info *, u64 *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
const struct qstr *, subvol_inum *, u64 *,
|
||||
enum bch_rename_mode);
|
||||
|
@ -29,9 +29,25 @@ struct bch_dirent {
|
||||
* Copy of mode bits 12-15 from the target inode - so userspace can get
|
||||
* the filetype without having to do a stat()
|
||||
*/
|
||||
__u8 d_type;
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u8 d_type:5,
|
||||
d_unused:2,
|
||||
d_casefold:1;
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
__u8 d_casefold:1,
|
||||
d_unused:2,
|
||||
d_type:5;
|
||||
#endif
|
||||
|
||||
__u8 d_name[];
|
||||
union {
|
||||
struct {
|
||||
__u8 d_pad;
|
||||
__le16 d_name_len;
|
||||
__le16 d_cf_name_len;
|
||||
__u8 d_names[0];
|
||||
} d_cf_name_block __packed;
|
||||
__u8 d_name[0];
|
||||
} __packed;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define DT_SUBVOL 16
|
||||
|
@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
|
||||
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
|
||||
u64 *v, unsigned nr)
|
||||
{
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &p);
|
||||
|
||||
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "io_read.h"
|
||||
#include "io_write.h"
|
||||
#include "keylist.h"
|
||||
#include "lru.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
@ -298,10 +299,22 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct extent_ptr_decoded p = {
|
||||
.ptr = *ptr,
|
||||
.crc = bch2_extent_crc_unpack(s.k, NULL),
|
||||
};
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, BTREE_ID_stripes, 0, s.s_c, p,
|
||||
(const union bch_extent_entry *) ptr, &bp);
|
||||
|
||||
struct bkey_i_alloc_v4 *a =
|
||||
bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?:
|
||||
bch2_bucket_backpointer_mod(trans, s.s_c, &bp,
|
||||
!(flags & BTREE_TRIGGER_overwrite));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
@ -399,6 +412,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
(new_s->nr_blocks != old_s->nr_blocks ||
|
||||
new_s->nr_redundant != old_s->nr_redundant));
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
int ret = bch2_lru_change(trans,
|
||||
BCH_LRU_STRIPE_FRAGMENTATION,
|
||||
idx,
|
||||
stripe_lru_pos(old_s),
|
||||
stripe_lru_pos(new_s));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
|
||||
/*
|
||||
@ -1163,6 +1185,10 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* can we kill this and delete stripes from the trigger?
|
||||
*/
|
||||
static void ec_stripe_delete_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c =
|
||||
@ -1380,8 +1406,12 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
||||
if (bp.v->btree_id == BTREE_ID_stripes)
|
||||
continue;
|
||||
|
||||
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s,
|
||||
bkey_s_c_to_backpointer(bp_k), &last_flushed);
|
||||
bp, &last_flushed);
|
||||
}));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
@ -2503,3 +2533,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
|
||||
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
|
||||
BIOSET_NEED_BVECS);
|
||||
}
|
||||
|
||||
static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return 0;
|
||||
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
|
||||
u64 lru_idx = stripe_lru_pos(s.v);
|
||||
if (lru_idx) {
|
||||
int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
|
||||
k.k->p.offset, lru_idx, k, last_flushed);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_buf last_flushed;
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -92,6 +92,31 @@ static inline void stripe_csum_set(struct bch_stripe *s,
|
||||
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
|
||||
}
|
||||
|
||||
#define STRIPE_LRU_POS_EMPTY 1
|
||||
|
||||
static inline u64 stripe_lru_pos(const struct bch_stripe *s)
|
||||
{
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
unsigned blocks_empty = 0, blocks_nonempty = 0;
|
||||
|
||||
for (unsigned i = 0; i < s->nr_blocks; i++) {
|
||||
blocks_empty += !stripe_blockcount_get(s, i);
|
||||
blocks_nonempty += !!stripe_blockcount_get(s, i);
|
||||
}
|
||||
|
||||
/* Will be picked up by the stripe_delete worker */
|
||||
if (!blocks_nonempty)
|
||||
return STRIPE_LRU_POS_EMPTY;
|
||||
|
||||
if (!blocks_empty)
|
||||
return 0;
|
||||
|
||||
/* invert: more blocks empty = reuse first */
|
||||
return LRU_TIME_MAX - blocks_empty;
|
||||
}
|
||||
|
||||
static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
|
||||
const struct bch_extent_ptr *data_ptr,
|
||||
unsigned sectors)
|
||||
@ -132,6 +157,20 @@ static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m,
|
||||
m->sectors);
|
||||
}
|
||||
|
||||
static inline void gc_stripe_unlock(struct gc_stripe *s)
|
||||
{
|
||||
BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
|
||||
|
||||
clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock);
|
||||
wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR);
|
||||
}
|
||||
|
||||
static inline void gc_stripe_lock(struct gc_stripe *s)
|
||||
{
|
||||
wait_on_bit_lock((void *) &s->lock, BUCKET_LOCK_BITNR,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
struct bch_read_bio;
|
||||
|
||||
struct ec_stripe_buf {
|
||||
@ -268,4 +307,6 @@ void bch2_fs_ec_exit(struct bch_fs *);
|
||||
void bch2_fs_ec_init_early(struct bch_fs *);
|
||||
int bch2_fs_ec_init(struct bch_fs *);
|
||||
|
||||
int bch2_check_stripe_to_lru_refs(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_EC_H */
|
||||
|
@ -20,12 +20,11 @@ struct stripe {
|
||||
};
|
||||
|
||||
struct gc_stripe {
|
||||
u8 lock;
|
||||
unsigned alive:1; /* does a corresponding key exist in stripes btree? */
|
||||
u16 sectors;
|
||||
|
||||
u8 nr_blocks;
|
||||
u8 nr_redundant;
|
||||
|
||||
unsigned alive:1; /* does a corresponding key exist in stripes btree? */
|
||||
u16 block_sectors[BCH_BKEY_PTRS_MAX];
|
||||
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
|
||||
|
||||
|
@ -530,35 +530,59 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
mutex_unlock(&c->fsck_error_msgs_lock);
|
||||
}
|
||||
|
||||
int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum)
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
|
||||
/* XXX: we don't yet attempt to print paths when we don't know the subvol */
|
||||
if (inum.subvol)
|
||||
ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out));
|
||||
if (inum.subvol) {
|
||||
ret = bch2_inum_to_path(trans, inum, out);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
}
|
||||
if (!inum.subvol || ret)
|
||||
prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum);
|
||||
prt_printf(out, " offset %llu: ", offset);
|
||||
|
||||
return trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
int ret = bch2_inum_err_msg_trans(trans, out, inum);
|
||||
prt_printf(out, " offset %llu: ", offset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum)
|
||||
{
|
||||
bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum));
|
||||
}
|
||||
|
||||
void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
subvol_inum inum, u64 offset)
|
||||
{
|
||||
bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
||||
bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset));
|
||||
}
|
||||
|
||||
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bpos pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
if (!bch2_snapshot_is_leaf(c, pos.snapshot))
|
||||
prt_str(out, "(multiple snapshots) ");
|
||||
|
||||
subvol_inum inum = {
|
||||
.subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot),
|
||||
.inum = pos.inode,
|
||||
};
|
||||
|
||||
if (inum.subvol) {
|
||||
ret = bch2_inum_to_path(trans, inum, out);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!inum.subvol || ret)
|
||||
prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot);
|
||||
|
||||
prt_printf(out, " offset %llu: ", pos.offset << 8);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
struct bpos pos)
|
||||
{
|
||||
bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
|
@ -238,10 +238,11 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
|
||||
_ret; \
|
||||
})
|
||||
|
||||
int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum);
|
||||
int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64);
|
||||
|
||||
void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum);
|
||||
void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
|
||||
|
||||
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *, struct printbuf *, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_ERROR_H */
|
||||
|
@ -148,87 +148,97 @@ static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *pr
|
||||
return cmp(a, b, priv);
|
||||
}
|
||||
|
||||
static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
||||
static inline int eytzinger1_do_cmp(void *base1, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
return do_cmp(base1 + inorder_to_eytzinger1(l, n) * size,
|
||||
base1 + inorder_to_eytzinger1(r, n) * size,
|
||||
cmp_func, priv);
|
||||
}
|
||||
|
||||
static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
||||
static inline void eytzinger1_do_swap(void *base1, size_t n, size_t size,
|
||||
swap_r_func_t swap_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
do_swap(base1 + inorder_to_eytzinger1(l, n) * size,
|
||||
base1 + inorder_to_eytzinger1(r, n) * size,
|
||||
size, swap_func, priv);
|
||||
}
|
||||
|
||||
static void eytzinger1_sort_r(void *base1, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
unsigned i, j, k;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base1, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base1, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2; i >= 1; --i) {
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = i; k = j * 2, k < n;)
|
||||
j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 == n)
|
||||
j *= 2;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j != i && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, i, j) >= 0)
|
||||
j /= 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j != i;) {
|
||||
j /= 2;
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n; i > 1; --i) {
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, 1, i);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = 1; k = j * 2, k + 1 < i;)
|
||||
j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 1 == i)
|
||||
j *= 2;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j >= 1 && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, 1, j) >= 0)
|
||||
j /= 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j > 1;) {
|
||||
j /= 2;
|
||||
eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
int i, j, k;
|
||||
void *base1 = base - size;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = i; k = j * 2 + 1, k + 1 < n;)
|
||||
j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 2 == n)
|
||||
j = j * 2 + 1;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j != i && eytzinger0_do_cmp(base, n, size, cmp_func, priv, i, j) >= 0)
|
||||
j = (j - 1) / 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j != i;) {
|
||||
j = (j - 1) / 2;
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
||||
|
||||
/* Find the sift-down path all the way to the leaves. */
|
||||
for (j = 0; k = j * 2 + 1, k + 1 < i;)
|
||||
j = eytzinger0_do_cmp(base, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1;
|
||||
|
||||
/* Special case for the last leaf with no sibling. */
|
||||
if (j * 2 + 2 == i)
|
||||
j = j * 2 + 1;
|
||||
|
||||
/* Backtrack to the correct location. */
|
||||
while (j && eytzinger0_do_cmp(base, n, size, cmp_func, priv, 0, j) >= 0)
|
||||
j = (j - 1) / 2;
|
||||
|
||||
/* Shift the element into its correct place. */
|
||||
for (k = j; j;) {
|
||||
j = (j - 1) / 2;
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, j, k);
|
||||
}
|
||||
}
|
||||
return eytzinger1_sort_r(base1, n, size, cmp_func, swap_func, priv);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/log2.h>
|
||||
|
||||
#ifdef EYTZINGER_DEBUG
|
||||
#include <linux/bug.h>
|
||||
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EYTZINGER_BUG_ON(cond)
|
||||
@ -56,24 +57,14 @@ static inline unsigned eytzinger1_last(unsigned size)
|
||||
return rounddown_pow_of_two(size + 1) - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* eytzinger1_next() and eytzinger1_prev() have the nice properties that
|
||||
*
|
||||
* eytzinger1_next(0) == eytzinger1_first())
|
||||
* eytzinger1_prev(0) == eytzinger1_last())
|
||||
*
|
||||
* eytzinger1_prev(eytzinger1_first()) == 0
|
||||
* eytzinger1_next(eytzinger1_last()) == 0
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i == 0 || i > size);
|
||||
|
||||
if (eytzinger1_right_child(i) <= size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
|
||||
i <<= __fls(size + 1) - __fls(i);
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i >>= i > size;
|
||||
} else {
|
||||
i >>= ffz(i) + 1;
|
||||
@ -84,12 +75,12 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i == 0 || i > size);
|
||||
|
||||
if (eytzinger1_left_child(i) <= size) {
|
||||
i = eytzinger1_left_child(i) + 1;
|
||||
|
||||
i <<= __fls(size + 1) - __fls(i);
|
||||
i <<= __fls(size) - __fls(i);
|
||||
i -= 1;
|
||||
i >>= i > size;
|
||||
} else {
|
||||
@ -243,73 +234,63 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
#define eytzinger0_for_each_prev(_i, _size) \
|
||||
for (unsigned (_i) = eytzinger0_last((_size)); \
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_prev((_i), (_size)))
|
||||
|
||||
/* return greatest node <= @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
unsigned i, n = 0;
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
if (!nr)
|
||||
return -1;
|
||||
|
||||
do {
|
||||
i = n;
|
||||
n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
|
||||
} while (n < nr);
|
||||
|
||||
if (n & 1) {
|
||||
/*
|
||||
* @i was greater than @search, return previous node:
|
||||
*
|
||||
* if @i was leftmost/smallest element,
|
||||
* eytzinger0_prev(eytzinger0_first())) returns -1, as expected
|
||||
*/
|
||||
return eytzinger0_prev(i, nr);
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
||||
n >>= __ffs(n) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
/* return smallest node > @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
/*
|
||||
* if eytitzinger0_find_le() returned -1 - no element was <= search - we
|
||||
* want to return the first element; next/prev identities mean this work
|
||||
* as expected
|
||||
*
|
||||
* similarly if find_le() returns last element, we should return -1;
|
||||
* identities mean this all works out:
|
||||
*/
|
||||
return eytzinger0_next(idx, nr);
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
|
||||
n >>= __ffs(n + 1) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
/* return smallest node >= @search, or -1 if not found */
|
||||
static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
void *base1 = base - size;
|
||||
unsigned n = 1;
|
||||
|
||||
if (idx < nr && !cmp(base + idx * size, search))
|
||||
return idx;
|
||||
|
||||
return eytzinger0_next(idx, nr);
|
||||
while (n <= nr)
|
||||
n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0);
|
||||
n >>= __ffs(n + 1) + 1;
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
void *_base = (base); \
|
||||
size_t _size = (size); \
|
||||
void *_base1 = (void *)(base) - _size; \
|
||||
const void *_search = (search); \
|
||||
size_t _nr = (nr); \
|
||||
size_t _size = (size); \
|
||||
size_t _i = 0; \
|
||||
size_t _i = 1; \
|
||||
int _res; \
|
||||
\
|
||||
while (_i < _nr && \
|
||||
(_res = _cmp(_search, _base + _i * _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
_i; \
|
||||
while (_i <= _nr && \
|
||||
(_res = _cmp(_search, _base1 + _i * _size))) \
|
||||
_i = eytzinger1_child(_i, _res > 0); \
|
||||
_i - 1; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
|
@ -47,6 +47,10 @@ int bch2_create_trans(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* Inherit casefold state from parent. */
|
||||
if (S_ISDIR(mode))
|
||||
new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
|
||||
|
||||
if (!(flags & BCH_CREATE_SNAPSHOT)) {
|
||||
/* Normal create path - allocate a new inode: */
|
||||
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
|
||||
@ -152,18 +156,15 @@ int bch2_create_trans(struct btree_trans *trans,
|
||||
if (is_subdir_for_nlink(new_inode))
|
||||
dir_u->bi_nlink++;
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = now;
|
||||
dir_u->bi_size += dirent_occupied_size(name);
|
||||
|
||||
ret = bch2_inode_write(trans, &dir_iter, dir_u);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_dirent_create(trans, dir, &dir_hash,
|
||||
dir_type,
|
||||
name,
|
||||
dir_target,
|
||||
&dir_offset,
|
||||
STR_HASH_must_create|BTREE_ITER_with_updates);
|
||||
ret = bch2_dirent_create(trans, dir, &dir_hash,
|
||||
dir_type,
|
||||
name,
|
||||
dir_target,
|
||||
&dir_offset,
|
||||
&dir_u->bi_size,
|
||||
STR_HASH_must_create|BTREE_ITER_with_updates) ?:
|
||||
bch2_inode_write(trans, &dir_iter, dir_u);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -221,13 +222,14 @@ int bch2_link_trans(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = now;
|
||||
dir_u->bi_size += dirent_occupied_size(name);
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
||||
ret = bch2_dirent_create(trans, dir, &dir_hash,
|
||||
mode_to_type(inode_u->bi_mode),
|
||||
name, inum.inum, &dir_offset,
|
||||
name, inum.inum,
|
||||
&dir_offset,
|
||||
&dir_u->bi_size,
|
||||
STR_HASH_must_create);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -266,8 +268,16 @@ int bch2_unlink_trans(struct btree_trans *trans,
|
||||
|
||||
dir_hash = bch2_hash_info_init(c, dir_u);
|
||||
|
||||
ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
|
||||
name, &inum, BTREE_ITER_intent);
|
||||
struct bkey_s_c dirent_k =
|
||||
bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
|
||||
&dir_hash, dir, name, BTREE_ITER_intent);
|
||||
ret = bkey_err(dirent_k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(dirent_k), &inum);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -324,7 +334,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
|
||||
|
||||
dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
|
||||
dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
|
||||
dir_u->bi_size -= dirent_occupied_size(name);
|
||||
dir_u->bi_size -= bkey_bytes(dirent_k.k);
|
||||
|
||||
ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
|
||||
&dir_hash, &dirent_iter,
|
||||
@ -420,8 +430,8 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
ret = bch2_dirent_rename(trans,
|
||||
src_dir, &src_hash,
|
||||
dst_dir, &dst_hash,
|
||||
src_dir, &src_hash, &src_dir_u->bi_size,
|
||||
dst_dir, &dst_hash, &dst_dir_u->bi_size,
|
||||
src_name, &src_inum, &src_offset,
|
||||
dst_name, &dst_inum, &dst_offset,
|
||||
mode);
|
||||
@ -463,14 +473,6 @@ int bch2_rename_trans(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (mode == BCH_RENAME) {
|
||||
src_dir_u->bi_size -= dirent_occupied_size(src_name);
|
||||
dst_dir_u->bi_size += dirent_occupied_size(dst_name);
|
||||
}
|
||||
|
||||
if (mode == BCH_RENAME_OVERWRITE)
|
||||
src_dir_u->bi_size -= dirent_occupied_size(src_name);
|
||||
|
||||
if (src_inode_u->bi_parent_subvol)
|
||||
src_inode_u->bi_parent_subvol = dst_dir.subvol;
|
||||
|
||||
|
@ -110,11 +110,18 @@ static int readpage_bio_extend(struct btree_trans *trans,
|
||||
if (!get_more)
|
||||
break;
|
||||
|
||||
unsigned sectors_remaining = sectors_this_extent - bio_sectors(bio);
|
||||
|
||||
if (sectors_remaining < PAGE_SECTORS << mapping_min_folio_order(iter->mapping))
|
||||
break;
|
||||
|
||||
unsigned order = ilog2(rounddown_pow_of_two(sectors_remaining) / PAGE_SECTORS);
|
||||
|
||||
folio = xa_load(&iter->mapping->i_pages, folio_offset);
|
||||
if (folio && !xa_is_value(folio))
|
||||
break;
|
||||
|
||||
folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0);
|
||||
folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), order);
|
||||
if (!folio)
|
||||
break;
|
||||
|
||||
@ -230,7 +237,8 @@ err:
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9);
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9));
|
||||
prt_printf(&buf, "read error %i from btree lookup", ret);
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
@ -54,6 +54,31 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
|
||||
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
|
||||
return -EINVAL;
|
||||
|
||||
if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
|
||||
#ifdef CONFIG_UNICODE
|
||||
int ret = 0;
|
||||
/* Not supported on individual files. */
|
||||
if (!S_ISDIR(bi->bi_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Make sure the dir is empty, as otherwise we'd need to
|
||||
* rehash everything and update the dirent keys.
|
||||
*/
|
||||
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
|
||||
#else
|
||||
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (s->set_projinherit) {
|
||||
bi->bi_fields_set &= ~(1 << Inode_opt_project);
|
||||
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
|
||||
|
@ -6,19 +6,21 @@
|
||||
|
||||
/* bcachefs inode flags -> vfs inode flags: */
|
||||
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
[__BCH_INODE_sync] = S_SYNC,
|
||||
[__BCH_INODE_immutable] = S_IMMUTABLE,
|
||||
[__BCH_INODE_append] = S_APPEND,
|
||||
[__BCH_INODE_noatime] = S_NOATIME,
|
||||
[__BCH_INODE_casefolded] = S_CASEFOLD,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
[__BCH_INODE_sync] = FS_SYNC_FL,
|
||||
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_append] = FS_APPEND_FL,
|
||||
[__BCH_INODE_nodump] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_noatime] = FS_NOATIME_FL,
|
||||
[__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
|
@ -698,6 +698,23 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
|
||||
if (IS_ERR(inode))
|
||||
inode = NULL;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (!inode && IS_CASEFOLDED(vdir)) {
|
||||
/*
|
||||
* Do not cache a negative dentry in casefolded directories
|
||||
* as it would need to be invalidated in the following situation:
|
||||
* - Lookup file "blAH" in a casefolded directory
|
||||
* - Creation of file "BLAH" in a casefolded directory
|
||||
* - Lookup file "blAH" in a casefolded directory
|
||||
* which would fail if we had a negative dentry.
|
||||
*
|
||||
* We should come back to this when VFS has a method to handle
|
||||
* this edgecase.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return d_splice_alias(&inode->v, dentry);
|
||||
}
|
||||
|
||||
@ -1802,7 +1819,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
mapping_set_large_folios(inode->v.i_mapping);
|
||||
mapping_set_folio_min_order(inode->v.i_mapping,
|
||||
get_order(trans->c->opts.block_size));
|
||||
}
|
||||
|
||||
static void bch2_free_inode(struct inode *vinode)
|
||||
|
@ -823,6 +823,7 @@ struct inode_walker_entry {
|
||||
struct bch_inode_unpacked inode;
|
||||
u32 snapshot;
|
||||
u64 count;
|
||||
u64 i_size;
|
||||
};
|
||||
|
||||
struct inode_walker {
|
||||
@ -910,8 +911,9 @@ found:
|
||||
if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
|
||||
struct inode_walker_entry new = *i;
|
||||
|
||||
new.snapshot = k.k->p.snapshot;
|
||||
new.count = 0;
|
||||
new.snapshot = k.k->p.snapshot;
|
||||
new.count = 0;
|
||||
new.i_size = 0;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
@ -1116,37 +1118,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_directory_size(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
struct bkey_s_c inode_k, bool *write_inode)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 new_size = 0;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
|
||||
SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot),
|
||||
POS(inode_k.k->p.offset, U64_MAX),
|
||||
0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_dirent)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k);
|
||||
struct qstr name = bch2_dirent_get_name(dirent);
|
||||
|
||||
new_size += dirent_occupied_size(&name);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (!ret && inode_u->bi_size != new_size) {
|
||||
inode_u->bi_size = new_size;
|
||||
*write_inode = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_inode(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
@ -1335,16 +1306,6 @@ static int check_inode(struct btree_trans *trans,
|
||||
u.bi_journal_seq = journal_cur_seq(&c->journal);
|
||||
do_update = true;
|
||||
}
|
||||
|
||||
if (S_ISDIR(u.bi_mode)) {
|
||||
ret = check_directory_size(trans, &u, k, &do_update);
|
||||
|
||||
fsck_err_on(ret,
|
||||
trans, directory_size_mismatch,
|
||||
"directory inode %llu:%u with the mismatch directory size",
|
||||
u.bi_inum, k.k->p.snapshot);
|
||||
ret = 0;
|
||||
}
|
||||
do_update:
|
||||
if (do_update) {
|
||||
ret = __bch2_fsck_write_inode(trans, &u);
|
||||
@ -2017,10 +1978,31 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_dir_i_size_notnested(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
darray_for_each(w->inodes, i)
|
||||
if (fsck_err_on(i->inode.bi_size != i->i_size,
|
||||
trans, inode_dir_wrong_nlink,
|
||||
"directory %llu:%u with wrong i_size: got %llu, should be %llu",
|
||||
w->last_pos.inode, i->snapshot, i->inode.bi_size, i->i_size)) {
|
||||
i->inode.bi_size = i->i_size;
|
||||
ret = bch2_fsck_write_inode(trans, &i->inode);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
fsck_err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_subdir_count_notnested(trans, w) ?:
|
||||
check_dir_i_size_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
@ -2367,7 +2349,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
goto out;
|
||||
|
||||
if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
|
||||
ret = check_subdir_count(trans, dir);
|
||||
ret = check_subdir_dirents_count(trans, dir);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -2457,9 +2439,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (d.v->d_type == DT_DIR)
|
||||
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
|
||||
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) {
|
||||
if (d.v->d_type == DT_DIR)
|
||||
i->count++;
|
||||
i->i_size += bkey_bytes(d.k);
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
|
@ -137,7 +137,8 @@ enum inode_opt_id {
|
||||
x(i_sectors_dirty, 6) \
|
||||
x(unlinked, 7) \
|
||||
x(backptr_untrusted, 8) \
|
||||
x(has_child_snapshot, 9)
|
||||
x(has_child_snapshot, 9) \
|
||||
x(casefolded, 10)
|
||||
|
||||
/* bits 20+ reserved for packed fields below: */
|
||||
|
||||
|
@ -115,7 +115,8 @@ err:
|
||||
bch2_increment_clock(c, sectors_allocated, WRITE);
|
||||
if (should_print_err(ret)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9);
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9));
|
||||
prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
@ -181,7 +181,7 @@ static noinline void promote_start(struct bch_read_bio *rbio)
|
||||
{
|
||||
struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
|
||||
|
||||
trace_and_count(op->write.op.c, read_promote, &rbio->bio);
|
||||
trace_and_count(op->write.op.c, io_read_promote, &rbio->bio);
|
||||
|
||||
INIT_WORK(&op->work, promote_start_work);
|
||||
queue_work(rbio->c->write_ref_wq, &op->work);
|
||||
@ -243,7 +243,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
orig->opts,
|
||||
&orig->opts,
|
||||
update_opts,
|
||||
btree_id, k);
|
||||
/*
|
||||
@ -320,7 +320,7 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
|
||||
*read_full = promote_full;
|
||||
return promote;
|
||||
nopromote:
|
||||
trace_read_nopromote(c, ret);
|
||||
trace_io_read_nopromote(c, ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -329,9 +329,10 @@ nopromote:
|
||||
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_read_bio *rbio, struct bpos read_pos)
|
||||
{
|
||||
return bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { rbio->subvol, read_pos.inode },
|
||||
read_pos.offset << 9);
|
||||
return lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { rbio->subvol, read_pos.inode },
|
||||
read_pos.offset << 9));
|
||||
}
|
||||
|
||||
static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
@ -463,7 +464,9 @@ static void bch2_rbio_retry(struct work_struct *work)
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
|
||||
trace_and_count(c, read_retry, &rbio->bio);
|
||||
trace_io_read_retry(&rbio->bio);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
|
||||
bvec_iter_sectors(rbio->bvec_iter));
|
||||
|
||||
if (rbio->retry == READ_RETRY_AVOID)
|
||||
bch2_mark_io_failure(&failed, &rbio->pick);
|
||||
@ -802,7 +805,7 @@ static void bch2_read_endio(struct bio *bio)
|
||||
|
||||
if (((rbio->flags & BCH_READ_retry_if_stale) && race_fault()) ||
|
||||
(ca && dev_ptr_stale(ca, &rbio->pick.ptr))) {
|
||||
trace_and_count(c, read_reuse_race, &rbio->bio);
|
||||
trace_and_count(c, io_read_reuse_race, &rbio->bio);
|
||||
|
||||
if (rbio->flags & BCH_READ_retry_if_stale)
|
||||
bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
|
||||
@ -891,6 +894,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
swap(iter.bi_size, bytes);
|
||||
bio_advance_iter(&orig->bio, &iter, bytes);
|
||||
zero_fill_bio_iter(&orig->bio, iter);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read_inline],
|
||||
bvec_iter_sectors(iter));
|
||||
goto out_read_done;
|
||||
}
|
||||
retry_pick:
|
||||
@ -979,6 +984,7 @@ retry_pick:
|
||||
*/
|
||||
struct data_update *u = container_of(orig, struct data_update, rbio);
|
||||
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
|
||||
BUG();
|
||||
if (ca)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto hole;
|
||||
@ -1077,10 +1083,12 @@ retry_pick:
|
||||
rbio->bio.bi_end_io = bch2_read_endio;
|
||||
|
||||
if (rbio->bounce)
|
||||
trace_and_count(c, read_bounce, &rbio->bio);
|
||||
trace_and_count(c, io_read_bounce, &rbio->bio);
|
||||
|
||||
if (!(flags & BCH_READ_data_update))
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
|
||||
else
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move_read], bio_sectors(&rbio->bio));
|
||||
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
|
||||
|
||||
/*
|
||||
@ -1093,7 +1101,7 @@ retry_pick:
|
||||
|
||||
if (!(flags & (BCH_READ_in_retry|BCH_READ_last_fragment))) {
|
||||
bio_inc_remaining(&orig->bio);
|
||||
trace_and_count(c, read_split, &orig->bio);
|
||||
trace_and_count(c, io_read_split, &orig->bio);
|
||||
}
|
||||
|
||||
if (!rbio->pick.idx) {
|
||||
@ -1170,6 +1178,8 @@ err:
|
||||
goto out_read_done;
|
||||
|
||||
hole:
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_read_hole],
|
||||
bvec_iter_sectors(iter));
|
||||
/*
|
||||
* won't normally happen in the BCH_READ_data_update
|
||||
* (bch2_move_extent()) path, but if we retry and the extent we wanted
|
||||
@ -1270,7 +1280,9 @@ err:
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9);
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
|
||||
bvec_iter.bi_sector << 9));
|
||||
prt_printf(&buf, "read error %i from btree lookup", ret);
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
@ -396,19 +396,61 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
|
||||
/* Writes */
|
||||
|
||||
static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
|
||||
u64 offset)
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64 offset, const char *fmt, ...)
|
||||
{
|
||||
bch2_inum_offset_err_msg(op->c, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
prt_printf(out, "write error%s: ",
|
||||
op->flags & BCH_WRITE_move ? "(internal move)" : "");
|
||||
if (op->subvol)
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9));
|
||||
else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
|
||||
prt_str(out, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64 offset,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
__bch2_write_op_error(out, op, op->pos.offset);
|
||||
if (op->subvol)
|
||||
bch2_inum_offset_err_msg(op->c, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
bch2_inum_snap_offset_err_msg(op->c, out, pos);
|
||||
}
|
||||
|
||||
prt_str(out, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
@ -551,8 +593,8 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
__bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
|
||||
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@ -1104,8 +1146,8 @@ do_write:
|
||||
csum_err:
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"error verifying existing checksum while rewriting existing data (memory corruption?)");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@ -1201,8 +1243,8 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
__bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
|
||||
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
|
||||
bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@ -1369,8 +1411,8 @@ err:
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
op->error = ret;
|
||||
@ -1492,8 +1534,8 @@ err:
|
||||
if (unlikely(ret < 0)) {
|
||||
if (!(op->flags & BCH_WRITE_alloc_nowait)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"%s(): %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@ -1624,8 +1666,8 @@ CLOSURE_CALLBACK(bch2_write)
|
||||
|
||||
if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "misaligned write");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"misaligned write");
|
||||
printbuf_exit(&buf);
|
||||
op->error = -EIO;
|
||||
goto err;
|
||||
|
@ -20,7 +20,13 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *, bool);
|
||||
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
|
||||
__printf(5, 6)
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64, const char *, ...);
|
||||
|
||||
__printf(4, 5)
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64,
|
||||
const char *, ...);
|
||||
|
||||
#define BCH_WRITE_FLAGS() \
|
||||
x(alloc_nowait) \
|
||||
|
@ -56,11 +56,18 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
||||
prt_printf(out, "seq:\t%llu\n", seq);
|
||||
printbuf_indent_add(out, 2);
|
||||
|
||||
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
|
||||
if (!buf->write_started)
|
||||
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK));
|
||||
|
||||
prt_printf(out, "size:\t");
|
||||
prt_human_readable_u64(out, vstruct_bytes(buf->data));
|
||||
prt_newline(out);
|
||||
struct closure *cl = &buf->io;
|
||||
int r = atomic_read(&cl->remaining);
|
||||
prt_printf(out, "io:\t%pS r %i\n", cl->fn, r & CLOSURE_REMAINING_MASK);
|
||||
|
||||
if (buf->data) {
|
||||
prt_printf(out, "size:\t");
|
||||
prt_human_readable_u64(out, vstruct_bytes(buf->data));
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
prt_printf(out, "expires:\t");
|
||||
prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
|
||||
@ -87,6 +94,9 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
|
||||
|
||||
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
||||
{
|
||||
lockdep_assert_held(&j->lock);
|
||||
out->atomic++;
|
||||
|
||||
if (!out->nr_tabstops)
|
||||
printbuf_tabstop_push(out, 24);
|
||||
|
||||
@ -95,6 +105,8 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
|
||||
seq++)
|
||||
bch2_journal_buf_to_text(out, j, seq);
|
||||
prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
|
||||
|
||||
--out->atomic;
|
||||
}
|
||||
|
||||
static inline struct journal_buf *
|
||||
@ -104,10 +116,8 @@ journal_seq_to_buf(struct journal *j, u64 seq)
|
||||
|
||||
EBUG_ON(seq > journal_cur_seq(j));
|
||||
|
||||
if (journal_seq_unwritten(j, seq)) {
|
||||
if (journal_seq_unwritten(j, seq))
|
||||
buf = j->buf + (seq & JOURNAL_BUF_MASK);
|
||||
EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -195,7 +205,8 @@ void bch2_journal_do_writes(struct journal *j)
|
||||
if (w->write_started)
|
||||
continue;
|
||||
|
||||
if (!journal_state_count(j->reservations, idx)) {
|
||||
if (!journal_state_seq_count(j, j->reservations, seq)) {
|
||||
j->seq_write_started = seq;
|
||||
w->write_started = true;
|
||||
closure_call(&w->io, bch2_journal_write, j->wq, NULL);
|
||||
}
|
||||
@ -391,6 +402,9 @@ static int journal_entry_open(struct journal *j)
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
|
||||
return JOURNAL_ERR_max_in_flight;
|
||||
|
||||
if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
|
||||
return JOURNAL_ERR_max_open;
|
||||
|
||||
if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
|
||||
bch_err(c, "cannot start: journal seq overflow");
|
||||
if (bch2_fs_emergency_read_only_locked(c))
|
||||
@ -398,8 +412,16 @@ static int journal_entry_open(struct journal *j)
|
||||
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
|
||||
}
|
||||
|
||||
if (!j->free_buf && !buf->data)
|
||||
return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
|
||||
|
||||
BUG_ON(!j->cur_entry_sectors);
|
||||
|
||||
if (!buf->data) {
|
||||
swap(buf->data, j->free_buf);
|
||||
swap(buf->buf_size, j->free_buf_size);
|
||||
}
|
||||
|
||||
buf->expires =
|
||||
(journal_cur_seq(j) == j->flushed_seq_ondisk
|
||||
? jiffies
|
||||
@ -464,7 +486,7 @@ static int journal_entry_open(struct journal *j)
|
||||
|
||||
new.idx++;
|
||||
BUG_ON(journal_state_count(new, new.idx));
|
||||
BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
|
||||
BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_STATE_BUF_MASK));
|
||||
|
||||
journal_state_inc(&new);
|
||||
|
||||
@ -514,6 +536,33 @@ static void journal_write_work(struct work_struct *work)
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static void journal_buf_prealloc(struct journal *j)
|
||||
{
|
||||
if (j->free_buf &&
|
||||
j->free_buf_size >= j->buf_size_want)
|
||||
return;
|
||||
|
||||
unsigned buf_size = j->buf_size_want;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
void *buf = kvmalloc(buf_size, GFP_NOFS);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
if (buf &&
|
||||
(!j->free_buf ||
|
||||
buf_size > j->free_buf_size)) {
|
||||
swap(buf, j->free_buf);
|
||||
swap(buf_size, j->free_buf_size);
|
||||
}
|
||||
|
||||
if (unlikely(buf)) {
|
||||
spin_unlock(&j->lock);
|
||||
/* kvfree can sleep */
|
||||
kvfree(buf);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int __journal_res_get(struct journal *j, struct journal_res *res,
|
||||
unsigned flags)
|
||||
{
|
||||
@ -544,6 +593,8 @@ retry:
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
journal_buf_prealloc(j);
|
||||
|
||||
/*
|
||||
* Recheck after taking the lock, so we don't race with another thread
|
||||
* that just did journal_entry_open() and call bch2_journal_entry_close()
|
||||
@ -571,20 +622,43 @@ unlock:
|
||||
can_discard = j->can_discard;
|
||||
spin_unlock(&j->lock);
|
||||
out:
|
||||
if (likely(!ret))
|
||||
return 0;
|
||||
if (ret == JOURNAL_ERR_retry)
|
||||
goto retry;
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
if (journal_error_check_stuck(j, ret, flags))
|
||||
ret = -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if (ret == JOURNAL_ERR_max_in_flight &&
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
|
||||
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
|
||||
trace_journal_entry_full_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_printbuf_make_room(&buf, 4096);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
|
||||
bch2_journal_bufs_to_text(&buf, j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
trace_journal_entry_full(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
count_event(c, journal_entry_full);
|
||||
}
|
||||
|
||||
if (ret == JOURNAL_ERR_max_open &&
|
||||
track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) &&
|
||||
trace_journal_entry_full_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_printbuf_make_room(&buf, 4096);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
|
||||
bch2_journal_bufs_to_text(&buf, j);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
trace_journal_entry_full(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
count_event(c, journal_entry_full);
|
||||
@ -907,7 +981,7 @@ int bch2_journal_meta(struct journal *j)
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal))
|
||||
return -EROFS;
|
||||
return -BCH_ERR_erofs_no_writes;
|
||||
|
||||
int ret = __bch2_journal_meta(j);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_journal);
|
||||
@ -951,7 +1025,8 @@ static void __bch2_journal_block(struct journal *j)
|
||||
new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL;
|
||||
} while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
|
||||
|
||||
journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
if (old.cur_entry_offset < JOURNAL_ENTRY_BLOCKED_VAL)
|
||||
journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
}
|
||||
}
|
||||
|
||||
@ -992,7 +1067,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
|
||||
*blocked = true;
|
||||
}
|
||||
|
||||
ret = journal_state_count(s, idx) > open
|
||||
ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open
|
||||
? ERR_PTR(-EAGAIN)
|
||||
: buf;
|
||||
break;
|
||||
@ -1342,6 +1417,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
|
||||
j->replay_journal_seq_end = cur_seq;
|
||||
j->last_seq_ondisk = last_seq;
|
||||
j->flushed_seq_ondisk = cur_seq - 1;
|
||||
j->seq_write_started = cur_seq - 1;
|
||||
j->seq_ondisk = cur_seq - 1;
|
||||
j->pin.front = last_seq;
|
||||
j->pin.back = cur_seq;
|
||||
@ -1474,6 +1550,7 @@ void bch2_fs_journal_exit(struct journal *j)
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
|
||||
kvfree(j->buf[i].data);
|
||||
kvfree(j->free_buf);
|
||||
free_fifo(&j->pin);
|
||||
}
|
||||
|
||||
@ -1500,13 +1577,13 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
|
||||
return -BCH_ERR_ENOMEM_journal_pin_fifo;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
|
||||
j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
|
||||
if (!j->buf[i].data)
|
||||
return -BCH_ERR_ENOMEM_journal_buf;
|
||||
j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
|
||||
j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
|
||||
if (!j->free_buf)
|
||||
return -BCH_ERR_ENOMEM_journal_buf;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
|
||||
j->buf[i].idx = i;
|
||||
}
|
||||
|
||||
j->pin.front = j->pin.back = 1;
|
||||
|
||||
@ -1556,6 +1633,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
prt_printf(out, "average write size:\t");
|
||||
prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "free buf:\t%u\n", j->free_buf ? j->free_buf_size : 0);
|
||||
prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
|
||||
prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
|
||||
prt_printf(out, "reclaim kicked:\t%u\n", j->reclaim_kicked);
|
||||
|
@ -121,11 +121,6 @@ static inline void journal_wake(struct journal *j)
|
||||
closure_wake_up(&j->async_wait);
|
||||
}
|
||||
|
||||
static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
||||
{
|
||||
return j->buf + j->reservations.idx;
|
||||
}
|
||||
|
||||
/* Sequence number of oldest dirty journal entry */
|
||||
|
||||
static inline u64 journal_last_seq(struct journal *j)
|
||||
@ -143,6 +138,15 @@ static inline u64 journal_last_unwritten_seq(struct journal *j)
|
||||
return j->seq_ondisk + 1;
|
||||
}
|
||||
|
||||
static inline struct journal_buf *journal_cur_buf(struct journal *j)
|
||||
{
|
||||
unsigned idx = (journal_cur_seq(j) &
|
||||
JOURNAL_BUF_MASK &
|
||||
~JOURNAL_STATE_BUF_MASK) + j->reservations.idx;
|
||||
|
||||
return j->buf + idx;
|
||||
}
|
||||
|
||||
static inline int journal_state_count(union journal_res_state s, int idx)
|
||||
{
|
||||
switch (idx) {
|
||||
@ -154,6 +158,15 @@ static inline int journal_state_count(union journal_res_state s, int idx)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline int journal_state_seq_count(struct journal *j,
|
||||
union journal_res_state s, u64 seq)
|
||||
{
|
||||
if (journal_cur_seq(j) - seq <= JOURNAL_STATE_BUF_NR)
|
||||
return journal_state_count(s, seq & JOURNAL_STATE_BUF_MASK);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void journal_state_inc(union journal_res_state *s)
|
||||
{
|
||||
s->buf0_count += s->idx == 0;
|
||||
@ -269,7 +282,7 @@ void bch2_journal_buf_put_final(struct journal *, u64);
|
||||
|
||||
static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
|
||||
{
|
||||
unsigned idx = seq & JOURNAL_BUF_MASK;
|
||||
unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
|
||||
union journal_res_state s;
|
||||
|
||||
s = journal_state_buf_put(j, idx);
|
||||
@ -279,7 +292,7 @@ static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
|
||||
|
||||
static inline void bch2_journal_buf_put(struct journal *j, u64 seq)
|
||||
{
|
||||
unsigned idx = seq & JOURNAL_BUF_MASK;
|
||||
unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
|
||||
union journal_res_state s;
|
||||
|
||||
s = journal_state_buf_put(j, idx);
|
||||
@ -365,9 +378,7 @@ static inline int journal_res_get_fast(struct journal *j,
|
||||
res->ref = true;
|
||||
res->offset = old.cur_entry_offset;
|
||||
res->seq = journal_cur_seq(j);
|
||||
res->seq -= (res->seq - old.idx) & JOURNAL_BUF_MASK;
|
||||
|
||||
EBUG_ON(res->seq != le64_to_cpu(j->buf[old.idx].data->seq));
|
||||
res->seq -= (res->seq - old.idx) & JOURNAL_STATE_BUF_MASK;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -394,6 +405,7 @@ out:
|
||||
(flags & JOURNAL_RES_GET_NONBLOCK) != 0,
|
||||
NULL, _THIS_IP_);
|
||||
EBUG_ON(!res->ref);
|
||||
BUG_ON(!res->seq);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1515,7 +1515,7 @@ static void __journal_write_alloc(struct journal *j,
|
||||
* @j: journal object
|
||||
* @w: journal buf (entry to be written)
|
||||
*
|
||||
* Returns: 0 on success, or -EROFS on failure
|
||||
* Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
|
||||
*/
|
||||
static int journal_write_alloc(struct journal *j, struct journal_buf *w)
|
||||
{
|
||||
@ -1624,8 +1624,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
} else {
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
w->devs_written);
|
||||
if (bch2_mark_replicas(c, &replicas.e))
|
||||
err = -EIO;
|
||||
err = bch2_mark_replicas(c, &replicas.e);
|
||||
}
|
||||
|
||||
if (err)
|
||||
@ -1640,6 +1639,21 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
j->err_seq = seq;
|
||||
w->write_done = true;
|
||||
|
||||
if (!j->free_buf || j->free_buf_size < w->buf_size) {
|
||||
swap(j->free_buf, w->data);
|
||||
swap(j->free_buf_size, w->buf_size);
|
||||
}
|
||||
|
||||
if (w->data) {
|
||||
void *buf = w->data;
|
||||
w->data = NULL;
|
||||
w->buf_size = 0;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
kvfree(buf);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
|
||||
bool completed = false;
|
||||
|
||||
for (seq = journal_last_unwritten_seq(j);
|
||||
@ -1649,7 +1663,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
if (!w->write_done)
|
||||
break;
|
||||
|
||||
if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
|
||||
if (!j->err_seq && !w->noflush) {
|
||||
j->flushed_seq_ondisk = seq;
|
||||
j->last_seq_ondisk = w->last_seq;
|
||||
|
||||
@ -1973,7 +1987,7 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
|
||||
* write anything at all.
|
||||
*/
|
||||
if (error && test_bit(JOURNAL_need_flush_write, &j->flags))
|
||||
return -EIO;
|
||||
return error;
|
||||
|
||||
if (error ||
|
||||
w->noflush ||
|
||||
|
@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
|
||||
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn fn)
|
||||
{
|
||||
if (fn == bch2_btree_node_flush0 ||
|
||||
fn == bch2_btree_node_flush1)
|
||||
return JOURNAL_PIN_TYPE_btree;
|
||||
else if (fn == bch2_btree_key_cache_journal_flush)
|
||||
fn == bch2_btree_node_flush1) {
|
||||
unsigned idx = fn == bch2_btree_node_flush1;
|
||||
struct btree *b = container_of(pin, struct btree, writes[idx].journal);
|
||||
|
||||
return JOURNAL_PIN_TYPE_btree0 - b->c.level;
|
||||
} else if (fn == bch2_btree_key_cache_journal_flush)
|
||||
return JOURNAL_PIN_TYPE_key_cache;
|
||||
else
|
||||
return JOURNAL_PIN_TYPE_other;
|
||||
@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
|
||||
|
||||
bool reclaim = __journal_pin_drop(j, dst);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
|
||||
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
|
||||
|
||||
bool reclaim = __journal_pin_drop(j, pin);
|
||||
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
|
||||
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
|
||||
|
||||
if (reclaim)
|
||||
bch2_journal_reclaim_fast(j);
|
||||
@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
|
||||
spin_lock(&j->lock);
|
||||
/* Pin might have been dropped or rearmed: */
|
||||
if (likely(!err && !j->flush_in_progress_dropped))
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
|
||||
j->flush_in_progress = NULL;
|
||||
j->flush_in_progress_dropped = false;
|
||||
spin_unlock(&j->lock);
|
||||
@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
|
||||
BIT(JOURNAL_PIN_TYPE_key_cache)|
|
||||
BIT(JOURNAL_PIN_TYPE_other))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
|
||||
BIT(JOURNAL_PIN_TYPE_btree))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
for (int type = JOURNAL_PIN_TYPE_NR - 1;
|
||||
type >= 0;
|
||||
--type)
|
||||
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
|
||||
*did_work = true;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (seq_to_flush > journal_cur_seq(j))
|
||||
bch2_journal_entry_close(j);
|
||||
|
@ -231,15 +231,14 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
|
||||
struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
|
||||
BUG_ON(nr != t->nr);
|
||||
|
||||
unsigned i;
|
||||
for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
|
||||
src < bl->start + nr;
|
||||
src++, i = eytzinger0_next(i, nr)) {
|
||||
src = bl->start;
|
||||
eytzinger0_for_each(i, nr) {
|
||||
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
|
||||
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
|
||||
|
||||
if (t->entries[i].dirty || t->entries[i].end >= c->journal.oldest_seq_found_ondisk)
|
||||
*dst++ = *src;
|
||||
src++;
|
||||
}
|
||||
|
||||
unsigned new_nr = dst - bl->start;
|
||||
|
@ -12,7 +12,11 @@
|
||||
/* btree write buffer steals 8 bits for its own purposes: */
|
||||
#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1)
|
||||
|
||||
#define JOURNAL_BUF_BITS 2
|
||||
#define JOURNAL_STATE_BUF_BITS 2
|
||||
#define JOURNAL_STATE_BUF_NR (1U << JOURNAL_STATE_BUF_BITS)
|
||||
#define JOURNAL_STATE_BUF_MASK (JOURNAL_STATE_BUF_NR - 1)
|
||||
|
||||
#define JOURNAL_BUF_BITS 4
|
||||
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
|
||||
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
|
||||
|
||||
@ -53,7 +57,10 @@ struct journal_buf {
|
||||
*/
|
||||
|
||||
enum journal_pin_type {
|
||||
JOURNAL_PIN_TYPE_btree,
|
||||
JOURNAL_PIN_TYPE_btree3,
|
||||
JOURNAL_PIN_TYPE_btree2,
|
||||
JOURNAL_PIN_TYPE_btree1,
|
||||
JOURNAL_PIN_TYPE_btree0,
|
||||
JOURNAL_PIN_TYPE_key_cache,
|
||||
JOURNAL_PIN_TYPE_other,
|
||||
JOURNAL_PIN_TYPE_NR,
|
||||
@ -150,9 +157,11 @@ enum journal_flags {
|
||||
x(retry) \
|
||||
x(blocked) \
|
||||
x(max_in_flight) \
|
||||
x(max_open) \
|
||||
x(journal_full) \
|
||||
x(journal_pin_full) \
|
||||
x(journal_stuck) \
|
||||
x(enomem) \
|
||||
x(insufficient_devices)
|
||||
|
||||
enum journal_errors {
|
||||
@ -215,6 +224,8 @@ struct journal {
|
||||
* other is possibly being written out.
|
||||
*/
|
||||
struct journal_buf buf[JOURNAL_BUF_NR];
|
||||
void *free_buf;
|
||||
unsigned free_buf_size;
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
@ -232,6 +243,7 @@ struct journal {
|
||||
/* Sequence number of most recent journal entry (last entry in @pin) */
|
||||
atomic64_t seq;
|
||||
|
||||
u64 seq_write_started;
|
||||
/* seq, last_seq from the most recent journal entry successfully written */
|
||||
u64 seq_ondisk;
|
||||
u64 flushed_seq_ondisk;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "lru.h"
|
||||
#include "recovery.h"
|
||||
@ -59,9 +60,9 @@ int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
|
||||
}
|
||||
|
||||
int bch2_lru_change(struct btree_trans *trans,
|
||||
u16 lru_id, u64 dev_bucket,
|
||||
u64 old_time, u64 new_time)
|
||||
int __bch2_lru_change(struct btree_trans *trans,
|
||||
u16 lru_id, u64 dev_bucket,
|
||||
u64 old_time, u64 new_time)
|
||||
{
|
||||
if (old_time == new_time)
|
||||
return 0;
|
||||
@ -78,7 +79,9 @@ static const char * const bch2_lru_types[] = {
|
||||
};
|
||||
|
||||
int bch2_lru_check_set(struct btree_trans *trans,
|
||||
u16 lru_id, u64 time,
|
||||
u16 lru_id,
|
||||
u64 dev_bucket,
|
||||
u64 time,
|
||||
struct bkey_s_c referring_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
@ -87,9 +90,7 @@ int bch2_lru_check_set(struct btree_trans *trans,
|
||||
struct btree_iter lru_iter;
|
||||
struct bkey_s_c lru_k =
|
||||
bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
|
||||
lru_pos(lru_id,
|
||||
bucket_to_u64(referring_k.k->p),
|
||||
time), 0);
|
||||
lru_pos(lru_id, dev_bucket, time), 0);
|
||||
int ret = bkey_err(lru_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -104,7 +105,7 @@ int bch2_lru_check_set(struct btree_trans *trans,
|
||||
" %s",
|
||||
bch2_lru_types[lru_type(lru_k)],
|
||||
(bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
|
||||
ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
|
||||
ret = bch2_lru_set(trans, lru_id, dev_bucket, time);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -116,49 +117,73 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
|
||||
{
|
||||
enum bch_lru_type type = lru_type(lru_k);
|
||||
|
||||
switch (type) {
|
||||
case BCH_LRU_read:
|
||||
case BCH_LRU_fragmentation:
|
||||
return BBPOS(BTREE_ID_alloc, u64_to_bucket(lru_k.k->p.offset));
|
||||
case BCH_LRU_stripes:
|
||||
return BBPOS(BTREE_ID_stripes, POS(0, lru_k.k->p.offset));
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static u64 bkey_lru_type_idx(struct bch_fs *c,
|
||||
enum bch_lru_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a;
|
||||
|
||||
switch (type) {
|
||||
case BCH_LRU_read:
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
return alloc_lru_idx_read(*a);
|
||||
case BCH_LRU_fragmentation: {
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode);
|
||||
u64 idx = ca
|
||||
? alloc_lru_idx_fragmentation(*a, ca)
|
||||
: 0;
|
||||
rcu_read_unlock();
|
||||
return idx;
|
||||
}
|
||||
case BCH_LRU_stripes:
|
||||
return k.k->type == KEY_TYPE_stripe
|
||||
? stripe_lru_pos(bkey_s_c_to_stripe(k).v)
|
||||
: 0;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a;
|
||||
struct printbuf buf1 = PRINTBUF;
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
enum bch_lru_type type = lru_type(lru_k);
|
||||
struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
|
||||
u64 idx;
|
||||
int ret;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos);
|
||||
struct bbpos bp = lru_pos_to_bp(lru_k);
|
||||
|
||||
if (fsck_err_on(!ca,
|
||||
trans, lru_entry_to_invalid_bucket,
|
||||
"lru key points to nonexistent device:bucket %llu:%llu",
|
||||
alloc_pos.inode, alloc_pos.offset))
|
||||
return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false);
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
|
||||
ret = bkey_err(k);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, bp.btree, bp.pos, 0);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
enum bch_lru_type type = lru_type(lru_k);
|
||||
u64 idx = bkey_lru_type_idx(c, type, k);
|
||||
|
||||
switch (type) {
|
||||
case BCH_LRU_read:
|
||||
idx = alloc_lru_idx_read(*a);
|
||||
break;
|
||||
case BCH_LRU_fragmentation:
|
||||
idx = alloc_lru_idx_fragmentation(*a, ca);
|
||||
break;
|
||||
}
|
||||
|
||||
if (lru_k.k->type != KEY_TYPE_set ||
|
||||
lru_pos_time(lru_k.k->p) != idx) {
|
||||
if (lru_pos_time(lru_k.k->p) != idx) {
|
||||
ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -176,7 +201,6 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_dev_put(ca);
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
|
@ -28,9 +28,14 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
|
||||
{
|
||||
u16 lru_id = l.k->p.inode >> 48;
|
||||
|
||||
if (lru_id == BCH_LRU_FRAGMENTATION_START)
|
||||
switch (lru_id) {
|
||||
case BCH_LRU_BUCKET_FRAGMENTATION:
|
||||
return BCH_LRU_fragmentation;
|
||||
return BCH_LRU_read;
|
||||
case BCH_LRU_STRIPE_FRAGMENTATION:
|
||||
return BCH_LRU_stripes;
|
||||
default:
|
||||
return BCH_LRU_read;
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context);
|
||||
@ -46,10 +51,19 @@ void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
|
||||
|
||||
int bch2_lru_del(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
|
||||
int __bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
|
||||
|
||||
static inline int bch2_lru_change(struct btree_trans *trans,
|
||||
u16 lru_id, u64 dev_bucket,
|
||||
u64 old_time, u64 new_time)
|
||||
{
|
||||
return old_time != new_time
|
||||
? __bch2_lru_change(trans, lru_id, dev_bucket, old_time, new_time)
|
||||
: 0;
|
||||
}
|
||||
|
||||
struct bkey_buf;
|
||||
int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
|
||||
int bch2_lru_check_set(struct btree_trans *, u16, u64, u64, struct bkey_s_c, struct bkey_buf *);
|
||||
|
||||
int bch2_check_lrus(struct bch_fs *);
|
||||
|
||||
|
@ -9,7 +9,8 @@ struct bch_lru {
|
||||
|
||||
#define BCH_LRU_TYPES() \
|
||||
x(read) \
|
||||
x(fragmentation)
|
||||
x(fragmentation) \
|
||||
x(stripes)
|
||||
|
||||
enum bch_lru_type {
|
||||
#define x(n) BCH_LRU_##n,
|
||||
@ -17,7 +18,8 @@ enum bch_lru_type {
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
|
||||
#define BCH_LRU_BUCKET_FRAGMENTATION ((1U << 16) - 1)
|
||||
#define BCH_LRU_STRIPE_FRAGMENTATION ((1U << 16) - 2)
|
||||
|
||||
#define LRU_TIME_BITS 48
|
||||
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "keylist.h"
|
||||
#include "migrate.h"
|
||||
#include "move.h"
|
||||
#include "progress.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
@ -76,7 +77,9 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
static int bch2_dev_usrdata_drop(struct bch_fs *c,
|
||||
struct progress_indicator_state *progress,
|
||||
unsigned dev_idx, int flags)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id id;
|
||||
@ -88,8 +91,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
|
||||
ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
bch2_progress_update_iter(trans, progress, &iter, "dropping user data");
|
||||
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
|
||||
}));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -99,7 +104,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
static int bch2_dev_metadata_drop(struct bch_fs *c,
|
||||
struct progress_indicator_state *progress,
|
||||
unsigned dev_idx, int flags)
|
||||
{
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter iter;
|
||||
@ -125,6 +132,8 @@ retry:
|
||||
while (bch2_trans_begin(trans),
|
||||
(b = bch2_btree_iter_peek_node(&iter)) &&
|
||||
!(ret = PTR_ERR_OR_ZERO(b))) {
|
||||
bch2_progress_update_iter(trans, progress, &iter, "dropping metadata");
|
||||
|
||||
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
|
||||
goto next;
|
||||
|
||||
@ -169,6 +178,11 @@ err:
|
||||
|
||||
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
|
||||
bch2_dev_metadata_drop(c, dev_idx, flags);
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c,
|
||||
BIT_ULL(BTREE_ID_extents)|
|
||||
BIT_ULL(BTREE_ID_reflink));
|
||||
|
||||
return bch2_dev_usrdata_drop(c, &progress, dev_idx, flags) ?:
|
||||
bch2_dev_metadata_drop(c, &progress, dev_idx, flags);
|
||||
}
|
||||
|
@ -38,28 +38,28 @@ const char * const bch2_data_ops_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
|
||||
static void trace_io_move2(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
if (trace_move_extent_enabled()) {
|
||||
if (trace_io_move_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_newline(&buf);
|
||||
bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
|
||||
trace_move_extent(c, buf.buf);
|
||||
trace_io_move(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
|
||||
static void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
if (trace_move_extent_read_enabled()) {
|
||||
if (trace_io_move_read_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
trace_move_extent_read(c, buf.buf);
|
||||
trace_io_move_read(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
@ -89,7 +89,12 @@ static void move_free(struct moving_io *io)
|
||||
wake_up(&ctxt->wait);
|
||||
mutex_unlock(&ctxt->lock);
|
||||
|
||||
bch2_data_update_exit(&io->write);
|
||||
if (!io->write.data_opts.scrub) {
|
||||
bch2_data_update_exit(&io->write);
|
||||
} else {
|
||||
bch2_bio_free_pages_pool(io->write.op.c, &io->write.op.wbio.bio);
|
||||
kfree(io->write.bvecs);
|
||||
}
|
||||
kfree(io);
|
||||
}
|
||||
|
||||
@ -127,12 +132,12 @@ static void move_write(struct moving_io *io)
|
||||
return;
|
||||
}
|
||||
|
||||
if (trace_move_extent_write_enabled()) {
|
||||
if (trace_io_move_write_enabled()) {
|
||||
struct bch_fs *c = io->write.op.c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
|
||||
trace_move_extent_write(c, buf.buf);
|
||||
trace_io_move_write(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -268,7 +273,8 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
trace_move_extent2(c, k, &io_opts, &data_opts);
|
||||
trace_io_move2(c, k, &io_opts, &data_opts);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
|
||||
|
||||
if (ctxt->stats)
|
||||
ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
|
||||
@ -300,15 +306,21 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
|
||||
if (!data_opts.scrub) {
|
||||
ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
|
||||
io_opts, data_opts, iter->btree_id, k);
|
||||
&io_opts, data_opts, iter->btree_id, k);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
io->write.op.end_io = move_write_done;
|
||||
} else {
|
||||
bch2_bkey_buf_init(&io->write.k);
|
||||
bch2_bkey_buf_reassemble(&io->write.k, c, k);
|
||||
|
||||
io->write.op.c = c;
|
||||
io->write.data_opts = data_opts;
|
||||
|
||||
ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
io->write.rbio.bio.bi_end_io = move_read_endio;
|
||||
@ -327,9 +339,7 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
atomic_inc(&io->b->count);
|
||||
}
|
||||
|
||||
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
|
||||
this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
|
||||
trace_move_extent_read2(c, k);
|
||||
trace_io_move_read2(c, k);
|
||||
|
||||
mutex_lock(&ctxt->lock);
|
||||
atomic_add(io->read_sectors, &ctxt->read_sectors);
|
||||
@ -363,15 +373,15 @@ err:
|
||||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
return ret;
|
||||
|
||||
count_event(c, move_extent_start_fail);
|
||||
count_event(c, io_move_start_fail);
|
||||
|
||||
if (trace_move_extent_start_fail_enabled()) {
|
||||
if (trace_io_move_start_fail_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, ": ");
|
||||
prt_str(&buf, bch2_err_str(ret));
|
||||
trace_move_extent_start_fail(c, buf.buf);
|
||||
trace_io_move_start_fail(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
return ret;
|
||||
@ -764,6 +774,9 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
if (!(data_types & BIT(bp.v->data_type)))
|
||||
goto next;
|
||||
|
||||
if (!bp.v->level && bp.v->btree_id == BTREE_ID_stripes)
|
||||
goto next;
|
||||
|
||||
k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
@ -849,6 +862,7 @@ static int bch2_move_data_phys(struct bch_fs *c,
|
||||
|
||||
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
||||
ctxt.stats->phys = true;
|
||||
ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
|
||||
|
||||
int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
@ -1038,14 +1052,6 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
|
||||
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
||||
}
|
||||
|
||||
static bool migrate_btree_pred(struct bch_fs *c, void *arg,
|
||||
struct btree *b,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ancient versions of bcachefs produced packed formats which could represent
|
||||
* keys that the in memory format cannot represent; this checks for those
|
||||
@ -1174,6 +1180,12 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
switch (op.op) {
|
||||
case BCH_DATA_OP_scrub:
|
||||
/*
|
||||
* prevent tests from spuriously failing, make sure we see all
|
||||
* btree nodes that need to be repaired
|
||||
*/
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
|
||||
ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX,
|
||||
op.scrub.data_types,
|
||||
NULL,
|
||||
@ -1202,14 +1214,14 @@ int bch2_data_job(struct bch_fs *c,
|
||||
|
||||
stats->data_type = BCH_DATA_journal;
|
||||
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
|
||||
ret = bch2_move_btree(c, start, end,
|
||||
migrate_btree_pred, &op, stats) ?: ret;
|
||||
ret = bch2_move_data(c, start, end,
|
||||
NULL,
|
||||
stats,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
true,
|
||||
migrate_pred, &op) ?: ret;
|
||||
ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX,
|
||||
~0,
|
||||
NULL,
|
||||
stats,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
true,
|
||||
migrate_pred, &op) ?: ret;
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
break;
|
||||
case BCH_DATA_OP_rewrite_old_nodes:
|
||||
|
@ -168,8 +168,8 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
ret = for_each_btree_key_max(trans, iter, BTREE_ID_lru,
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
|
||||
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, 0, 0),
|
||||
lru_pos(BCH_LRU_BUCKET_FRAGMENTATION, U64_MAX, LRU_TIME_MAX),
|
||||
0, k, ({
|
||||
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
|
||||
int ret2 = 0;
|
||||
|
@ -197,7 +197,7 @@ enum fsck_err_opts {
|
||||
BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \
|
||||
NULL, "Hash function for directory entries and xattrs")\
|
||||
x(metadata_target, u16, \
|
||||
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FN(bch2_opt_target), \
|
||||
BCH_SB_METADATA_TARGET, 0, \
|
||||
"(target)", "Device or label for metadata writes") \
|
||||
|
63
libbcachefs/progress.c
Normal file
63
libbcachefs/progress.c
Normal file
@ -0,0 +1,63 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "bcachefs.h"
|
||||
#include "bbpos.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "progress.h"
|
||||
|
||||
void bch2_progress_init(struct progress_indicator_state *s,
|
||||
struct bch_fs *c,
|
||||
u64 btree_id_mask)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||
if (!(btree_id_mask & BIT_ULL(i)))
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc = {
|
||||
.type = BCH_DISK_ACCOUNTING_btree,
|
||||
.btree.id = i,
|
||||
};
|
||||
|
||||
u64 v;
|
||||
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
|
||||
s->nodes_total += div64_ul(v, btree_sectors(c));
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool progress_update_p(struct progress_indicator_state *s)
|
||||
{
|
||||
bool ret = time_after_eq(jiffies, s->next_print);
|
||||
|
||||
if (ret)
|
||||
s->next_print = jiffies + HZ * 10;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_progress_update_iter(struct btree_trans *trans,
|
||||
struct progress_indicator_state *s,
|
||||
struct btree_iter *iter,
|
||||
const char *msg)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = path_l(btree_iter_path(trans, iter))->b;
|
||||
|
||||
s->nodes_seen += b != s->last_node;
|
||||
s->last_node = b;
|
||||
|
||||
if (progress_update_p(s)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
unsigned percent = s->nodes_total
|
||||
? div64_u64(s->nodes_seen * 100, s->nodes_total)
|
||||
: 0;
|
||||
|
||||
prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
|
||||
msg, percent, s->nodes_seen, s->nodes_total);
|
||||
bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
|
||||
|
||||
bch_info(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
29
libbcachefs/progress.h
Normal file
29
libbcachefs/progress.h
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_PROGRESS_H
|
||||
#define _BCACHEFS_PROGRESS_H
|
||||
|
||||
/*
|
||||
* Lame progress indicators
|
||||
*
|
||||
* We don't like to use these because they print to the dmesg console, which is
|
||||
* spammy - we much prefer to be wired up to a userspace programm (e.g. via
|
||||
* thread_with_file) and have it print the progress indicator.
|
||||
*
|
||||
* But some code is old and doesn't support that, or runs in a context where
|
||||
* that's not yet practical (mount).
|
||||
*/
|
||||
|
||||
struct progress_indicator_state {
|
||||
unsigned long next_print;
|
||||
u64 nodes_seen;
|
||||
u64 nodes_total;
|
||||
struct btree *last_node;
|
||||
};
|
||||
|
||||
void bch2_progress_init(struct progress_indicator_state *, struct bch_fs *, u64);
|
||||
void bch2_progress_update_iter(struct btree_trans *,
|
||||
struct progress_indicator_state *,
|
||||
struct btree_iter *,
|
||||
const char *);
|
||||
|
||||
#endif /* _BCACHEFS_PROGRESS_H */
|
@ -172,7 +172,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
|
||||
bool should_commit)
|
||||
{
|
||||
if (REFLINK_P_ERROR(p.v))
|
||||
return -BCH_ERR_missing_indirect_extent;
|
||||
return 0;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
u64 live_start = REFLINK_P_IDX(p.v);
|
||||
@ -185,12 +185,21 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
|
||||
BUG_ON(missing_start < refd_start);
|
||||
BUG_ON(missing_end > refd_end);
|
||||
|
||||
if (fsck_err(trans, reflink_p_to_missing_reflink_v,
|
||||
"pointer to missing indirect extent\n"
|
||||
" %s\n"
|
||||
" missing range %llu-%llu",
|
||||
(bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
|
||||
missing_start, missing_end)) {
|
||||
struct bpos missing_pos = bkey_start_pos(p.k);
|
||||
missing_pos.offset += missing_start - live_start;
|
||||
|
||||
prt_printf(&buf, "pointer to missing indirect extent in ");
|
||||
ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prt_printf(&buf, "-%llu\n ", (missing_pos.offset + (missing_end - missing_start)) << 9);
|
||||
bch2_bkey_val_to_text(&buf, c, p.s_c);
|
||||
|
||||
prt_printf(&buf, "\n missing reflink btree range %llu-%llu",
|
||||
missing_start, missing_end);
|
||||
|
||||
if (fsck_err(trans, reflink_p_to_missing_reflink_v, "%s", buf.buf)) {
|
||||
struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p);
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
@ -259,8 +268,6 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
|
||||
return k;
|
||||
|
||||
if (unlikely(!bkey_extent_is_reflink_data(k.k))) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
unsigned size = min((u64) k.k->size,
|
||||
REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) -
|
||||
reflink_offset);
|
||||
@ -268,14 +275,16 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
|
||||
|
||||
int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset,
|
||||
k.k->p.offset, should_commit);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return bkey_s_c_err(ret);
|
||||
}
|
||||
} else if (unlikely(REFLINK_P_ERROR(p.v))) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
|
||||
int ret = bch2_indirect_extent_not_missing(trans, p, should_commit);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch2_trans_iter_exit(trans, iter);
|
||||
return bkey_s_c_err(ret);
|
||||
}
|
||||
}
|
||||
|
||||
*offset_into_extent = reflink_offset - bkey_start_offset(k.k);
|
||||
@ -300,7 +309,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (bkey_deleted(k.k)) {
|
||||
if (!bkey_refcount_c(k)) {
|
||||
if (!(flags & BTREE_TRIGGER_overwrite))
|
||||
ret = -BCH_ERR_missing_indirect_extent;
|
||||
goto next;
|
||||
|
@ -9,8 +9,20 @@ enum counters_flags {
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0, TYPE_SECTORS) \
|
||||
x(io_read_inline, 80, TYPE_SECTORS) \
|
||||
x(io_read_hole, 81, TYPE_SECTORS) \
|
||||
x(io_read_promote, 30, TYPE_COUNTER) \
|
||||
x(io_read_bounce, 31, TYPE_COUNTER) \
|
||||
x(io_read_split, 33, TYPE_COUNTER) \
|
||||
x(io_read_reuse_race, 34, TYPE_COUNTER) \
|
||||
x(io_read_retry, 32, TYPE_COUNTER) \
|
||||
x(io_write, 1, TYPE_SECTORS) \
|
||||
x(io_move, 2, TYPE_SECTORS) \
|
||||
x(io_move_read, 35, TYPE_SECTORS) \
|
||||
x(io_move_write, 36, TYPE_SECTORS) \
|
||||
x(io_move_finish, 37, TYPE_SECTORS) \
|
||||
x(io_move_fail, 38, TYPE_COUNTER) \
|
||||
x(io_move_start_fail, 39, TYPE_COUNTER) \
|
||||
x(bucket_invalidate, 3, TYPE_COUNTER) \
|
||||
x(bucket_discard, 4, TYPE_COUNTER) \
|
||||
x(bucket_discard_fast, 79, TYPE_COUNTER) \
|
||||
@ -39,16 +51,6 @@ enum counters_flags {
|
||||
x(journal_reclaim_finish, 27, TYPE_COUNTER) \
|
||||
x(journal_reclaim_start, 28, TYPE_COUNTER) \
|
||||
x(journal_write, 29, TYPE_COUNTER) \
|
||||
x(read_promote, 30, TYPE_COUNTER) \
|
||||
x(read_bounce, 31, TYPE_COUNTER) \
|
||||
x(read_split, 33, TYPE_COUNTER) \
|
||||
x(read_retry, 32, TYPE_COUNTER) \
|
||||
x(read_reuse_race, 34, TYPE_COUNTER) \
|
||||
x(move_extent_read, 35, TYPE_SECTORS) \
|
||||
x(move_extent_write, 36, TYPE_SECTORS) \
|
||||
x(move_extent_finish, 37, TYPE_SECTORS) \
|
||||
x(move_extent_fail, 38, TYPE_COUNTER) \
|
||||
x(move_extent_start_fail, 39, TYPE_COUNTER) \
|
||||
x(copygc, 40, TYPE_COUNTER) \
|
||||
x(copygc_wait, 41, TYPE_COUNTER) \
|
||||
x(gc_gens_end, 42, TYPE_COUNTER) \
|
||||
|
@ -92,8 +92,14 @@
|
||||
BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
|
||||
BCH_FSCK_ERR_accounting_key_junk_at_end) \
|
||||
x(directory_size, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
|
||||
BCH_FSCK_ERR_directory_size_mismatch) \
|
||||
x(cached_backpointers, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
|
||||
BCH_FSCK_ERR_ptr_to_missing_backpointer) \
|
||||
x(stripe_backpointers, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
|
||||
BCH_FSCK_ERR_ptr_to_missing_backpointer)
|
||||
|
||||
#define DOWNGRADE_TABLE() \
|
||||
x(bucket_stripe_sectors, \
|
||||
|
@ -180,9 +180,9 @@ enum bch_fsck_flags {
|
||||
x(ptr_crc_nonce_mismatch, 162, 0) \
|
||||
x(ptr_stripe_redundant, 163, 0) \
|
||||
x(reservation_key_nr_replicas_invalid, 164, 0) \
|
||||
x(reflink_v_refcount_wrong, 165, 0) \
|
||||
x(reflink_v_refcount_wrong, 165, FSCK_AUTOFIX) \
|
||||
x(reflink_v_pos_bad, 292, 0) \
|
||||
x(reflink_p_to_missing_reflink_v, 166, 0) \
|
||||
x(reflink_p_to_missing_reflink_v, 166, FSCK_AUTOFIX) \
|
||||
x(reflink_refcount_underflow, 293, 0) \
|
||||
x(stripe_pos_bad, 167, 0) \
|
||||
x(stripe_val_size_bad, 168, 0) \
|
||||
@ -314,7 +314,9 @@ enum bch_fsck_flags {
|
||||
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
|
||||
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
|
||||
x(directory_size_mismatch, 303, FSCK_AUTOFIX) \
|
||||
x(MAX, 304, 0)
|
||||
x(dirent_cf_name_too_big, 304, 0) \
|
||||
x(dirent_stray_data_after_cf_name, 305, 0) \
|
||||
x(MAX, 306, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -146,8 +146,9 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
||||
id = get_ancestor_below(t, id, ancestor);
|
||||
if (likely(ancestor >= IS_ANCESTOR_BITMAP))
|
||||
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
||||
id = get_ancestor_below(t, id, ancestor);
|
||||
|
||||
ret = id && id < ancestor
|
||||
? test_ancestor_bitmap(t, id, ancestor)
|
||||
@ -389,7 +390,7 @@ static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
|
||||
u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
|
||||
{
|
||||
u32 id = snapshot_root;
|
||||
u32 subvol = 0, s;
|
||||
|
@ -105,6 +105,7 @@ static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
|
||||
return id;
|
||||
}
|
||||
|
||||
u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *, u32);
|
||||
u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
|
||||
|
||||
static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
|
||||
|
@ -50,7 +50,7 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans,
|
||||
for (unsigned i = 0; i < 1000; i++) {
|
||||
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
|
||||
old_name.len, old_name.name, i);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(len, 0);
|
||||
|
||||
if (u64s > U8_MAX)
|
||||
return -EINVAL;
|
||||
|
@ -34,6 +34,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
|
||||
|
||||
struct bch_hash_info {
|
||||
u8 type;
|
||||
struct unicode_map *cf_encoding;
|
||||
/*
|
||||
* For crc32 or crc64 string hashes the first key value of
|
||||
* the siphash_key (k0) is used as the key.
|
||||
@ -47,6 +48,9 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
|
||||
/* XXX ick */
|
||||
struct bch_hash_info info = {
|
||||
.type = INODE_STR_HASH(bi),
|
||||
#ifdef CONFIG_UNICODE
|
||||
.cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
|
||||
#endif
|
||||
.siphash_key = { .k0 = bi->bi_hash_seed }
|
||||
};
|
||||
|
||||
|
@ -387,12 +387,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
|
||||
|
||||
block_size = le16_to_cpu(sb->block_size);
|
||||
|
||||
if (block_size > PAGE_SECTORS) {
|
||||
prt_printf(out, "Block size too big (got %u, max %u)",
|
||||
block_size, PAGE_SECTORS);
|
||||
return -BCH_ERR_invalid_sb_block_size;
|
||||
}
|
||||
|
||||
if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) {
|
||||
prt_printf(out, "Bad user UUID (got zeroes)");
|
||||
return -BCH_ERR_invalid_sb_uuid;
|
||||
|
@ -837,6 +837,25 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* Default encoding until we can potentially have more as an option. */
|
||||
c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
|
||||
if (IS_ERR(c->cf_encoding)) {
|
||||
printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
|
||||
unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
|
||||
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#else
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
|
||||
printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
|
||||
pr_uuid(&name, c->sb.user_uuid.b);
|
||||
ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
|
||||
if (ret)
|
||||
|
@ -146,6 +146,7 @@ write_attribute(trigger_journal_writes);
|
||||
write_attribute(trigger_btree_cache_shrink);
|
||||
write_attribute(trigger_btree_key_cache_shrink);
|
||||
write_attribute(trigger_freelist_wakeup);
|
||||
write_attribute(trigger_btree_updates);
|
||||
read_attribute(gc_gens_pos);
|
||||
|
||||
read_attribute(uuid);
|
||||
@ -411,6 +412,9 @@ STORE(bch2_fs)
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
if (attr == &sysfs_trigger_btree_updates)
|
||||
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
|
||||
return -EROFS;
|
||||
|
||||
@ -580,6 +584,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_trigger_btree_cache_shrink,
|
||||
&sysfs_trigger_btree_key_cache_shrink,
|
||||
&sysfs_trigger_freelist_wakeup,
|
||||
&sysfs_trigger_btree_updates,
|
||||
|
||||
&sysfs_gc_gens_pos,
|
||||
|
||||
|
@ -295,12 +295,12 @@ TRACE_EVENT(write_super,
|
||||
|
||||
/* io.c: */
|
||||
|
||||
DEFINE_EVENT(bio, read_promote,
|
||||
DEFINE_EVENT(bio, io_read_promote,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
TRACE_EVENT(read_nopromote,
|
||||
TRACE_EVENT(io_read_nopromote,
|
||||
TP_PROTO(struct bch_fs *c, int ret),
|
||||
TP_ARGS(c, ret),
|
||||
|
||||
@ -319,22 +319,22 @@ TRACE_EVENT(read_nopromote,
|
||||
__entry->ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_bounce,
|
||||
DEFINE_EVENT(bio, io_read_bounce,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_split,
|
||||
DEFINE_EVENT(bio, io_read_split,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_retry,
|
||||
DEFINE_EVENT(bio, io_read_retry,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bio, read_reuse_race,
|
||||
DEFINE_EVENT(bio, io_read_reuse_race,
|
||||
TP_PROTO(struct bio *bio),
|
||||
TP_ARGS(bio)
|
||||
);
|
||||
@ -797,32 +797,32 @@ TRACE_EVENT(bucket_invalidate,
|
||||
|
||||
/* Moving IO */
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent,
|
||||
DEFINE_EVENT(fs_str, io_move,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_read,
|
||||
DEFINE_EVENT(fs_str, io_move_read,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_write,
|
||||
DEFINE_EVENT(fs_str, io_move_write,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_finish,
|
||||
DEFINE_EVENT(fs_str, io_move_finish,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_fail,
|
||||
DEFINE_EVENT(fs_str, io_move_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, move_extent_start_fail,
|
||||
DEFINE_EVENT(fs_str, io_move_start_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
@ -473,10 +473,10 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
|
||||
u64 last_q = 0;
|
||||
|
||||
prt_printf(out, "quantiles (%s):\t", u->name);
|
||||
eytzinger0_for_each(i, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
|
||||
eytzinger0_for_each(j, NR_QUANTILES) {
|
||||
bool is_last = eytzinger0_next(j, NR_QUANTILES) == -1;
|
||||
|
||||
u64 q = max(quantiles->entries[i].m, last_q);
|
||||
u64 q = max(quantiles->entries[j].m, last_q);
|
||||
prt_printf(out, "%llu ", div64_u64(q, u->nsecs));
|
||||
if (is_last)
|
||||
prt_newline(out);
|
||||
@ -701,9 +701,9 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
#if 0
|
||||
void eytzinger1_test(void)
|
||||
{
|
||||
unsigned inorder, eytz, size;
|
||||
unsigned inorder, size;
|
||||
|
||||
pr_info("1 based eytzinger test:");
|
||||
pr_info("1 based eytzinger test:\n");
|
||||
|
||||
for (size = 2;
|
||||
size < 65536;
|
||||
@ -711,13 +711,7 @@ void eytzinger1_test(void)
|
||||
unsigned extra = eytzinger1_extra(size);
|
||||
|
||||
if (!(size % 4096))
|
||||
pr_info("tree size %u", size);
|
||||
|
||||
BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
|
||||
BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
|
||||
|
||||
BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0);
|
||||
BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0);
|
||||
pr_info("tree size %u\n", size);
|
||||
|
||||
inorder = 1;
|
||||
eytzinger1_for_each(eytz, size) {
|
||||
@ -728,15 +722,16 @@ void eytzinger1_test(void)
|
||||
|
||||
inorder++;
|
||||
}
|
||||
BUG_ON(inorder - 1 != size);
|
||||
}
|
||||
}
|
||||
|
||||
void eytzinger0_test(void)
|
||||
{
|
||||
|
||||
unsigned inorder, eytz, size;
|
||||
unsigned inorder, size;
|
||||
|
||||
pr_info("0 based eytzinger test:");
|
||||
pr_info("0 based eytzinger test:\n");
|
||||
|
||||
for (size = 1;
|
||||
size < 65536;
|
||||
@ -744,13 +739,7 @@ void eytzinger0_test(void)
|
||||
unsigned extra = eytzinger0_extra(size);
|
||||
|
||||
if (!(size % 4096))
|
||||
pr_info("tree size %u", size);
|
||||
|
||||
BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
|
||||
BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
|
||||
|
||||
BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1);
|
||||
BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1);
|
||||
pr_info("tree size %u\n", size);
|
||||
|
||||
inorder = 0;
|
||||
eytzinger0_for_each(eytz, size) {
|
||||
@ -761,37 +750,171 @@ void eytzinger0_test(void)
|
||||
|
||||
inorder++;
|
||||
}
|
||||
BUG_ON(inorder != size);
|
||||
|
||||
inorder = size - 1;
|
||||
eytzinger0_for_each_prev(eytz, size) {
|
||||
BUG_ON(eytz != eytzinger0_first(size) &&
|
||||
eytzinger0_next(eytzinger0_prev(eytz, size), size) != eytz);
|
||||
|
||||
inorder--;
|
||||
}
|
||||
BUG_ON(inorder != -1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int cmp_u16(const void *_l, const void *_r, size_t size)
|
||||
static inline int cmp_u16(const void *_l, const void *_r)
|
||||
{
|
||||
const u16 *l = _l, *r = _r;
|
||||
|
||||
return (*l > *r) - (*r - *l);
|
||||
return (*l > *r) - (*r > *l);
|
||||
}
|
||||
|
||||
static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
|
||||
static void eytzinger0_find_test_le(u16 *test_array, unsigned nr, u16 search)
|
||||
{
|
||||
int i, c1 = -1, c2 = -1;
|
||||
ssize_t r;
|
||||
int r, s;
|
||||
bool bad;
|
||||
|
||||
r = eytzinger0_find_le(test_array, nr,
|
||||
sizeof(test_array[0]),
|
||||
cmp_u16, &search);
|
||||
if (r >= 0)
|
||||
c1 = test_array[r];
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
if (test_array[i] <= search && test_array[i] > c2)
|
||||
c2 = test_array[i];
|
||||
|
||||
if (c1 != c2) {
|
||||
eytzinger0_for_each(i, nr)
|
||||
pr_info("[%3u] = %12u", i, test_array[i]);
|
||||
pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
|
||||
i, r, c1, c2);
|
||||
if (r >= 0) {
|
||||
if (test_array[r] > search) {
|
||||
bad = true;
|
||||
} else {
|
||||
s = eytzinger0_next(r, nr);
|
||||
bad = s >= 0 && test_array[s] <= search;
|
||||
}
|
||||
} else {
|
||||
s = eytzinger0_last(nr);
|
||||
bad = s >= 0 && test_array[s] <= search;
|
||||
}
|
||||
|
||||
if (bad) {
|
||||
s = -1;
|
||||
eytzinger0_for_each_prev(j, nr) {
|
||||
if (test_array[j] <= search) {
|
||||
s = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
eytzinger0_for_each(j, nr)
|
||||
pr_info("[%3u] = %12u\n", j, test_array[j]);
|
||||
pr_info("find_le(%12u) = %3i should be %3i\n",
|
||||
search, r, s);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void eytzinger0_find_test_gt(u16 *test_array, unsigned nr, u16 search)
|
||||
{
|
||||
int r, s;
|
||||
bool bad;
|
||||
|
||||
r = eytzinger0_find_gt(test_array, nr,
|
||||
sizeof(test_array[0]),
|
||||
cmp_u16, &search);
|
||||
if (r >= 0) {
|
||||
if (test_array[r] <= search) {
|
||||
bad = true;
|
||||
} else {
|
||||
s = eytzinger0_prev(r, nr);
|
||||
bad = s >= 0 && test_array[s] > search;
|
||||
}
|
||||
} else {
|
||||
s = eytzinger0_first(nr);
|
||||
bad = s >= 0 && test_array[s] > search;
|
||||
}
|
||||
|
||||
if (bad) {
|
||||
s = -1;
|
||||
eytzinger0_for_each(j, nr) {
|
||||
if (test_array[j] > search) {
|
||||
s = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
eytzinger0_for_each(j, nr)
|
||||
pr_info("[%3u] = %12u\n", j, test_array[j]);
|
||||
pr_info("find_gt(%12u) = %3i should be %3i\n",
|
||||
search, r, s);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void eytzinger0_find_test_ge(u16 *test_array, unsigned nr, u16 search)
|
||||
{
|
||||
int r, s;
|
||||
bool bad;
|
||||
|
||||
r = eytzinger0_find_ge(test_array, nr,
|
||||
sizeof(test_array[0]),
|
||||
cmp_u16, &search);
|
||||
if (r >= 0) {
|
||||
if (test_array[r] < search) {
|
||||
bad = true;
|
||||
} else {
|
||||
s = eytzinger0_prev(r, nr);
|
||||
bad = s >= 0 && test_array[s] >= search;
|
||||
}
|
||||
} else {
|
||||
s = eytzinger0_first(nr);
|
||||
bad = s >= 0 && test_array[s] >= search;
|
||||
}
|
||||
|
||||
if (bad) {
|
||||
s = -1;
|
||||
eytzinger0_for_each(j, nr) {
|
||||
if (test_array[j] >= search) {
|
||||
s = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
eytzinger0_for_each(j, nr)
|
||||
pr_info("[%3u] = %12u\n", j, test_array[j]);
|
||||
pr_info("find_ge(%12u) = %3i should be %3i\n",
|
||||
search, r, s);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void eytzinger0_find_test_eq(u16 *test_array, unsigned nr, u16 search)
|
||||
{
|
||||
unsigned r;
|
||||
int s;
|
||||
bool bad;
|
||||
|
||||
r = eytzinger0_find(test_array, nr,
|
||||
sizeof(test_array[0]),
|
||||
cmp_u16, &search);
|
||||
|
||||
if (r < nr) {
|
||||
bad = test_array[r] != search;
|
||||
} else {
|
||||
s = eytzinger0_find_le(test_array, nr,
|
||||
sizeof(test_array[0]),
|
||||
cmp_u16, &search);
|
||||
bad = s >= 0 && test_array[s] == search;
|
||||
}
|
||||
|
||||
if (bad) {
|
||||
eytzinger0_for_each(j, nr)
|
||||
pr_info("[%3u] = %12u\n", j, test_array[j]);
|
||||
pr_info("find(%12u) = %3i is incorrect\n",
|
||||
search, r);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
|
||||
{
|
||||
eytzinger0_find_test_le(test_array, nr, search);
|
||||
eytzinger0_find_test_gt(test_array, nr, search);
|
||||
eytzinger0_find_test_ge(test_array, nr, search);
|
||||
eytzinger0_find_test_eq(test_array, nr, search);
|
||||
}
|
||||
|
||||
void eytzinger0_find_test(void)
|
||||
@ -800,15 +923,18 @@ void eytzinger0_find_test(void)
|
||||
u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
|
||||
|
||||
for (nr = 1; nr < allocated; nr++) {
|
||||
pr_info("testing %u elems", nr);
|
||||
u16 prev = 0;
|
||||
|
||||
pr_info("testing %u elems\n", nr);
|
||||
|
||||
get_random_bytes(test_array, nr * sizeof(test_array[0]));
|
||||
eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
|
||||
|
||||
/* verify array is sorted correctly: */
|
||||
eytzinger0_for_each(i, nr)
|
||||
BUG_ON(i != eytzinger0_last(nr) &&
|
||||
test_array[i] > test_array[eytzinger0_next(i, nr)]);
|
||||
eytzinger0_for_each(j, nr) {
|
||||
BUG_ON(test_array[j] < prev);
|
||||
prev = test_array[j];
|
||||
}
|
||||
|
||||
for (i = 0; i < U16_MAX; i += 1 << 12)
|
||||
eytzinger0_find_test_val(test_array, nr, i);
|
||||
|
Loading…
Reference in New Issue
Block a user