Update bcachefs sources to 864591728963 bcachefs: Dropped superblock write is no longer a fatal error

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-12-08 22:31:09 -05:00
parent 634c812a1e
commit 1055935ffe
61 changed files with 1196 additions and 904 deletions

View File

@ -1 +1 @@
55a65a994ed5fba038fda00f78416faf6f308bb8
864591728963d416c49e502bfee56a283eda31a5

View File

@ -13,7 +13,7 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
# output directory for tests below
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
TMPOUT = .tmp_$$$$
# try-run
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)

View File

@ -6,6 +6,7 @@
#define __LINUX_BLK_TYPES_H
#include <linux/atomic.h>
#include <linux/backing-dev.h>
#include <linux/types.h>
#include <linux/bvec.h>
#include <linux/kobject.h>

View File

@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
extern void kmemleak_update_trace(const void *ptr) __ref;
extern void kmemleak_not_leak(const void *ptr) __ref;
extern void kmemleak_transient_leak(const void *ptr) __ref;
extern void kmemleak_ignore(const void *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
extern void kmemleak_no_scan(const void *ptr) __ref;
@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr)
static inline void kmemleak_not_leak(const void *ptr)
{
}
static inline void kmemleak_transient_leak(const void *ptr)
{
}
static inline void kmemleak_ignore(const void *ptr)
{
}

View File

@ -20,6 +20,7 @@
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/workqueue.h>

View File

@ -933,8 +933,6 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
u64 transaction_seq = trans->journal_res.seq;
BUG_ON(!transaction_seq);
BUG_ON(transaction_seq < new_a->journal_seq_nonempty);
BUG_ON(transaction_seq < new_a->journal_seq_empty);
if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq,
trans, alloc_key_journal_seq_in_future,

View File

@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
return;
}
percpu_down_read(&c->mark_lock);
spin_lock(&ob->lock);
ob->valid = false;
ob->data_type = 0;
spin_unlock(&ob->lock);
percpu_up_read(&c->mark_lock);
spin_lock(&c->freelist_lock);
bch2_open_bucket_hash_remove(c, ob);

View File

@ -178,7 +178,7 @@ static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}
static int bch2_backpointers_maybe_flush(struct btree_trans *trans,
static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
struct bkey_s_c visiting_k,
struct bkey_buf *last_flushed)
{
@ -201,17 +201,30 @@ static int backpointer_target_not_found(struct btree_trans *trans,
* looking at may have already been deleted - failure to find what it
* pointed to is not an error:
*/
ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed);
ret = last_flushed
? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed)
: 0;
if (ret)
return ret;
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
bp.v->level ? "btree node" : "extent");
bch2_bkey_val_to_text(&buf, c, bp.s_c);
prt_printf(&buf, "\n ");
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, target_k);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
if (p.ptr.dev == bp.k->p.inode) {
prt_printf(&buf, "\n ");
struct bkey_i_backpointer bp2;
bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
}
if (fsck_err(trans, backpointer_to_missing_ptr,
"%s", buf.buf))
ret = bch2_backpointer_del(trans, bp.k->p);
@ -491,7 +504,7 @@ check_existing_bp:
struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
struct bkey_s_c other_extent =
bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed);
bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL);
ret = bkey_err(other_extent);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
ret = 0;
@ -553,11 +566,11 @@ check_existing_bp:
goto err;
missing:
printbuf_reset(&buf);
prt_str(&buf, "missing backpointer ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
prt_newline(&buf);
prt_str(&buf, "missing backpointer\n for: ");
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_printf(&buf, "\n got: ");
prt_printf(&buf, "\n want: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
prt_printf(&buf, "\n got: ");
bch2_bkey_val_to_text(&buf, c, bp_k);
if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
@ -586,12 +599,16 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
rcu_read_unlock();
if (check) {
if (check || empty) {
struct bkey_i_backpointer bp;
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
int ret = check_bp_exists(trans, s, &bp, k);
int ret = check
? check_bp_exists(trans, s, &bp, k)
: bch2_bucket_backpointer_mod(trans, k, &bp, true);
if (ret)
return ret;
}
@ -825,12 +842,15 @@ static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t
}
}
static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k,
struct bkey_buf *last_flushed)
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
struct bkey_buf *last_flushed)
{
int ret = 0;
struct bch_fs *c = trans->c;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
bool need_commit = false;
if (a->data_type == BCH_DATA_sb ||
a->data_type == BCH_DATA_journal ||
@ -846,6 +866,7 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
struct btree_iter iter;
struct bkey_s_c bp_k;
int ret = 0;
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, alloc_k.k->p),
bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
@ -854,6 +875,17 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
(bp.v->bucket_gen != a->gen ||
bp.v->pad)) {
ret = bch2_backpointer_del(trans, bp_k.k->p);
if (ret)
break;
need_commit = true;
continue;
}
if (bp.v->bucket_gen != a->gen)
continue;
@ -863,30 +895,40 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
if (ret)
goto err;
/* Cached pointers don't have backpointers: */
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
sectors[ALLOC_stripe] != a->stripe_sectors) {
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
if (need_commit) {
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
}
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
/* Cached pointers don't have backpointers: */
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
sectors[ALLOC_stripe] != a->stripe_sectors) {
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
if (ret)
goto err;
}
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
sectors[ALLOC_stripe] > a->stripe_sectors) {
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
-BCH_ERR_transaction_restart_nested;
goto err;
}
if (!sectors[ALLOC_dirty] &&
!sectors[ALLOC_stripe])
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
else
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
}
err:
bch2_dev_put(ca);
return ret;
}
static int check_bucket_backpointer_mismatches(struct btree_trans *trans,
struct bkey_buf *last_flushed)
{
return for_each_btree_key(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k, ({
check_bucket_backpointer_mismatch_one(trans, k, last_flushed);
}));
}
static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
{
switch (k.k->type) {
@ -896,6 +938,9 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
rcu_read_lock();
struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
while (pos.inode <= k.k->p.inode) {
if (pos.inode >= c->sb.nr_devices)
break;
struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
if (!ca)
goto next;
@ -941,7 +986,7 @@ err:
}
static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
struct bpos start, struct bpos *end)
struct bpos start, struct bpos *end)
{
struct bch_fs *c = trans->c;
int ret = 0;
@ -1022,7 +1067,11 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
sizeof(unsigned long),
GFP_KERNEL);
if (!ca->bucket_backpointer_mismatches) {
ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
sizeof(unsigned long),
GFP_KERNEL);
if (!ca->bucket_backpointer_mismatches ||
!ca->bucket_backpointer_empty) {
bch2_dev_put(ca);
ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
goto err_free_bitmaps;
@ -1035,21 +1084,25 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
bch2_bkey_buf_init(&s.last_flushed);
bkey_init(&s.last_flushed.k->k);
ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed);
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k, ({
check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
}));
if (ret)
goto err;
u64 nr_buckets = 0, nr_mismatches = 0;
u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
for_each_member_device(c, ca) {
nr_buckets += ca->mi.nbuckets;
nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
nr_buckets += ca->mi.nbuckets;
nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets);
}
if (!nr_mismatches)
if (!nr_mismatches && !nr_empty)
goto err;
bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
nr_mismatches, nr_buckets);
nr_mismatches + nr_empty, nr_buckets);
while (1) {
ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
@ -1086,6 +1139,8 @@ err:
bch2_btree_cache_unpin(c);
err_free_bitmaps:
for_each_member_device(c, ca) {
kvfree(ca->bucket_backpointer_empty);
ca->bucket_backpointer_empty = NULL;
kvfree(ca->bucket_backpointer_mismatches);
ca->bucket_backpointer_mismatches = NULL;
}
@ -1122,6 +1177,25 @@ static int check_one_backpointer(struct btree_trans *trans,
return ret;
}
static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
struct bch_dev *ca, struct bpos bucket)
{
u32 restart_count = trans->restart_count;
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket),
bucket_pos_to_bp_end(ca, bucket),
0, k,
check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
);
bch2_bkey_buf_exit(&last_flushed, trans->c);
return ret ?: trans_was_restarted(trans, restart_count);
}
static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)

View File

@ -547,17 +547,16 @@ struct bch_dev {
/*
* Buckets:
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
* gc_gens_lock, for device resize - holding any is sufficient for
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
* Per-bucket arrays are protected by either rcu_read_lock or
* state_lock, for device resize.
*/
GENRADIX(struct bucket) buckets_gc;
struct bucket_gens __rcu *bucket_gens;
u8 *oldest_gen;
unsigned long *buckets_nouse;
struct rw_semaphore bucket_lock;
unsigned long *bucket_backpointer_mismatches;
unsigned long *bucket_backpointer_empty;
struct bch_dev_usage __percpu *usage;

View File

@ -684,7 +684,8 @@ struct bch_sb_field_ext {
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
x(inode_depth, BCH_VERSION(1, 17)) \
x(persistent_inode_cursors, BCH_VERSION(1, 18))
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
x(autofix_errors, BCH_VERSION(1, 19))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@ -1287,14 +1288,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
/* Btree: */
enum btree_id_flags {
BTREE_ID_EXTENTS = BIT(0),
BTREE_ID_SNAPSHOTS = BIT(1),
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
BTREE_ID_DATA = BIT(3),
BTREE_IS_extents = BIT(0),
BTREE_IS_snapshots = BIT(1),
BTREE_IS_snapshot_field = BIT(2),
BTREE_IS_data = BIT(3),
BTREE_IS_write_buffer = BIT(4),
};
#define BCH_BTREE_IDS() \
x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
x(extents, 0, \
BTREE_IS_extents| \
BTREE_IS_snapshots| \
BTREE_IS_data, \
BIT_ULL(KEY_TYPE_whiteout)| \
BIT_ULL(KEY_TYPE_error)| \
BIT_ULL(KEY_TYPE_cookie)| \
@ -1302,17 +1307,20 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_reservation)| \
BIT_ULL(KEY_TYPE_reflink_p)| \
BIT_ULL(KEY_TYPE_inline_data)) \
x(inodes, 1, BTREE_ID_SNAPSHOTS, \
x(inodes, 1, \
BTREE_IS_snapshots, \
BIT_ULL(KEY_TYPE_whiteout)| \
BIT_ULL(KEY_TYPE_inode)| \
BIT_ULL(KEY_TYPE_inode_v2)| \
BIT_ULL(KEY_TYPE_inode_v3)| \
BIT_ULL(KEY_TYPE_inode_generation)) \
x(dirents, 2, BTREE_ID_SNAPSHOTS, \
x(dirents, 2, \
BTREE_IS_snapshots, \
BIT_ULL(KEY_TYPE_whiteout)| \
BIT_ULL(KEY_TYPE_hash_whiteout)| \
BIT_ULL(KEY_TYPE_dirent)) \
x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
x(xattrs, 3, \
BTREE_IS_snapshots, \
BIT_ULL(KEY_TYPE_whiteout)| \
BIT_ULL(KEY_TYPE_cookie)| \
BIT_ULL(KEY_TYPE_hash_whiteout)| \
@ -1326,7 +1334,9 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_quota)) \
x(stripes, 6, 0, \
BIT_ULL(KEY_TYPE_stripe)) \
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
x(reflink, 7, \
BTREE_IS_extents| \
BTREE_IS_data, \
BIT_ULL(KEY_TYPE_reflink_v)| \
BIT_ULL(KEY_TYPE_indirect_inline_data)| \
BIT_ULL(KEY_TYPE_error)) \
@ -1334,29 +1344,38 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_subvolume)) \
x(snapshots, 9, 0, \
BIT_ULL(KEY_TYPE_snapshot)) \
x(lru, 10, 0, \
x(lru, 10, \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)) \
x(freespace, 11, BTREE_ID_EXTENTS, \
x(freespace, 11, \
BTREE_IS_extents, \
BIT_ULL(KEY_TYPE_set)) \
x(need_discard, 12, 0, \
BIT_ULL(KEY_TYPE_set)) \
x(backpointers, 13, 0, \
x(backpointers, 13, \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_backpointer)) \
x(bucket_gens, 14, 0, \
BIT_ULL(KEY_TYPE_bucket_gens)) \
x(snapshot_trees, 15, 0, \
BIT_ULL(KEY_TYPE_snapshot_tree)) \
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
x(deleted_inodes, 16, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)) \
x(logged_ops, 17, 0, \
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
x(rebalance_work, 18, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
x(subvolume_children, 19, 0, \
BIT_ULL(KEY_TYPE_set)) \
x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \
x(accounting, 20, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_accounting)) \
enum btree_id {

View File

@ -733,16 +733,8 @@ static int bch2_gc_btrees(struct bch_fs *c)
continue;
ret = bch2_gc_btree(trans, btree, true);
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
trans, btree_node_read_error,
"btree node read error for %s",
(printbuf_reset(&buf),
bch2_btree_id_to_text(&buf, btree),
buf.buf)))
ret = bch2_btree_lost_data(c, btree);
}
fsck_err:
printbuf_exit(&buf);
bch2_trans_put(trans);
bch_err_fn(c, ret);
@ -811,7 +803,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
old = bch2_alloc_to_v4(k, &old_convert);
gc = new = *old;
percpu_down_read(&c->mark_lock);
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
old_gc = gc;
@ -822,7 +813,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
gc.data_type = old->data_type;
gc.dirty_sectors = old->dirty_sectors;
}
percpu_up_read(&c->mark_lock);
/*
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
@ -840,11 +830,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
* we don't run it twice:
*/
percpu_down_read(&c->mark_lock);
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
gc_m->data_type = gc.data_type;
gc_m->dirty_sectors = gc.dirty_sectors;
percpu_up_read(&c->mark_lock);
}
if (fsck_err_on(new.data_type != gc.data_type,
@ -1088,7 +1076,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
return -EROFS;
percpu_down_read(&c->mark_lock);
rcu_read_lock();
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
@ -1097,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
if (dev_ptr_stale(ca, ptr) > 16) {
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
goto update;
}
}
@ -1112,7 +1098,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
*gen = ptr->gen;
}
rcu_read_unlock();
percpu_up_read(&c->mark_lock);
return 0;
update:
u = bch2_bkey_make_mut(trans, iter, &k, 0);
@ -1141,7 +1126,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev
return ret;
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
alloc_data_type_set(&a_mut->v, a_mut->v.data_type);
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
}
@ -1254,9 +1238,16 @@ void bch2_gc_gens_async(struct bch_fs *c)
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
}
void bch2_fs_gc_init(struct bch_fs *c)
void bch2_fs_btree_gc_exit(struct bch_fs *c)
{
}
int bch2_fs_btree_gc_init(struct bch_fs *c)
{
seqcount_init(&c->gc_pos_lock);
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
return 0;
}

View File

@ -82,6 +82,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
int bch2_gc_gens(struct bch_fs *);
void bch2_gc_gens_async(struct bch_fs *);
void bch2_fs_gc_init(struct bch_fs *);
void bch2_fs_btree_gc_exit(struct bch_fs *);
int bch2_fs_btree_gc_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_GC_H */

View File

@ -489,8 +489,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
if (b->nsets == MAX_BSETS &&
!btree_node_write_in_flight(b) &&
should_compact_all(c, b)) {
bch2_btree_node_write(c, b, SIX_LOCK_write,
BTREE_WRITE_init_next_bset);
bch2_btree_node_write_trans(trans, b, SIX_LOCK_write,
BTREE_WRITE_init_next_bset);
reinit_iter = true;
}
@ -2345,6 +2345,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
}
}
void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b,
enum six_lock_type lock_type_held,
unsigned flags)
{
struct bch_fs *c = trans->c;
if (lock_type_held == SIX_LOCK_intent ||
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
__bch2_btree_node_write(c, b, flags);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
six_trylock_write(&b->c.lock)) {
bch2_btree_post_write_cleanup(c, b);
__bch2_btree_node_unlock_write(trans, b);
}
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
__bch2_btree_node_write(c, b, flags);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);
}
}
static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
{
struct bucket_table *tbl;

View File

@ -144,11 +144,13 @@ enum btree_write_flags {
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
enum six_lock_type, unsigned);
void bch2_btree_node_write_trans(struct btree_trans *, struct btree *,
enum six_lock_type, unsigned);
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b,
enum six_lock_type lock_held)
{
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
}
bool bch2_btree_flush_all_reads(struct bch_fs *);

View File

@ -699,6 +699,19 @@ void bch2_trans_node_add(struct btree_trans *trans,
bch2_trans_revalidate_updates_in_node(trans, b);
}
void bch2_trans_node_drop(struct btree_trans *trans,
struct btree *b)
{
struct btree_path *path;
unsigned i, level = b->c.level;
trans_for_each_path(trans, path, i)
if (path->l[level].b == b) {
btree_node_unlock(trans, path, level);
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
}
}
/*
* A btree node has been modified in such a way as to invalidate iterators - fix
* them:
@ -1854,7 +1867,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *
!bkey_eq(path->pos, ck->key.pos));
*u = ck->k->k;
k = bkey_i_to_s_c(ck->k);
k = (struct bkey_s_c) { u, &ck->k->v };
}
return k;
@ -2144,21 +2157,18 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
}
static noinline
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
void btree_trans_peek_journal(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
bch2_btree_journal_peek(trans, iter,
k.k ? k.k->p : path_l(path)->b->key.k.p);
k->k ? k->k->p : path_l(path)->b->key.k.p);
if (next_journal) {
iter->k = next_journal->k;
k = bkey_i_to_s_c(next_journal);
*k = bkey_i_to_s_c(next_journal);
}
return k;
}
static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
@ -2175,21 +2185,19 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
}
static noinline
struct bkey_s_c btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
bch2_btree_journal_peek_prev(trans, iter,
k.k ? k.k->p : path_l(path)->b->key.k.p);
k->k ? k->k->p : path_l(path)->b->key.k.p);
if (next_journal) {
iter->k = next_journal->k;
k = bkey_i_to_s_c(next_journal);
*k = bkey_i_to_s_c(next_journal);
}
return k;
}
/*
@ -2234,10 +2242,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
if (k.k && !bkey_err(k)) {
iter->k = u;
k.k = &iter->k;
}
if (!k.k)
return k;
if ((iter->flags & BTREE_ITER_all_snapshots) &&
!bpos_eq(pos, k.k->p))
return bkey_s_c_null;
iter->k = u;
k.k = &iter->k;
return k;
}
@ -2260,7 +2273,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
k = bkey_s_c_err(ret);
goto out;
break;
}
struct btree_path *path = btree_iter_path(trans, iter);
@ -2270,7 +2283,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
/* No btree nodes at requested level: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
k = bkey_s_c_null;
goto out;
break;
}
btree_path_set_should_be_locked(trans, path);
@ -2281,15 +2294,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
k = k2;
ret = bkey_err(k);
if (ret) {
if (bkey_err(k)) {
bch2_btree_iter_set_pos(iter, iter->pos);
goto out;
break;
}
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
k = btree_trans_peek_journal(trans, iter, k);
btree_trans_peek_journal(trans, iter, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
@ -2318,12 +2330,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
/* End of btree: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
k = bkey_s_c_null;
goto out;
break;
}
}
out:
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(iter);
return k;
}
@ -2424,7 +2435,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
continue;
}
if (bkey_whiteout(k.k)) {
if (bkey_whiteout(k.k) &&
!(iter->flags & BTREE_ITER_key_cache_fill)) {
search_key = bkey_successor(iter, k.k->p);
continue;
}
@ -2547,7 +2559,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
k = btree_trans_peek_prev_journal(trans, iter, k);
btree_trans_peek_prev_journal(trans, iter, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
@ -2784,6 +2796,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
if (unlikely(!k.k))
goto out_no_locked;
if (unlikely(k.k->type == KEY_TYPE_whiteout &&
(iter->flags & BTREE_ITER_filter_snapshots) &&
!(iter->flags & BTREE_ITER_key_cache_fill)))
iter->k.type = KEY_TYPE_deleted;
} else {
struct bpos next;
struct bpos end = iter->pos;
@ -3028,7 +3045,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans,
unsigned flags)
{
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
bch2_btree_iter_flags(trans, btree_id, flags),
bch2_btree_iter_flags(trans, btree_id, 0, flags),
_RET_IP_);
}
@ -3044,8 +3061,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans,
flags |= BTREE_ITER_snapshot_field;
flags |= BTREE_ITER_all_snapshots;
if (!depth && btree_id_cached(trans->c, btree_id))
flags |= BTREE_ITER_with_key_cache;
bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
__bch2_btree_iter_flags(trans, btree_id, flags),
bch2_btree_iter_flags(trans, btree_id, depth, flags),
_RET_IP_);
iter->min_depth = depth;

View File

@ -372,6 +372,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
void bch2_trans_downgrade(struct btree_trans *);
void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *);
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
@ -446,10 +447,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
unsigned btree_id,
unsigned flags)
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
unsigned btree_id,
unsigned level,
unsigned flags)
{
if (level || !btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_cached;
flags &= ~BTREE_ITER_with_key_cache;
} else if (!(flags & BTREE_ITER_cached))
flags |= BTREE_ITER_with_key_cache;
if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) &&
btree_id_is_extents(btree_id))
flags |= BTREE_ITER_is_extents;
@ -468,19 +476,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
return flags;
}
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
unsigned btree_id,
unsigned flags)
{
if (!btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_cached;
flags &= ~BTREE_ITER_with_key_cache;
} else if (!(flags & BTREE_ITER_cached))
flags |= BTREE_ITER_with_key_cache;
return __bch2_btree_iter_flags(trans, btree_id, flags);
}
static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
struct btree_iter *iter,
unsigned btree_id, struct bpos pos,
@ -517,7 +512,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
if (__builtin_constant_p(btree_id) &&
__builtin_constant_p(flags))
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
bch2_btree_iter_flags(trans, btree_id, flags),
bch2_btree_iter_flags(trans, btree_id, 0, flags),
_THIS_IP_);
else
bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);

View File

@ -197,7 +197,9 @@ out:
return ck;
}
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
static int btree_key_cache_create(struct btree_trans *trans,
struct btree_path *path,
struct btree_path *ck_path,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
key_u64s = min(256U, (key_u64s * 3) / 2);
key_u64s = roundup_pow_of_two(key_u64s);
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s);
int ret = PTR_ERR_OR_ZERO(ck);
if (ret)
return ret;
@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
ck = bkey_cached_reuse(bc);
if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
bch2_btree_id_str(path->btree_id));
bch2_btree_id_str(ck_path->btree_id));
return -BCH_ERR_ENOMEM_btree_key_cache_create;
}
}
ck->c.level = 0;
ck->c.btree_id = path->btree_id;
ck->key.btree_id = path->btree_id;
ck->key.pos = path->pos;
ck->c.btree_id = ck_path->btree_id;
ck->key.btree_id = ck_path->btree_id;
ck->key.pos = ck_path->pos;
ck->flags = 1U << BKEY_CACHED_ACCESSED;
if (unlikely(key_u64s > ck->u64s)) {
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
kmalloc(key_u64s * sizeof(u64), _gfp));
@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
bkey_reassemble(ck->k, k);
ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c);
if (unlikely(ret))
goto err;
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
bch2_btree_node_unlock_write(trans, path, path_l(path)->b);
if (unlikely(ret)) /* raced with another fill? */
goto err;
atomic_long_inc(&bc->nr_keys);
six_unlock_write(&ck->c.lock);
enum six_lock_type lock_want = __btree_lock_want(path, 0);
enum six_lock_type lock_want = __btree_lock_want(ck_path, 0);
if (lock_want == SIX_LOCK_read)
six_lock_downgrade(&ck->c.lock);
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
path->uptodate = BTREE_ITER_UPTODATE;
btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want);
ck_path->uptodate = BTREE_ITER_UPTODATE;
return 0;
err:
bkey_cached_free(bc, ck);
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
return ret;
}
@ -293,6 +302,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
int ret;
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
BTREE_ITER_intent|
BTREE_ITER_key_cache_fill|
BTREE_ITER_cached_nofill);
iter.flags &= ~BTREE_ITER_with_journal;
@ -306,9 +316,19 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
if (unlikely(ret))
goto out;
ret = btree_key_cache_create(trans, ck_path, k);
ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k);
if (ret)
goto err;
if (trace_key_cache_fill_enabled()) {
struct printbuf buf = PRINTBUF;
bch2_bpos_to_text(&buf, ck_path->pos);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, trans->c, k);
trace_key_cache_fill(trans, buf.buf);
printbuf_exit(&buf);
}
out:
/* We're not likely to need this iterator again: */
bch2_set_btree_iter_dontneed(&iter);
@ -593,8 +613,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
bkey_cached_free(bc, ck);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
path->should_be_locked = false;
struct btree_path *path2;
unsigned i;
trans_for_each_path(trans, path2, i)
if (path2->l[0].b == (void *) ck) {
__bch2_btree_path_unlock(trans, path2);
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
path2->should_be_locked = false;
btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE);
}
bch2_trans_verify_locks(trans);
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,

View File

@ -818,6 +818,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans)
bch2_trans_srcu_unlock(trans);
}
void bch2_trans_unlock_write(struct btree_trans *trans)
{
struct btree_path *path;
unsigned i;
trans_for_each_path(trans, path, i)
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++)
if (btree_node_write_locked(path, l))
bch2_btree_node_unlock_write(trans, path, path->l[l].b);
}
int __bch2_trans_mutex_lock(struct btree_trans *trans,
struct mutex *lock)
{
@ -856,6 +867,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path)
(want == BTREE_NODE_UNLOCKED ||
have != BTREE_NODE_WRITE_LOCKED) &&
want != have);
BUG_ON(btree_node_locked(path, l) &&
path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock));
}
}

View File

@ -16,6 +16,7 @@
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
void bch2_trans_unlock_noassert(struct btree_trans *);
void bch2_trans_unlock_write(struct btree_trans *);
static inline bool is_btree_node(struct btree_path *path, unsigned l)
{
@ -75,13 +76,6 @@ static inline void mark_btree_node_locked_noreset(struct btree_path *path,
path->nodes_locked |= (type + 1) << (level << 1);
}
static inline void mark_btree_node_unlocked(struct btree_path *path,
unsigned level)
{
EBUG_ON(btree_node_write_locked(path, level));
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
}
static inline void mark_btree_node_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level,
@ -124,19 +118,25 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
/* unlock: */
void bch2_btree_node_unlock_write(struct btree_trans *,
struct btree_path *, struct btree *);
static inline void btree_node_unlock(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
int lock_type = btree_node_locked_type(path, level);
EBUG_ON(level >= BTREE_MAX_DEPTH);
EBUG_ON(lock_type == BTREE_NODE_WRITE_LOCKED);
if (lock_type != BTREE_NODE_UNLOCKED) {
if (unlikely(lock_type == BTREE_NODE_WRITE_LOCKED)) {
bch2_btree_node_unlock_write(trans, path, path->l[level].b);
lock_type = BTREE_NODE_INTENT_LOCKED;
}
six_unlock_type(&path->l[level].b->c.lock, lock_type);
btree_trans_lock_hold_time_update(trans, path, level);
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
}
mark_btree_node_unlocked(path, level);
}
static inline int btree_path_lowest_level_locked(struct btree_path *path)
@ -162,28 +162,32 @@ static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
* Updates the saved lock sequence number, so that bch2_btree_node_relock() will
* succeed:
*/
static inline void
__bch2_btree_node_unlock_write(struct btree_trans *trans, struct btree *b)
{
if (!b->c.lock.write_lock_recurse) {
struct btree_path *linked;
unsigned i;
trans_for_each_path_with_node(trans, b, linked, i)
linked->l[b->c.level].lock_seq++;
}
six_unlock_write(&b->c.lock);
}
static inline void
bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
struct btree *b)
{
struct btree_path *linked;
unsigned i;
EBUG_ON(path->l[b->c.level].b != b);
EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
trans_for_each_path_with_node(trans, b, linked, i)
linked->l[b->c.level].lock_seq++;
six_unlock_write(&b->c.lock);
__bch2_btree_node_unlock_write(trans, b);
}
void bch2_btree_node_unlock_write(struct btree_trans *,
struct btree_path *, struct btree *);
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
/* lock: */

View File

@ -152,7 +152,7 @@ static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
swap(*l, *r);
}
const struct min_heap_callbacks found_btree_node_heap_cbs = {
static const struct min_heap_callbacks found_btree_node_heap_cbs = {
.less = found_btree_node_cmp_pos_less,
.swp = found_btree_node_swap,
};

View File

@ -133,7 +133,7 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans)
return 0;
}
static inline void bch2_trans_unlock_write(struct btree_trans *trans)
static inline void bch2_trans_unlock_updates_write(struct btree_trans *trans)
{
if (likely(trans->write_locked)) {
trans_for_each_update(trans, i)
@ -249,7 +249,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
new |= 1 << BTREE_NODE_need_write;
} while (!try_cmpxchg(&b->flags, &old, new));
btree_node_write_if_need(c, b, SIX_LOCK_read);
btree_node_write_if_need(trans, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
bch2_trans_put(trans);
@ -384,7 +384,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
struct bkey_i *new_k;
int ret;
bch2_trans_unlock_write(trans);
bch2_trans_unlock_updates_write(trans);
bch2_trans_unlock(trans);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
@ -479,8 +479,7 @@ static int run_one_mem_trigger(struct btree_trans *trans,
old, flags);
}
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
bool overwrite)
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i)
{
verify_update_old_key(trans, i);
@ -507,10 +506,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
BTREE_TRIGGER_insert|
BTREE_TRIGGER_overwrite|flags) ?: 1;
} else if (overwrite && !i->overwrite_trigger_run) {
} else if (!i->overwrite_trigger_run) {
i->overwrite_trigger_run = true;
return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
} else if (!overwrite && !i->insert_trigger_run) {
} else if (!i->insert_trigger_run) {
i->insert_trigger_run = true;
return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
} else {
@ -519,39 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
}
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
unsigned btree_id_start)
unsigned *btree_id_updates_start)
{
for (int overwrite = 1; overwrite >= 0; --overwrite) {
bool trans_trigger_run;
bool trans_trigger_run;
/*
* Running triggers will append more updates to the list of updates as
* we're walking it:
*/
do {
trans_trigger_run = false;
/*
* Running triggers will append more updates to the list of updates as
* we're walking it:
*/
do {
trans_trigger_run = false;
for (unsigned i = btree_id_start;
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
if (trans->updates[i].btree_id != btree_id)
continue;
int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
if (ret < 0)
return ret;
if (ret)
trans_trigger_run = true;
for (unsigned i = *btree_id_updates_start;
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
if (trans->updates[i].btree_id < btree_id) {
*btree_id_updates_start = i;
continue;
}
} while (trans_trigger_run);
}
int ret = run_one_trans_trigger(trans, trans->updates + i);
if (ret < 0)
return ret;
if (ret)
trans_trigger_run = true;
}
} while (trans_trigger_run);
trans_for_each_update(trans, i)
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
i->btree_id == btree_id &&
btree_node_type_has_trans_triggers(i->bkey_type) &&
(!i->insert_trigger_run || !i->overwrite_trigger_run));
return 0;
}
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
unsigned btree_id = 0, btree_id_start = 0;
unsigned btree_id = 0, btree_id_updates_start = 0;
int ret = 0;
/*
@ -565,27 +570,15 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
if (btree_id == BTREE_ID_alloc)
continue;
while (btree_id_start < trans->nr_updates &&
trans->updates[btree_id_start].btree_id < btree_id)
btree_id_start++;
ret = run_btree_triggers(trans, btree_id, btree_id_start);
ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start);
if (ret)
return ret;
}
for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
struct btree_insert_entry *i = trans->updates + idx;
if (i->btree_id > BTREE_ID_alloc)
break;
if (i->btree_id == BTREE_ID_alloc) {
ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
if (ret)
return ret;
break;
}
}
btree_id_updates_start = 0;
ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start);
if (ret)
return ret;
#ifdef CONFIG_BCACHEFS_DEBUG
trans_for_each_update(trans, i)
@ -875,7 +868,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
bch2_trans_unlock_write(trans);
bch2_trans_unlock_updates_write(trans);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,

View File

@ -790,53 +790,64 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type)
return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
}
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(type) & mask;
}
static inline bool btree_id_is_extents(enum btree_id btree)
{
return btree_node_type_is_extents(__btree_node_type(0, btree));
}
static inline bool btree_type_has_snapshots(enum btree_id id)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr)
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(id) & mask;
return BIT_ULL(btree) & mask;
}
static inline bool btree_type_has_snapshot_field(enum btree_id id)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1);
}
static inline bool btree_type_has_snapshots(enum btree_id btree)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr)
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(id) & mask;
return BIT_ULL(btree) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id id)
static inline bool btree_type_has_snapshot_field(enum btree_id btree)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr)
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(id) & mask;
return BIT_ULL(btree) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id btree)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr)
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(btree) & mask;
}
static inline bool btree_type_uses_write_buffer(enum btree_id btree)
{
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr)
BCH_BTREE_IDS()
#undef x
;
return BIT_ULL(btree) & mask;
}
struct btree_root {

View File

@ -823,10 +823,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
return bch2_trans_update_buffered(trans, btree, &k);
}
static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
{
unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
return ret;
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
int ret = PTR_ERR_OR_ZERO(e);
ret = PTR_ERR_OR_ZERO(e);
if (ret)
return ret;
@ -862,7 +869,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
__bch2_trans_log_msg(trans, &buf, u64s));
bch2_trans_log_msg(trans, &buf));
}
err:
printbuf_exit(&buf);

View File

@ -159,6 +159,7 @@ void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);

View File

@ -238,7 +238,6 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
struct btree *b)
{
struct bch_fs *c = trans->c;
unsigned i, level = b->c.level;
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
@ -249,13 +248,9 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
mutex_unlock(&c->btree_cache.lock);
six_unlock_write(&b->c.lock);
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
trans_for_each_path(trans, path, i)
if (path->l[level].b == b) {
btree_node_unlock(trans, path, level);
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
}
bch2_trans_node_drop(trans, b);
}
static void bch2_btree_node_free_never_used(struct btree_update *as,
@ -264,8 +259,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
{
struct bch_fs *c = as->c;
struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
struct btree_path *path;
unsigned i, level = b->c.level;
BUG_ON(!list_empty(&b->write_blocked));
BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
@ -287,11 +280,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
six_unlock_intent(&b->c.lock);
trans_for_each_path(trans, path, i)
if (path->l[level].b == b) {
btree_node_unlock(trans, path, level);
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
}
bch2_trans_node_drop(trans, b);
}
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
@ -803,7 +792,7 @@ err:
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
six_unlock_write(&b->c.lock);
btree_node_write_if_need(c, b, SIX_LOCK_intent);
btree_node_write_if_need(trans, b, SIX_LOCK_intent);
btree_node_unlock(trans, path, b->c.level);
bch2_path_put(trans, path_idx, true);
}
@ -824,7 +813,7 @@ err:
b = as->new_nodes[i];
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
btree_node_write_if_need(c, b, SIX_LOCK_read);
btree_node_write_if_need(trans, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@ -1709,14 +1698,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n3, SIX_LOCK_intent, 0);
}
if (n2) {
bch2_btree_update_get_open_buckets(as, n2);
bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n2, SIX_LOCK_intent, 0);
}
bch2_btree_update_get_open_buckets(as, n1);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n1, SIX_LOCK_intent, 0);
/*
* The old node must be freed (in memory) _before_ unlocking the new
@ -1911,7 +1900,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
BUG_ON(ret);
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
six_unlock_intent(&n->c.lock);
@ -2104,7 +2093,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_trans_verify_paths(trans);
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, trans->paths + path, b);
bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m);
@ -2181,7 +2170,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b);
@ -2291,7 +2280,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
bool now = false, pending = false;
spin_lock(&c->btree_node_rewrites_lock);
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
list_add(&a->list, &c->btree_node_rewrites);
now = true;
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {

View File

@ -312,6 +312,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
BUG_ON(!btree_type_uses_write_buffer(k->btree));
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
@ -632,6 +634,14 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
bch2_bkey_buf_init(&tmp);
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
if (trace_write_buffer_maybe_flush_enabled()) {
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, referring_k);
trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf);
printbuf_exit(&buf);
}
bch2_bkey_buf_reassemble(&tmp, c, referring_k);
if (bkey_is_btree_ptr(referring_k.k)) {

View File

@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
percpu_down_read(&c->mark_lock);
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
if (ret)
@ -364,7 +362,6 @@ found:
bch_info(c, "new key %s", buf.buf);
}
percpu_up_read(&c->mark_lock);
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
@ -373,8 +370,6 @@ found:
BTREE_UPDATE_internal_snapshot_node|
BTREE_TRIGGER_norun);
bch2_trans_iter_exit(trans, &iter);
percpu_down_read(&c->mark_lock);
if (ret)
goto err;
@ -382,7 +377,6 @@ found:
bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
}
err:
percpu_up_read(&c->mark_lock);
printbuf_exit(&buf);
return ret;
}
@ -547,7 +541,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
struct bkey_s_c k,
const struct extent_ptr_decoded *p,
s64 sectors, enum bch_data_type ptr_data_type,
struct bch_alloc_v4 *a)
struct bch_alloc_v4 *a,
bool insert)
{
u32 *dst_sectors = p->has_ec ? &a->stripe_sectors :
!p->ptr.cached ? &a->dirty_sectors :
@ -557,8 +552,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
if (ret)
return ret;
alloc_data_type_set(a, ptr_data_type);
if (insert)
alloc_data_type_set(a, ptr_data_type);
return 0;
}
@ -591,7 +586,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
ret = PTR_ERR_OR_ZERO(a) ?:
__mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v);
__mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert);
if (ret)
goto err;
@ -603,22 +598,19 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_gc) {
percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
p.ptr.dev,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = -BCH_ERR_trigger_pointer;
goto err_unlock;
goto err;
}
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new);
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert);
alloc_to_bucket(g, new);
bucket_unlock(g);
err_unlock:
percpu_up_read(&c->mark_lock);
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
@ -996,11 +988,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
struct bch_fs *c = trans->c;
int ret = 0;
percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, b);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
ca->dev_idx, bch2_data_type_str(data_type)))
goto err_unlock;
goto err;
bucket_lock(g);
struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
@ -1010,26 +1001,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type)))
goto err;
goto err_unlock;
if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
"bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
ca->dev_idx, b, g->gen,
bch2_data_type_str(g->data_type ?: data_type),
g->dirty_sectors, sectors))
goto err;
goto err_unlock;
g->data_type = data_type;
g->dirty_sectors += sectors;
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
bucket_unlock(g);
percpu_up_read(&c->mark_lock);
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
return ret;
err:
bucket_unlock(g);
err_unlock:
percpu_up_read(&c->mark_lock);
bucket_unlock(g);
err:
return -BCH_ERR_metadata_bucket_inconsistency;
}
@ -1269,7 +1258,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c)
for_each_member_device(c, ca) {
BUG_ON(ca->buckets_nouse);
ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO);
if (!ca->buckets_nouse) {
@ -1295,10 +1284,14 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bool resize = ca->bucket_gens != NULL;
int ret;
BUG_ON(resize && ca->buckets_nouse);
if (resize)
lockdep_assert_held(&c->state_lock);
bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets),
GFP_KERNEL|__GFP_ZERO);
if (resize && ca->buckets_nouse)
return -BCH_ERR_no_resize_with_buckets_nouse;
bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets),
GFP_KERNEL|__GFP_ZERO);
if (!bucket_gens) {
ret = -BCH_ERR_ENOMEM_bucket_gens;
goto err;
@ -1309,11 +1302,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bucket_gens->nbuckets_minus_first =
bucket_gens->nbuckets - bucket_gens->first_bucket;
if (resize) {
down_write(&ca->bucket_lock);
percpu_down_write(&c->mark_lock);
}
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
if (resize) {
@ -1331,11 +1319,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
nbuckets = ca->mi.nbuckets;
if (resize) {
percpu_up_write(&c->mark_lock);
up_write(&ca->bucket_lock);
}
ret = 0;
err:
if (bucket_gens)

View File

@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b)
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
{
return genradix_ptr(&ca->buckets_gc, b);
return bucket_valid(ca, b)
? genradix_ptr(&ca->buckets_gc, b)
: NULL;
}
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
{
return rcu_dereference_check(ca->bucket_gens,
!ca->fs ||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
lockdep_is_held(&ca->fs->state_lock) ||
lockdep_is_held(&ca->bucket_lock));
lockdep_is_held(&ca->fs->state_lock));
}
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)

View File

@ -14,21 +14,13 @@ static inline bool io_timer_cmp(const void *l, const void *r, void __always_unus
return (*_l)->expire < (*_r)->expire;
}
static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
{
struct io_timer **_l = (struct io_timer **)l;
struct io_timer **_r = (struct io_timer **)r;
swap(*_l, *_r);
}
static const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = NULL,
};
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
{
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
spin_lock(&clock->timer_lock);
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
@ -48,11 +40,6 @@ out:
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
{
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
spin_lock(&clock->timer_lock);
for (size_t i = 0; i < clock->timers.nr; i++)
@ -142,10 +129,6 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
{
struct io_timer *ret = NULL;
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
if (clock->timers.nr &&
time_after_eq64(now, clock->timers.data[0]->expire)) {

View File

@ -620,7 +620,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* and we have to check for this because we go rw before repairing the
* snapshots table - just skip it, we can move it later.
*/
if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot)))
return -BCH_ERR_data_update_done;
if (!bkey_get_dev_refs(c, k))

View File

@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_
memcpy_u64s_small(acc->v.d, d, nr);
}
static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos);
int bch2_disk_accounting_mod(struct btree_trans *trans,
struct disk_accounting_pos *k,
s64 *d, unsigned nr, bool gc)
@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans,
accounting_key_init(&k_i.k, k, d, nr);
return likely(!gc)
? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k)
: bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
if (unlikely(gc)) {
int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
if (ret == -BCH_ERR_btree_insert_need_mark_replicas)
ret = drop_locks_do(trans,
bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?:
bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
return ret;
} else {
return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k);
}
}
int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
@ -471,32 +480,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
return ret;
}
void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
percpu_down_read(&c->mark_lock);
out->atomic++;
eytzinger0_for_each(i, acc->k.nr) {
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos);
bch2_accounting_key_to_text(out, &acc_k);
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
prt_str(out, ":");
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
prt_printf(out, " %llu", v[j]);
prt_newline(out);
}
--out->atomic;
percpu_up_read(&c->mark_lock);
}
static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc)
{
darray_for_each(acc->k, e) {
@ -931,10 +914,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
continue;
break;
if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
if (!bch2_accounting_is_mem(acc_k)) {
struct disk_accounting_pos next = { .type = acc_k.type + 1 };
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
continue;
}
bch2_accounting_mem_read(c, k.k->p, v, nr);

View File

@ -138,7 +138,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
bpos_to_disk_accounting_pos(&acc_k, a.k->p);
bool gc = mode == BCH_ACCOUNTING_gc;
EBUG_ON(gc && !acc->gc_running);
if (gc && !acc->gc_running)
return 0;
if (!bch2_accounting_is_mem(acc_k))
return 0;
@ -255,7 +256,6 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans
int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *);
int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned);
void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *);
int bch2_gc_accounting_start(struct bch_fs *);
int bch2_gc_accounting_done(struct bch_fs *);

View File

@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_gc) {
percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset);
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = -BCH_ERR_mark_stripe;
goto err_unlock;
goto err;
}
bucket_lock(g);
@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
alloc_to_bucket(g, new);
bucket_unlock(g);
err_unlock:
percpu_up_read(&c->mark_lock);
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
@ -1058,6 +1056,11 @@ static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
ec_stripes_heap_set_backpointer(_h, j);
}
static const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
@ -1070,11 +1073,6 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
mutex_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
@ -1085,11 +1083,6 @@ void bch2_stripes_heap_del(struct bch_fs *c,
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
mutex_lock(&c->ec_stripes_heap_lock);
BUG_ON(min_heap_full(&c->ec_stripes_heap));
@ -1108,10 +1101,6 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
ec_stripes_heap *h = &c->ec_stripes_heap;
bool do_deletes;
size_t i;

View File

@ -118,6 +118,7 @@
x(ENOENT, ENOENT_dev_not_found) \
x(ENOENT, ENOENT_dev_idx_not_found) \
x(ENOENT, ENOENT_inode_no_backpointer) \
x(ENOENT, ENOENT_no_snapshot_tree_subvol) \
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
x(EEXIST, EEXIST_str_hash_set) \
@ -196,6 +197,9 @@
x(EINVAL, opt_parse_error) \
x(EINVAL, remove_with_metadata_missing_unimplemented)\
x(EINVAL, remove_would_lose_data) \
x(EINVAL, no_resize_with_buckets_nouse) \
x(EINVAL, inode_unpack_error) \
x(EINVAL, varint_decode_error) \
x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \
x(EROFS, erofs_journal_err) \
@ -313,6 +317,7 @@ static inline long bch2_err_class(long err)
#define BLK_STS_REMOVED ((__force blk_status_t)128)
#include <linux/blk_types.h>
const char *bch2_blk_status_to_str(blk_status_t);
#endif /* _BCACHFES_ERRCODE_H */

View File

@ -574,6 +574,11 @@ static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsig
printbuf_nul_terminate(out);
}
static inline void prt_str_reversed(struct printbuf *out, const char *s)
{
prt_bytes_reversed(out, s, strlen(s));
}
static inline void reverse_bytes(void *b, size_t n)
{
char *e = b + n, *s = b;
@ -596,17 +601,20 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
struct bch_inode_unpacked inode;
ret = bch2_inode_find_by_inum_trans(trans, inum, &inode);
if (ret)
goto err;
goto disconnected;
if (!inode.bi_dir && !inode.bi_dir_offset) {
ret = -BCH_ERR_ENOENT_inode_no_backpointer;
goto err;
goto disconnected;
}
inum.subvol = inode.bi_parent_subvol ?: inum.subvol;
inum.inum = inode.bi_dir;
u32 snapshot;
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
goto disconnected;
struct btree_iter d_iter;
struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
@ -614,23 +622,19 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
0, dirent);
ret = bkey_err(d.s_c);
if (ret)
goto err;
goto disconnected;
struct qstr dirent_name = bch2_dirent_get_name(d);
prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
prt_char(path, '/');
if (d.v->d_type == DT_SUBVOL)
inum.subvol = le32_to_cpu(d.v->d_parent_subvol);
inum.inum = d.k->p.inode;
bch2_trans_iter_exit(trans, &d_iter);
}
if (orig_pos == path->pos)
prt_char(path, '/');
out:
ret = path->allocation_failure ? -ENOMEM : 0;
if (ret)
goto err;
@ -639,4 +643,10 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
return 0;
err:
return ret;
disconnected:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
prt_str_reversed(path, "(disconnected)");
goto out;
}

View File

@ -625,15 +625,6 @@ do_io:
BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio,
sectors << 9, offset << 9));
/* Check for writing past i_size: */
WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
round_up(i_size, block_bytes(c)) &&
!test_bit(BCH_FS_emergency_ro, &c->flags),
"writing past i_size: %llu > %llu (unrounded %llu)\n",
bio_end_sector(&w->io->op.wbio.bio) << 9,
round_up(i_size, block_bytes(c)),
i_size);
w->io->op.res.sectors += reserved_sectors;
w->io->op.i_sectors_delta -= dirty_sectors;
w->io->op.new_i_size = i_size;

View File

@ -205,6 +205,36 @@ err:
return ret;
}
/*
* Find any subvolume associated with a tree of snapshots
* We can't rely on master_subvol - it might have been deleted.
*/
static int find_snapshot_tree_subvol(struct btree_trans *trans,
u32 tree_id, u32 *subvol)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret;
for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) {
if (k.k->type != KEY_TYPE_snapshot)
continue;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
if (le32_to_cpu(s.v->tree) != tree_id)
continue;
if (s.v->subvol) {
*subvol = le32_to_cpu(s.v->subvol);
goto found;
}
}
ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol;
found:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
/* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked *lostfound,
@ -223,19 +253,24 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
if (ret)
return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
u32 subvolid;
ret = find_snapshot_tree_subvol(trans,
bch2_snapshot_tree(c, snapshot), &subvolid);
bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u",
bch2_snapshot_tree(c, snapshot));
if (ret)
return ret;
struct bch_subvolume subvol;
ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), false, &subvol);
bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
le32_to_cpu(st.master_subvol), snapshot);
ret = bch2_subvolume_get(trans, subvolid, false, &subvol);
bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot);
if (ret)
return ret;
if (!subvol.inode) {
struct btree_iter iter;
struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
BTREE_ID_subvolumes, POS(0, subvolid),
0, subvolume);
ret = PTR_ERR_OR_ZERO(subvol);
if (ret)
@ -245,13 +280,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
bch2_trans_iter_exit(trans, &iter);
}
root_inum.inum = le64_to_cpu(subvol.inode);
subvol_inum root_inum = {
.subvol = subvolid,
.inum = le64_to_cpu(subvol.inode)
};
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode);
bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
root_inum.inum, le32_to_cpu(st.master_subvol));
root_inum.inum, subvolid);
if (ret)
return ret;
@ -458,7 +496,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
continue;
struct bch_inode_unpacked child_inode;
bch2_inode_unpack(k, &child_inode);
ret = bch2_inode_unpack(k, &child_inode);
if (ret)
break;
if (!inode_should_reattach(&child_inode)) {
ret = maybe_delete_dirent(trans,
@ -809,9 +849,8 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
{
struct bch_inode_unpacked u;
BUG_ON(bch2_inode_unpack(inode, &u));
return darray_push(&w->inodes, ((struct inode_walker_entry) {
return bch2_inode_unpack(inode, &u) ?:
darray_push(&w->inodes, ((struct inode_walker_entry) {
.inode = u,
.snapshot = inode.k->p.snapshot,
}));
@ -1065,7 +1104,7 @@ static int get_snapshot_root_inode(struct btree_trans *trans,
goto err;
BUG();
found_root:
BUG_ON(bch2_inode_unpack(k, root));
ret = bch2_inode_unpack(k, root);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -1096,7 +1135,9 @@ static int check_inode(struct btree_trans *trans,
if (!bkey_is_inode(k.k))
return 0;
BUG_ON(bch2_inode_unpack(k, &u));
ret = bch2_inode_unpack(k, &u);
if (ret)
goto err;
if (snapshot_root->bi_inum != u.bi_inum) {
ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum);
@ -1107,7 +1148,7 @@ static int check_inode(struct btree_trans *trans,
if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed ||
INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root),
trans, inode_snapshot_mismatch,
"inodes in different snapshots don't match")) {
"inode hash info in different snapshots don't match")) {
u.bi_hash_seed = snapshot_root->bi_hash_seed;
SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root));
do_update = true;
@ -1318,7 +1359,9 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans,
break;
struct bch_inode_unpacked parent_inode;
bch2_inode_unpack(k, &parent_inode);
ret = bch2_inode_unpack(k, &parent_inode);
if (ret)
break;
if (!inode_should_reattach(&parent_inode))
break;
@ -1341,7 +1384,9 @@ static int check_unreachable_inode(struct btree_trans *trans,
return 0;
struct bch_inode_unpacked inode;
BUG_ON(bch2_inode_unpack(k, &inode));
ret = bch2_inode_unpack(k, &inode);
if (ret)
return ret;
if (!inode_should_reattach(&inode))
return 0;
@ -2296,7 +2341,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
ret = bch2_str_hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k);
if (ret < 0)
goto err;
if (ret) {
@ -2410,7 +2455,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
inode->first_this_inode = false;
ret = bch2_str_hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k);
bch_err_fn(c, ret);
return ret;
}
@ -2653,7 +2698,9 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
int ret = 0;
struct bch_inode_unpacked inode;
BUG_ON(bch2_inode_unpack(inode_k, &inode));
ret = bch2_inode_unpack(inode_k, &inode);
if (ret)
return ret;
while (!inode.bi_subvol) {
struct btree_iter dirent_iter;
@ -2864,7 +2911,9 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
/* Should never fail, checked by bch2_inode_invalid: */
struct bch_inode_unpacked u;
BUG_ON(bch2_inode_unpack(k, &u));
_ret3 = bch2_inode_unpack(k, &u);
if (_ret3)
break;
/*
* Backpointer and directory structure checks are sufficient for
@ -2942,7 +2991,9 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
if (!bkey_is_inode(k.k))
return 0;
BUG_ON(bch2_inode_unpack(k, &u));
ret = bch2_inode_unpack(k, &u);
if (ret)
return ret;
if (S_ISDIR(u.bi_mode))
return 0;

View File

@ -48,10 +48,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
u8 *p;
if (in >= end)
return -1;
return -BCH_ERR_inode_unpack_error;
if (!*in)
return -1;
return -BCH_ERR_inode_unpack_error;
/*
* position of highest set bit indicates number of bytes:
@ -61,7 +61,7 @@ static int inode_decode_field(const u8 *in, const u8 *end,
bytes = byte_table[shift - 1];
if (in + bytes > end)
return -1;
return -BCH_ERR_inode_unpack_error;
p = (u8 *) be + 16 - bytes;
memcpy(p, in, bytes);
@ -177,7 +177,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
return ret; \
\
if (field_bits > sizeof(unpacked->_name) * 8) \
return -1; \
return -BCH_ERR_inode_unpack_error; \
\
unpacked->_name = field[1]; \
in += ret;
@ -218,7 +218,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
return -BCH_ERR_inode_unpack_error; \
fieldnr++;
BCH_INODE_FIELDS_v2()
@ -269,7 +269,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
return -BCH_ERR_inode_unpack_error; \
fieldnr++;
BCH_INODE_FIELDS_v3()
@ -886,7 +886,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
{
struct bch_fs *c = trans->c;
u64 cursor_idx = c->opts.shard_inode_numbers ? cpu : 0;
u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1;
cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);
@ -907,19 +907,16 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
if (ret)
goto err;
cursor->v.bits = c->opts.shard_inode_numbers_bits;
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
if (c->opts.shard_inode_numbers) {
bits -= cursor->v.bits;
*min = (cpu << bits);
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
*min = max_t(u64, *min, BLOCKDEV_INODE_MAX);
} else {
if (c->opts.inodes_32bit) {
*min = BLOCKDEV_INODE_MAX;
*max = ~(ULLONG_MAX << bits);
*max = INT_MAX;
} else {
cursor->v.bits = c->opts.shard_inode_numbers_bits;
unsigned bits = 63 - c->opts.shard_inode_numbers_bits;
*min = max(cpu << bits, (u64) INT_MAX + 1);
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
}
if (le64_to_cpu(cursor->v.idx) < *min)

View File

@ -102,7 +102,8 @@ struct bch_inode_generation {
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
x(bi_nocow, 8) \
x(bi_depth, 32)
x(bi_depth, 32) \
x(bi_inodes_32bit, 8)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
@ -115,7 +116,8 @@ struct bch_inode_generation {
x(foreground_target, 16) \
x(background_target, 16) \
x(erasure_code, 16) \
x(nocow, 8)
x(nocow, 8) \
x(inodes_32bit, 8)
enum inode_opt_id {
#define x(name, ...) \

View File

@ -1356,6 +1356,9 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
darray_exit(&buckets);
if (ret) {
struct printbuf buf = PRINTBUF;
bch2_write_op_error(&buf, op);
@ -1366,9 +1369,6 @@ err:
op->flags |= BCH_WRITE_SUBMITTED;
}
bch2_trans_put(trans);
darray_exit(&buckets);
/* fallback to cow write path? */
if (!(op->flags & BCH_WRITE_SUBMITTED)) {
closure_sync(&op->cl);

View File

@ -1114,8 +1114,10 @@ reread:
(printbuf_reset(&err),
prt_str(&err, "journal "),
bch2_csum_err_msg(&err, csum_type, j->csum, csum),
err.buf)))
err.buf))) {
saw_bad = true;
bch2_fatal_error(c);
}
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
j->encrypted_start,

View File

@ -414,7 +414,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
continue;
struct bch_inode_unpacked inode;
BUG_ON(bch2_inode_unpack(k, &inode));
_ret3 = bch2_inode_unpack(k, &inode);
if (_ret3)
break;
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
bch2_inode_opts_get(&e.io_opts, trans->c, &inode);

View File

@ -222,15 +222,10 @@ enum fsck_err_opts {
BCH_SB_ERASURE_CODE, false, \
NULL, "Enable erasure coding (DO NOT USE YET)") \
x(inodes_32bit, u8, \
OPT_FS|OPT_FORMAT, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_INODE_32BIT, true, \
NULL, "Constrain inode numbers to 32 bits") \
x(shard_inode_numbers, u8, \
OPT_FS|OPT_FORMAT, \
OPT_BOOL(), \
BCH_SB_SHARD_INUMS, true, \
NULL, "Shard new inode numbers by CPU id") \
x(shard_inode_numbers_bits, u8, \
OPT_FS|OPT_FORMAT, \
OPT_UINT(0, 8), \

View File

@ -107,6 +107,12 @@ out:
return ret;
}
static void kill_btree(struct bch_fs *c, enum btree_id btree)
{
bch2_btree_id_root(c, btree)->alive = false;
bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
}
/* for -o reconstruct_alloc: */
static void bch2_reconstruct_alloc(struct bch_fs *c)
{
@ -157,16 +163,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
for (unsigned i = 0; i < btree_id_nr_alive(c); i++)
if (btree_id_is_alloc(i))
kill_btree(c, i);
}
/*
@ -573,9 +572,6 @@ static int read_btree_roots(struct bch_fs *c)
if (!r->alive)
continue;
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
continue;
printbuf_reset(&buf);
bch2_btree_id_level_to_text(&buf, i, r->level);
@ -785,6 +781,11 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) {
SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
write_sb = true;
}
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@ -882,15 +883,15 @@ use_clean:
c->journal_replay_seq_start = last_seq;
c->journal_replay_seq_end = blacklist_seq - 1;
if (c->opts.reconstruct_alloc)
bch2_reconstruct_alloc(c);
zero_out_btree_mem_ptr(&c->journal_keys);
ret = journal_replay_early(c, clean);
if (ret)
goto err;
if (c->opts.reconstruct_alloc)
bch2_reconstruct_alloc(c);
/*
* After an unclean shutdown, skip then next few journal sequence
* numbers as they may have been referenced by btree writes that

View File

@ -49,7 +49,7 @@
x(fs_upgrade_for_subvolumes, 22, 0) \
x(check_inodes, 24, PASS_FSCK) \
x(check_extents, 25, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \
x(check_dirents, 27, PASS_FSCK) \
x(check_xattrs, 28, PASS_FSCK) \
x(check_root, 29, PASS_ONLINE|PASS_FSCK) \

View File

@ -2,86 +2,91 @@
#ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H
#define _BCACHEFS_SB_COUNTERS_FORMAT_H
#define BCH_PERSISTENT_COUNTERS() \
x(io_read, 0) \
x(io_write, 1) \
x(io_move, 2) \
x(bucket_invalidate, 3) \
x(bucket_discard, 4) \
x(bucket_alloc, 5) \
x(bucket_alloc_fail, 6) \
x(btree_cache_scan, 7) \
x(btree_cache_reap, 8) \
x(btree_cache_cannibalize, 9) \
x(btree_cache_cannibalize_lock, 10) \
x(btree_cache_cannibalize_lock_fail, 11) \
x(btree_cache_cannibalize_unlock, 12) \
x(btree_node_write, 13) \
x(btree_node_read, 14) \
x(btree_node_compact, 15) \
x(btree_node_merge, 16) \
x(btree_node_split, 17) \
x(btree_node_rewrite, 18) \
x(btree_node_alloc, 19) \
x(btree_node_free, 20) \
x(btree_node_set_root, 21) \
x(btree_path_relock_fail, 22) \
x(btree_path_upgrade_fail, 23) \
x(btree_reserve_get_fail, 24) \
x(journal_entry_full, 25) \
x(journal_full, 26) \
x(journal_reclaim_finish, 27) \
x(journal_reclaim_start, 28) \
x(journal_write, 29) \
x(read_promote, 30) \
x(read_bounce, 31) \
x(read_split, 33) \
x(read_retry, 32) \
x(read_reuse_race, 34) \
x(move_extent_read, 35) \
x(move_extent_write, 36) \
x(move_extent_finish, 37) \
x(move_extent_fail, 38) \
x(move_extent_start_fail, 39) \
x(copygc, 40) \
x(copygc_wait, 41) \
x(gc_gens_end, 42) \
x(gc_gens_start, 43) \
x(trans_blocked_journal_reclaim, 44) \
x(trans_restart_btree_node_reused, 45) \
x(trans_restart_btree_node_split, 46) \
x(trans_restart_fault_inject, 47) \
x(trans_restart_iter_upgrade, 48) \
x(trans_restart_journal_preres_get, 49) \
x(trans_restart_journal_reclaim, 50) \
x(trans_restart_journal_res_get, 51) \
x(trans_restart_key_cache_key_realloced, 52) \
x(trans_restart_key_cache_raced, 53) \
x(trans_restart_mark_replicas, 54) \
x(trans_restart_mem_realloced, 55) \
x(trans_restart_memory_allocation_failure, 56) \
x(trans_restart_relock, 57) \
x(trans_restart_relock_after_fill, 58) \
x(trans_restart_relock_key_cache_fill, 59) \
x(trans_restart_relock_next_node, 60) \
x(trans_restart_relock_parent_for_fill, 61) \
x(trans_restart_relock_path, 62) \
x(trans_restart_relock_path_intent, 63) \
x(trans_restart_too_many_iters, 64) \
x(trans_restart_traverse, 65) \
x(trans_restart_upgrade, 66) \
x(trans_restart_would_deadlock, 67) \
x(trans_restart_would_deadlock_write, 68) \
x(trans_restart_injected, 69) \
x(trans_restart_key_cache_upgrade, 70) \
x(trans_traverse_all, 71) \
x(transaction_commit, 72) \
x(write_super, 73) \
x(trans_restart_would_deadlock_recursion_limit, 74) \
x(trans_restart_write_buffer_flush, 75) \
x(trans_restart_split_race, 76) \
x(write_buffer_flush_slowpath, 77) \
x(write_buffer_flush_sync, 78)
enum counters_flags {
TYPE_COUNTER = BIT(0), /* event counters */
TYPE_SECTORS = BIT(1), /* amount counters, the unit is sectors */
};
#define BCH_PERSISTENT_COUNTERS() \
x(io_read, 0, TYPE_SECTORS) \
x(io_write, 1, TYPE_SECTORS) \
x(io_move, 2, TYPE_SECTORS) \
x(bucket_invalidate, 3, TYPE_COUNTER) \
x(bucket_discard, 4, TYPE_COUNTER) \
x(bucket_alloc, 5, TYPE_COUNTER) \
x(bucket_alloc_fail, 6, TYPE_COUNTER) \
x(btree_cache_scan, 7, TYPE_COUNTER) \
x(btree_cache_reap, 8, TYPE_COUNTER) \
x(btree_cache_cannibalize, 9, TYPE_COUNTER) \
x(btree_cache_cannibalize_lock, 10, TYPE_COUNTER) \
x(btree_cache_cannibalize_lock_fail, 11, TYPE_COUNTER) \
x(btree_cache_cannibalize_unlock, 12, TYPE_COUNTER) \
x(btree_node_write, 13, TYPE_COUNTER) \
x(btree_node_read, 14, TYPE_COUNTER) \
x(btree_node_compact, 15, TYPE_COUNTER) \
x(btree_node_merge, 16, TYPE_COUNTER) \
x(btree_node_split, 17, TYPE_COUNTER) \
x(btree_node_rewrite, 18, TYPE_COUNTER) \
x(btree_node_alloc, 19, TYPE_COUNTER) \
x(btree_node_free, 20, TYPE_COUNTER) \
x(btree_node_set_root, 21, TYPE_COUNTER) \
x(btree_path_relock_fail, 22, TYPE_COUNTER) \
x(btree_path_upgrade_fail, 23, TYPE_COUNTER) \
x(btree_reserve_get_fail, 24, TYPE_COUNTER) \
x(journal_entry_full, 25, TYPE_COUNTER) \
x(journal_full, 26, TYPE_COUNTER) \
x(journal_reclaim_finish, 27, TYPE_COUNTER) \
x(journal_reclaim_start, 28, TYPE_COUNTER) \
x(journal_write, 29, TYPE_COUNTER) \
x(read_promote, 30, TYPE_COUNTER) \
x(read_bounce, 31, TYPE_COUNTER) \
x(read_split, 33, TYPE_COUNTER) \
x(read_retry, 32, TYPE_COUNTER) \
x(read_reuse_race, 34, TYPE_COUNTER) \
x(move_extent_read, 35, TYPE_SECTORS) \
x(move_extent_write, 36, TYPE_SECTORS) \
x(move_extent_finish, 37, TYPE_SECTORS) \
x(move_extent_fail, 38, TYPE_COUNTER) \
x(move_extent_start_fail, 39, TYPE_COUNTER) \
x(copygc, 40, TYPE_COUNTER) \
x(copygc_wait, 41, TYPE_COUNTER) \
x(gc_gens_end, 42, TYPE_COUNTER) \
x(gc_gens_start, 43, TYPE_COUNTER) \
x(trans_blocked_journal_reclaim, 44, TYPE_COUNTER) \
x(trans_restart_btree_node_reused, 45, TYPE_COUNTER) \
x(trans_restart_btree_node_split, 46, TYPE_COUNTER) \
x(trans_restart_fault_inject, 47, TYPE_COUNTER) \
x(trans_restart_iter_upgrade, 48, TYPE_COUNTER) \
x(trans_restart_journal_preres_get, 49, TYPE_COUNTER) \
x(trans_restart_journal_reclaim, 50, TYPE_COUNTER) \
x(trans_restart_journal_res_get, 51, TYPE_COUNTER) \
x(trans_restart_key_cache_key_realloced, 52, TYPE_COUNTER) \
x(trans_restart_key_cache_raced, 53, TYPE_COUNTER) \
x(trans_restart_mark_replicas, 54, TYPE_COUNTER) \
x(trans_restart_mem_realloced, 55, TYPE_COUNTER) \
x(trans_restart_memory_allocation_failure, 56, TYPE_COUNTER) \
x(trans_restart_relock, 57, TYPE_COUNTER) \
x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \
x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \
x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \
x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \
x(trans_restart_relock_path, 62, TYPE_COUNTER) \
x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \
x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \
x(trans_restart_traverse, 65, TYPE_COUNTER) \
x(trans_restart_upgrade, 66, TYPE_COUNTER) \
x(trans_restart_would_deadlock, 67, TYPE_COUNTER) \
x(trans_restart_would_deadlock_write, 68, TYPE_COUNTER) \
x(trans_restart_injected, 69, TYPE_COUNTER) \
x(trans_restart_key_cache_upgrade, 70, TYPE_COUNTER) \
x(trans_traverse_all, 71, TYPE_COUNTER) \
x(transaction_commit, 72, TYPE_COUNTER) \
x(write_super, 73, TYPE_COUNTER) \
x(trans_restart_would_deadlock_recursion_limit, 74, TYPE_COUNTER) \
x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \
x(trans_restart_split_race, 76, TYPE_COUNTER) \
x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \
x(write_buffer_flush_sync, 78, TYPE_COUNTER)
enum bch_persistent_counters {
#define x(t, n, ...) BCH_COUNTER_##t,

View File

@ -83,7 +83,6 @@
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \
x(backpointer_bucket_gen, \
BIT_ULL(BCH_RECOVERY_PASS_check_backpointers_to_extents)|\
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
BCH_FSCK_ERR_backpointer_to_missing_ptr, \
BCH_FSCK_ERR_ptr_to_missing_backpointer) \

View File

@ -616,8 +616,6 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long
if (type != SIX_LOCK_write)
six_release(&lock->dep_map, ip);
else
lock->seq++;
if (type == SIX_LOCK_intent &&
lock->intent_lock_recurse) {
@ -625,6 +623,15 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long
return;
}
if (type == SIX_LOCK_write &&
lock->write_lock_recurse) {
--lock->write_lock_recurse;
return;
}
if (type == SIX_LOCK_write)
lock->seq++;
do_six_unlock_type(lock, type);
}
EXPORT_SYMBOL_GPL(six_unlock_ip);
@ -735,13 +742,13 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
atomic_add(l[type].lock_val, &lock->state);
}
break;
case SIX_LOCK_write:
lock->write_lock_recurse++;
fallthrough;
case SIX_LOCK_intent:
EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
lock->intent_lock_recurse++;
break;
case SIX_LOCK_write:
BUG();
break;
}
}
EXPORT_SYMBOL_GPL(six_lock_increment);

View File

@ -137,6 +137,7 @@ struct six_lock {
atomic_t state;
u32 seq;
unsigned intent_lock_recurse;
unsigned write_lock_recurse;
struct task_struct *owner;
unsigned __percpu *readers;
raw_spinlock_t wait_lock;

View File

@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
@ -279,23 +280,6 @@ fsck_err:
return ret;
}
static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
{
struct snapshot_t *t = snapshot_t_mut(c, id);
u32 parent = id;
while ((parent = bch2_snapshot_parent_early(c, parent)) &&
parent - id - 1 < IS_ANCESTOR_BITMAP)
__set_bit(parent - id - 1, t->is_ancestor);
}
static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
{
mutex_lock(&c->snapshot_table_lock);
__set_is_ancestor_bitmap(c, id);
mutex_unlock(&c->snapshot_table_lock);
}
static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
@ -317,6 +301,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->live = true;
t->parent = le32_to_cpu(s.v->parent);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
@ -335,7 +320,11 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
t->skip[2] = 0;
}
__set_is_ancestor_bitmap(c, id);
u32 parent = id;
while ((parent = bch2_snapshot_parent_early(c, parent)) &&
parent - id - 1 < IS_ANCESTOR_BITMAP)
__set_bit(parent - id - 1, t->is_ancestor);
if (BCH_SNAPSHOT_DELETED(s.v)) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
@ -365,70 +354,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
BTREE_ITER_with_updates, snapshot, s);
}
static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
{
struct bch_snapshot v;
int ret;
if (!id)
return 0;
ret = bch2_snapshot_lookup(trans, id, &v);
if (bch2_err_matches(ret, ENOENT))
bch_err(trans->c, "snapshot node %u not found", id);
if (ret)
return ret;
return !BCH_SNAPSHOT_DELETED(&v);
}
/*
* If @k is a snapshot with just one live child, it's part of a linear chain,
* which we consider to be an equivalence class: and then after snapshot
* deletion cleanup, there should only be a single key at a given position in
* this equivalence class.
*
* This sets the equivalence class of @k to be the child's equivalence class, if
* it's part of such a linear chain: this correctly sets equivalence classes on
* startup if we run leaf to root (i.e. in natural key order).
*/
static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
unsigned i, nr_live = 0, live_idx = 0;
struct bkey_s_c_snapshot snap;
u32 id = k.k->p.offset, child[2];
if (k.k->type != KEY_TYPE_snapshot)
return 0;
snap = bkey_s_c_to_snapshot(k);
child[0] = le32_to_cpu(snap.v->children[0]);
child[1] = le32_to_cpu(snap.v->children[1]);
for (i = 0; i < 2; i++) {
int ret = bch2_snapshot_live(trans, child[i]);
if (ret < 0)
return ret;
if (ret)
live_idx = i;
nr_live += ret;
}
mutex_lock(&c->snapshot_table_lock);
snapshot_t_mut(c, id)->equiv = nr_live == 1
? snapshot_t_mut(c, child[live_idx])->equiv
: id;
mutex_unlock(&c->snapshot_table_lock);
return 0;
}
/* fsck: */
static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
@ -570,6 +495,9 @@ static int check_snapshot_tree(struct btree_trans *trans,
goto err;
}
if (!st.v->master_subvol)
goto out;
ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
@ -613,6 +541,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
u->v.master_subvol = cpu_to_le32(subvol_id);
st = snapshot_tree_i_to_s_c(u);
}
out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &snapshot_iter);
@ -913,7 +842,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
if (bch2_snapshot_equiv(c, id))
if (bch2_snapshot_exists(c, id))
return 0;
/* Do we need to reconstruct the snapshot_tree entry as well? */
@ -962,8 +891,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0);
}
/* Figure out which snapshot nodes belong in the same tree: */
@ -1061,7 +989,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
if (fsck_err_on(!bch2_snapshot_exists(c, *id),
trans, snapshot_node_missing,
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) {
@ -1094,10 +1022,12 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot),
if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot),
trans, bkey_in_missing_snapshot,
"key in missing snapshot %s, delete?",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err:
@ -1111,13 +1041,11 @@ fsck_err:
int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
{
struct btree_iter iter;
struct bkey_i_snapshot *s;
int ret = 0;
s = bch2_bkey_get_mut_typed(trans, &iter,
struct bkey_i_snapshot *s =
bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshots, POS(0, id),
0, snapshot);
ret = PTR_ERR_OR_ZERO(s);
int ret = PTR_ERR_OR_ZERO(s);
if (unlikely(ret)) {
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
trans->c, "missing snapshot %u", id);
@ -1305,10 +1233,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
goto err;
new_snapids[i] = iter.pos.offset;
mutex_lock(&c->snapshot_table_lock);
snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
mutex_unlock(&c->snapshot_table_lock);
}
err:
bch2_trans_iter_exit(trans, &iter);
@ -1414,102 +1338,95 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it
*/
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
snapshot_id_list *deleted,
snapshot_id_list *equiv_seen,
struct bpos *last_pos)
{
int ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret)
return ret < 0 ? ret : 0;
struct snapshot_interior_delete {
u32 id;
u32 live_child;
};
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
struct bch_fs *c = trans->c;
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{
darray_for_each(*l, i)
if (i->id == id)
return i->live_child;
return 0;
}
static unsigned __live_child(struct snapshot_table *t, u32 id,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{
struct snapshot_t *s = __snapshot_t(t, id);
if (!s)
return 0;
if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0;
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++)
if (s->children[i] &&
!snapshot_list_has_id(delete_leaves, s->children[i]) &&
!interior_delete_has_id(delete_interior, s->children[i]))
return s->children[i];
if (snapshot_list_has_id(deleted, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
if (!bpos_eq(*last_pos, k.k->p) &&
snapshot_list_has_id(equiv_seen, equiv))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
*last_pos = k.k->p;
ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
if (ret)
return ret;
/*
* When we have a linear chain of snapshot nodes, we consider
* those to form an equivalence class: we're going to collapse
* them all down to a single node, and keep the leaf-most node -
* which has the same id as the equivalence class id.
*
* If there are multiple keys in different snapshots at the same
* position, we're only going to keep the one in the newest
* snapshot (we delete the others above) - the rest have been
* overwritten and are redundant, and for the key we're going to keep we
* need to move it to the equivalance class ID if it's not there
* already.
*/
if (equiv != k.k->p.snapshot) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
new->k.p.snapshot = equiv;
struct btree_iter new_iter;
bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
BTREE_ITER_all_snapshots|
BTREE_ITER_cached|
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&new_iter) ?:
bch2_trans_update(trans, &new_iter, new,
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &new_iter);
if (ret)
return ret;
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) {
u32 live_child = s->children[i]
? __live_child(t, s->children[i], delete_leaves, delete_interior)
: 0;
if (live_child)
return live_child;
}
return 0;
}
static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
static unsigned live_child(struct bch_fs *c, u32 id,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{
struct bkey_s_c_snapshot snap;
u32 children[2];
int ret;
rcu_read_lock();
u32 ret = __live_child(rcu_dereference(c->snapshots), id,
delete_leaves, delete_interior);
rcu_read_unlock();
return ret;
}
if (k.k->type != KEY_TYPE_snapshot)
return 0;
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{
if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v) ||
BCH_SNAPSHOT_SUBVOL(snap.v))
return 0;
u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot);
if (live_child) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
children[0] = le32_to_cpu(snap.v->children[0]);
children[1] = le32_to_cpu(snap.v->children[1]);
new->k.p.snapshot = live_child;
ret = bch2_snapshot_live(trans, children[0]) ?:
bch2_snapshot_live(trans, children[1]);
if (ret < 0)
struct btree_iter dst_iter;
struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter,
iter->btree_id, new->k.p,
BTREE_ITER_all_snapshots|
BTREE_ITER_intent);
ret = bkey_err(dst_k);
if (ret)
return ret;
ret = (bkey_deleted(dst_k.k)
? bch2_trans_update(trans, &dst_iter, new,
BTREE_UPDATE_internal_snapshot_node)
: 0) ?:
bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &dst_iter);
return ret;
return !ret;
}
return 0;
}
/*
@ -1517,26 +1434,57 @@ static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
* as deleted.
*/
static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{
int ret = bch2_snapshot_needs_delete(trans, k);
if (k.k->type != KEY_TYPE_snapshot)
return 0;
return ret <= 0
? ret
: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
struct bch_fs *c = trans->c;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v))
return 0;
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
live_children += child &&
!snapshot_list_has_id(delete_leaves, child);
}
if (live_children == 0) {
return snapshot_list_add(c, delete_leaves, s.k->p.offset);
} else if (live_children == 1) {
struct snapshot_interior_delete d = {
.id = s.k->p.offset,
.live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior),
};
if (!d.live_child) {
bch_err(c, "error finding live child of snapshot %u", d.id);
return -EINVAL;
}
return darray_push(delete_interior, d);
} else {
return 0;
}
}
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
snapshot_id_list *skip)
interior_delete_list *skip)
{
rcu_read_lock();
while (snapshot_list_has_id(skip, id))
while (interior_delete_has_id(skip, id))
id = __bch2_snapshot_parent(c, id);
while (n--) {
do {
id = __bch2_snapshot_parent(c, id);
} while (snapshot_list_has_id(skip, id));
} while (interior_delete_has_id(skip, id));
}
rcu_read_unlock();
@ -1545,7 +1493,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
struct btree_iter *iter, struct bkey_s_c k,
snapshot_id_list *deleted)
interior_delete_list *deleted)
{
struct bch_fs *c = trans->c;
u32 nr_deleted_ancestors = 0;
@ -1555,7 +1503,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
if (k.k->type != KEY_TYPE_snapshot)
return 0;
if (snapshot_list_has_id(deleted, k.k->p.offset))
if (interior_delete_has_id(deleted, k.k->p.offset))
return 0;
s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
@ -1564,7 +1512,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return ret;
darray_for_each(*deleted, i)
nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
if (!nr_deleted_ancestors)
return 0;
@ -1582,7 +1530,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
u32 id = le32_to_cpu(s->v.skip[j]);
if (snapshot_list_has_id(deleted, id)) {
if (interior_delete_has_id(deleted, id)) {
id = bch2_snapshot_nth_parent_skip(c,
parent,
depth > 1
@ -1601,51 +1549,44 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
int bch2_delete_dead_snapshots(struct bch_fs *c)
{
struct btree_trans *trans;
snapshot_id_list deleted = { 0 };
snapshot_id_list deleted_interior = { 0 };
int ret = 0;
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0;
trans = bch2_trans_get(c);
struct btree_trans *trans = bch2_trans_get(c);
snapshot_id_list delete_leaves = {};
interior_delete_list delete_interior = {};
int ret = 0;
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
NULL, NULL, 0,
bch2_delete_redundant_snapshot(trans, k));
bch_err_msg(c, ret, "deleting redundant snapshots");
if (ret)
goto err;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
bch2_snapshot_set_equiv(trans, k));
bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
if (ret)
goto err;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ({
if (k.k->type != KEY_TYPE_snapshot)
continue;
BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
? snapshot_list_add(c, &deleted, k.k->p.offset)
: 0;
}));
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior));
bch_err_msg(c, ret, "walking snapshots");
if (ret)
goto err;
if (!delete_leaves.nr && !delete_interior.nr)
goto err;
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "deleting leaves");
darray_for_each(delete_leaves, i)
prt_printf(&buf, " %u", *i);
prt_printf(&buf, " interior");
darray_for_each(delete_interior, i)
prt_printf(&buf, " %u->%u", i->id, i->live_child);
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
printbuf_exit(&buf);
if (ret)
goto err;
}
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
struct bpos last_pos = POS_MIN;
snapshot_id_list equiv_seen = { 0 };
struct disk_reservation res = { 0 };
if (!btree_type_has_snapshots(btree))
@ -1655,33 +1596,24 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
btree, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
&equiv_seen, &last_pos));
delete_dead_snapshots_process_key(trans, &iter, k,
&delete_leaves,
&delete_interior));
bch2_disk_reservation_put(c, &res);
darray_exit(&equiv_seen);
bch_err_msg(c, ret, "deleting keys from dying snapshots");
if (ret)
goto err;
}
bch2_trans_unlock(trans);
down_write(&c->snapshot_create_lock);
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ({
u32 snapshot = k.k->p.offset;
u32 equiv = bch2_snapshot_equiv(c, snapshot);
equiv != snapshot
? snapshot_list_add(c, &deleted_interior, snapshot)
: 0;
}));
bch_err_msg(c, ret, "walking snapshots");
if (ret)
goto err_create_lock;
darray_for_each(delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
bch_err_msg(c, ret, "deleting snapshot %u", *i);
if (ret)
goto err;
}
/*
* Fixing children of deleted snapshots can't be done completely
@ -1691,30 +1623,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior));
if (ret)
goto err_create_lock;
goto err;
darray_for_each(deleted, i) {
darray_for_each(delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
bch_err_msg(c, ret, "deleting snapshot %u", *i);
bch2_snapshot_node_delete(trans, i->id));
bch_err_msg(c, ret, "deleting snapshot %u", i->id);
if (ret)
goto err_create_lock;
goto err;
}
darray_for_each(deleted_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
bch_err_msg(c, ret, "deleting snapshot %u", *i);
if (ret)
goto err_create_lock;
}
err_create_lock:
up_write(&c->snapshot_create_lock);
err:
darray_exit(&deleted_interior);
darray_exit(&deleted);
darray_exit(&delete_interior);
darray_exit(&delete_leaves);
bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
@ -1767,37 +1689,36 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return ret;
}
static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
{
/* If there's one child, it's redundant and keys will be moved to the child */
return !!snap.v->children[0] + !!snap.v->children[1] == 1;
}
static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct bkey_s_c_snapshot snap;
int ret = 0;
if (k.k->type != KEY_TYPE_snapshot)
return 0;
snap = bkey_s_c_to_snapshot(k);
struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v) ||
bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
(ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
return 0;
}
interior_snapshot_needs_delete(snap))
set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
return ret;
return 0;
}
int bch2_snapshots_read(struct bch_fs *c)
{
/*
* Initializing the is_ancestor bitmaps requires ancestors to already be
* initialized - so mark in reverse:
*/
int ret = bch2_trans_run(c,
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
POS_MAX, 0, k,
__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_snapshot_set_equiv(trans, k) ?:
bch2_check_snapshot_needs_deletion(trans, k)) ?:
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch2_check_snapshot_needs_deletion(trans, k)));
bch_err_fn(c, ret);
/*

View File

@ -119,19 +119,19 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
return id;
}
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s = snapshot_t(c, id);
return s ? s->equiv : 0;
return s ? s->live : 0;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
{
rcu_read_lock();
id = __bch2_snapshot_equiv(c, id);
bool ret = __bch2_snapshot_exists(c, id);
rcu_read_unlock();
return id;
return ret;
}
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)

View File

@ -101,38 +101,108 @@ static int hash_pick_winner(struct btree_trans *trans,
}
}
int bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k)
static int repair_inode_hash_info(struct btree_trans *trans,
struct bch_inode_unpacked *snapshot_root)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1),
BTREE_ITER_all_snapshots, k, ret) {
if (k.k->p.offset != snapshot_root->bi_inum)
break;
if (!bkey_is_inode(k.k))
continue;
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(k, &inode);
if (ret)
break;
if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed ||
INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root),
trans, inode_snapshot_mismatch,
"inode hash info in different snapshots don't match")) {
inode.bi_hash_seed = snapshot_root->bi_hash_seed;
SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
ret = __bch2_fsck_write_inode(trans, &inode) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
-BCH_ERR_transaction_restart_nested;
break;
}
}
fsck_err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
/*
* All versions of the same inode in different snapshots must have the same hash
* seed/type: verify that the hash info we're using matches the root
*/
static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
struct bch_hash_info *hash_info)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX),
BTREE_ITER_all_snapshots, k, ret) {
if (k.k->p.offset != inum)
break;
if (bkey_is_inode(k.k))
goto found;
}
bch_err(c, "%s(): inum %llu not found", __func__, inum);
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
found:;
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(k, &inode);
if (ret)
goto err;
struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
if (memcmp(hash_info, &hash2, sizeof(hash2))) {
ret = repair_inode_hash_info(trans, &inode);
if (!ret) {
bch_err(c, "inode hash info mismatch with root, but mismatch not found");
ret = -BCH_ERR_fsck_repair_unimplemented;
}
}
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
int __bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = { NULL };
struct printbuf buf = PRINTBUF;
struct bkey_s_c k;
u64 hash;
int ret = 0;
if (hash_k.k->type != desc.key_type)
return 0;
hash = desc.hash_bkey(hash_info, hash_k);
if (likely(hash == hash_k.k->p.offset))
return 0;
u64 hash = desc->hash_bkey(hash_info, hash_k);
if (hash_k.k->p.offset < hash)
goto bad_hash;
for_each_btree_key_norestart(trans, iter, desc.btree_id,
for_each_btree_key_norestart(trans, iter, desc->btree_id,
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
BTREE_ITER_slots, k, ret) {
if (bkey_eq(k.k->p, hash_k.k->p))
break;
if (k.k->type == desc.key_type &&
!desc.cmp_bkey(k, hash_k))
if (k.k->type == desc->key_type &&
!desc->cmp_bkey(k, hash_k))
goto duplicate_entries;
if (bkey_deleted(k.k)) {
@ -145,16 +215,23 @@ out:
printbuf_exit(&buf);
return ret;
bad_hash:
/*
* Before doing any repair, check hash_info itself:
*/
ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info);
if (ret)
goto out;
if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
if (IS_ERR(new))
return PTR_ERR(new);
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info,
(subvol_inum) { 0, hash_k.k->p.inode },
hash_k.k->p.snapshot, new,
STR_HASH_must_create|
@ -166,9 +243,9 @@ bad_hash:
if (k.k)
goto duplicate_entries;
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
@ -176,7 +253,7 @@ bad_hash:
fsck_err:
goto out;
duplicate_entries:
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k);
if (ret < 0)
goto out;
@ -192,14 +269,14 @@ duplicate_entries:
switch (ret) {
case 0:
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
break;
case 1:
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0);
break;
case 2:
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
goto out;
}

View File

@ -394,10 +394,25 @@ int bch2_hash_delete(struct btree_trans *trans,
}
struct snapshots_seen;
int bch2_str_hash_check_key(struct btree_trans *,
struct snapshots_seen *,
const struct bch_hash_desc,
struct bch_hash_info *,
struct btree_iter *, struct bkey_s_c);
int __bch2_str_hash_check_key(struct btree_trans *,
struct snapshots_seen *,
const struct bch_hash_desc *,
struct bch_hash_info *,
struct btree_iter *, struct bkey_s_c);
static inline int bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k)
{
if (hash_k.k->type != desc->key_type)
return 0;
if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset))
return 0;
return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k);
}
#endif /* _BCACHEFS_STR_HASH_H */

View File

@ -409,26 +409,56 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
*/
static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
{
struct btree_iter iter;
struct bkey_s_c_subvolume subvol;
u32 snapid;
int ret = 0;
struct btree_iter subvol_iter = {}, snapshot_iter = {}, snapshot_tree_iter = {};
subvol = bch2_bkey_get_iter_typed(trans, &iter,
struct bkey_s_c_subvolume subvol =
bch2_bkey_get_iter_typed(trans, &subvol_iter,
BTREE_ID_subvolumes, POS(0, subvolid),
BTREE_ITER_cached|BTREE_ITER_intent,
subvolume);
ret = bkey_err(subvol);
int ret = bkey_err(subvol);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
"missing subvolume %u", subvolid);
if (ret)
return ret;
goto err;
snapid = le32_to_cpu(subvol.v->snapshot);
u32 snapid = le32_to_cpu(subvol.v->snapshot);
ret = bch2_btree_delete_at(trans, &iter, 0) ?:
struct bkey_s_c_snapshot snapshot =
bch2_bkey_get_iter_typed(trans, &snapshot_iter,
BTREE_ID_snapshots, POS(0, snapid),
0, snapshot);
ret = bkey_err(subvol);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
"missing snapshot %u", snapid);
if (ret)
goto err;
u32 treeid = le32_to_cpu(snapshot.v->tree);
struct bkey_s_c_snapshot_tree snapshot_tree =
bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter,
BTREE_ID_snapshot_trees, POS(0, treeid),
0, snapshot_tree);
if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) {
struct bkey_i_snapshot_tree *snapshot_tree_mut =
bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter,
&snapshot_tree.s_c,
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(snapshot_tree_mut);
if (ret)
goto err;
snapshot_tree_mut->v.master_subvol = 0;
}
ret = bch2_btree_delete_at(trans, &subvol_iter, 0) ?:
bch2_snapshot_node_set_deleted(trans, snapid);
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_iter_exit(trans, &snapshot_tree_iter);
bch2_trans_iter_exit(trans, &snapshot_iter);
bch2_trans_iter_exit(trans, &subvol_iter);
return ret;
}

View File

@ -9,13 +9,13 @@ typedef DARRAY(u32) snapshot_id_list;
#define IS_ANCESTOR_BITMAP 128
struct snapshot_t {
bool live;
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
u32 equiv;
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
};

View File

@ -1084,9 +1084,16 @@ int bch2_write_super(struct bch_fs *c)
": Superblock write was silently dropped! (seq %llu expected %llu)",
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
bch2_fs_fatal_error(c, "%s", buf.buf);
if (c->opts.errors != BCH_ON_ERROR_continue &&
c->opts.errors != BCH_ON_ERROR_fix_safe) {
ret = -BCH_ERR_erofs_sb_err;
bch2_fs_fatal_error(c, "%s", buf.buf);
} else {
bch_err(c, "%s", buf.buf);
}
printbuf_exit(&buf);
ret = -BCH_ERR_erofs_sb_err;
}
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {

View File

@ -563,6 +563,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_fs_btree_gc_exit(c);
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
@ -770,13 +771,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->recovery_pass_lock);
sema_init(&c->online_fsck_mutex, 1);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
bch2_fs_gc_init(c);
bch2_fs_copygc_init(c);
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_btree_iter_init_early(c);
@ -911,6 +908,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_btree_gc_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_subvolumes_init(c) ?:
@ -1306,8 +1304,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
init_completion(&ca->ref_completion);
init_completion(&ca->io_ref_completion);
init_rwsem(&ca->bucket_lock);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
bch2_time_stats_quantiles_init(&ca->io_latency[READ]);

View File

@ -203,7 +203,6 @@ read_attribute(disk_groups);
read_attribute(has_data);
read_attribute(alloc_debug);
read_attribute(accounting);
read_attribute(usage_base);
#define x(t, n, ...) read_attribute(t);
@ -397,9 +396,6 @@ SHOW(bch2_fs)
if (attr == &sysfs_alloc_debug)
bch2_fs_alloc_debug_to_text(out, c);
if (attr == &sysfs_accounting)
bch2_fs_accounting_to_text(out, c);
if (attr == &sysfs_usage_base)
bch2_fs_usage_base_to_text(out, c);
@ -509,15 +505,22 @@ SHOW(bch2_fs_counters)
printbuf_tabstop_push(out, 32);
#define x(t, ...) \
#define x(t, n, f, ...) \
if (attr == &sysfs_##t) { \
counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
if (f & TYPE_SECTORS) { \
counter <<= 9; \
counter_since_mount <<= 9; \
} \
\
prt_printf(out, "since mount:\t"); \
(f & TYPE_COUNTER) ? prt_u64(out, counter_since_mount) :\
prt_human_readable_u64(out, counter_since_mount); \
prt_newline(out); \
\
prt_printf(out, "since filesystem creation:\t"); \
(f & TYPE_COUNTER) ? prt_u64(out, counter) : \
prt_human_readable_u64(out, counter); \
prt_newline(out); \
}
@ -595,7 +598,6 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_disk_groups,
&sysfs_alloc_debug,
&sysfs_accounting,
&sysfs_usage_base,
NULL
};

View File

@ -1338,6 +1338,12 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
__entry->new_u64s)
);
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
);
TRACE_EVENT(path_downgrade,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
@ -1374,10 +1380,21 @@ TRACE_EVENT(path_downgrade,
__entry->pos_snapshot)
);
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
TRACE_EVENT(key_cache_fill,
TP_PROTO(struct btree_trans *trans, const char *key),
TP_ARGS(trans, key),
TP_STRUCT__entry(
__array(char, trans_fn, 32 )
__string(key, key )
),
TP_fast_assign(
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
__assign_str(key);
),
TP_printk("%s %s", __entry->trans_fn, __get_str(key))
);
TRACE_EVENT(write_buffer_flush,
@ -1436,6 +1453,24 @@ TRACE_EVENT(write_buffer_flush_slowpath,
TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
);
TRACE_EVENT(write_buffer_maybe_flush,
TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *key),
TP_ARGS(trans, caller_ip, key),
TP_STRUCT__entry(
__array(char, trans_fn, 32 )
__field(unsigned long, caller_ip )
__string(key, key )
),
TP_fast_assign(
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
__assign_str(key);
),
TP_printk("%s %pS %s", __entry->trans_fn, (void *) __entry->caller_ip, __get_str(key))
);
DEFINE_EVENT(fs_str, rebalance_extent,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)

View File

@ -55,6 +55,16 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
static inline void *bch2_kvmalloc(size_t n, gfp_t flags)
{
void *p = unlikely(n >= INT_MAX)
? vmalloc(n)
: kvmalloc(n, flags & ~__GFP_ZERO);
if (p && (flags & __GFP_ZERO))
memset(p, 0, n);
return p;
}
#define init_heap(heap, _size, gfp) \
({ \
(heap)->nr = 0; \

View File

@ -9,6 +9,7 @@
#include <valgrind/memcheck.h>
#endif
#include "errcode.h"
#include "varint.h"
/**
@ -53,7 +54,7 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
u64 v;
if (unlikely(in + bytes > end))
return -1;
return -BCH_ERR_varint_decode_error;
if (likely(bytes < 9)) {
__le64 v_le = 0;
@ -115,7 +116,7 @@ int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out)
unsigned bytes = ffz(*in) + 1;
if (unlikely(in + bytes > end))
return -1;
return -BCH_ERR_varint_decode_error;
if (likely(bytes < 9)) {
v >>= bytes;