mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-01-22 00:04:31 +03:00
Update bcachefs sources to 864591728963 bcachefs: Dropped superblock write is no longer a fatal error
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
634c812a1e
commit
1055935ffe
@ -1 +1 @@
|
||||
55a65a994ed5fba038fda00f78416faf6f308bb8
|
||||
864591728963d416c49e502bfee56a283eda31a5
|
||||
|
@ -13,7 +13,7 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \
|
||||
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
|
||||
|
||||
# output directory for tests below
|
||||
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
|
||||
TMPOUT = .tmp_$$$$
|
||||
|
||||
# try-run
|
||||
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define __LINUX_BLK_TYPES_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/kobject.h>
|
||||
|
@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
|
||||
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
|
||||
extern void kmemleak_update_trace(const void *ptr) __ref;
|
||||
extern void kmemleak_not_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_transient_leak(const void *ptr) __ref;
|
||||
extern void kmemleak_ignore(const void *ptr) __ref;
|
||||
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
|
||||
extern void kmemleak_no_scan(const void *ptr) __ref;
|
||||
@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr)
|
||||
static inline void kmemleak_not_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_transient_leak(const void *ptr)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_ignore(const void *ptr)
|
||||
{
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -933,8 +933,6 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
|
||||
u64 transaction_seq = trans->journal_res.seq;
|
||||
BUG_ON(!transaction_seq);
|
||||
BUG_ON(transaction_seq < new_a->journal_seq_nonempty);
|
||||
BUG_ON(transaction_seq < new_a->journal_seq_empty);
|
||||
|
||||
if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq,
|
||||
trans, alloc_key_journal_seq_in_future,
|
||||
|
@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
return;
|
||||
}
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
ob->valid = false;
|
||||
ob->data_type = 0;
|
||||
|
||||
spin_unlock(&ob->lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
bch2_open_bucket_hash_remove(c, ob);
|
||||
|
@ -178,7 +178,7 @@ static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
static int bch2_backpointers_maybe_flush(struct btree_trans *trans,
|
||||
static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
|
||||
struct bkey_s_c visiting_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
@ -201,17 +201,30 @@ static int backpointer_target_not_found(struct btree_trans *trans,
|
||||
* looking at may have already been deleted - failure to find what it
|
||||
* pointed to is not an error:
|
||||
*/
|
||||
ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed);
|
||||
ret = last_flushed
|
||||
? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed)
|
||||
: 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
|
||||
bp.v->level ? "btree node" : "extent");
|
||||
bch2_bkey_val_to_text(&buf, c, bp.s_c);
|
||||
prt_printf(&buf, "\n ");
|
||||
|
||||
prt_printf(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, target_k);
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
|
||||
if (p.ptr.dev == bp.k->p.inode) {
|
||||
prt_printf(&buf, "\n ");
|
||||
struct bkey_i_backpointer bp2;
|
||||
bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
|
||||
}
|
||||
|
||||
if (fsck_err(trans, backpointer_to_missing_ptr,
|
||||
"%s", buf.buf))
|
||||
ret = bch2_backpointer_del(trans, bp.k->p);
|
||||
@ -491,7 +504,7 @@ check_existing_bp:
|
||||
struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
|
||||
|
||||
struct bkey_s_c other_extent =
|
||||
bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed);
|
||||
bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL);
|
||||
ret = bkey_err(other_extent);
|
||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||
ret = 0;
|
||||
@ -553,11 +566,11 @@ check_existing_bp:
|
||||
goto err;
|
||||
missing:
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "missing backpointer ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
|
||||
prt_newline(&buf);
|
||||
prt_str(&buf, "missing backpointer\n for: ");
|
||||
bch2_bkey_val_to_text(&buf, c, orig_k);
|
||||
prt_printf(&buf, "\n got: ");
|
||||
prt_printf(&buf, "\n want: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
|
||||
prt_printf(&buf, "\n got: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bp_k);
|
||||
|
||||
if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
|
||||
@ -586,12 +599,16 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
|
||||
bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
|
||||
bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (check) {
|
||||
if (check || empty) {
|
||||
struct bkey_i_backpointer bp;
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
|
||||
int ret = check_bp_exists(trans, s, &bp, k);
|
||||
|
||||
int ret = check
|
||||
? check_bp_exists(trans, s, &bp, k)
|
||||
: bch2_bucket_backpointer_mod(trans, k, &bp, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -825,12 +842,15 @@ static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t
|
||||
}
|
||||
}
|
||||
|
||||
static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
|
||||
|
||||
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
int ret = 0;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
bool need_commit = false;
|
||||
|
||||
if (a->data_type == BCH_DATA_sb ||
|
||||
a->data_type == BCH_DATA_journal ||
|
||||
@ -846,6 +866,7 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
|
||||
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c bp_k;
|
||||
int ret = 0;
|
||||
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp_start(ca, alloc_k.k->p),
|
||||
bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
|
||||
@ -854,6 +875,17 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
||||
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
|
||||
(bp.v->bucket_gen != a->gen ||
|
||||
bp.v->pad)) {
|
||||
ret = bch2_backpointer_del(trans, bp_k.k->p);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
need_commit = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bp.v->bucket_gen != a->gen)
|
||||
continue;
|
||||
|
||||
@ -863,30 +895,40 @@ static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, stru
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* Cached pointers don't have backpointers: */
|
||||
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
if (need_commit) {
|
||||
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
|
||||
/* Cached pointers don't have backpointers: */
|
||||
|
||||
if (sectors[ALLOC_dirty] != a->dirty_sectors ||
|
||||
sectors[ALLOC_stripe] != a->stripe_sectors) {
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
|
||||
ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (sectors[ALLOC_dirty] > a->dirty_sectors ||
|
||||
sectors[ALLOC_stripe] > a->stripe_sectors) {
|
||||
ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!sectors[ALLOC_dirty] &&
|
||||
!sectors[ALLOC_stripe])
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
|
||||
else
|
||||
__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
|
||||
}
|
||||
err:
|
||||
bch2_dev_put(ca);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_bucket_backpointer_mismatches(struct btree_trans *trans,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
return for_each_btree_key(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
check_bucket_backpointer_mismatch_one(trans, k, last_flushed);
|
||||
}));
|
||||
}
|
||||
|
||||
static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
@ -896,6 +938,9 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
|
||||
rcu_read_lock();
|
||||
struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
|
||||
while (pos.inode <= k.k->p.inode) {
|
||||
if (pos.inode >= c->sb.nr_devices)
|
||||
break;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
|
||||
if (!ca)
|
||||
goto next;
|
||||
@ -941,7 +986,7 @@ err:
|
||||
}
|
||||
|
||||
static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
|
||||
struct bpos start, struct bpos *end)
|
||||
struct bpos start, struct bpos *end)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
@ -1022,7 +1067,11 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL);
|
||||
if (!ca->bucket_backpointer_mismatches) {
|
||||
ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL);
|
||||
if (!ca->bucket_backpointer_mismatches ||
|
||||
!ca->bucket_backpointer_empty) {
|
||||
bch2_dev_put(ca);
|
||||
ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
|
||||
goto err_free_bitmaps;
|
||||
@ -1035,21 +1084,25 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
bch2_bkey_buf_init(&s.last_flushed);
|
||||
bkey_init(&s.last_flushed.k->k);
|
||||
|
||||
ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed);
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
|
||||
}));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
u64 nr_buckets = 0, nr_mismatches = 0;
|
||||
u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
|
||||
for_each_member_device(c, ca) {
|
||||
nr_buckets += ca->mi.nbuckets;
|
||||
nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
|
||||
nr_buckets += ca->mi.nbuckets;
|
||||
nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
|
||||
nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets);
|
||||
}
|
||||
|
||||
if (!nr_mismatches)
|
||||
if (!nr_mismatches && !nr_empty)
|
||||
goto err;
|
||||
|
||||
bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
|
||||
nr_mismatches, nr_buckets);
|
||||
nr_mismatches + nr_empty, nr_buckets);
|
||||
|
||||
while (1) {
|
||||
ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
|
||||
@ -1086,6 +1139,8 @@ err:
|
||||
bch2_btree_cache_unpin(c);
|
||||
err_free_bitmaps:
|
||||
for_each_member_device(c, ca) {
|
||||
kvfree(ca->bucket_backpointer_empty);
|
||||
ca->bucket_backpointer_empty = NULL;
|
||||
kvfree(ca->bucket_backpointer_mismatches);
|
||||
ca->bucket_backpointer_mismatches = NULL;
|
||||
}
|
||||
@ -1122,6 +1177,25 @@ static int check_one_backpointer(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
|
||||
struct bch_dev *ca, struct bpos bucket)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
struct bkey_buf last_flushed;
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
|
||||
bucket_pos_to_bp_start(ca, bucket),
|
||||
bucket_pos_to_bp_end(ca, bucket),
|
||||
0, k,
|
||||
check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
|
||||
);
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, trans->c);
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
struct bbpos start,
|
||||
struct bbpos end)
|
||||
|
@ -547,17 +547,16 @@ struct bch_dev {
|
||||
|
||||
/*
|
||||
* Buckets:
|
||||
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
|
||||
* gc_gens_lock, for device resize - holding any is sufficient for
|
||||
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||
* Per-bucket arrays are protected by either rcu_read_lock or
|
||||
* state_lock, for device resize.
|
||||
*/
|
||||
GENRADIX(struct bucket) buckets_gc;
|
||||
struct bucket_gens __rcu *bucket_gens;
|
||||
u8 *oldest_gen;
|
||||
unsigned long *buckets_nouse;
|
||||
struct rw_semaphore bucket_lock;
|
||||
|
||||
unsigned long *bucket_backpointer_mismatches;
|
||||
unsigned long *bucket_backpointer_empty;
|
||||
|
||||
struct bch_dev_usage __percpu *usage;
|
||||
|
||||
|
@ -684,7 +684,8 @@ struct bch_sb_field_ext {
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18))
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18)) \
|
||||
x(autofix_errors, BCH_VERSION(1, 19))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -1287,14 +1288,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
|
||||
/* Btree: */
|
||||
|
||||
enum btree_id_flags {
|
||||
BTREE_ID_EXTENTS = BIT(0),
|
||||
BTREE_ID_SNAPSHOTS = BIT(1),
|
||||
BTREE_ID_SNAPSHOT_FIELD = BIT(2),
|
||||
BTREE_ID_DATA = BIT(3),
|
||||
BTREE_IS_extents = BIT(0),
|
||||
BTREE_IS_snapshots = BIT(1),
|
||||
BTREE_IS_snapshot_field = BIT(2),
|
||||
BTREE_IS_data = BIT(3),
|
||||
BTREE_IS_write_buffer = BIT(4),
|
||||
};
|
||||
|
||||
#define BCH_BTREE_IDS() \
|
||||
x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
|
||||
x(extents, 0, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_snapshots| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_error)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
@ -1302,17 +1307,20 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_reservation)| \
|
||||
BIT_ULL(KEY_TYPE_reflink_p)| \
|
||||
BIT_ULL(KEY_TYPE_inline_data)) \
|
||||
x(inodes, 1, BTREE_ID_SNAPSHOTS, \
|
||||
x(inodes, 1, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_inode)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v2)| \
|
||||
BIT_ULL(KEY_TYPE_inode_v3)| \
|
||||
BIT_ULL(KEY_TYPE_inode_generation)) \
|
||||
x(dirents, 2, BTREE_ID_SNAPSHOTS, \
|
||||
x(dirents, 2, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_dirent)) \
|
||||
x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
|
||||
x(xattrs, 3, \
|
||||
BTREE_IS_snapshots, \
|
||||
BIT_ULL(KEY_TYPE_whiteout)| \
|
||||
BIT_ULL(KEY_TYPE_cookie)| \
|
||||
BIT_ULL(KEY_TYPE_hash_whiteout)| \
|
||||
@ -1326,7 +1334,9 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_quota)) \
|
||||
x(stripes, 6, 0, \
|
||||
BIT_ULL(KEY_TYPE_stripe)) \
|
||||
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
|
||||
x(reflink, 7, \
|
||||
BTREE_IS_extents| \
|
||||
BTREE_IS_data, \
|
||||
BIT_ULL(KEY_TYPE_reflink_v)| \
|
||||
BIT_ULL(KEY_TYPE_indirect_inline_data)| \
|
||||
BIT_ULL(KEY_TYPE_error)) \
|
||||
@ -1334,29 +1344,38 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_subvolume)) \
|
||||
x(snapshots, 9, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot)) \
|
||||
x(lru, 10, 0, \
|
||||
x(lru, 10, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(freespace, 11, BTREE_ID_EXTENTS, \
|
||||
x(freespace, 11, \
|
||||
BTREE_IS_extents, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(need_discard, 12, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(backpointers, 13, 0, \
|
||||
x(backpointers, 13, \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_backpointer)) \
|
||||
x(bucket_gens, 14, 0, \
|
||||
BIT_ULL(KEY_TYPE_bucket_gens)) \
|
||||
x(snapshot_trees, 15, 0, \
|
||||
BIT_ULL(KEY_TYPE_snapshot_tree)) \
|
||||
x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(deleted_inodes, 16, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
|
||||
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(rebalance_work, 18, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
x(accounting, 20, \
|
||||
BTREE_IS_snapshot_field| \
|
||||
BTREE_IS_write_buffer, \
|
||||
BIT_ULL(KEY_TYPE_accounting)) \
|
||||
|
||||
enum btree_id {
|
||||
|
@ -733,16 +733,8 @@ static int bch2_gc_btrees(struct bch_fs *c)
|
||||
continue;
|
||||
|
||||
ret = bch2_gc_btree(trans, btree, true);
|
||||
|
||||
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
|
||||
trans, btree_node_read_error,
|
||||
"btree node read error for %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_id_to_text(&buf, btree),
|
||||
buf.buf)))
|
||||
ret = bch2_btree_lost_data(c, btree);
|
||||
}
|
||||
fsck_err:
|
||||
|
||||
printbuf_exit(&buf);
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
@ -811,7 +803,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
old = bch2_alloc_to_v4(k, &old_convert);
|
||||
gc = new = *old;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
__bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
|
||||
|
||||
old_gc = gc;
|
||||
@ -822,7 +813,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
gc.data_type = old->data_type;
|
||||
gc.dirty_sectors = old->dirty_sectors;
|
||||
}
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
/*
|
||||
* gc.data_type doesn't yet include need_discard & need_gc_gen states -
|
||||
@ -840,11 +830,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
|
||||
* we don't run it twice:
|
||||
*/
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
|
||||
gc_m->data_type = gc.data_type;
|
||||
gc_m->dirty_sectors = gc.dirty_sectors;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
if (fsck_err_on(new.data_type != gc.data_type,
|
||||
@ -1088,7 +1076,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
|
||||
return -EROFS;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
rcu_read_lock();
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
||||
@ -1097,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
|
||||
if (dev_ptr_stale(ca, ptr) > 16) {
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
goto update;
|
||||
}
|
||||
}
|
||||
@ -1112,7 +1098,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
|
||||
*gen = ptr->gen;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
return 0;
|
||||
update:
|
||||
u = bch2_bkey_make_mut(trans, iter, &k, 0);
|
||||
@ -1141,7 +1126,6 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev
|
||||
return ret;
|
||||
|
||||
a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
|
||||
alloc_data_type_set(&a_mut->v, a_mut->v.data_type);
|
||||
|
||||
return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
|
||||
}
|
||||
@ -1254,9 +1238,16 @@ void bch2_gc_gens_async(struct bch_fs *c)
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
|
||||
}
|
||||
|
||||
void bch2_fs_gc_init(struct bch_fs *c)
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *c)
|
||||
{
|
||||
}
|
||||
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *c)
|
||||
{
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,6 +82,8 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
|
||||
|
||||
int bch2_gc_gens(struct bch_fs *);
|
||||
void bch2_gc_gens_async(struct bch_fs *);
|
||||
void bch2_fs_gc_init(struct bch_fs *);
|
||||
|
||||
void bch2_fs_btree_gc_exit(struct bch_fs *);
|
||||
int bch2_fs_btree_gc_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_GC_H */
|
||||
|
@ -489,8 +489,8 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
!btree_node_write_in_flight(b) &&
|
||||
should_compact_all(c, b)) {
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
bch2_btree_node_write_trans(trans, b, SIX_LOCK_write,
|
||||
BTREE_WRITE_init_next_bset);
|
||||
reinit_iter = true;
|
||||
}
|
||||
|
||||
@ -2345,6 +2345,34 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_type_held,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (lock_type_held == SIX_LOCK_intent ||
|
||||
(lock_type_held == SIX_LOCK_read &&
|
||||
six_lock_tryupgrade(&b->c.lock))) {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
|
||||
/* don't cycle lock unnecessarily: */
|
||||
if (btree_node_just_written(b) &&
|
||||
six_trylock_write(&b->c.lock)) {
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
__bch2_btree_node_unlock_write(trans, b);
|
||||
}
|
||||
|
||||
if (lock_type_held == SIX_LOCK_read)
|
||||
six_lock_downgrade(&b->c.lock);
|
||||
} else {
|
||||
__bch2_btree_node_write(c, b, flags);
|
||||
if (lock_type_held == SIX_LOCK_write &&
|
||||
btree_node_just_written(b))
|
||||
bch2_btree_post_write_cleanup(c, b);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
|
||||
{
|
||||
struct bucket_table *tbl;
|
||||
|
@ -144,11 +144,13 @@ enum btree_write_flags {
|
||||
void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
|
||||
void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
void bch2_btree_node_write_trans(struct btree_trans *, struct btree *,
|
||||
enum six_lock_type, unsigned);
|
||||
|
||||
static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
|
||||
static inline void btree_node_write_if_need(struct btree_trans *trans, struct btree *b,
|
||||
enum six_lock_type lock_held)
|
||||
{
|
||||
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
bch2_btree_node_write_trans(trans, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
|
||||
}
|
||||
|
||||
bool bch2_btree_flush_all_reads(struct bch_fs *);
|
||||
|
@ -699,6 +699,19 @@ void bch2_trans_node_add(struct btree_trans *trans,
|
||||
bch2_trans_revalidate_updates_in_node(trans, b);
|
||||
}
|
||||
|
||||
void bch2_trans_node_drop(struct btree_trans *trans,
|
||||
struct btree *b)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i, level = b->c.level;
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
if (path->l[level].b == b) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A btree node has been modified in such a way as to invalidate iterators - fix
|
||||
* them:
|
||||
@ -1854,7 +1867,7 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *
|
||||
!bkey_eq(path->pos, ck->key.pos));
|
||||
|
||||
*u = ck->k->k;
|
||||
k = bkey_i_to_s_c(ck->k);
|
||||
k = (struct bkey_s_c) { u, &ck->k->v };
|
||||
}
|
||||
|
||||
return k;
|
||||
@ -2144,21 +2157,18 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
void btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
struct bkey_i *next_journal =
|
||||
bch2_btree_journal_peek(trans, iter,
|
||||
k.k ? k.k->p : path_l(path)->b->key.k.p);
|
||||
|
||||
k->k ? k->k->p : path_l(path)->b->key.k.p);
|
||||
if (next_journal) {
|
||||
iter->k = next_journal->k;
|
||||
k = bkey_i_to_s_c(next_journal);
|
||||
*k = bkey_i_to_s_c(next_journal);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
|
||||
@ -2175,21 +2185,19 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_prev_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
void btree_trans_peek_prev_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
struct bkey_i *next_journal =
|
||||
bch2_btree_journal_peek_prev(trans, iter,
|
||||
k.k ? k.k->p : path_l(path)->b->key.k.p);
|
||||
k->k ? k->k->p : path_l(path)->b->key.k.p);
|
||||
|
||||
if (next_journal) {
|
||||
iter->k = next_journal->k;
|
||||
k = bkey_i_to_s_c(next_journal);
|
||||
*k = bkey_i_to_s_c(next_journal);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2234,10 +2242,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
|
||||
btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
|
||||
|
||||
k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
|
||||
if (k.k && !bkey_err(k)) {
|
||||
iter->k = u;
|
||||
k.k = &iter->k;
|
||||
}
|
||||
if (!k.k)
|
||||
return k;
|
||||
|
||||
if ((iter->flags & BTREE_ITER_all_snapshots) &&
|
||||
!bpos_eq(pos, k.k->p))
|
||||
return bkey_s_c_null;
|
||||
|
||||
iter->k = u;
|
||||
k.k = &iter->k;
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -2260,7 +2273,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
/* ensure that iter->k is consistent with iter->pos: */
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
k = bkey_s_c_err(ret);
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
struct btree_path *path = btree_iter_path(trans, iter);
|
||||
@ -2270,7 +2283,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
/* No btree nodes at requested level: */
|
||||
bch2_btree_iter_set_pos(iter, SPOS_MAX);
|
||||
k = bkey_s_c_null;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
btree_path_set_should_be_locked(trans, path);
|
||||
@ -2281,15 +2294,14 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
k.k &&
|
||||
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
|
||||
k = k2;
|
||||
ret = bkey_err(k);
|
||||
if (ret) {
|
||||
if (bkey_err(k)) {
|
||||
bch2_btree_iter_set_pos(iter, iter->pos);
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_with_journal))
|
||||
k = btree_trans_peek_journal(trans, iter, k);
|
||||
btree_trans_peek_journal(trans, iter, &k);
|
||||
|
||||
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
|
||||
trans->nr_updates))
|
||||
@ -2318,12 +2330,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
|
||||
/* End of btree: */
|
||||
bch2_btree_iter_set_pos(iter, SPOS_MAX);
|
||||
k = bkey_s_c_null;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
bch2_btree_iter_verify(iter);
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
return k;
|
||||
}
|
||||
|
||||
@ -2424,7 +2435,8 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bkey_whiteout(k.k)) {
|
||||
if (bkey_whiteout(k.k) &&
|
||||
!(iter->flags & BTREE_ITER_key_cache_fill)) {
|
||||
search_key = bkey_successor(iter, k.k->p);
|
||||
continue;
|
||||
}
|
||||
@ -2547,7 +2559,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
|
||||
}
|
||||
|
||||
if (unlikely(iter->flags & BTREE_ITER_with_journal))
|
||||
k = btree_trans_peek_prev_journal(trans, iter, k);
|
||||
btree_trans_peek_prev_journal(trans, iter, &k);
|
||||
|
||||
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
|
||||
trans->nr_updates))
|
||||
@ -2784,6 +2796,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
|
||||
if (unlikely(!k.k))
|
||||
goto out_no_locked;
|
||||
|
||||
if (unlikely(k.k->type == KEY_TYPE_whiteout &&
|
||||
(iter->flags & BTREE_ITER_filter_snapshots) &&
|
||||
!(iter->flags & BTREE_ITER_key_cache_fill)))
|
||||
iter->k.type = KEY_TYPE_deleted;
|
||||
} else {
|
||||
struct bpos next;
|
||||
struct bpos end = iter->pos;
|
||||
@ -3028,7 +3045,7 @@ void bch2_trans_iter_init_outlined(struct btree_trans *trans,
|
||||
unsigned flags)
|
||||
{
|
||||
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
|
||||
bch2_btree_iter_flags(trans, btree_id, flags),
|
||||
bch2_btree_iter_flags(trans, btree_id, 0, flags),
|
||||
_RET_IP_);
|
||||
}
|
||||
|
||||
@ -3044,8 +3061,11 @@ void bch2_trans_node_iter_init(struct btree_trans *trans,
|
||||
flags |= BTREE_ITER_snapshot_field;
|
||||
flags |= BTREE_ITER_all_snapshots;
|
||||
|
||||
if (!depth && btree_id_cached(trans->c, btree_id))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
|
||||
__bch2_btree_iter_flags(trans, btree_id, flags),
|
||||
bch2_btree_iter_flags(trans, btree_id, depth, flags),
|
||||
_RET_IP_);
|
||||
|
||||
iter->min_depth = depth;
|
||||
|
@ -372,6 +372,7 @@ static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
|
||||
void bch2_trans_downgrade(struct btree_trans *);
|
||||
|
||||
void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *);
|
||||
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
|
||||
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
|
||||
|
||||
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
|
||||
@ -446,10 +447,17 @@ static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 sna
|
||||
|
||||
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned flags)
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned level,
|
||||
unsigned flags)
|
||||
{
|
||||
if (level || !btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
if (!(flags & (BTREE_ITER_all_snapshots|BTREE_ITER_not_extents)) &&
|
||||
btree_id_is_extents(btree_id))
|
||||
flags |= BTREE_ITER_is_extents;
|
||||
@ -468,19 +476,6 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
|
||||
unsigned btree_id,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!btree_id_cached(trans->c, btree_id)) {
|
||||
flags &= ~BTREE_ITER_cached;
|
||||
flags &= ~BTREE_ITER_with_key_cache;
|
||||
} else if (!(flags & BTREE_ITER_cached))
|
||||
flags |= BTREE_ITER_with_key_cache;
|
||||
|
||||
return __bch2_btree_iter_flags(trans, btree_id, flags);
|
||||
}
|
||||
|
||||
static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
unsigned btree_id, struct bpos pos,
|
||||
@ -517,7 +512,7 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
|
||||
if (__builtin_constant_p(btree_id) &&
|
||||
__builtin_constant_p(flags))
|
||||
bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
|
||||
bch2_btree_iter_flags(trans, btree_id, flags),
|
||||
bch2_btree_iter_flags(trans, btree_id, 0, flags),
|
||||
_THIS_IP_);
|
||||
else
|
||||
bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);
|
||||
|
@ -197,7 +197,9 @@ out:
|
||||
return ck;
|
||||
}
|
||||
|
||||
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
|
||||
static int btree_key_cache_create(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -217,7 +219,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
key_u64s = min(256U, (key_u64s * 3) / 2);
|
||||
key_u64s = roundup_pow_of_two(key_u64s);
|
||||
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s);
|
||||
int ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -226,19 +228,19 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
bch2_btree_id_str(path->btree_id));
|
||||
bch2_btree_id_str(ck_path->btree_id));
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_create;
|
||||
}
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = path->btree_id;
|
||||
ck->key.btree_id = path->btree_id;
|
||||
ck->key.pos = path->pos;
|
||||
ck->c.btree_id = ck_path->btree_id;
|
||||
ck->key.btree_id = ck_path->btree_id;
|
||||
ck->key.pos = ck_path->pos;
|
||||
ck->flags = 1U << BKEY_CACHED_ACCESSED;
|
||||
|
||||
if (unlikely(key_u64s > ck->u64s)) {
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
||||
kmalloc(key_u64s * sizeof(u64), _gfp));
|
||||
@ -258,22 +260,29 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
|
||||
|
||||
bkey_reassemble(ck->k, k);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &path_l(path)->b->c);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
|
||||
|
||||
bch2_btree_node_unlock_write(trans, path, path_l(path)->b);
|
||||
|
||||
if (unlikely(ret)) /* raced with another fill? */
|
||||
goto err;
|
||||
|
||||
atomic_long_inc(&bc->nr_keys);
|
||||
six_unlock_write(&ck->c.lock);
|
||||
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
enum six_lock_type lock_want = __btree_lock_want(ck_path, 0);
|
||||
if (lock_want == SIX_LOCK_read)
|
||||
six_lock_downgrade(&ck->c.lock);
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
btree_path_cached_set(trans, ck_path, ck, (enum btree_node_locked_type) lock_want);
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
err:
|
||||
bkey_cached_free(bc, ck);
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
mark_btree_node_locked_noreset(ck_path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -293,6 +302,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
|
||||
BTREE_ITER_intent|
|
||||
BTREE_ITER_key_cache_fill|
|
||||
BTREE_ITER_cached_nofill);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
@ -306,9 +316,19 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
ret = btree_key_cache_create(trans, ck_path, k);
|
||||
ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (trace_key_cache_fill_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bpos_to_text(&buf, ck_path->pos);
|
||||
prt_char(&buf, ' ');
|
||||
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||
trace_key_cache_fill(trans, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
out:
|
||||
/* We're not likely to need this iterator again: */
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
@ -593,8 +613,18 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
bkey_cached_free(bc, ck);
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
path->should_be_locked = false;
|
||||
|
||||
struct btree_path *path2;
|
||||
unsigned i;
|
||||
trans_for_each_path(trans, path2, i)
|
||||
if (path2->l[0].b == (void *) ck) {
|
||||
__bch2_btree_path_unlock(trans, path2);
|
||||
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
|
||||
path2->should_be_locked = false;
|
||||
btree_path_set_dirty(path2, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
bch2_trans_verify_locks(trans);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
|
@ -818,6 +818,17 @@ void bch2_trans_unlock_long(struct btree_trans *trans)
|
||||
bch2_trans_srcu_unlock(trans);
|
||||
}
|
||||
|
||||
void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_path *path;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++)
|
||||
if (btree_node_write_locked(path, l))
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[l].b);
|
||||
}
|
||||
|
||||
int __bch2_trans_mutex_lock(struct btree_trans *trans,
|
||||
struct mutex *lock)
|
||||
{
|
||||
@ -856,6 +867,9 @@ void bch2_btree_path_verify_locks(struct btree_path *path)
|
||||
(want == BTREE_NODE_UNLOCKED ||
|
||||
have != BTREE_NODE_WRITE_LOCKED) &&
|
||||
want != have);
|
||||
|
||||
BUG_ON(btree_node_locked(path, l) &&
|
||||
path->l[l].lock_seq != six_lock_seq(&path->l[l].b->c.lock));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
|
||||
|
||||
void bch2_trans_unlock_noassert(struct btree_trans *);
|
||||
void bch2_trans_unlock_write(struct btree_trans *);
|
||||
|
||||
static inline bool is_btree_node(struct btree_path *path, unsigned l)
|
||||
{
|
||||
@ -75,13 +76,6 @@ static inline void mark_btree_node_locked_noreset(struct btree_path *path,
|
||||
path->nodes_locked |= (type + 1) << (level << 1);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_unlocked(struct btree_path *path,
|
||||
unsigned level)
|
||||
{
|
||||
EBUG_ON(btree_node_write_locked(path, level));
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
|
||||
}
|
||||
|
||||
static inline void mark_btree_node_locked(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned level,
|
||||
@ -124,19 +118,25 @@ static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
|
||||
|
||||
/* unlock: */
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
static inline void btree_node_unlock(struct btree_trans *trans,
|
||||
struct btree_path *path, unsigned level)
|
||||
{
|
||||
int lock_type = btree_node_locked_type(path, level);
|
||||
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
EBUG_ON(lock_type == BTREE_NODE_WRITE_LOCKED);
|
||||
|
||||
if (lock_type != BTREE_NODE_UNLOCKED) {
|
||||
if (unlikely(lock_type == BTREE_NODE_WRITE_LOCKED)) {
|
||||
bch2_btree_node_unlock_write(trans, path, path->l[level].b);
|
||||
lock_type = BTREE_NODE_INTENT_LOCKED;
|
||||
}
|
||||
six_unlock_type(&path->l[level].b->c.lock, lock_type);
|
||||
btree_trans_lock_hold_time_update(trans, path, level);
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
|
||||
}
|
||||
mark_btree_node_unlocked(path, level);
|
||||
}
|
||||
|
||||
static inline int btree_path_lowest_level_locked(struct btree_path *path)
|
||||
@ -162,28 +162,32 @@ static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
|
||||
* Updates the saved lock sequence number, so that bch2_btree_node_relock() will
|
||||
* succeed:
|
||||
*/
|
||||
static inline void
|
||||
__bch2_btree_node_unlock_write(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
if (!b->c.lock.write_lock_recurse) {
|
||||
struct btree_path *linked;
|
||||
unsigned i;
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked, i)
|
||||
linked->l[b->c.level].lock_seq++;
|
||||
}
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree *b)
|
||||
{
|
||||
struct btree_path *linked;
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(path->l[b->c.level].b != b);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
|
||||
EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked, i)
|
||||
linked->l[b->c.level].lock_seq++;
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
__bch2_btree_node_unlock_write(trans, b);
|
||||
}
|
||||
|
||||
void bch2_btree_node_unlock_write(struct btree_trans *,
|
||||
struct btree_path *, struct btree *);
|
||||
|
||||
int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
|
||||
|
||||
/* lock: */
|
||||
|
@ -152,7 +152,7 @@ static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
|
||||
swap(*l, *r);
|
||||
}
|
||||
|
||||
const struct min_heap_callbacks found_btree_node_heap_cbs = {
|
||||
static const struct min_heap_callbacks found_btree_node_heap_cbs = {
|
||||
.less = found_btree_node_cmp_pos_less,
|
||||
.swp = found_btree_node_swap,
|
||||
};
|
||||
|
@ -133,7 +133,7 @@ static inline int bch2_trans_lock_write(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
static inline void bch2_trans_unlock_updates_write(struct btree_trans *trans)
|
||||
{
|
||||
if (likely(trans->write_locked)) {
|
||||
trans_for_each_update(trans, i)
|
||||
@ -249,7 +249,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
new |= 1 << BTREE_NODE_need_write;
|
||||
} while (!try_cmpxchg(&b->flags, &old, new));
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
bch2_trans_put(trans);
|
||||
@ -384,7 +384,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
|
||||
struct bkey_i *new_k;
|
||||
int ret;
|
||||
|
||||
bch2_trans_unlock_write(trans);
|
||||
bch2_trans_unlock_updates_write(trans);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
@ -479,8 +479,7 @@ static int run_one_mem_trigger(struct btree_trans *trans,
|
||||
old, flags);
|
||||
}
|
||||
|
||||
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
|
||||
bool overwrite)
|
||||
static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i)
|
||||
{
|
||||
verify_update_old_key(trans, i);
|
||||
|
||||
@ -507,10 +506,10 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
|
||||
BTREE_TRIGGER_insert|
|
||||
BTREE_TRIGGER_overwrite|flags) ?: 1;
|
||||
} else if (overwrite && !i->overwrite_trigger_run) {
|
||||
} else if (!i->overwrite_trigger_run) {
|
||||
i->overwrite_trigger_run = true;
|
||||
return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
|
||||
} else if (!overwrite && !i->insert_trigger_run) {
|
||||
} else if (!i->insert_trigger_run) {
|
||||
i->insert_trigger_run = true;
|
||||
return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
|
||||
} else {
|
||||
@ -519,39 +518,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
|
||||
}
|
||||
|
||||
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
|
||||
unsigned btree_id_start)
|
||||
unsigned *btree_id_updates_start)
|
||||
{
|
||||
for (int overwrite = 1; overwrite >= 0; --overwrite) {
|
||||
bool trans_trigger_run;
|
||||
bool trans_trigger_run;
|
||||
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
/*
|
||||
* Running triggers will append more updates to the list of updates as
|
||||
* we're walking it:
|
||||
*/
|
||||
do {
|
||||
trans_trigger_run = false;
|
||||
|
||||
for (unsigned i = btree_id_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id != btree_id)
|
||||
continue;
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
for (unsigned i = *btree_id_updates_start;
|
||||
i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
|
||||
i++) {
|
||||
if (trans->updates[i].btree_id < btree_id) {
|
||||
*btree_id_updates_start = i;
|
||||
continue;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
}
|
||||
|
||||
int ret = run_one_trans_trigger(trans, trans->updates + i);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
trans_trigger_run = true;
|
||||
}
|
||||
} while (trans_trigger_run);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
BUG_ON(!(i->flags & BTREE_TRIGGER_norun) &&
|
||||
i->btree_id == btree_id &&
|
||||
btree_node_type_has_trans_triggers(i->bkey_type) &&
|
||||
(!i->insert_trigger_run || !i->overwrite_trigger_run));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
{
|
||||
unsigned btree_id = 0, btree_id_start = 0;
|
||||
unsigned btree_id = 0, btree_id_updates_start = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -565,27 +570,15 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
|
||||
if (btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
|
||||
while (btree_id_start < trans->nr_updates &&
|
||||
trans->updates[btree_id_start].btree_id < btree_id)
|
||||
btree_id_start++;
|
||||
|
||||
ret = run_btree_triggers(trans, btree_id, btree_id_start);
|
||||
ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
|
||||
struct btree_insert_entry *i = trans->updates + idx;
|
||||
|
||||
if (i->btree_id > BTREE_ID_alloc)
|
||||
break;
|
||||
if (i->btree_id == BTREE_ID_alloc) {
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
}
|
||||
}
|
||||
btree_id_updates_start = 0;
|
||||
ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
trans_for_each_update(trans, i)
|
||||
@ -875,7 +868,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
||||
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||
bch2_drop_overwrites_from_journal(trans);
|
||||
|
||||
bch2_trans_unlock_write(trans);
|
||||
bch2_trans_unlock_updates_write(trans);
|
||||
|
||||
if (!ret && trans->journal_pin)
|
||||
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
|
||||
|
@ -790,53 +790,64 @@ static inline bool btree_node_type_has_triggers(enum btree_node_type type)
|
||||
return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
|
||||
}
|
||||
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(type) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_id_is_extents(enum btree_id btree)
|
||||
{
|
||||
return btree_node_type_is_extents(__btree_node_type(0, btree));
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshots(enum btree_id id)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshot_field(enum btree_id id)
|
||||
static inline bool btree_node_type_is_extents(enum btree_node_type type)
|
||||
{
|
||||
return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1);
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_snapshots(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id id)
|
||||
static inline bool btree_type_has_snapshot_field(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(id) & mask;
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
static inline bool btree_type_uses_write_buffer(enum btree_id btree)
|
||||
{
|
||||
const u64 mask = 0
|
||||
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr)
|
||||
BCH_BTREE_IDS()
|
||||
#undef x
|
||||
;
|
||||
|
||||
return BIT_ULL(btree) & mask;
|
||||
}
|
||||
|
||||
struct btree_root {
|
||||
|
@ -823,10 +823,17 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
|
||||
return bch2_trans_update_buffered(trans, btree, &k);
|
||||
}
|
||||
|
||||
static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
|
||||
int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
|
||||
{
|
||||
unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
|
||||
prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
|
||||
|
||||
int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
|
||||
int ret = PTR_ERR_OR_ZERO(e);
|
||||
ret = PTR_ERR_OR_ZERO(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -862,7 +869,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
|
||||
c->journal.early_journal_entries.nr += jset_u64s(u64s);
|
||||
} else {
|
||||
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,
|
||||
__bch2_trans_log_msg(trans, &buf, u64s));
|
||||
bch2_trans_log_msg(trans, &buf));
|
||||
}
|
||||
err:
|
||||
printbuf_exit(&buf);
|
||||
|
@ -159,6 +159,7 @@ void bch2_trans_commit_hook(struct btree_trans *,
|
||||
struct btree_trans_commit_hook *);
|
||||
int __bch2_trans_commit(struct btree_trans *, unsigned);
|
||||
|
||||
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
|
||||
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
|
||||
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
|
||||
|
||||
|
@ -238,7 +238,6 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
|
||||
struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned i, level = b->c.level;
|
||||
|
||||
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
|
||||
|
||||
@ -249,13 +248,9 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
|
||||
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
if (path->l[level].b == b) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
}
|
||||
bch2_trans_node_drop(trans, b);
|
||||
}
|
||||
|
||||
static void bch2_btree_node_free_never_used(struct btree_update *as,
|
||||
@ -264,8 +259,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
|
||||
struct btree_path *path;
|
||||
unsigned i, level = b->c.level;
|
||||
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
|
||||
@ -287,11 +280,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
|
||||
|
||||
six_unlock_intent(&b->c.lock);
|
||||
|
||||
trans_for_each_path(trans, path, i)
|
||||
if (path->l[level].b == b) {
|
||||
btree_node_unlock(trans, path, level);
|
||||
path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
|
||||
}
|
||||
bch2_trans_node_drop(trans, b);
|
||||
}
|
||||
|
||||
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
|
||||
@ -803,7 +792,7 @@ err:
|
||||
mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
|
||||
six_unlock_write(&b->c.lock);
|
||||
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_intent);
|
||||
btree_node_write_if_need(trans, b, SIX_LOCK_intent);
|
||||
btree_node_unlock(trans, path, b->c.level);
|
||||
bch2_path_put(trans, path_idx, true);
|
||||
}
|
||||
@ -824,7 +813,7 @@ err:
|
||||
b = as->new_nodes[i];
|
||||
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(c, b, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->c.lock);
|
||||
}
|
||||
|
||||
@ -1709,14 +1698,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
|
||||
if (n3) {
|
||||
bch2_btree_update_get_open_buckets(as, n3);
|
||||
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n3, SIX_LOCK_intent, 0);
|
||||
}
|
||||
if (n2) {
|
||||
bch2_btree_update_get_open_buckets(as, n2);
|
||||
bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n2, SIX_LOCK_intent, 0);
|
||||
}
|
||||
bch2_btree_update_get_open_buckets(as, n1);
|
||||
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n1, SIX_LOCK_intent, 0);
|
||||
|
||||
/*
|
||||
* The old node must be freed (in memory) _before_ unlocking the new
|
||||
@ -1911,7 +1900,7 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
|
||||
BUG_ON(ret);
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
|
||||
bch2_trans_node_add(trans, path, n);
|
||||
six_unlock_intent(&n->c.lock);
|
||||
|
||||
@ -2104,7 +2093,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
bch2_trans_verify_paths(trans);
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
|
||||
|
||||
bch2_btree_node_free_inmem(trans, trans->paths + path, b);
|
||||
bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m);
|
||||
@ -2181,7 +2170,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
|
||||
|
||||
bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b);
|
||||
|
||||
@ -2291,7 +2280,8 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
bool now = false, pending = false;
|
||||
|
||||
spin_lock(&c->btree_node_rewrites_lock);
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay &&
|
||||
bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
|
||||
list_add(&a->list, &c->btree_node_rewrites);
|
||||
now = true;
|
||||
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
|
||||
|
@ -312,6 +312,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
darray_for_each(wb->sorted, i) {
|
||||
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
|
||||
|
||||
BUG_ON(!btree_type_uses_write_buffer(k->btree));
|
||||
|
||||
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
|
||||
prefetch(&wb->flushing.keys.data[n->idx]);
|
||||
|
||||
@ -632,6 +634,14 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
|
||||
if (trace_write_buffer_maybe_flush_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, referring_k);
|
||||
trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
bch2_bkey_buf_reassemble(&tmp, c, referring_k);
|
||||
|
||||
if (bkey_is_btree_ptr(referring_k.k)) {
|
||||
|
@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
|
||||
ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
|
||||
if (ret)
|
||||
@ -364,7 +362,6 @@ found:
|
||||
bch_info(c, "new key %s", buf.buf);
|
||||
}
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
struct btree_iter iter;
|
||||
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
|
||||
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
|
||||
@ -373,8 +370,6 @@ found:
|
||||
BTREE_UPDATE_internal_snapshot_node|
|
||||
BTREE_TRIGGER_norun);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -382,7 +377,6 @@ found:
|
||||
bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
|
||||
}
|
||||
err:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -547,7 +541,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
|
||||
struct bkey_s_c k,
|
||||
const struct extent_ptr_decoded *p,
|
||||
s64 sectors, enum bch_data_type ptr_data_type,
|
||||
struct bch_alloc_v4 *a)
|
||||
struct bch_alloc_v4 *a,
|
||||
bool insert)
|
||||
{
|
||||
u32 *dst_sectors = p->has_ec ? &a->stripe_sectors :
|
||||
!p->ptr.cached ? &a->dirty_sectors :
|
||||
@ -557,8 +552,8 @@ static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
alloc_data_type_set(a, ptr_data_type);
|
||||
if (insert)
|
||||
alloc_data_type_set(a, ptr_data_type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -591,7 +586,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v);
|
||||
__mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -603,22 +598,19 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bucket *g = gc_bucket(ca, bucket.offset);
|
||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
||||
p.ptr.dev,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = -BCH_ERR_trigger_pointer;
|
||||
goto err_unlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bucket_lock(g);
|
||||
struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
|
||||
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new);
|
||||
ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert);
|
||||
alloc_to_bucket(g, new);
|
||||
bucket_unlock(g);
|
||||
err_unlock:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (!ret)
|
||||
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
||||
@ -996,11 +988,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret = 0;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bucket *g = gc_bucket(ca, b);
|
||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
|
||||
ca->dev_idx, bch2_data_type_str(data_type)))
|
||||
goto err_unlock;
|
||||
goto err;
|
||||
|
||||
bucket_lock(g);
|
||||
struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
|
||||
@ -1010,26 +1001,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_type_str(g->data_type),
|
||||
bch2_data_type_str(data_type)))
|
||||
goto err;
|
||||
goto err_unlock;
|
||||
|
||||
if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
|
||||
"bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
|
||||
ca->dev_idx, b, g->gen,
|
||||
bch2_data_type_str(g->data_type ?: data_type),
|
||||
g->dirty_sectors, sectors))
|
||||
goto err;
|
||||
goto err_unlock;
|
||||
|
||||
g->data_type = data_type;
|
||||
g->dirty_sectors += sectors;
|
||||
struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
|
||||
bucket_unlock(g);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
||||
return ret;
|
||||
err:
|
||||
bucket_unlock(g);
|
||||
err_unlock:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
bucket_unlock(g);
|
||||
err:
|
||||
return -BCH_ERR_metadata_bucket_inconsistency;
|
||||
}
|
||||
|
||||
@ -1269,7 +1258,7 @@ int bch2_buckets_nouse_alloc(struct bch_fs *c)
|
||||
for_each_member_device(c, ca) {
|
||||
BUG_ON(ca->buckets_nouse);
|
||||
|
||||
ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
|
||||
ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
|
||||
sizeof(unsigned long),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!ca->buckets_nouse) {
|
||||
@ -1295,10 +1284,14 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bool resize = ca->bucket_gens != NULL;
|
||||
int ret;
|
||||
|
||||
BUG_ON(resize && ca->buckets_nouse);
|
||||
if (resize)
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (resize && ca->buckets_nouse)
|
||||
return -BCH_ERR_no_resize_with_buckets_nouse;
|
||||
|
||||
bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets),
|
||||
GFP_KERNEL|__GFP_ZERO);
|
||||
if (!bucket_gens) {
|
||||
ret = -BCH_ERR_ENOMEM_bucket_gens;
|
||||
goto err;
|
||||
@ -1309,11 +1302,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bucket_gens->nbuckets_minus_first =
|
||||
bucket_gens->nbuckets - bucket_gens->first_bucket;
|
||||
|
||||
if (resize) {
|
||||
down_write(&ca->bucket_lock);
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
|
||||
|
||||
if (resize) {
|
||||
@ -1331,11 +1319,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
|
||||
nbuckets = ca->mi.nbuckets;
|
||||
|
||||
if (resize) {
|
||||
percpu_up_write(&c->mark_lock);
|
||||
up_write(&ca->bucket_lock);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
err:
|
||||
if (bucket_gens)
|
||||
|
@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b)
|
||||
|
||||
static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
|
||||
{
|
||||
return genradix_ptr(&ca->buckets_gc, b);
|
||||
return bucket_valid(ca, b)
|
||||
? genradix_ptr(&ca->buckets_gc, b)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
||||
{
|
||||
return rcu_dereference_check(ca->bucket_gens,
|
||||
!ca->fs ||
|
||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
||||
lockdep_is_held(&ca->fs->state_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
lockdep_is_held(&ca->fs->state_lock));
|
||||
}
|
||||
|
||||
static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
|
||||
|
@ -14,21 +14,13 @@ static inline bool io_timer_cmp(const void *l, const void *r, void __always_unus
|
||||
return (*_l)->expire < (*_r)->expire;
|
||||
}
|
||||
|
||||
static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
|
||||
{
|
||||
struct io_timer **_l = (struct io_timer **)l;
|
||||
struct io_timer **_r = (struct io_timer **)r;
|
||||
|
||||
swap(*_l, *_r);
|
||||
}
|
||||
static const struct min_heap_callbacks callbacks = {
|
||||
.less = io_timer_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
|
||||
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = io_timer_cmp,
|
||||
.swp = io_timer_swp,
|
||||
};
|
||||
|
||||
spin_lock(&clock->timer_lock);
|
||||
|
||||
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
|
||||
@ -48,11 +40,6 @@ out:
|
||||
|
||||
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = io_timer_cmp,
|
||||
.swp = io_timer_swp,
|
||||
};
|
||||
|
||||
spin_lock(&clock->timer_lock);
|
||||
|
||||
for (size_t i = 0; i < clock->timers.nr; i++)
|
||||
@ -142,10 +129,6 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
|
||||
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
|
||||
{
|
||||
struct io_timer *ret = NULL;
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = io_timer_cmp,
|
||||
.swp = io_timer_swp,
|
||||
};
|
||||
|
||||
if (clock->timers.nr &&
|
||||
time_after_eq64(now, clock->timers.data[0]->expire)) {
|
||||
|
@ -620,7 +620,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
* and we have to check for this because we go rw before repairing the
|
||||
* snapshots table - just skip it, we can move it later.
|
||||
*/
|
||||
if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
|
||||
if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot)))
|
||||
return -BCH_ERR_data_update_done;
|
||||
|
||||
if (!bkey_get_dev_refs(c, k))
|
||||
|
@ -79,6 +79,8 @@ static inline void accounting_key_init(struct bkey_i *k, struct disk_accounting_
|
||||
memcpy_u64s_small(acc->v.d, d, nr);
|
||||
}
|
||||
|
||||
static int bch2_accounting_update_sb_one(struct bch_fs *, struct bpos);
|
||||
|
||||
int bch2_disk_accounting_mod(struct btree_trans *trans,
|
||||
struct disk_accounting_pos *k,
|
||||
s64 *d, unsigned nr, bool gc)
|
||||
@ -96,9 +98,16 @@ int bch2_disk_accounting_mod(struct btree_trans *trans,
|
||||
|
||||
accounting_key_init(&k_i.k, k, d, nr);
|
||||
|
||||
return likely(!gc)
|
||||
? bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k)
|
||||
: bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
|
||||
if (unlikely(gc)) {
|
||||
int ret = bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
|
||||
if (ret == -BCH_ERR_btree_insert_need_mark_replicas)
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_accounting_update_sb_one(trans->c, disk_accounting_pos_to_bpos(k))) ?:
|
||||
bch2_accounting_mem_add(trans, bkey_i_to_s_c_accounting(&k_i.k), true);
|
||||
return ret;
|
||||
} else {
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_accounting, &k_i.k);
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
|
||||
@ -471,32 +480,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_fs_accounting_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
out->atomic++;
|
||||
|
||||
eytzinger0_for_each(i, acc->k.nr) {
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[i].pos);
|
||||
|
||||
bch2_accounting_key_to_text(out, &acc_k);
|
||||
|
||||
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
|
||||
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
|
||||
|
||||
prt_str(out, ":");
|
||||
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
|
||||
prt_printf(out, " %llu", v[j]);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
--out->atomic;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc)
|
||||
{
|
||||
darray_for_each(acc->k, e) {
|
||||
@ -931,10 +914,13 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
continue;
|
||||
break;
|
||||
|
||||
if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
|
||||
if (!bch2_accounting_is_mem(acc_k)) {
|
||||
struct disk_accounting_pos next = { .type = acc_k.type + 1 };
|
||||
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_accounting_mem_read(c, k.k->p, v, nr);
|
||||
|
||||
|
@ -138,7 +138,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
|
||||
bpos_to_disk_accounting_pos(&acc_k, a.k->p);
|
||||
bool gc = mode == BCH_ACCOUNTING_gc;
|
||||
|
||||
EBUG_ON(gc && !acc->gc_running);
|
||||
if (gc && !acc->gc_running)
|
||||
return 0;
|
||||
|
||||
if (!bch2_accounting_is_mem(acc_k))
|
||||
return 0;
|
||||
@ -255,7 +256,6 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans
|
||||
|
||||
int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *);
|
||||
int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned);
|
||||
void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
int bch2_gc_accounting_start(struct bch_fs *);
|
||||
int bch2_gc_accounting_done(struct bch_fs *);
|
||||
|
@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (flags & BTREE_TRIGGER_gc) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bucket *g = gc_bucket(ca, bucket.offset);
|
||||
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
|
||||
ptr->dev,
|
||||
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
|
||||
ret = -BCH_ERR_mark_stripe;
|
||||
goto err_unlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bucket_lock(g);
|
||||
@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
|
||||
alloc_to_bucket(g, new);
|
||||
bucket_unlock(g);
|
||||
err_unlock:
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (!ret)
|
||||
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
|
||||
}
|
||||
@ -1058,6 +1056,11 @@ static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
|
||||
ec_stripes_heap_set_backpointer(_h, j);
|
||||
}
|
||||
|
||||
static const struct min_heap_callbacks callbacks = {
|
||||
.less = ec_stripes_heap_cmp,
|
||||
.swp = ec_stripes_heap_swap,
|
||||
};
|
||||
|
||||
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
||||
{
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
@ -1070,11 +1073,6 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
|
||||
void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = ec_stripes_heap_cmp,
|
||||
.swp = ec_stripes_heap_swap,
|
||||
};
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
heap_verify_backpointer(c, idx);
|
||||
|
||||
@ -1085,11 +1083,6 @@ void bch2_stripes_heap_del(struct bch_fs *c,
|
||||
void bch2_stripes_heap_insert(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = ec_stripes_heap_cmp,
|
||||
.swp = ec_stripes_heap_swap,
|
||||
};
|
||||
|
||||
mutex_lock(&c->ec_stripes_heap_lock);
|
||||
BUG_ON(min_heap_full(&c->ec_stripes_heap));
|
||||
|
||||
@ -1108,10 +1101,6 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
|
||||
void bch2_stripes_heap_update(struct bch_fs *c,
|
||||
struct stripe *m, size_t idx)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = ec_stripes_heap_cmp,
|
||||
.swp = ec_stripes_heap_swap,
|
||||
};
|
||||
ec_stripes_heap *h = &c->ec_stripes_heap;
|
||||
bool do_deletes;
|
||||
size_t i;
|
||||
|
@ -118,6 +118,7 @@
|
||||
x(ENOENT, ENOENT_dev_not_found) \
|
||||
x(ENOENT, ENOENT_dev_idx_not_found) \
|
||||
x(ENOENT, ENOENT_inode_no_backpointer) \
|
||||
x(ENOENT, ENOENT_no_snapshot_tree_subvol) \
|
||||
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
|
||||
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
|
||||
x(EEXIST, EEXIST_str_hash_set) \
|
||||
@ -196,6 +197,9 @@
|
||||
x(EINVAL, opt_parse_error) \
|
||||
x(EINVAL, remove_with_metadata_missing_unimplemented)\
|
||||
x(EINVAL, remove_would_lose_data) \
|
||||
x(EINVAL, no_resize_with_buckets_nouse) \
|
||||
x(EINVAL, inode_unpack_error) \
|
||||
x(EINVAL, varint_decode_error) \
|
||||
x(EROFS, erofs_trans_commit) \
|
||||
x(EROFS, erofs_no_writes) \
|
||||
x(EROFS, erofs_journal_err) \
|
||||
@ -313,6 +317,7 @@ static inline long bch2_err_class(long err)
|
||||
|
||||
#define BLK_STS_REMOVED ((__force blk_status_t)128)
|
||||
|
||||
#include <linux/blk_types.h>
|
||||
const char *bch2_blk_status_to_str(blk_status_t);
|
||||
|
||||
#endif /* _BCACHFES_ERRCODE_H */
|
||||
|
@ -574,6 +574,11 @@ static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsig
|
||||
printbuf_nul_terminate(out);
|
||||
}
|
||||
|
||||
static inline void prt_str_reversed(struct printbuf *out, const char *s)
|
||||
{
|
||||
prt_bytes_reversed(out, s, strlen(s));
|
||||
}
|
||||
|
||||
static inline void reverse_bytes(void *b, size_t n)
|
||||
{
|
||||
char *e = b + n, *s = b;
|
||||
@ -596,17 +601,20 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
|
||||
struct bch_inode_unpacked inode;
|
||||
ret = bch2_inode_find_by_inum_trans(trans, inum, &inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto disconnected;
|
||||
|
||||
if (!inode.bi_dir && !inode.bi_dir_offset) {
|
||||
ret = -BCH_ERR_ENOENT_inode_no_backpointer;
|
||||
goto err;
|
||||
goto disconnected;
|
||||
}
|
||||
|
||||
inum.subvol = inode.bi_parent_subvol ?: inum.subvol;
|
||||
inum.inum = inode.bi_dir;
|
||||
|
||||
u32 snapshot;
|
||||
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto disconnected;
|
||||
|
||||
struct btree_iter d_iter;
|
||||
struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
|
||||
@ -614,23 +622,19 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
|
||||
0, dirent);
|
||||
ret = bkey_err(d.s_c);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto disconnected;
|
||||
|
||||
struct qstr dirent_name = bch2_dirent_get_name(d);
|
||||
prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
|
||||
|
||||
prt_char(path, '/');
|
||||
|
||||
if (d.v->d_type == DT_SUBVOL)
|
||||
inum.subvol = le32_to_cpu(d.v->d_parent_subvol);
|
||||
inum.inum = d.k->p.inode;
|
||||
|
||||
bch2_trans_iter_exit(trans, &d_iter);
|
||||
}
|
||||
|
||||
if (orig_pos == path->pos)
|
||||
prt_char(path, '/');
|
||||
|
||||
out:
|
||||
ret = path->allocation_failure ? -ENOMEM : 0;
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -639,4 +643,10 @@ int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printb
|
||||
return 0;
|
||||
err:
|
||||
return ret;
|
||||
disconnected:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto err;
|
||||
|
||||
prt_str_reversed(path, "(disconnected)");
|
||||
goto out;
|
||||
}
|
||||
|
@ -625,15 +625,6 @@ do_io:
|
||||
BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio,
|
||||
sectors << 9, offset << 9));
|
||||
|
||||
/* Check for writing past i_size: */
|
||||
WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
|
||||
round_up(i_size, block_bytes(c)) &&
|
||||
!test_bit(BCH_FS_emergency_ro, &c->flags),
|
||||
"writing past i_size: %llu > %llu (unrounded %llu)\n",
|
||||
bio_end_sector(&w->io->op.wbio.bio) << 9,
|
||||
round_up(i_size, block_bytes(c)),
|
||||
i_size);
|
||||
|
||||
w->io->op.res.sectors += reserved_sectors;
|
||||
w->io->op.i_sectors_delta -= dirty_sectors;
|
||||
w->io->op.new_i_size = i_size;
|
||||
|
@ -205,6 +205,36 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any subvolume associated with a tree of snapshots
|
||||
* We can't rely on master_subvol - it might have been deleted.
|
||||
*/
|
||||
static int find_snapshot_tree_subvol(struct btree_trans *trans,
|
||||
u32 tree_id, u32 *subvol)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
||||
if (le32_to_cpu(s.v->tree) != tree_id)
|
||||
continue;
|
||||
|
||||
if (s.v->subvol) {
|
||||
*subvol = le32_to_cpu(s.v->subvol);
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol;
|
||||
found:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Get lost+found, create if it doesn't exist: */
|
||||
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
struct bch_inode_unpacked *lostfound,
|
||||
@ -223,19 +253,24 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
|
||||
u32 subvolid;
|
||||
ret = find_snapshot_tree_subvol(trans,
|
||||
bch2_snapshot_tree(c, snapshot), &subvolid);
|
||||
bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u",
|
||||
bch2_snapshot_tree(c, snapshot));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), false, &subvol);
|
||||
bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
|
||||
le32_to_cpu(st.master_subvol), snapshot);
|
||||
ret = bch2_subvolume_get(trans, subvolid, false, &subvol);
|
||||
bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!subvol.inode) {
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
|
||||
BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
|
||||
BTREE_ID_subvolumes, POS(0, subvolid),
|
||||
0, subvolume);
|
||||
ret = PTR_ERR_OR_ZERO(subvol);
|
||||
if (ret)
|
||||
@ -245,13 +280,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
root_inum.inum = le64_to_cpu(subvol.inode);
|
||||
subvol_inum root_inum = {
|
||||
.subvol = subvolid,
|
||||
.inum = le64_to_cpu(subvol.inode)
|
||||
};
|
||||
|
||||
struct bch_inode_unpacked root_inode;
|
||||
struct bch_hash_info root_hash_info;
|
||||
ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode);
|
||||
bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
|
||||
root_inum.inum, le32_to_cpu(st.master_subvol));
|
||||
root_inum.inum, subvolid);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -458,7 +496,9 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
|
||||
continue;
|
||||
|
||||
struct bch_inode_unpacked child_inode;
|
||||
bch2_inode_unpack(k, &child_inode);
|
||||
ret = bch2_inode_unpack(k, &child_inode);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!inode_should_reattach(&child_inode)) {
|
||||
ret = maybe_delete_dirent(trans,
|
||||
@ -809,9 +849,8 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
|
||||
{
|
||||
struct bch_inode_unpacked u;
|
||||
|
||||
BUG_ON(bch2_inode_unpack(inode, &u));
|
||||
|
||||
return darray_push(&w->inodes, ((struct inode_walker_entry) {
|
||||
return bch2_inode_unpack(inode, &u) ?:
|
||||
darray_push(&w->inodes, ((struct inode_walker_entry) {
|
||||
.inode = u,
|
||||
.snapshot = inode.k->p.snapshot,
|
||||
}));
|
||||
@ -1065,7 +1104,7 @@ static int get_snapshot_root_inode(struct btree_trans *trans,
|
||||
goto err;
|
||||
BUG();
|
||||
found_root:
|
||||
BUG_ON(bch2_inode_unpack(k, root));
|
||||
ret = bch2_inode_unpack(k, root);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
@ -1096,7 +1135,9 @@ static int check_inode(struct btree_trans *trans,
|
||||
if (!bkey_is_inode(k.k))
|
||||
return 0;
|
||||
|
||||
BUG_ON(bch2_inode_unpack(k, &u));
|
||||
ret = bch2_inode_unpack(k, &u);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (snapshot_root->bi_inum != u.bi_inum) {
|
||||
ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum);
|
||||
@ -1107,7 +1148,7 @@ static int check_inode(struct btree_trans *trans,
|
||||
if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed ||
|
||||
INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root),
|
||||
trans, inode_snapshot_mismatch,
|
||||
"inodes in different snapshots don't match")) {
|
||||
"inode hash info in different snapshots don't match")) {
|
||||
u.bi_hash_seed = snapshot_root->bi_hash_seed;
|
||||
SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root));
|
||||
do_update = true;
|
||||
@ -1318,7 +1359,9 @@ static int find_oldest_inode_needs_reattach(struct btree_trans *trans,
|
||||
break;
|
||||
|
||||
struct bch_inode_unpacked parent_inode;
|
||||
bch2_inode_unpack(k, &parent_inode);
|
||||
ret = bch2_inode_unpack(k, &parent_inode);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!inode_should_reattach(&parent_inode))
|
||||
break;
|
||||
@ -1341,7 +1384,9 @@ static int check_unreachable_inode(struct btree_trans *trans,
|
||||
return 0;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
BUG_ON(bch2_inode_unpack(k, &inode));
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!inode_should_reattach(&inode))
|
||||
return 0;
|
||||
@ -2296,7 +2341,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
*hash_info = bch2_hash_info_init(c, &i->inode);
|
||||
dir->first_this_inode = false;
|
||||
|
||||
ret = bch2_str_hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
|
||||
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret) {
|
||||
@ -2410,7 +2455,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
|
||||
*hash_info = bch2_hash_info_init(c, &i->inode);
|
||||
inode->first_this_inode = false;
|
||||
|
||||
ret = bch2_str_hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
|
||||
ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -2653,7 +2698,9 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
||||
int ret = 0;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
BUG_ON(bch2_inode_unpack(inode_k, &inode));
|
||||
ret = bch2_inode_unpack(inode_k, &inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (!inode.bi_subvol) {
|
||||
struct btree_iter dirent_iter;
|
||||
@ -2864,7 +2911,9 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c,
|
||||
|
||||
/* Should never fail, checked by bch2_inode_invalid: */
|
||||
struct bch_inode_unpacked u;
|
||||
BUG_ON(bch2_inode_unpack(k, &u));
|
||||
_ret3 = bch2_inode_unpack(k, &u);
|
||||
if (_ret3)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Backpointer and directory structure checks are sufficient for
|
||||
@ -2942,7 +2991,9 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
|
||||
if (!bkey_is_inode(k.k))
|
||||
return 0;
|
||||
|
||||
BUG_ON(bch2_inode_unpack(k, &u));
|
||||
ret = bch2_inode_unpack(k, &u);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (S_ISDIR(u.bi_mode))
|
||||
return 0;
|
||||
|
@ -48,10 +48,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
|
||||
u8 *p;
|
||||
|
||||
if (in >= end)
|
||||
return -1;
|
||||
return -BCH_ERR_inode_unpack_error;
|
||||
|
||||
if (!*in)
|
||||
return -1;
|
||||
return -BCH_ERR_inode_unpack_error;
|
||||
|
||||
/*
|
||||
* position of highest set bit indicates number of bytes:
|
||||
@ -61,7 +61,7 @@ static int inode_decode_field(const u8 *in, const u8 *end,
|
||||
bytes = byte_table[shift - 1];
|
||||
|
||||
if (in + bytes > end)
|
||||
return -1;
|
||||
return -BCH_ERR_inode_unpack_error;
|
||||
|
||||
p = (u8 *) be + 16 - bytes;
|
||||
memcpy(p, in, bytes);
|
||||
@ -177,7 +177,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
|
||||
return ret; \
|
||||
\
|
||||
if (field_bits > sizeof(unpacked->_name) * 8) \
|
||||
return -1; \
|
||||
return -BCH_ERR_inode_unpack_error; \
|
||||
\
|
||||
unpacked->_name = field[1]; \
|
||||
in += ret;
|
||||
@ -218,7 +218,7 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
|
||||
\
|
||||
unpacked->_name = v[0]; \
|
||||
if (v[1] || v[0] != unpacked->_name) \
|
||||
return -1; \
|
||||
return -BCH_ERR_inode_unpack_error; \
|
||||
fieldnr++;
|
||||
|
||||
BCH_INODE_FIELDS_v2()
|
||||
@ -269,7 +269,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
|
||||
\
|
||||
unpacked->_name = v[0]; \
|
||||
if (v[1] || v[0] != unpacked->_name) \
|
||||
return -1; \
|
||||
return -BCH_ERR_inode_unpack_error; \
|
||||
fieldnr++;
|
||||
|
||||
BCH_INODE_FIELDS_v3()
|
||||
@ -886,7 +886,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
u64 cursor_idx = c->opts.shard_inode_numbers ? cpu : 0;
|
||||
u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1;
|
||||
|
||||
cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);
|
||||
|
||||
@ -907,19 +907,16 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
cursor->v.bits = c->opts.shard_inode_numbers_bits;
|
||||
|
||||
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
|
||||
if (c->opts.shard_inode_numbers) {
|
||||
bits -= cursor->v.bits;
|
||||
|
||||
*min = (cpu << bits);
|
||||
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
|
||||
*min = max_t(u64, *min, BLOCKDEV_INODE_MAX);
|
||||
} else {
|
||||
if (c->opts.inodes_32bit) {
|
||||
*min = BLOCKDEV_INODE_MAX;
|
||||
*max = ~(ULLONG_MAX << bits);
|
||||
*max = INT_MAX;
|
||||
} else {
|
||||
cursor->v.bits = c->opts.shard_inode_numbers_bits;
|
||||
|
||||
unsigned bits = 63 - c->opts.shard_inode_numbers_bits;
|
||||
|
||||
*min = max(cpu << bits, (u64) INT_MAX + 1);
|
||||
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
}
|
||||
|
||||
if (le64_to_cpu(cursor->v.idx) < *min)
|
||||
|
@ -102,7 +102,8 @@ struct bch_inode_generation {
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8) \
|
||||
x(bi_depth, 32)
|
||||
x(bi_depth, 32) \
|
||||
x(bi_inodes_32bit, 8)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
@ -115,7 +116,8 @@ struct bch_inode_generation {
|
||||
x(foreground_target, 16) \
|
||||
x(background_target, 16) \
|
||||
x(erasure_code, 16) \
|
||||
x(nocow, 8)
|
||||
x(nocow, 8) \
|
||||
x(inodes_32bit, 8)
|
||||
|
||||
enum inode_opt_id {
|
||||
#define x(name, ...) \
|
||||
|
@ -1356,6 +1356,9 @@ err:
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto retry;
|
||||
|
||||
bch2_trans_put(trans);
|
||||
darray_exit(&buckets);
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
@ -1366,9 +1369,6 @@ err:
|
||||
op->flags |= BCH_WRITE_SUBMITTED;
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
darray_exit(&buckets);
|
||||
|
||||
/* fallback to cow write path? */
|
||||
if (!(op->flags & BCH_WRITE_SUBMITTED)) {
|
||||
closure_sync(&op->cl);
|
||||
|
@ -1114,8 +1114,10 @@ reread:
|
||||
(printbuf_reset(&err),
|
||||
prt_str(&err, "journal "),
|
||||
bch2_csum_err_msg(&err, csum_type, j->csum, csum),
|
||||
err.buf)))
|
||||
err.buf))) {
|
||||
saw_bad = true;
|
||||
bch2_fatal_error(c);
|
||||
}
|
||||
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
|
@ -414,7 +414,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
|
||||
continue;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
BUG_ON(bch2_inode_unpack(k, &inode));
|
||||
_ret3 = bch2_inode_unpack(k, &inode);
|
||||
if (_ret3)
|
||||
break;
|
||||
|
||||
struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
|
||||
bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
|
||||
|
@ -222,15 +222,10 @@ enum fsck_err_opts {
|
||||
BCH_SB_ERASURE_CODE, false, \
|
||||
NULL, "Enable erasure coding (DO NOT USE YET)") \
|
||||
x(inodes_32bit, u8, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, true, \
|
||||
NULL, "Constrain inode numbers to 32 bits") \
|
||||
x(shard_inode_numbers, u8, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_SHARD_INUMS, true, \
|
||||
NULL, "Shard new inode numbers by CPU id") \
|
||||
x(shard_inode_numbers_bits, u8, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_UINT(0, 8), \
|
||||
|
@ -107,6 +107,12 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kill_btree(struct bch_fs *c, enum btree_id btree)
|
||||
{
|
||||
bch2_btree_id_root(c, btree)->alive = false;
|
||||
bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
}
|
||||
|
||||
/* for -o reconstruct_alloc: */
|
||||
static void bch2_reconstruct_alloc(struct bch_fs *c)
|
||||
{
|
||||
@ -157,16 +163,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c); i++)
|
||||
if (btree_id_is_alloc(i))
|
||||
kill_btree(c, i);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -573,9 +572,6 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
if (!r->alive)
|
||||
continue;
|
||||
|
||||
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
|
||||
continue;
|
||||
|
||||
printbuf_reset(&buf);
|
||||
bch2_btree_id_level_to_text(&buf, i, r->level);
|
||||
|
||||
@ -785,6 +781,11 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
|
||||
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||
|
||||
if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) {
|
||||
SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
@ -882,15 +883,15 @@ use_clean:
|
||||
c->journal_replay_seq_start = last_seq;
|
||||
c->journal_replay_seq_end = blacklist_seq - 1;
|
||||
|
||||
if (c->opts.reconstruct_alloc)
|
||||
bch2_reconstruct_alloc(c);
|
||||
|
||||
zero_out_btree_mem_ptr(&c->journal_keys);
|
||||
|
||||
ret = journal_replay_early(c, clean);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.reconstruct_alloc)
|
||||
bch2_reconstruct_alloc(c);
|
||||
|
||||
/*
|
||||
* After an unclean shutdown, skip then next few journal sequence
|
||||
* numbers as they may have been referenced by btree writes that
|
||||
|
@ -49,7 +49,7 @@
|
||||
x(fs_upgrade_for_subvolumes, 22, 0) \
|
||||
x(check_inodes, 24, PASS_FSCK) \
|
||||
x(check_extents, 25, PASS_FSCK) \
|
||||
x(check_indirect_extents, 26, PASS_FSCK) \
|
||||
x(check_indirect_extents, 26, PASS_ONLINE|PASS_FSCK) \
|
||||
x(check_dirents, 27, PASS_FSCK) \
|
||||
x(check_xattrs, 28, PASS_FSCK) \
|
||||
x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
|
||||
|
@ -2,86 +2,91 @@
|
||||
#ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H
|
||||
#define _BCACHEFS_SB_COUNTERS_FORMAT_H
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0) \
|
||||
x(io_write, 1) \
|
||||
x(io_move, 2) \
|
||||
x(bucket_invalidate, 3) \
|
||||
x(bucket_discard, 4) \
|
||||
x(bucket_alloc, 5) \
|
||||
x(bucket_alloc_fail, 6) \
|
||||
x(btree_cache_scan, 7) \
|
||||
x(btree_cache_reap, 8) \
|
||||
x(btree_cache_cannibalize, 9) \
|
||||
x(btree_cache_cannibalize_lock, 10) \
|
||||
x(btree_cache_cannibalize_lock_fail, 11) \
|
||||
x(btree_cache_cannibalize_unlock, 12) \
|
||||
x(btree_node_write, 13) \
|
||||
x(btree_node_read, 14) \
|
||||
x(btree_node_compact, 15) \
|
||||
x(btree_node_merge, 16) \
|
||||
x(btree_node_split, 17) \
|
||||
x(btree_node_rewrite, 18) \
|
||||
x(btree_node_alloc, 19) \
|
||||
x(btree_node_free, 20) \
|
||||
x(btree_node_set_root, 21) \
|
||||
x(btree_path_relock_fail, 22) \
|
||||
x(btree_path_upgrade_fail, 23) \
|
||||
x(btree_reserve_get_fail, 24) \
|
||||
x(journal_entry_full, 25) \
|
||||
x(journal_full, 26) \
|
||||
x(journal_reclaim_finish, 27) \
|
||||
x(journal_reclaim_start, 28) \
|
||||
x(journal_write, 29) \
|
||||
x(read_promote, 30) \
|
||||
x(read_bounce, 31) \
|
||||
x(read_split, 33) \
|
||||
x(read_retry, 32) \
|
||||
x(read_reuse_race, 34) \
|
||||
x(move_extent_read, 35) \
|
||||
x(move_extent_write, 36) \
|
||||
x(move_extent_finish, 37) \
|
||||
x(move_extent_fail, 38) \
|
||||
x(move_extent_start_fail, 39) \
|
||||
x(copygc, 40) \
|
||||
x(copygc_wait, 41) \
|
||||
x(gc_gens_end, 42) \
|
||||
x(gc_gens_start, 43) \
|
||||
x(trans_blocked_journal_reclaim, 44) \
|
||||
x(trans_restart_btree_node_reused, 45) \
|
||||
x(trans_restart_btree_node_split, 46) \
|
||||
x(trans_restart_fault_inject, 47) \
|
||||
x(trans_restart_iter_upgrade, 48) \
|
||||
x(trans_restart_journal_preres_get, 49) \
|
||||
x(trans_restart_journal_reclaim, 50) \
|
||||
x(trans_restart_journal_res_get, 51) \
|
||||
x(trans_restart_key_cache_key_realloced, 52) \
|
||||
x(trans_restart_key_cache_raced, 53) \
|
||||
x(trans_restart_mark_replicas, 54) \
|
||||
x(trans_restart_mem_realloced, 55) \
|
||||
x(trans_restart_memory_allocation_failure, 56) \
|
||||
x(trans_restart_relock, 57) \
|
||||
x(trans_restart_relock_after_fill, 58) \
|
||||
x(trans_restart_relock_key_cache_fill, 59) \
|
||||
x(trans_restart_relock_next_node, 60) \
|
||||
x(trans_restart_relock_parent_for_fill, 61) \
|
||||
x(trans_restart_relock_path, 62) \
|
||||
x(trans_restart_relock_path_intent, 63) \
|
||||
x(trans_restart_too_many_iters, 64) \
|
||||
x(trans_restart_traverse, 65) \
|
||||
x(trans_restart_upgrade, 66) \
|
||||
x(trans_restart_would_deadlock, 67) \
|
||||
x(trans_restart_would_deadlock_write, 68) \
|
||||
x(trans_restart_injected, 69) \
|
||||
x(trans_restart_key_cache_upgrade, 70) \
|
||||
x(trans_traverse_all, 71) \
|
||||
x(transaction_commit, 72) \
|
||||
x(write_super, 73) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74) \
|
||||
x(trans_restart_write_buffer_flush, 75) \
|
||||
x(trans_restart_split_race, 76) \
|
||||
x(write_buffer_flush_slowpath, 77) \
|
||||
x(write_buffer_flush_sync, 78)
|
||||
enum counters_flags {
|
||||
TYPE_COUNTER = BIT(0), /* event counters */
|
||||
TYPE_SECTORS = BIT(1), /* amount counters, the unit is sectors */
|
||||
};
|
||||
|
||||
#define BCH_PERSISTENT_COUNTERS() \
|
||||
x(io_read, 0, TYPE_SECTORS) \
|
||||
x(io_write, 1, TYPE_SECTORS) \
|
||||
x(io_move, 2, TYPE_SECTORS) \
|
||||
x(bucket_invalidate, 3, TYPE_COUNTER) \
|
||||
x(bucket_discard, 4, TYPE_COUNTER) \
|
||||
x(bucket_alloc, 5, TYPE_COUNTER) \
|
||||
x(bucket_alloc_fail, 6, TYPE_COUNTER) \
|
||||
x(btree_cache_scan, 7, TYPE_COUNTER) \
|
||||
x(btree_cache_reap, 8, TYPE_COUNTER) \
|
||||
x(btree_cache_cannibalize, 9, TYPE_COUNTER) \
|
||||
x(btree_cache_cannibalize_lock, 10, TYPE_COUNTER) \
|
||||
x(btree_cache_cannibalize_lock_fail, 11, TYPE_COUNTER) \
|
||||
x(btree_cache_cannibalize_unlock, 12, TYPE_COUNTER) \
|
||||
x(btree_node_write, 13, TYPE_COUNTER) \
|
||||
x(btree_node_read, 14, TYPE_COUNTER) \
|
||||
x(btree_node_compact, 15, TYPE_COUNTER) \
|
||||
x(btree_node_merge, 16, TYPE_COUNTER) \
|
||||
x(btree_node_split, 17, TYPE_COUNTER) \
|
||||
x(btree_node_rewrite, 18, TYPE_COUNTER) \
|
||||
x(btree_node_alloc, 19, TYPE_COUNTER) \
|
||||
x(btree_node_free, 20, TYPE_COUNTER) \
|
||||
x(btree_node_set_root, 21, TYPE_COUNTER) \
|
||||
x(btree_path_relock_fail, 22, TYPE_COUNTER) \
|
||||
x(btree_path_upgrade_fail, 23, TYPE_COUNTER) \
|
||||
x(btree_reserve_get_fail, 24, TYPE_COUNTER) \
|
||||
x(journal_entry_full, 25, TYPE_COUNTER) \
|
||||
x(journal_full, 26, TYPE_COUNTER) \
|
||||
x(journal_reclaim_finish, 27, TYPE_COUNTER) \
|
||||
x(journal_reclaim_start, 28, TYPE_COUNTER) \
|
||||
x(journal_write, 29, TYPE_COUNTER) \
|
||||
x(read_promote, 30, TYPE_COUNTER) \
|
||||
x(read_bounce, 31, TYPE_COUNTER) \
|
||||
x(read_split, 33, TYPE_COUNTER) \
|
||||
x(read_retry, 32, TYPE_COUNTER) \
|
||||
x(read_reuse_race, 34, TYPE_COUNTER) \
|
||||
x(move_extent_read, 35, TYPE_SECTORS) \
|
||||
x(move_extent_write, 36, TYPE_SECTORS) \
|
||||
x(move_extent_finish, 37, TYPE_SECTORS) \
|
||||
x(move_extent_fail, 38, TYPE_COUNTER) \
|
||||
x(move_extent_start_fail, 39, TYPE_COUNTER) \
|
||||
x(copygc, 40, TYPE_COUNTER) \
|
||||
x(copygc_wait, 41, TYPE_COUNTER) \
|
||||
x(gc_gens_end, 42, TYPE_COUNTER) \
|
||||
x(gc_gens_start, 43, TYPE_COUNTER) \
|
||||
x(trans_blocked_journal_reclaim, 44, TYPE_COUNTER) \
|
||||
x(trans_restart_btree_node_reused, 45, TYPE_COUNTER) \
|
||||
x(trans_restart_btree_node_split, 46, TYPE_COUNTER) \
|
||||
x(trans_restart_fault_inject, 47, TYPE_COUNTER) \
|
||||
x(trans_restart_iter_upgrade, 48, TYPE_COUNTER) \
|
||||
x(trans_restart_journal_preres_get, 49, TYPE_COUNTER) \
|
||||
x(trans_restart_journal_reclaim, 50, TYPE_COUNTER) \
|
||||
x(trans_restart_journal_res_get, 51, TYPE_COUNTER) \
|
||||
x(trans_restart_key_cache_key_realloced, 52, TYPE_COUNTER) \
|
||||
x(trans_restart_key_cache_raced, 53, TYPE_COUNTER) \
|
||||
x(trans_restart_mark_replicas, 54, TYPE_COUNTER) \
|
||||
x(trans_restart_mem_realloced, 55, TYPE_COUNTER) \
|
||||
x(trans_restart_memory_allocation_failure, 56, TYPE_COUNTER) \
|
||||
x(trans_restart_relock, 57, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_after_fill, 58, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_key_cache_fill, 59, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_next_node, 60, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_parent_for_fill, 61, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_path, 62, TYPE_COUNTER) \
|
||||
x(trans_restart_relock_path_intent, 63, TYPE_COUNTER) \
|
||||
x(trans_restart_too_many_iters, 64, TYPE_COUNTER) \
|
||||
x(trans_restart_traverse, 65, TYPE_COUNTER) \
|
||||
x(trans_restart_upgrade, 66, TYPE_COUNTER) \
|
||||
x(trans_restart_would_deadlock, 67, TYPE_COUNTER) \
|
||||
x(trans_restart_would_deadlock_write, 68, TYPE_COUNTER) \
|
||||
x(trans_restart_injected, 69, TYPE_COUNTER) \
|
||||
x(trans_restart_key_cache_upgrade, 70, TYPE_COUNTER) \
|
||||
x(trans_traverse_all, 71, TYPE_COUNTER) \
|
||||
x(transaction_commit, 72, TYPE_COUNTER) \
|
||||
x(write_super, 73, TYPE_COUNTER) \
|
||||
x(trans_restart_would_deadlock_recursion_limit, 74, TYPE_COUNTER) \
|
||||
x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \
|
||||
x(trans_restart_split_race, 76, TYPE_COUNTER) \
|
||||
x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \
|
||||
x(write_buffer_flush_sync, 78, TYPE_COUNTER)
|
||||
|
||||
enum bch_persistent_counters {
|
||||
#define x(t, n, ...) BCH_COUNTER_##t,
|
||||
|
@ -83,7 +83,6 @@
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
|
||||
BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \
|
||||
x(backpointer_bucket_gen, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_backpointers_to_extents)|\
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
|
||||
BCH_FSCK_ERR_backpointer_to_missing_ptr, \
|
||||
BCH_FSCK_ERR_ptr_to_missing_backpointer) \
|
||||
|
@ -616,8 +616,6 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map, ip);
|
||||
else
|
||||
lock->seq++;
|
||||
|
||||
if (type == SIX_LOCK_intent &&
|
||||
lock->intent_lock_recurse) {
|
||||
@ -625,6 +623,15 @@ void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long
|
||||
return;
|
||||
}
|
||||
|
||||
if (type == SIX_LOCK_write &&
|
||||
lock->write_lock_recurse) {
|
||||
--lock->write_lock_recurse;
|
||||
return;
|
||||
}
|
||||
|
||||
if (type == SIX_LOCK_write)
|
||||
lock->seq++;
|
||||
|
||||
do_six_unlock_type(lock, type);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_unlock_ip);
|
||||
@ -735,13 +742,13 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
|
||||
atomic_add(l[type].lock_val, &lock->state);
|
||||
}
|
||||
break;
|
||||
case SIX_LOCK_write:
|
||||
lock->write_lock_recurse++;
|
||||
fallthrough;
|
||||
case SIX_LOCK_intent:
|
||||
EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
|
||||
lock->intent_lock_recurse++;
|
||||
break;
|
||||
case SIX_LOCK_write:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_increment);
|
||||
|
@ -137,6 +137,7 @@ struct six_lock {
|
||||
atomic_t state;
|
||||
u32 seq;
|
||||
unsigned intent_lock_recurse;
|
||||
unsigned write_lock_recurse;
|
||||
struct task_struct *owner;
|
||||
unsigned __percpu *readers;
|
||||
raw_spinlock_t wait_lock;
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
@ -279,23 +280,6 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
|
||||
{
|
||||
struct snapshot_t *t = snapshot_t_mut(c, id);
|
||||
u32 parent = id;
|
||||
|
||||
while ((parent = bch2_snapshot_parent_early(c, parent)) &&
|
||||
parent - id - 1 < IS_ANCESTOR_BITMAP)
|
||||
__set_bit(parent - id - 1, t->is_ancestor);
|
||||
}
|
||||
|
||||
static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
|
||||
{
|
||||
mutex_lock(&c->snapshot_table_lock);
|
||||
__set_is_ancestor_bitmap(c, id);
|
||||
mutex_unlock(&c->snapshot_table_lock);
|
||||
}
|
||||
|
||||
static int __bch2_mark_snapshot(struct btree_trans *trans,
|
||||
enum btree_id btree, unsigned level,
|
||||
struct bkey_s_c old, struct bkey_s_c new,
|
||||
@ -317,6 +301,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
|
||||
if (new.k->type == KEY_TYPE_snapshot) {
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
|
||||
|
||||
t->live = true;
|
||||
t->parent = le32_to_cpu(s.v->parent);
|
||||
t->children[0] = le32_to_cpu(s.v->children[0]);
|
||||
t->children[1] = le32_to_cpu(s.v->children[1]);
|
||||
@ -335,7 +320,11 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
|
||||
t->skip[2] = 0;
|
||||
}
|
||||
|
||||
__set_is_ancestor_bitmap(c, id);
|
||||
u32 parent = id;
|
||||
|
||||
while ((parent = bch2_snapshot_parent_early(c, parent)) &&
|
||||
parent - id - 1 < IS_ANCESTOR_BITMAP)
|
||||
__set_bit(parent - id - 1, t->is_ancestor);
|
||||
|
||||
if (BCH_SNAPSHOT_DELETED(s.v)) {
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
||||
@ -365,70 +354,6 @@ int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
|
||||
BTREE_ITER_with_updates, snapshot, s);
|
||||
}
|
||||
|
||||
static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct bch_snapshot v;
|
||||
int ret;
|
||||
|
||||
if (!id)
|
||||
return 0;
|
||||
|
||||
ret = bch2_snapshot_lookup(trans, id, &v);
|
||||
if (bch2_err_matches(ret, ENOENT))
|
||||
bch_err(trans->c, "snapshot node %u not found", id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return !BCH_SNAPSHOT_DELETED(&v);
|
||||
}
|
||||
|
||||
/*
|
||||
* If @k is a snapshot with just one live child, it's part of a linear chain,
|
||||
* which we consider to be an equivalence class: and then after snapshot
|
||||
* deletion cleanup, there should only be a single key at a given position in
|
||||
* this equivalence class.
|
||||
*
|
||||
* This sets the equivalence class of @k to be the child's equivalence class, if
|
||||
* it's part of such a linear chain: this correctly sets equivalence classes on
|
||||
* startup if we run leaf to root (i.e. in natural key order).
|
||||
*/
|
||||
static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned i, nr_live = 0, live_idx = 0;
|
||||
struct bkey_s_c_snapshot snap;
|
||||
u32 id = k.k->p.offset, child[2];
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
return 0;
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
|
||||
child[0] = le32_to_cpu(snap.v->children[0]);
|
||||
child[1] = le32_to_cpu(snap.v->children[1]);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
int ret = bch2_snapshot_live(trans, child[i]);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret)
|
||||
live_idx = i;
|
||||
nr_live += ret;
|
||||
}
|
||||
|
||||
mutex_lock(&c->snapshot_table_lock);
|
||||
|
||||
snapshot_t_mut(c, id)->equiv = nr_live == 1
|
||||
? snapshot_t_mut(c, child[live_idx])->equiv
|
||||
: id;
|
||||
|
||||
mutex_unlock(&c->snapshot_table_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fsck: */
|
||||
|
||||
static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
|
||||
@ -570,6 +495,9 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!st.v->master_subvol)
|
||||
goto out;
|
||||
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
@ -613,6 +541,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
u->v.master_subvol = cpu_to_le32(subvol_id);
|
||||
st = snapshot_tree_i_to_s_c(u);
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &snapshot_iter);
|
||||
@ -913,7 +842,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (bch2_snapshot_equiv(c, id))
|
||||
if (bch2_snapshot_exists(c, id))
|
||||
return 0;
|
||||
|
||||
/* Do we need to reconstruct the snapshot_tree entry as well? */
|
||||
@ -962,8 +891,7 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
|
||||
|
||||
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
|
||||
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
|
||||
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
|
||||
bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
|
||||
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0);
|
||||
}
|
||||
|
||||
/* Figure out which snapshot nodes belong in the same tree: */
|
||||
@ -1061,7 +989,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
|
||||
snapshot_id_list_to_text(&buf, t);
|
||||
|
||||
darray_for_each(*t, id) {
|
||||
if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
|
||||
if (fsck_err_on(!bch2_snapshot_exists(c, *id),
|
||||
trans, snapshot_node_missing,
|
||||
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
|
||||
if (t->nr > 1) {
|
||||
@ -1094,10 +1022,12 @@ int bch2_check_key_has_snapshot(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot),
|
||||
if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot),
|
||||
trans, bkey_in_missing_snapshot,
|
||||
"key in missing snapshot %s, delete?",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
(bch2_btree_id_to_text(&buf, iter->btree_id),
|
||||
prt_char(&buf, ' '),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
ret = bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?: 1;
|
||||
fsck_err:
|
||||
@ -1111,13 +1041,11 @@ fsck_err:
|
||||
int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_snapshot *s;
|
||||
int ret = 0;
|
||||
|
||||
s = bch2_bkey_get_mut_typed(trans, &iter,
|
||||
struct bkey_i_snapshot *s =
|
||||
bch2_bkey_get_mut_typed(trans, &iter,
|
||||
BTREE_ID_snapshots, POS(0, id),
|
||||
0, snapshot);
|
||||
ret = PTR_ERR_OR_ZERO(s);
|
||||
int ret = PTR_ERR_OR_ZERO(s);
|
||||
if (unlikely(ret)) {
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
|
||||
trans->c, "missing snapshot %u", id);
|
||||
@ -1305,10 +1233,6 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
|
||||
goto err;
|
||||
|
||||
new_snapids[i] = iter.pos.offset;
|
||||
|
||||
mutex_lock(&c->snapshot_table_lock);
|
||||
snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
|
||||
mutex_unlock(&c->snapshot_table_lock);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
@ -1414,102 +1338,95 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
||||
* that key to snapshot leaf nodes, where we can mutate it
|
||||
*/
|
||||
|
||||
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
snapshot_id_list *deleted,
|
||||
snapshot_id_list *equiv_seen,
|
||||
struct bpos *last_pos)
|
||||
{
|
||||
int ret = bch2_check_key_has_snapshot(trans, iter, k);
|
||||
if (ret)
|
||||
return ret < 0 ? ret : 0;
|
||||
struct snapshot_interior_delete {
|
||||
u32 id;
|
||||
u32 live_child;
|
||||
};
|
||||
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
|
||||
if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
|
||||
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
|
||||
{
|
||||
darray_for_each(*l, i)
|
||||
if (i->id == id)
|
||||
return i->live_child;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned __live_child(struct snapshot_table *t, u32 id,
|
||||
snapshot_id_list *delete_leaves,
|
||||
interior_delete_list *delete_interior)
|
||||
{
|
||||
struct snapshot_t *s = __snapshot_t(t, id);
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
if (!bkey_eq(k.k->p, *last_pos))
|
||||
equiv_seen->nr = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++)
|
||||
if (s->children[i] &&
|
||||
!snapshot_list_has_id(delete_leaves, s->children[i]) &&
|
||||
!interior_delete_has_id(delete_interior, s->children[i]))
|
||||
return s->children[i];
|
||||
|
||||
if (snapshot_list_has_id(deleted, k.k->p.snapshot))
|
||||
return bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
|
||||
if (!bpos_eq(*last_pos, k.k->p) &&
|
||||
snapshot_list_has_id(equiv_seen, equiv))
|
||||
return bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
|
||||
*last_pos = k.k->p;
|
||||
|
||||
ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* When we have a linear chain of snapshot nodes, we consider
|
||||
* those to form an equivalence class: we're going to collapse
|
||||
* them all down to a single node, and keep the leaf-most node -
|
||||
* which has the same id as the equivalence class id.
|
||||
*
|
||||
* If there are multiple keys in different snapshots at the same
|
||||
* position, we're only going to keep the one in the newest
|
||||
* snapshot (we delete the others above) - the rest have been
|
||||
* overwritten and are redundant, and for the key we're going to keep we
|
||||
* need to move it to the equivalance class ID if it's not there
|
||||
* already.
|
||||
*/
|
||||
if (equiv != k.k->p.snapshot) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
int ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
new->k.p.snapshot = equiv;
|
||||
|
||||
struct btree_iter new_iter;
|
||||
bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
|
||||
BTREE_ITER_all_snapshots|
|
||||
BTREE_ITER_cached|
|
||||
BTREE_ITER_intent);
|
||||
|
||||
ret = bch2_btree_iter_traverse(&new_iter) ?:
|
||||
bch2_trans_update(trans, &new_iter, new,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &new_iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) {
|
||||
u32 live_child = s->children[i]
|
||||
? __live_child(t, s->children[i], delete_leaves, delete_interior)
|
||||
: 0;
|
||||
if (live_child)
|
||||
return live_child;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
|
||||
static unsigned live_child(struct bch_fs *c, u32 id,
|
||||
snapshot_id_list *delete_leaves,
|
||||
interior_delete_list *delete_interior)
|
||||
{
|
||||
struct bkey_s_c_snapshot snap;
|
||||
u32 children[2];
|
||||
int ret;
|
||||
rcu_read_lock();
|
||||
u32 ret = __live_child(rcu_dereference(c->snapshots), id,
|
||||
delete_leaves, delete_interior);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
return 0;
|
||||
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
snapshot_id_list *delete_leaves,
|
||||
interior_delete_list *delete_interior)
|
||||
{
|
||||
if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot))
|
||||
return bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
||||
BCH_SNAPSHOT_SUBVOL(snap.v))
|
||||
return 0;
|
||||
u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot);
|
||||
if (live_child) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
int ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
children[0] = le32_to_cpu(snap.v->children[0]);
|
||||
children[1] = le32_to_cpu(snap.v->children[1]);
|
||||
new->k.p.snapshot = live_child;
|
||||
|
||||
ret = bch2_snapshot_live(trans, children[0]) ?:
|
||||
bch2_snapshot_live(trans, children[1]);
|
||||
if (ret < 0)
|
||||
struct btree_iter dst_iter;
|
||||
struct bkey_s_c dst_k = bch2_bkey_get_iter(trans, &dst_iter,
|
||||
iter->btree_id, new->k.p,
|
||||
BTREE_ITER_all_snapshots|
|
||||
BTREE_ITER_intent);
|
||||
ret = bkey_err(dst_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = (bkey_deleted(dst_k.k)
|
||||
? bch2_trans_update(trans, &dst_iter, new,
|
||||
BTREE_UPDATE_internal_snapshot_node)
|
||||
: 0) ?:
|
||||
bch2_btree_delete_at(trans, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
bch2_trans_iter_exit(trans, &dst_iter);
|
||||
return ret;
|
||||
return !ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1517,26 +1434,57 @@ static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c
|
||||
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
|
||||
* as deleted.
|
||||
*/
|
||||
static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
|
||||
static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k,
|
||||
snapshot_id_list *delete_leaves,
|
||||
interior_delete_list *delete_interior)
|
||||
{
|
||||
int ret = bch2_snapshot_needs_delete(trans, k);
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
return 0;
|
||||
|
||||
return ret <= 0
|
||||
? ret
|
||||
: bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
||||
unsigned live_children = 0;
|
||||
|
||||
if (BCH_SNAPSHOT_SUBVOL(s.v))
|
||||
return 0;
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
u32 child = le32_to_cpu(s.v->children[i]);
|
||||
|
||||
live_children += child &&
|
||||
!snapshot_list_has_id(delete_leaves, child);
|
||||
}
|
||||
|
||||
if (live_children == 0) {
|
||||
return snapshot_list_add(c, delete_leaves, s.k->p.offset);
|
||||
} else if (live_children == 1) {
|
||||
struct snapshot_interior_delete d = {
|
||||
.id = s.k->p.offset,
|
||||
.live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior),
|
||||
};
|
||||
|
||||
if (!d.live_child) {
|
||||
bch_err(c, "error finding live child of snapshot %u", d.id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return darray_push(delete_interior, d);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
||||
snapshot_id_list *skip)
|
||||
interior_delete_list *skip)
|
||||
{
|
||||
rcu_read_lock();
|
||||
while (snapshot_list_has_id(skip, id))
|
||||
while (interior_delete_has_id(skip, id))
|
||||
id = __bch2_snapshot_parent(c, id);
|
||||
|
||||
while (n--) {
|
||||
do {
|
||||
id = __bch2_snapshot_parent(c, id);
|
||||
} while (snapshot_list_has_id(skip, id));
|
||||
} while (interior_delete_has_id(skip, id));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -1545,7 +1493,7 @@ static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
||||
|
||||
static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
||||
struct btree_iter *iter, struct bkey_s_c k,
|
||||
snapshot_id_list *deleted)
|
||||
interior_delete_list *deleted)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 nr_deleted_ancestors = 0;
|
||||
@ -1555,7 +1503,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
return 0;
|
||||
|
||||
if (snapshot_list_has_id(deleted, k.k->p.offset))
|
||||
if (interior_delete_has_id(deleted, k.k->p.offset))
|
||||
return 0;
|
||||
|
||||
s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
|
||||
@ -1564,7 +1512,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
||||
return ret;
|
||||
|
||||
darray_for_each(*deleted, i)
|
||||
nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
|
||||
nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
|
||||
|
||||
if (!nr_deleted_ancestors)
|
||||
return 0;
|
||||
@ -1582,7 +1530,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
|
||||
u32 id = le32_to_cpu(s->v.skip[j]);
|
||||
|
||||
if (snapshot_list_has_id(deleted, id)) {
|
||||
if (interior_delete_has_id(deleted, id)) {
|
||||
id = bch2_snapshot_nth_parent_skip(c,
|
||||
parent,
|
||||
depth > 1
|
||||
@ -1601,51 +1549,44 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
|
||||
|
||||
int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans;
|
||||
snapshot_id_list deleted = { 0 };
|
||||
snapshot_id_list deleted_interior = { 0 };
|
||||
int ret = 0;
|
||||
|
||||
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
|
||||
return 0;
|
||||
|
||||
trans = bch2_trans_get(c);
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
snapshot_id_list delete_leaves = {};
|
||||
interior_delete_list delete_interior = {};
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* For every snapshot node: If we have no live children and it's not
|
||||
* pointed to by a subvolume, delete it:
|
||||
*/
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k,
|
||||
NULL, NULL, 0,
|
||||
bch2_delete_redundant_snapshot(trans, k));
|
||||
bch_err_msg(c, ret, "deleting redundant snapshots");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k,
|
||||
bch2_snapshot_set_equiv(trans, k));
|
||||
bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ({
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
continue;
|
||||
|
||||
BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
|
||||
? snapshot_list_add(c, &deleted, k.k->p.offset)
|
||||
: 0;
|
||||
}));
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
|
||||
check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior));
|
||||
bch_err_msg(c, ret, "walking snapshots");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!delete_leaves.nr && !delete_interior.nr)
|
||||
goto err;
|
||||
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "deleting leaves");
|
||||
darray_for_each(delete_leaves, i)
|
||||
prt_printf(&buf, " %u", *i);
|
||||
|
||||
prt_printf(&buf, " interior");
|
||||
darray_for_each(delete_interior, i)
|
||||
prt_printf(&buf, " %u->%u", i->id, i->live_child);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
|
||||
printbuf_exit(&buf);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
|
||||
struct bpos last_pos = POS_MIN;
|
||||
snapshot_id_list equiv_seen = { 0 };
|
||||
struct disk_reservation res = { 0 };
|
||||
|
||||
if (!btree_type_has_snapshots(btree))
|
||||
@ -1655,33 +1596,24 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
btree, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
|
||||
&equiv_seen, &last_pos));
|
||||
delete_dead_snapshots_process_key(trans, &iter, k,
|
||||
&delete_leaves,
|
||||
&delete_interior));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
darray_exit(&equiv_seen);
|
||||
|
||||
bch_err_msg(c, ret, "deleting keys from dying snapshots");
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
down_write(&c->snapshot_create_lock);
|
||||
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k, ({
|
||||
u32 snapshot = k.k->p.offset;
|
||||
u32 equiv = bch2_snapshot_equiv(c, snapshot);
|
||||
|
||||
equiv != snapshot
|
||||
? snapshot_list_add(c, &deleted_interior, snapshot)
|
||||
: 0;
|
||||
}));
|
||||
|
||||
bch_err_msg(c, ret, "walking snapshots");
|
||||
if (ret)
|
||||
goto err_create_lock;
|
||||
darray_for_each(delete_leaves, i) {
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
bch2_snapshot_node_delete(trans, *i));
|
||||
bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fixing children of deleted snapshots can't be done completely
|
||||
@ -1691,30 +1623,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
|
||||
BTREE_ITER_intent, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
|
||||
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior));
|
||||
if (ret)
|
||||
goto err_create_lock;
|
||||
goto err;
|
||||
|
||||
darray_for_each(deleted, i) {
|
||||
darray_for_each(delete_interior, i) {
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
bch2_snapshot_node_delete(trans, *i));
|
||||
bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
||||
bch2_snapshot_node_delete(trans, i->id));
|
||||
bch_err_msg(c, ret, "deleting snapshot %u", i->id);
|
||||
if (ret)
|
||||
goto err_create_lock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
darray_for_each(deleted_interior, i) {
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
bch2_snapshot_node_delete(trans, *i));
|
||||
bch_err_msg(c, ret, "deleting snapshot %u", *i);
|
||||
if (ret)
|
||||
goto err_create_lock;
|
||||
}
|
||||
err_create_lock:
|
||||
up_write(&c->snapshot_create_lock);
|
||||
err:
|
||||
darray_exit(&deleted_interior);
|
||||
darray_exit(&deleted);
|
||||
darray_exit(&delete_interior);
|
||||
darray_exit(&delete_leaves);
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -1767,37 +1689,36 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
|
||||
{
|
||||
/* If there's one child, it's redundant and keys will be moved to the child */
|
||||
return !!snap.v->children[0] + !!snap.v->children[1] == 1;
|
||||
}
|
||||
|
||||
static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c_snapshot snap;
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type != KEY_TYPE_snapshot)
|
||||
return 0;
|
||||
|
||||
snap = bkey_s_c_to_snapshot(k);
|
||||
struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
|
||||
if (BCH_SNAPSHOT_DELETED(snap.v) ||
|
||||
bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
|
||||
(ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
|
||||
return 0;
|
||||
}
|
||||
interior_snapshot_needs_delete(snap))
|
||||
set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_snapshots_read(struct bch_fs *c)
|
||||
{
|
||||
/*
|
||||
* Initializing the is_ancestor bitmaps requires ancestors to already be
|
||||
* initialized - so mark in reverse:
|
||||
*/
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k,
|
||||
for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MAX, 0, k,
|
||||
__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
|
||||
bch2_snapshot_set_equiv(trans, k) ?:
|
||||
bch2_check_snapshot_needs_deletion(trans, k)) ?:
|
||||
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
|
||||
POS_MIN, 0, k,
|
||||
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
|
||||
bch2_check_snapshot_needs_deletion(trans, k)));
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
/*
|
||||
|
@ -119,19 +119,19 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
|
||||
return id;
|
||||
}
|
||||
|
||||
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
|
||||
static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id)
|
||||
{
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
return s ? s->equiv : 0;
|
||||
return s ? s->live : 0;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
|
||||
static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
|
||||
{
|
||||
rcu_read_lock();
|
||||
id = __bch2_snapshot_equiv(c, id);
|
||||
bool ret = __bch2_snapshot_exists(c, id);
|
||||
rcu_read_unlock();
|
||||
|
||||
return id;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
|
||||
|
@ -101,38 +101,108 @@ static int hash_pick_winner(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_str_hash_check_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||
static int repair_inode_hash_info(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *snapshot_root)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
|
||||
SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != snapshot_root->bi_inum)
|
||||
break;
|
||||
if (!bkey_is_inode(k.k))
|
||||
continue;
|
||||
|
||||
struct bch_inode_unpacked inode;
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed ||
|
||||
INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root),
|
||||
trans, inode_snapshot_mismatch,
|
||||
"inode hash info in different snapshots don't match")) {
|
||||
inode.bi_hash_seed = snapshot_root->bi_hash_seed;
|
||||
SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
|
||||
ret = __bch2_fsck_write_inode(trans, &inode) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* All versions of the same inode in different snapshots must have the same hash
|
||||
* seed/type: verify that the hash info we're using matches the root
|
||||
*/
|
||||
static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
|
||||
struct bch_hash_info *hash_info)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inum)
|
||||
break;
|
||||
if (bkey_is_inode(k.k))
|
||||
goto found;
|
||||
}
|
||||
bch_err(c, "%s(): inum %llu not found", __func__, inum);
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
goto err;
|
||||
found:;
|
||||
struct bch_inode_unpacked inode;
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
|
||||
if (memcmp(hash_info, &hash2, sizeof(hash2))) {
|
||||
ret = repair_inode_hash_info(trans, &inode);
|
||||
if (!ret) {
|
||||
bch_err(c, "inode hash info mismatch with root, but mismatch not found");
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __bch2_str_hash_check_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc *desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c k;
|
||||
u64 hash;
|
||||
int ret = 0;
|
||||
|
||||
if (hash_k.k->type != desc.key_type)
|
||||
return 0;
|
||||
|
||||
hash = desc.hash_bkey(hash_info, hash_k);
|
||||
|
||||
if (likely(hash == hash_k.k->p.offset))
|
||||
return 0;
|
||||
|
||||
u64 hash = desc->hash_bkey(hash_info, hash_k);
|
||||
if (hash_k.k->p.offset < hash)
|
||||
goto bad_hash;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, desc.btree_id,
|
||||
for_each_btree_key_norestart(trans, iter, desc->btree_id,
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_slots, k, ret) {
|
||||
if (bkey_eq(k.k->p, hash_k.k->p))
|
||||
break;
|
||||
|
||||
if (k.k->type == desc.key_type &&
|
||||
!desc.cmp_bkey(k, hash_k))
|
||||
if (k.k->type == desc->key_type &&
|
||||
!desc->cmp_bkey(k, hash_k))
|
||||
goto duplicate_entries;
|
||||
|
||||
if (bkey_deleted(k.k)) {
|
||||
@ -145,16 +215,23 @@ out:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
bad_hash:
|
||||
/*
|
||||
* Before doing any repair, check hash_info itself:
|
||||
*/
|
||||
ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (fsck_err(trans, hash_table_key_wrong_offset,
|
||||
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
|
||||
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
|
||||
bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
|
||||
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info,
|
||||
(subvol_inum) { 0, hash_k.k->p.inode },
|
||||
hash_k.k->p.snapshot, new,
|
||||
STR_HASH_must_create|
|
||||
@ -166,9 +243,9 @@ bad_hash:
|
||||
if (k.k)
|
||||
goto duplicate_entries;
|
||||
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
|
||||
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
bch2_fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
|
||||
bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
@ -176,7 +253,7 @@ bad_hash:
|
||||
fsck_err:
|
||||
goto out;
|
||||
duplicate_entries:
|
||||
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
|
||||
ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -192,14 +269,14 @@ duplicate_entries:
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
|
||||
break;
|
||||
case 1:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
|
||||
ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
|
||||
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
|
||||
bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -394,10 +394,25 @@ int bch2_hash_delete(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
struct snapshots_seen;
|
||||
int bch2_str_hash_check_key(struct btree_trans *,
|
||||
struct snapshots_seen *,
|
||||
const struct bch_hash_desc,
|
||||
struct bch_hash_info *,
|
||||
struct btree_iter *, struct bkey_s_c);
|
||||
int __bch2_str_hash_check_key(struct btree_trans *,
|
||||
struct snapshots_seen *,
|
||||
const struct bch_hash_desc *,
|
||||
struct bch_hash_info *,
|
||||
struct btree_iter *, struct bkey_s_c);
|
||||
|
||||
static inline int bch2_str_hash_check_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc *desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||
{
|
||||
if (hash_k.k->type != desc->key_type)
|
||||
return 0;
|
||||
|
||||
if (likely(desc->hash_bkey(hash_info, hash_k) == hash_k.k->p.offset))
|
||||
return 0;
|
||||
|
||||
return __bch2_str_hash_check_key(trans, s, desc, hash_info, k_iter, hash_k);
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_STR_HASH_H */
|
||||
|
@ -409,26 +409,56 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
|
||||
*/
|
||||
static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c_subvolume subvol;
|
||||
u32 snapid;
|
||||
int ret = 0;
|
||||
struct btree_iter subvol_iter = {}, snapshot_iter = {}, snapshot_tree_iter = {};
|
||||
|
||||
subvol = bch2_bkey_get_iter_typed(trans, &iter,
|
||||
struct bkey_s_c_subvolume subvol =
|
||||
bch2_bkey_get_iter_typed(trans, &subvol_iter,
|
||||
BTREE_ID_subvolumes, POS(0, subvolid),
|
||||
BTREE_ITER_cached|BTREE_ITER_intent,
|
||||
subvolume);
|
||||
ret = bkey_err(subvol);
|
||||
int ret = bkey_err(subvol);
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
|
||||
"missing subvolume %u", subvolid);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
snapid = le32_to_cpu(subvol.v->snapshot);
|
||||
u32 snapid = le32_to_cpu(subvol.v->snapshot);
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0) ?:
|
||||
struct bkey_s_c_snapshot snapshot =
|
||||
bch2_bkey_get_iter_typed(trans, &snapshot_iter,
|
||||
BTREE_ID_snapshots, POS(0, snapid),
|
||||
0, snapshot);
|
||||
ret = bkey_err(subvol);
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
|
||||
"missing snapshot %u", snapid);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
u32 treeid = le32_to_cpu(snapshot.v->tree);
|
||||
|
||||
struct bkey_s_c_snapshot_tree snapshot_tree =
|
||||
bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter,
|
||||
BTREE_ID_snapshot_trees, POS(0, treeid),
|
||||
0, snapshot_tree);
|
||||
|
||||
if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) {
|
||||
struct bkey_i_snapshot_tree *snapshot_tree_mut =
|
||||
bch2_bkey_make_mut_typed(trans, &snapshot_tree_iter,
|
||||
&snapshot_tree.s_c,
|
||||
0, snapshot_tree);
|
||||
ret = PTR_ERR_OR_ZERO(snapshot_tree_mut);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
snapshot_tree_mut->v.master_subvol = 0;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_at(trans, &subvol_iter, 0) ?:
|
||||
bch2_snapshot_node_set_deleted(trans, snapid);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &snapshot_tree_iter);
|
||||
bch2_trans_iter_exit(trans, &snapshot_iter);
|
||||
bch2_trans_iter_exit(trans, &subvol_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -9,13 +9,13 @@ typedef DARRAY(u32) snapshot_id_list;
|
||||
#define IS_ANCESTOR_BITMAP 128
|
||||
|
||||
struct snapshot_t {
|
||||
bool live;
|
||||
u32 parent;
|
||||
u32 skip[3];
|
||||
u32 depth;
|
||||
u32 children[2];
|
||||
u32 subvol; /* Nonzero only if a subvolume points to this node: */
|
||||
u32 tree;
|
||||
u32 equiv;
|
||||
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
|
||||
};
|
||||
|
||||
|
@ -1084,9 +1084,16 @@ int bch2_write_super(struct bch_fs *c)
|
||||
": Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
|
||||
if (c->opts.errors != BCH_ON_ERROR_continue &&
|
||||
c->opts.errors != BCH_ON_ERROR_fix_safe) {
|
||||
ret = -BCH_ERR_erofs_sb_err;
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
} else {
|
||||
bch_err(c, "%s", buf.buf);
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
ret = -BCH_ERR_erofs_sb_err;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
|
||||
|
@ -563,6 +563,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
bch2_fs_btree_gc_exit(c);
|
||||
bch2_journal_keys_put_initial(c);
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
@ -770,13 +771,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
spin_lock_init(&c->recovery_pass_lock);
|
||||
sema_init(&c->online_fsck_mutex, 1);
|
||||
|
||||
init_rwsem(&c->gc_lock);
|
||||
mutex_init(&c->gc_gens_lock);
|
||||
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_init(&c->times[i]);
|
||||
|
||||
bch2_fs_gc_init(c);
|
||||
bch2_fs_copygc_init(c);
|
||||
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
|
||||
bch2_fs_btree_iter_init_early(c);
|
||||
@ -911,6 +908,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
bch2_fs_btree_cache_init(c) ?:
|
||||
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
|
||||
bch2_fs_btree_interior_update_init(c) ?:
|
||||
bch2_fs_btree_gc_init(c) ?:
|
||||
bch2_fs_buckets_waiting_for_journal_init(c) ?:
|
||||
bch2_fs_btree_write_buffer_init(c) ?:
|
||||
bch2_fs_subvolumes_init(c) ?:
|
||||
@ -1306,8 +1304,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
init_completion(&ca->ref_completion);
|
||||
init_completion(&ca->io_ref_completion);
|
||||
|
||||
init_rwsem(&ca->bucket_lock);
|
||||
|
||||
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
|
||||
|
||||
bch2_time_stats_quantiles_init(&ca->io_latency[READ]);
|
||||
|
@ -203,7 +203,6 @@ read_attribute(disk_groups);
|
||||
|
||||
read_attribute(has_data);
|
||||
read_attribute(alloc_debug);
|
||||
read_attribute(accounting);
|
||||
read_attribute(usage_base);
|
||||
|
||||
#define x(t, n, ...) read_attribute(t);
|
||||
@ -397,9 +396,6 @@ SHOW(bch2_fs)
|
||||
if (attr == &sysfs_alloc_debug)
|
||||
bch2_fs_alloc_debug_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_accounting)
|
||||
bch2_fs_accounting_to_text(out, c);
|
||||
|
||||
if (attr == &sysfs_usage_base)
|
||||
bch2_fs_usage_base_to_text(out, c);
|
||||
|
||||
@ -509,15 +505,22 @@ SHOW(bch2_fs_counters)
|
||||
|
||||
printbuf_tabstop_push(out, 32);
|
||||
|
||||
#define x(t, ...) \
|
||||
#define x(t, n, f, ...) \
|
||||
if (attr == &sysfs_##t) { \
|
||||
counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
|
||||
counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
|
||||
if (f & TYPE_SECTORS) { \
|
||||
counter <<= 9; \
|
||||
counter_since_mount <<= 9; \
|
||||
} \
|
||||
\
|
||||
prt_printf(out, "since mount:\t"); \
|
||||
(f & TYPE_COUNTER) ? prt_u64(out, counter_since_mount) :\
|
||||
prt_human_readable_u64(out, counter_since_mount); \
|
||||
prt_newline(out); \
|
||||
\
|
||||
prt_printf(out, "since filesystem creation:\t"); \
|
||||
(f & TYPE_COUNTER) ? prt_u64(out, counter) : \
|
||||
prt_human_readable_u64(out, counter); \
|
||||
prt_newline(out); \
|
||||
}
|
||||
@ -595,7 +598,6 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
|
||||
&sysfs_disk_groups,
|
||||
&sysfs_alloc_debug,
|
||||
&sysfs_accounting,
|
||||
&sysfs_usage_base,
|
||||
NULL
|
||||
};
|
||||
|
@ -1338,6 +1338,12 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
|
||||
__entry->new_u64s)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
);
|
||||
|
||||
TRACE_EVENT(path_downgrade,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip,
|
||||
@ -1374,10 +1380,21 @@ TRACE_EVENT(path_downgrade,
|
||||
__entry->pos_snapshot)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
|
||||
TP_PROTO(struct btree_trans *trans,
|
||||
unsigned long caller_ip),
|
||||
TP_ARGS(trans, caller_ip)
|
||||
TRACE_EVENT(key_cache_fill,
|
||||
TP_PROTO(struct btree_trans *trans, const char *key),
|
||||
TP_ARGS(trans, key),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__string(key, key )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__assign_str(key);
|
||||
),
|
||||
|
||||
TP_printk("%s %s", __entry->trans_fn, __get_str(key))
|
||||
);
|
||||
|
||||
TRACE_EVENT(write_buffer_flush,
|
||||
@ -1436,6 +1453,24 @@ TRACE_EVENT(write_buffer_flush_slowpath,
|
||||
TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
|
||||
);
|
||||
|
||||
TRACE_EVENT(write_buffer_maybe_flush,
|
||||
TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *key),
|
||||
TP_ARGS(trans, caller_ip, key),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, trans_fn, 32 )
|
||||
__field(unsigned long, caller_ip )
|
||||
__string(key, key )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
|
||||
__assign_str(key);
|
||||
),
|
||||
|
||||
TP_printk("%s %pS %s", __entry->trans_fn, (void *) __entry->caller_ip, __get_str(key))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, rebalance_extent,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
|
@ -55,6 +55,16 @@ static inline size_t buf_pages(void *p, size_t len)
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline void *bch2_kvmalloc(size_t n, gfp_t flags)
|
||||
{
|
||||
void *p = unlikely(n >= INT_MAX)
|
||||
? vmalloc(n)
|
||||
: kvmalloc(n, flags & ~__GFP_ZERO);
|
||||
if (p && (flags & __GFP_ZERO))
|
||||
memset(p, 0, n);
|
||||
return p;
|
||||
}
|
||||
|
||||
#define init_heap(heap, _size, gfp) \
|
||||
({ \
|
||||
(heap)->nr = 0; \
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
#include "errcode.h"
|
||||
#include "varint.h"
|
||||
|
||||
/**
|
||||
@ -53,7 +54,7 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
|
||||
u64 v;
|
||||
|
||||
if (unlikely(in + bytes > end))
|
||||
return -1;
|
||||
return -BCH_ERR_varint_decode_error;
|
||||
|
||||
if (likely(bytes < 9)) {
|
||||
__le64 v_le = 0;
|
||||
@ -115,7 +116,7 @@ int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out)
|
||||
unsigned bytes = ffz(*in) + 1;
|
||||
|
||||
if (unlikely(in + bytes > end))
|
||||
return -1;
|
||||
return -BCH_ERR_varint_decode_error;
|
||||
|
||||
if (likely(bytes < 9)) {
|
||||
v >>= bytes;
|
||||
|
Loading…
Reference in New Issue
Block a user