mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-03-27 00:00:04 +03:00
Update bcachefs sources to 55a65a994ed5 bcachefs: bcachefs_metadata_version_persistent_inode_cursors
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
2aeeac7785
commit
8b31dfb350
.bcachefs_revision
bch_bindgen/src
include/linux
libbcachefs
alloc_background.calloc_background_format.halloc_foreground.calloc_foreground.hbackpointers.cbcachefs.hbcachefs_format.hbkey_types.hbtree_gc.cbtree_io.cbtree_node_scan.cbtree_node_scan_types.hbtree_trans_commit.cbtree_update.cbtree_update_interior.cdarray.hdirent.cdisk_accounting.cdisk_accounting.herrcode.herror.cextents.cfs-common.cfs-io-direct.cfs.cfsck.cfsck.hinode.cinode.hinode_format.hjournal.cjournal.hjournal_io.cjournal_io.hjournal_reclaim.clogged_ops.clogged_ops_format.hmove.copts.hrecovery_passes.csb-clean.csb-errors_format.hsnapshot.cstr_hash.cstr_hash.hsubvolume.csubvolume.hsuper-io.csuper.ctrace.h
@ -1 +1 @@
|
||||
4a32728376a81dd9e75851a49159ff33602840f4
|
||||
55a65a994ed5fba038fda00f78416faf6f308bb8
|
||||
|
@ -50,6 +50,7 @@ pub enum BkeyValC<'a> {
|
||||
logged_op_truncate(&'a c::bch_logged_op_truncate),
|
||||
logged_op_finsert(&'a c::bch_logged_op_finsert),
|
||||
accounting(&'a c::bch_accounting),
|
||||
inode_alloc_cursor(&'a c::bch_inode_alloc_cursor),
|
||||
}
|
||||
|
||||
impl<'a, 'b> BkeySC<'a> {
|
||||
@ -106,6 +107,7 @@ impl<'a, 'b> BkeySC<'a> {
|
||||
KEY_TYPE_logged_op_truncate => logged_op_truncate(transmute(self.v)),
|
||||
KEY_TYPE_logged_op_finsert => logged_op_finsert(transmute(self.v)),
|
||||
KEY_TYPE_accounting => accounting(transmute(self.v)),
|
||||
KEY_TYPE_inode_alloc_cursor => inode_alloc_cursor(transmute(self.v)),
|
||||
KEY_TYPE_MAX => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
@ -15,8 +15,8 @@
|
||||
*/
|
||||
#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
|
||||
struct _name { \
|
||||
int nr; \
|
||||
int size; \
|
||||
size_t nr; \
|
||||
size_t size; \
|
||||
_type *data; \
|
||||
_type preallocated[_nr]; \
|
||||
}
|
||||
|
@ -323,7 +323,8 @@ void bch2_alloc_v4_swab(struct bkey_s k)
|
||||
{
|
||||
struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
|
||||
|
||||
a->journal_seq = swab64(a->journal_seq);
|
||||
a->journal_seq_nonempty = swab64(a->journal_seq_nonempty);
|
||||
a->journal_seq_empty = swab64(a->journal_seq_empty);
|
||||
a->flags = swab32(a->flags);
|
||||
a->dirty_sectors = swab32(a->dirty_sectors);
|
||||
a->cached_sectors = swab32(a->cached_sectors);
|
||||
@ -346,16 +347,17 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
||||
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
|
||||
bch2_prt_data_type(out, a->data_type);
|
||||
prt_newline(out);
|
||||
prt_printf(out, "journal_seq %llu\n", a->journal_seq);
|
||||
prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a));
|
||||
prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a));
|
||||
prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors);
|
||||
prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors);
|
||||
prt_printf(out, "cached_sectors %u\n", a->cached_sectors);
|
||||
prt_printf(out, "stripe %u\n", a->stripe);
|
||||
prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy);
|
||||
prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]);
|
||||
prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]);
|
||||
prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty);
|
||||
prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty);
|
||||
prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a));
|
||||
prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a));
|
||||
prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors);
|
||||
prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors);
|
||||
prt_printf(out, "cached_sectors %u\n", a->cached_sectors);
|
||||
prt_printf(out, "stripe %u\n", a->stripe);
|
||||
prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy);
|
||||
prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]);
|
||||
prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]);
|
||||
|
||||
if (ca)
|
||||
prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca));
|
||||
@ -384,7 +386,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
|
||||
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
|
||||
|
||||
*out = (struct bch_alloc_v4) {
|
||||
.journal_seq = u.journal_seq,
|
||||
.journal_seq_nonempty = u.journal_seq,
|
||||
.flags = u.need_discard,
|
||||
.gen = u.gen,
|
||||
.oldest_gen = u.oldest_gen,
|
||||
@ -856,7 +858,10 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
alloc_data_type_set(new_a, new_a->data_type);
|
||||
|
||||
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
|
||||
int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
|
||||
(int) data_type_is_empty(old_a->data_type);
|
||||
|
||||
if (is_empty_delta < 0) {
|
||||
new_a->io_time[READ] = bch2_current_io_time(c, READ);
|
||||
new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
|
||||
@ -927,20 +932,31 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
|
||||
u64 transaction_seq = trans->journal_res.seq;
|
||||
BUG_ON(!transaction_seq);
|
||||
BUG_ON(transaction_seq < new_a->journal_seq_nonempty);
|
||||
BUG_ON(transaction_seq < new_a->journal_seq_empty);
|
||||
|
||||
if (log_fsck_err_on(transaction_seq && new_a->journal_seq > transaction_seq,
|
||||
if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq,
|
||||
trans, alloc_key_journal_seq_in_future,
|
||||
"bucket journal seq in future (currently at %llu)\n%s",
|
||||
journal_cur_seq(&c->journal),
|
||||
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)))
|
||||
new_a->journal_seq = transaction_seq;
|
||||
new_a->journal_seq_nonempty = transaction_seq;
|
||||
|
||||
int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
|
||||
(int) data_type_is_empty(old_a->data_type);
|
||||
|
||||
/* Record journal sequence number of empty -> nonempty transition: */
|
||||
if (is_empty_delta < 0)
|
||||
new_a->journal_seq = max(new_a->journal_seq, transaction_seq);
|
||||
/*
|
||||
* Record journal sequence number of empty -> nonempty transition:
|
||||
* Note that there may be multiple empty -> nonempty
|
||||
* transitions, data in a bucket may be overwritten while we're
|
||||
* still writing to it - so be careful to only record the first:
|
||||
* */
|
||||
if (is_empty_delta < 0 &&
|
||||
new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) {
|
||||
new_a->journal_seq_nonempty = transaction_seq;
|
||||
new_a->journal_seq_empty = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bucket becomes empty: mark it as waiting for a journal flush,
|
||||
@ -949,18 +965,21 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
* intermediate sequence numbers:
|
||||
*/
|
||||
if (is_empty_delta > 0) {
|
||||
if (new_a->journal_seq == transaction_seq ||
|
||||
bch2_journal_noflush_seq(&c->journal, new_a->journal_seq))
|
||||
new_a->journal_seq = 0;
|
||||
else {
|
||||
new_a->journal_seq = transaction_seq;
|
||||
if (new_a->journal_seq_nonempty == transaction_seq ||
|
||||
bch2_journal_noflush_seq(&c->journal,
|
||||
new_a->journal_seq_nonempty,
|
||||
transaction_seq)) {
|
||||
new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0;
|
||||
} else {
|
||||
new_a->journal_seq_empty = transaction_seq;
|
||||
|
||||
ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk,
|
||||
new.k->p.inode, new.k->p.offset,
|
||||
transaction_seq);
|
||||
c->journal.flushed_seq_ondisk,
|
||||
new.k->p.inode, new.k->p.offset,
|
||||
transaction_seq);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"setting bucket_needs_journal_commit: %s", bch2_err_str(ret)))
|
||||
"setting bucket_needs_journal_commit: %s",
|
||||
bch2_err_str(ret)))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
@ -978,7 +997,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
|
||||
#define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
|
||||
#define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr)
|
||||
#define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
|
||||
#define bucket_flushed(a) (a->journal_seq_empty <= c->journal.flushed_seq_ondisk)
|
||||
|
||||
if (statechange(a->data_type == BCH_DATA_free) &&
|
||||
bucket_flushed(new_a))
|
||||
@ -1840,16 +1859,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
|
||||
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
|
||||
trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
|
||||
a->v.journal_seq,
|
||||
c->journal.flushed_seq_ondisk,
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!fastpath) {
|
||||
if (discard_in_flight_add(ca, iter.pos.offset, true))
|
||||
goto out;
|
||||
|
@ -58,7 +58,7 @@ LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
||||
|
||||
struct bch_alloc_v4 {
|
||||
struct bch_val v;
|
||||
__u64 journal_seq;
|
||||
__u64 journal_seq_nonempty;
|
||||
__u32 flags;
|
||||
__u8 gen;
|
||||
__u8 oldest_gen;
|
||||
@ -70,7 +70,7 @@ struct bch_alloc_v4 {
|
||||
__u32 stripe;
|
||||
__u32 nr_external_backpointers;
|
||||
/* end of fields in original version of alloc_v4 */
|
||||
__u64 _fragmentation_lru; /* obsolete */
|
||||
__u64 journal_seq_empty;
|
||||
__u32 stripe_sectors;
|
||||
__u32 pad;
|
||||
} __packed __aligned(8);
|
||||
|
@ -200,14 +200,35 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool may_alloc_bucket(struct bch_fs *c,
|
||||
struct bpos bucket,
|
||||
struct bucket_alloc_state *s)
|
||||
{
|
||||
if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) {
|
||||
s->skipped_open++;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) {
|
||||
s->skipped_need_journal_commit++;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) {
|
||||
s->skipped_nocow++;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 bucket, u8 gen,
|
||||
enum bch_watermark watermark,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
||||
if (unlikely(is_superblock_bucket(c, ca, bucket)))
|
||||
return NULL;
|
||||
|
||||
@ -216,22 +237,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
|
||||
s->skipped_open++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
|
||||
s->skipped_need_journal_commit++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) {
|
||||
s->skipped_nocow++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) {
|
||||
@ -250,10 +255,9 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ob = bch2_open_bucket_alloc(c);
|
||||
struct open_bucket *ob = bch2_open_bucket_alloc(c);
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
ob->valid = true;
|
||||
ob->sectors_free = ca->mi.bucket_size;
|
||||
ob->dev = ca->dev_idx;
|
||||
@ -279,8 +283,11 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
|
||||
u8 gen;
|
||||
|
||||
if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s))
|
||||
return NULL;
|
||||
|
||||
u8 gen;
|
||||
int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
@ -300,6 +307,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
|
||||
struct bucket_alloc_state *s,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter, citer;
|
||||
struct bkey_s_c k, ck;
|
||||
struct open_bucket *ob = NULL;
|
||||
@ -359,7 +367,10 @@ again:
|
||||
|
||||
s->buckets_seen++;
|
||||
|
||||
ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, a->gen, watermark, s, cl);
|
||||
ob = may_alloc_bucket(c, k.k->p, s)
|
||||
? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen,
|
||||
watermark, s, cl)
|
||||
: NULL;
|
||||
next:
|
||||
bch2_set_btree_iter_dontneed(&citer);
|
||||
bch2_trans_iter_exit(trans, &citer);
|
||||
@ -626,9 +637,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
|
||||
unsigned i;
|
||||
|
||||
for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
|
||||
ret.devs[ret.nr++] = i;
|
||||
ret.data[ret.nr++] = i;
|
||||
|
||||
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
|
||||
bubble_sort(ret.data, ret.nr, dev_stripe_cmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -700,18 +711,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct dev_alloc_list devs_sorted =
|
||||
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
int ret = -BCH_ERR_insufficient_devices;
|
||||
|
||||
BUG_ON(*nr_effective >= nr_replicas);
|
||||
|
||||
for (unsigned i = 0; i < devs_sorted.nr; i++) {
|
||||
struct bch_dev_usage usage;
|
||||
struct open_bucket *ob;
|
||||
|
||||
unsigned dev = devs_sorted.devs[i];
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
|
||||
struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc);
|
||||
darray_for_each(devs_sorted, i) {
|
||||
struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
@ -720,8 +726,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
|
||||
continue;
|
||||
}
|
||||
|
||||
ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
||||
cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
|
||||
struct bch_dev_usage usage;
|
||||
struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
|
||||
cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
|
||||
if (!IS_ERR(ob))
|
||||
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
|
||||
bch2_dev_put(ca);
|
||||
@ -765,10 +772,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct dev_alloc_list devs_sorted;
|
||||
struct ec_stripe_head *h;
|
||||
struct open_bucket *ob;
|
||||
unsigned i, ec_idx;
|
||||
int ret = 0;
|
||||
|
||||
if (nr_replicas < 2)
|
||||
@ -777,34 +780,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
|
||||
if (ec_open_bucket(c, ptrs))
|
||||
return 0;
|
||||
|
||||
h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
|
||||
struct ec_stripe_head *h =
|
||||
bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
|
||||
if (IS_ERR(h))
|
||||
return PTR_ERR(h);
|
||||
if (!h)
|
||||
return 0;
|
||||
|
||||
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++)
|
||||
for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
|
||||
struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
|
||||
darray_for_each(devs_sorted, i)
|
||||
for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
|
||||
if (!h->s->blocks[ec_idx])
|
||||
continue;
|
||||
|
||||
ob = c->open_buckets + h->s->blocks[ec_idx];
|
||||
if (ob->dev == devs_sorted.devs[i] &&
|
||||
!test_and_set_bit(ec_idx, h->s->blocks_allocated))
|
||||
goto got_bucket;
|
||||
}
|
||||
goto out_put_head;
|
||||
got_bucket:
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
ec_stripe_new_get(h->s, STRIPE_REF_io);
|
||||
struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx];
|
||||
if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) {
|
||||
ob->ec_idx = ec_idx;
|
||||
ob->ec = h->s;
|
||||
ec_stripe_new_get(h->s, STRIPE_REF_io);
|
||||
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, ob);
|
||||
out_put_head:
|
||||
ret = add_new_bucket(c, ptrs, devs_may_alloc,
|
||||
nr_replicas, nr_effective,
|
||||
have_cache, ob);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
bch2_ec_stripe_head_put(c, h);
|
||||
return ret;
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *);
|
||||
|
||||
struct dev_alloc_list {
|
||||
unsigned nr;
|
||||
u8 devs[BCH_SB_MEMBERS_MAX];
|
||||
u8 data[BCH_SB_MEMBERS_MAX];
|
||||
};
|
||||
|
||||
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
|
||||
|
@ -42,16 +42,17 @@ void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bke
|
||||
u32 bucket_offset;
|
||||
struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
|
||||
rcu_read_unlock();
|
||||
prt_printf(out, "bucket=%llu:%llu:%u", bucket.inode, bucket.offset, bucket_offset);
|
||||
prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset);
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
prt_printf(out, "sector=%llu:%llu", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
|
||||
prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
|
||||
}
|
||||
|
||||
bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
|
||||
prt_printf(out, " suboffset=%u len=%u pos=",
|
||||
prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
|
||||
(u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
|
||||
bp.v->bucket_len);
|
||||
bp.v->bucket_len,
|
||||
bp.v->bucket_gen);
|
||||
bch2_bpos_to_text(out, bp.v->pos);
|
||||
}
|
||||
|
||||
|
@ -1048,6 +1048,7 @@ struct bch_fs {
|
||||
* for signaling to the toplevel code which pass we want to run now.
|
||||
*/
|
||||
enum bch_recovery_pass curr_recovery_pass;
|
||||
enum bch_recovery_pass next_recovery_pass;
|
||||
/* bitmask of recovery passes that we actually ran */
|
||||
u64 recovery_passes_complete;
|
||||
/* never rewinds version of curr_recovery_pass */
|
||||
@ -1063,9 +1064,6 @@ struct bch_fs {
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
|
||||
u64 *unused_inode_hints;
|
||||
unsigned inode_shard_bits;
|
||||
|
||||
/*
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
|
@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34)
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -682,7 +683,8 @@ struct bch_sb_field_ext {
|
||||
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17))
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -850,6 +852,7 @@ LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@ -1347,7 +1350,8 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
|
||||
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
|
@ -213,16 +213,16 @@ BCH_BKEY_TYPES();
|
||||
enum bch_validate_flags {
|
||||
BCH_VALIDATE_write = BIT(0),
|
||||
BCH_VALIDATE_commit = BIT(1),
|
||||
BCH_VALIDATE_journal = BIT(2),
|
||||
BCH_VALIDATE_silent = BIT(3),
|
||||
BCH_VALIDATE_silent = BIT(2),
|
||||
};
|
||||
|
||||
#define BKEY_VALIDATE_CONTEXTS() \
|
||||
x(unknown) \
|
||||
x(commit) \
|
||||
x(superblock) \
|
||||
x(journal) \
|
||||
x(btree_root) \
|
||||
x(btree_node)
|
||||
x(btree_node) \
|
||||
x(commit)
|
||||
|
||||
struct bkey_validate_context {
|
||||
enum {
|
||||
@ -230,10 +230,12 @@ struct bkey_validate_context {
|
||||
BKEY_VALIDATE_CONTEXTS()
|
||||
#undef x
|
||||
} from:8;
|
||||
enum bch_validate_flags flags:8;
|
||||
u8 level;
|
||||
enum btree_id btree;
|
||||
bool root:1;
|
||||
enum bch_validate_flags flags:8;
|
||||
unsigned journal_offset;
|
||||
u64 journal_seq;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BKEY_TYPES_H */
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "move.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -359,11 +360,9 @@ again:
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!btree_id_is_alloc(b->c.btree_id)) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
ret = bch2_btree_lost_data(c, b->c.btree_id);
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -525,7 +524,7 @@ int bch2_check_topology(struct bch_fs *c)
|
||||
bch2_btree_id_to_text(&buf, i);
|
||||
|
||||
if (r->error) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
ret = bch2_btree_lost_data(c, i);
|
||||
if (ret)
|
||||
break;
|
||||
reconstruct_root:
|
||||
@ -741,7 +740,7 @@ static int bch2_gc_btrees(struct bch_fs *c)
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_id_to_text(&buf, btree),
|
||||
buf.buf)))
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
ret = bch2_btree_lost_data(c, btree);
|
||||
}
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
|
@ -26,7 +26,7 @@
|
||||
static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn)
|
||||
{
|
||||
bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn));
|
||||
prt_printf(out, " seq %llux\n", bn->keys.seq);
|
||||
prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn));
|
||||
prt_str(out, "min: ");
|
||||
bch2_bpos_to_text(out, bn->min_key);
|
||||
prt_newline(out);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "recovery_passes.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/min_heap.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct find_btree_nodes_worker {
|
||||
@ -31,8 +32,6 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
|
||||
|
||||
if (n->range_updated)
|
||||
prt_str(out, " range updated");
|
||||
if (n->overwritten)
|
||||
prt_str(out, " overwritten");
|
||||
|
||||
for (unsigned i = 0; i < n->nr_ptrs; i++) {
|
||||
prt_char(out, ' ');
|
||||
@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r)
|
||||
-found_btree_node_cmp_time(l, r);
|
||||
}
|
||||
|
||||
static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg)
|
||||
{
|
||||
return found_btree_node_cmp_pos(l, r) < 0;
|
||||
}
|
||||
|
||||
static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
|
||||
{
|
||||
struct found_btree_node *l = _l;
|
||||
struct found_btree_node *r = _r;
|
||||
|
||||
swap(*l, *r);
|
||||
}
|
||||
|
||||
const struct min_heap_callbacks found_btree_node_heap_cbs = {
|
||||
.less = found_btree_node_cmp_pos_less,
|
||||
.swp = found_btree_node_swap,
|
||||
};
|
||||
|
||||
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
struct bio *bio, struct btree_node *bn, u64 offset)
|
||||
{
|
||||
@ -295,55 +312,48 @@ err:
|
||||
return f->ret ?: ret;
|
||||
}
|
||||
|
||||
static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
|
||||
static bool nodes_overlap(const struct found_btree_node *l,
|
||||
const struct found_btree_node *r)
|
||||
{
|
||||
while (n + 1 < end &&
|
||||
found_btree_node_cmp_pos(n, n + 1) > 0) {
|
||||
swap(n[0], n[1]);
|
||||
n++;
|
||||
}
|
||||
return (l->btree_id == r->btree_id &&
|
||||
l->level == r->level &&
|
||||
bpos_gt(l->max_key, r->min_key));
|
||||
}
|
||||
|
||||
static int handle_overwrites(struct bch_fs *c,
|
||||
struct found_btree_node *start,
|
||||
struct found_btree_node *end)
|
||||
struct found_btree_node *l,
|
||||
found_btree_nodes *nodes_heap)
|
||||
{
|
||||
struct found_btree_node *n;
|
||||
again:
|
||||
for (n = start + 1;
|
||||
n < end &&
|
||||
n->btree_id == start->btree_id &&
|
||||
n->level == start->level &&
|
||||
bpos_lt(n->min_key, start->max_key);
|
||||
n++) {
|
||||
int cmp = found_btree_node_cmp_time(start, n);
|
||||
struct found_btree_node *r;
|
||||
|
||||
while ((r = min_heap_peek(nodes_heap)) &&
|
||||
nodes_overlap(l, r)) {
|
||||
int cmp = found_btree_node_cmp_time(l, r);
|
||||
|
||||
if (cmp > 0) {
|
||||
if (bpos_cmp(start->max_key, n->max_key) >= 0)
|
||||
n->overwritten = true;
|
||||
if (bpos_cmp(l->max_key, r->max_key) >= 0)
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
else {
|
||||
n->range_updated = true;
|
||||
n->min_key = bpos_successor(start->max_key);
|
||||
n->range_updated = true;
|
||||
bubble_up(n, end);
|
||||
goto again;
|
||||
r->range_updated = true;
|
||||
r->min_key = bpos_successor(l->max_key);
|
||||
r->range_updated = true;
|
||||
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
} else if (cmp < 0) {
|
||||
BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
|
||||
BUG_ON(bpos_eq(l->min_key, r->min_key));
|
||||
|
||||
start->max_key = bpos_predecessor(n->min_key);
|
||||
start->range_updated = true;
|
||||
} else if (n->level) {
|
||||
n->overwritten = true;
|
||||
l->max_key = bpos_predecessor(r->min_key);
|
||||
l->range_updated = true;
|
||||
} else if (r->level) {
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
} else {
|
||||
if (bpos_cmp(start->max_key, n->max_key) >= 0)
|
||||
n->overwritten = true;
|
||||
if (bpos_cmp(l->max_key, r->max_key) >= 0)
|
||||
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
else {
|
||||
n->range_updated = true;
|
||||
n->min_key = bpos_successor(start->max_key);
|
||||
n->range_updated = true;
|
||||
bubble_up(n, end);
|
||||
goto again;
|
||||
r->range_updated = true;
|
||||
r->min_key = bpos_successor(l->max_key);
|
||||
r->range_updated = true;
|
||||
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -355,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
{
|
||||
struct find_btree_nodes *f = &c->found_btree_nodes;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
found_btree_nodes nodes_heap = {};
|
||||
size_t dst;
|
||||
int ret = 0;
|
||||
|
||||
@ -409,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
dst = 0;
|
||||
darray_for_each(f->nodes, i) {
|
||||
if (i->overwritten)
|
||||
continue;
|
||||
swap(nodes_heap, f->nodes);
|
||||
|
||||
ret = handle_overwrites(c, i, &darray_top(f->nodes));
|
||||
{
|
||||
/* darray must have same layout as a heap */
|
||||
min_heap_char real_heap;
|
||||
BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr));
|
||||
BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size));
|
||||
BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr));
|
||||
BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size));
|
||||
}
|
||||
|
||||
min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
|
||||
if (nodes_heap.nr) {
|
||||
ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(i->overwritten);
|
||||
f->nodes.data[dst++] = *i;
|
||||
min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
f->nodes.nr = dst;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
while (true) {
|
||||
ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!nodes_heap.nr)
|
||||
break;
|
||||
|
||||
ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
|
||||
}
|
||||
|
||||
for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++)
|
||||
BUG_ON(nodes_overlap(n, n + 1));
|
||||
|
||||
if (0 && c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
} else {
|
||||
bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
|
||||
}
|
||||
|
||||
eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
err:
|
||||
darray_exit(&nodes_heap);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
struct found_btree_node {
|
||||
bool range_updated:1;
|
||||
bool overwritten:1;
|
||||
u8 btree_id;
|
||||
u8 level;
|
||||
unsigned sectors_written;
|
||||
|
@ -719,19 +719,29 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
goto fatal_err;
|
||||
}
|
||||
|
||||
struct bkey_validate_context validate_context = { .from = BKEY_VALIDATE_commit };
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
for (struct jset_entry *i = trans->journal_entries;
|
||||
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
|
||||
i = vstruct_next(i)) {
|
||||
ret = bch2_journal_entry_validate(c, NULL, i,
|
||||
bcachefs_metadata_version_current,
|
||||
CPU_BIG_ENDIAN, validate_context);
|
||||
if (unlikely(ret)) {
|
||||
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
|
||||
trans->fn);
|
||||
goto fatal_err;
|
||||
}
|
||||
}
|
||||
|
||||
trans_for_each_update(trans, i) {
|
||||
enum bch_validate_flags invalid_flags = 0;
|
||||
validate_context.level = i->level;
|
||||
validate_context.btree = i->btree_id;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
|
||||
(struct bkey_validate_context) {
|
||||
.from = BKEY_VALIDATE_commit,
|
||||
.level = i->level,
|
||||
.btree = i->btree_id,
|
||||
.flags = invalid_flags,
|
||||
});
|
||||
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), validate_context);
|
||||
if (unlikely(ret)){
|
||||
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
|
||||
trans->fn, (void *) i->ip_allocated);
|
||||
@ -740,24 +750,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
btree_insert_entry_checks(trans, i);
|
||||
}
|
||||
|
||||
for (struct jset_entry *i = trans->journal_entries;
|
||||
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
|
||||
i = vstruct_next(i)) {
|
||||
enum bch_validate_flags invalid_flags = 0;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
|
||||
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
|
||||
|
||||
ret = bch2_journal_entry_validate(c, NULL, i,
|
||||
bcachefs_metadata_version_current,
|
||||
CPU_BIG_ENDIAN, invalid_flags);
|
||||
if (unlikely(ret)) {
|
||||
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
|
||||
trans->fn);
|
||||
goto fatal_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
|
||||
struct journal *j = &c->journal;
|
||||
struct jset_entry *entry;
|
||||
|
@ -588,7 +588,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi
|
||||
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
|
||||
enum btree_id btree, struct bpos end)
|
||||
{
|
||||
bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent);
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
|
@ -1607,8 +1607,6 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
|
||||
struct btree *n[2];
|
||||
|
||||
@ -1707,6 +1705,8 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
if (n3) {
|
||||
bch2_btree_update_get_open_buckets(as, n3);
|
||||
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
|
||||
@ -2063,9 +2063,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
trace_and_count(c, btree_node_merge, trans, b);
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
bch2_btree_interior_update_will_free_node(as, m);
|
||||
|
||||
n = bch2_btree_node_alloc(as, trans, b->c.level);
|
||||
|
||||
SET_BTREE_NODE_SEQ(n->data,
|
||||
@ -2101,6 +2098,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err_free_update;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
bch2_btree_interior_update_will_free_node(as, m);
|
||||
|
||||
bch2_trans_verify_paths(trans);
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
@ -2155,8 +2155,6 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
n = bch2_btree_node_alloc_replacement(as, trans, b);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
@ -2180,6 +2178,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
|
||||
|
||||
|
@ -83,7 +83,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++)
|
||||
|
||||
#define darray_for_each_reverse(_d, _i) \
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i)
|
||||
|
||||
#define darray_init(_d) \
|
||||
do { \
|
||||
|
@ -266,7 +266,7 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
|
||||
} else {
|
||||
target->subvol = le32_to_cpu(d.v->d_child_subvol);
|
||||
|
||||
ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_cached, &s);
|
||||
ret = bch2_subvolume_get(trans, target->subvol, true, &s);
|
||||
|
||||
target->inum = le64_to_cpu(s.inode);
|
||||
}
|
||||
|
@ -324,6 +324,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
|
||||
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
|
||||
if (trace_accounting_mem_insert_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_accounting_to_text(&buf, c, a.s_c);
|
||||
trace_accounting_mem_insert(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
free_percpu(n.v[1]);
|
||||
@ -722,11 +730,18 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
int ret = for_each_btree_key_continue(trans, iter,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
|
||||
struct bkey u;
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
|
||||
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
continue;
|
||||
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
|
||||
break;
|
||||
|
||||
if (!bch2_accounting_is_mem(acc_k)) {
|
||||
struct disk_accounting_pos next = { .type = acc_k.type + 1 };
|
||||
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
|
||||
@ -746,6 +761,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
if (i->k->k.type == KEY_TYPE_accounting) {
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, i->k->k.p);
|
||||
|
||||
if (!bch2_accounting_is_mem(acc_k))
|
||||
continue;
|
||||
|
||||
@ -782,15 +798,16 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
keys->gap = keys->nr = dst - keys->data;
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
unsigned i = 0;
|
||||
while (i < acc->k.nr) {
|
||||
unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
|
||||
|
||||
darray_for_each_reverse(acc->k, i) {
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
|
||||
bpos_to_disk_accounting_pos(&acc_k, i->pos);
|
||||
|
||||
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
|
||||
bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
|
||||
memset(v, 0, sizeof(v));
|
||||
|
||||
for (unsigned j = 0; j < i->nr_counters; j++)
|
||||
v[j] = percpu_u64_get(i->v[0] + j);
|
||||
|
||||
/*
|
||||
* If the entry counters are zeroed, it should be treated as
|
||||
@ -799,26 +816,25 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
* Remove it, so that if it's re-added it gets re-marked in the
|
||||
* superblock:
|
||||
*/
|
||||
ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
|
||||
ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
|
||||
? -BCH_ERR_remove_disk_accounting_entry
|
||||
: bch2_disk_accounting_validate_late(trans, acc_k,
|
||||
v, acc->k.data[idx].nr_counters);
|
||||
: bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters);
|
||||
|
||||
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
|
||||
free_percpu(acc->k.data[idx].v[0]);
|
||||
free_percpu(acc->k.data[idx].v[1]);
|
||||
darray_remove_item(&acc->k, &acc->k.data[idx]);
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
free_percpu(i->v[0]);
|
||||
free_percpu(i->v[1]);
|
||||
darray_remove_item(&acc->k, i);
|
||||
ret = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
i++;
|
||||
}
|
||||
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
|
||||
preempt_disable();
|
||||
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
|
||||
|
||||
|
@ -120,7 +120,8 @@ void bch2_accounting_mem_gc(struct bch_fs *);
|
||||
|
||||
static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc)
|
||||
{
|
||||
return acc.type != BCH_DISK_ACCOUNTING_inum;
|
||||
return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR &&
|
||||
acc.type != BCH_DISK_ACCOUNTING_inum;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -264,6 +264,7 @@
|
||||
x(EIO, missing_indirect_extent) \
|
||||
x(EIO, invalidate_stripe_to_dev) \
|
||||
x(EIO, no_encryption_key) \
|
||||
x(EIO, insufficient_journal_devices) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
|
||||
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
|
||||
|
@ -477,8 +477,8 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
|
||||
|
||||
unsigned fsck_flags = 0;
|
||||
if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) {
|
||||
//if (test_bit(err, c->sb.errors_silent))
|
||||
// return -BCH_ERR_fsck_delete_bkey;
|
||||
if (test_bit(err, c->sb.errors_silent))
|
||||
return -BCH_ERR_fsck_delete_bkey;
|
||||
|
||||
fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
|
||||
}
|
||||
@ -486,9 +486,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
|
||||
fsck_flags |= fsck_flags_extra[err];
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "invalid bkey in %s btree=",
|
||||
prt_printf(&buf, "invalid bkey in %s",
|
||||
bch2_bkey_validate_contexts[from.from]);
|
||||
|
||||
if (from.from == BKEY_VALIDATE_journal)
|
||||
prt_printf(&buf, " journal seq=%llu offset=%u",
|
||||
from.journal_seq, from.journal_offset);
|
||||
|
||||
prt_str(&buf, " btree=");
|
||||
bch2_btree_id_to_text(&buf, from.btree);
|
||||
prt_printf(&buf, " level=%u: ", from.level);
|
||||
|
||||
|
@ -1238,6 +1238,12 @@ static int extent_ptr_validate(struct bch_fs *c,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr(ptrs, ptr2)
|
||||
bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
|
||||
c, ptr_to_duplicate_device,
|
||||
"multiple pointers to same device (%u)", ptr->dev);
|
||||
|
||||
/* bad pointers are repaired by check_fix_ptrs(): */
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
@ -1252,13 +1258,6 @@ static int extent_ptr_validate(struct bch_fs *c,
|
||||
unsigned bucket_size = ca->mi.bucket_size;
|
||||
rcu_read_unlock();
|
||||
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
bkey_for_each_ptr(ptrs, ptr2)
|
||||
bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev,
|
||||
c, ptr_to_duplicate_device,
|
||||
"multiple pointers to same device (%u)", ptr->dev);
|
||||
|
||||
|
||||
bkey_fsck_err_on(bucket >= nbuckets,
|
||||
c, ptr_after_last_bucket,
|
||||
"pointer past last bucket (%llu > %llu)", bucket, nbuckets);
|
||||
|
@ -69,9 +69,7 @@ int bch2_create_trans(struct btree_trans *trans,
|
||||
if (!snapshot_src.inum) {
|
||||
/* Inode wasn't specified, just snapshot: */
|
||||
struct bch_subvolume s;
|
||||
|
||||
ret = bch2_subvolume_get(trans, snapshot_src.subvol, true,
|
||||
BTREE_ITER_cached, &s);
|
||||
ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -226,7 +226,6 @@ struct dio_write {
|
||||
struct mm_struct *mm;
|
||||
const struct iovec *iov;
|
||||
unsigned loop:1,
|
||||
have_mm_ref:1,
|
||||
extending:1,
|
||||
sync:1,
|
||||
flush:1;
|
||||
@ -391,9 +390,6 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
|
||||
|
||||
kfree(dio->iov);
|
||||
|
||||
if (dio->have_mm_ref)
|
||||
mmdrop(dio->mm);
|
||||
|
||||
ret = dio->op.error ?: ((long) dio->written << 9);
|
||||
bio_put(&dio->op.wbio.bio);
|
||||
|
||||
@ -533,24 +529,9 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
|
||||
|
||||
if (unlikely(dio->iter.count) &&
|
||||
!dio->sync &&
|
||||
!dio->loop) {
|
||||
/*
|
||||
* Rest of write will be submitted asynchronously -
|
||||
* unless copying the iov fails:
|
||||
*/
|
||||
if (likely(!bch2_dio_write_copy_iov(dio))) {
|
||||
/*
|
||||
* aio guarantees that mm_struct outlives the
|
||||
* request, but io_uring does not
|
||||
*/
|
||||
if (dio->mm) {
|
||||
mmgrab(dio->mm);
|
||||
dio->have_mm_ref = true;
|
||||
}
|
||||
} else {
|
||||
dio->sync = sync = true;
|
||||
}
|
||||
}
|
||||
!dio->loop &&
|
||||
bch2_dio_write_copy_iov(dio))
|
||||
dio->sync = sync = true;
|
||||
|
||||
dio->loop = true;
|
||||
closure_call(&dio->op.cl, bch2_write, NULL, NULL);
|
||||
@ -578,25 +559,15 @@ err:
|
||||
|
||||
static noinline __cold void bch2_dio_write_continue(struct dio_write *dio)
|
||||
{
|
||||
struct mm_struct *mm = dio->have_mm_ref ? dio->mm: NULL;
|
||||
struct mm_struct *mm = dio->mm;
|
||||
|
||||
bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
|
||||
|
||||
if (mm) {
|
||||
if (unlikely(!mmget_not_zero(mm))) {
|
||||
/* process exited */
|
||||
dio->op.error = -ESRCH;
|
||||
bch2_dio_write_done(dio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mm)
|
||||
kthread_use_mm(mm);
|
||||
}
|
||||
bch2_dio_write_loop(dio);
|
||||
if (mm) {
|
||||
if (mm)
|
||||
kthread_unuse_mm(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct bch_write_op *op)
|
||||
@ -670,7 +641,6 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
|
||||
dio->mm = current->mm;
|
||||
dio->iov = NULL;
|
||||
dio->loop = false;
|
||||
dio->have_mm_ref = false;
|
||||
dio->extending = extending;
|
||||
dio->sync = is_sync_kiocb(req) || extending;
|
||||
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
|
||||
|
@ -499,7 +499,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_subvolume subvol;
|
||||
int ret = lockrestart_do(trans,
|
||||
bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
|
||||
bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
||||
bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
|
||||
bch2_trans_put(trans);
|
||||
@ -569,8 +569,7 @@ retry:
|
||||
inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;
|
||||
inum.inum = inode_u.bi_inum;
|
||||
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true,
|
||||
BTREE_ITER_with_updates, &subvol) ?:
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
||||
bch2_trans_commit(trans, NULL, &journal_seq, 0);
|
||||
if (unlikely(ret)) {
|
||||
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
|
||||
@ -651,7 +650,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
|
||||
ret = bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?:
|
||||
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
|
||||
PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
|
||||
|
||||
|
@ -109,7 +109,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
|
||||
u32 *snapshot, u64 *inum)
|
||||
{
|
||||
struct bch_subvolume s;
|
||||
int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
|
||||
int ret = bch2_subvolume_get(trans, subvol, false, &s);
|
||||
|
||||
*snapshot = le32_to_cpu(s.snapshot);
|
||||
*inum = le64_to_cpu(s.inode);
|
||||
@ -226,8 +226,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
|
||||
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
|
||||
|
||||
struct bch_subvolume subvol;
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
|
||||
false, 0, &subvol);
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), false, &subvol);
|
||||
bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
|
||||
le32_to_cpu(st.master_subvol), snapshot);
|
||||
if (ret)
|
||||
@ -942,69 +941,16 @@ static int get_visible_inodes(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (d.v->d_type == DT_SUBVOL) {
|
||||
u32 snap;
|
||||
u64 inum;
|
||||
int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
return !ret;
|
||||
} else {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = bkey_is_inode(k.k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Prefer to delete the first one, since that will be the one at the wrong
|
||||
* offset:
|
||||
* return value: 0 -> delete k1, 1 -> delete k2
|
||||
*/
|
||||
static int hash_pick_winner(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_s_c k1,
|
||||
struct bkey_s_c k2)
|
||||
{
|
||||
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
|
||||
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
|
||||
return 0;
|
||||
|
||||
switch (desc.btree_id) {
|
||||
case BTREE_ID_dirents: {
|
||||
int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int fsck_update_backpointers(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_i *new)
|
||||
int bch2_fsck_update_backpointers(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_i *new)
|
||||
{
|
||||
if (new->k.type != KEY_TYPE_dirent)
|
||||
return 0;
|
||||
@ -1032,153 +978,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int fsck_rename_dirent(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_s_c_dirent old)
|
||||
{
|
||||
struct qstr old_name = bch2_dirent_get_name(old);
|
||||
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
|
||||
int ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_dirent_init(&new->k_i);
|
||||
dirent_copy_target(new, old);
|
||||
new->k.p = old.k->p;
|
||||
|
||||
for (unsigned i = 0; i < 1000; i++) {
|
||||
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
|
||||
old_name.len, old_name.name, i);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
|
||||
|
||||
if (u64s > U8_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
new->k.u64s = u64s;
|
||||
|
||||
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
||||
(subvol_inum) { 0, old.k->p.inode },
|
||||
old.k->p.snapshot, &new->k_i,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
if (!bch2_err_matches(ret, EEXIST))
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
|
||||
}
|
||||
|
||||
static int hash_check_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c k;
|
||||
u64 hash;
|
||||
int ret = 0;
|
||||
|
||||
if (hash_k.k->type != desc.key_type)
|
||||
return 0;
|
||||
|
||||
hash = desc.hash_bkey(hash_info, hash_k);
|
||||
|
||||
if (likely(hash == hash_k.k->p.offset))
|
||||
return 0;
|
||||
|
||||
if (hash_k.k->p.offset < hash)
|
||||
goto bad_hash;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, desc.btree_id,
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_slots, k, ret) {
|
||||
if (bkey_eq(k.k->p, hash_k.k->p))
|
||||
break;
|
||||
|
||||
if (k.k->type == desc.key_type &&
|
||||
!desc.cmp_bkey(k, hash_k))
|
||||
goto duplicate_entries;
|
||||
|
||||
if (bkey_deleted(k.k)) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
goto bad_hash;
|
||||
}
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
bad_hash:
|
||||
if (fsck_err(trans, hash_table_key_wrong_offset,
|
||||
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
|
||||
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
|
||||
(subvol_inum) { 0, hash_k.k->p.inode },
|
||||
hash_k.k->p.snapshot, new,
|
||||
STR_HASH_must_create|
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (k.k)
|
||||
goto duplicate_entries;
|
||||
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
}
|
||||
fsck_err:
|
||||
goto out;
|
||||
duplicate_entries:
|
||||
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (!fsck_err(trans, hash_table_key_duplicate,
|
||||
"duplicate hash table keys%s:\n%s",
|
||||
ret != 2 ? "" : ", both point to valid inodes",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, hash_k),
|
||||
prt_newline(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf)))
|
||||
goto out;
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
break;
|
||||
case 1:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
|
||||
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bch_inode_unpacked *inode,
|
||||
@ -1421,7 +1220,7 @@ static int check_inode(struct btree_trans *trans,
|
||||
if (u.bi_subvol) {
|
||||
struct bch_subvolume s;
|
||||
|
||||
ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s);
|
||||
ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
@ -2497,7 +2296,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
*hash_info = bch2_hash_info_init(c, &i->inode);
|
||||
dir->first_this_inode = false;
|
||||
|
||||
ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
|
||||
ret = bch2_str_hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret) {
|
||||
@ -2611,7 +2410,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
|
||||
*hash_info = bch2_hash_info_init(c, &i->inode);
|
||||
inode->first_this_inode = false;
|
||||
|
||||
ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
|
||||
ret = bch2_str_hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -2864,7 +2663,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
||||
d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
|
||||
ret = bkey_err(d.s_c);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
break;
|
||||
goto out;
|
||||
|
||||
if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
@ -2899,7 +2698,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
||||
if (ret) {
|
||||
/* Should have been caught in dirents pass */
|
||||
bch_err_msg(c, ret, "error looking up parent directory");
|
||||
break;
|
||||
goto out;
|
||||
}
|
||||
|
||||
min_bi_depth = parent_inode.bi_depth;
|
||||
@ -2930,8 +2729,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
|
||||
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
|
||||
}
|
||||
|
||||
redo_bi_depth = false;
|
||||
break;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,14 @@
|
||||
#ifndef _BCACHEFS_FSCK_H
|
||||
#define _BCACHEFS_FSCK_H
|
||||
|
||||
#include "str_hash.h"
|
||||
|
||||
int bch2_fsck_update_backpointers(struct btree_trans *,
|
||||
struct snapshots_seen *,
|
||||
const struct bch_hash_desc,
|
||||
struct bch_hash_info *,
|
||||
struct bkey_i *);
|
||||
|
||||
int bch2_check_inodes(struct bch_fs *);
|
||||
int bch2_check_extents(struct bch_fs *);
|
||||
int bch2_check_indirect_extents(struct bch_fs *);
|
||||
|
@ -799,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
|
||||
}
|
||||
|
||||
int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors,
|
||||
c, inode_alloc_cursor_inode_bad,
|
||||
"k.p.inode bad");
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k);
|
||||
|
||||
prt_printf(out, "idx %llu generation %llu",
|
||||
le64_to_cpu(i.v->idx),
|
||||
le64_to_cpu(i.v->gen));
|
||||
}
|
||||
|
||||
void bch2_inode_init_early(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *inode_u)
|
||||
{
|
||||
@ -859,6 +881,59 @@ static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_i_inode_alloc_cursor *
|
||||
bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
u64 cursor_idx = c->opts.shard_inode_numbers ? cpu : 0;
|
||||
|
||||
cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);
|
||||
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
|
||||
BTREE_ID_logged_ops,
|
||||
POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx),
|
||||
BTREE_ITER_cached);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct bkey_i_inode_alloc_cursor *cursor =
|
||||
k.k->type == KEY_TYPE_inode_alloc_cursor
|
||||
? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor)
|
||||
: bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor);
|
||||
ret = PTR_ERR_OR_ZERO(cursor);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
cursor->v.bits = c->opts.shard_inode_numbers_bits;
|
||||
|
||||
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
|
||||
if (c->opts.shard_inode_numbers) {
|
||||
bits -= cursor->v.bits;
|
||||
|
||||
*min = (cpu << bits);
|
||||
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
|
||||
*min = max_t(u64, *min, BLOCKDEV_INODE_MAX);
|
||||
} else {
|
||||
*min = BLOCKDEV_INODE_MAX;
|
||||
*max = ~(ULLONG_MAX << bits);
|
||||
}
|
||||
|
||||
if (le64_to_cpu(cursor->v.idx) < *min)
|
||||
cursor->v.idx = cpu_to_le64(*min);
|
||||
|
||||
if (le64_to_cpu(cursor->v.idx) >= *max) {
|
||||
cursor->v.idx = cpu_to_le64(*min);
|
||||
le32_add_cpu(&cursor->v.gen, 1);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret ? ERR_PTR(ret) : cursor;
|
||||
}
|
||||
|
||||
/*
|
||||
* This just finds an empty slot:
|
||||
*/
|
||||
@ -867,35 +942,20 @@ int bch2_inode_create(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
u32 snapshot, u64 cpu)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
u64 min, max, start, pos, *hint;
|
||||
int ret = 0;
|
||||
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
|
||||
u64 min, max;
|
||||
struct bkey_i_inode_alloc_cursor *cursor =
|
||||
bch2_inode_alloc_cursor_get(trans, cpu, &min, &max);
|
||||
int ret = PTR_ERR_OR_ZERO(cursor);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (c->opts.shard_inode_numbers) {
|
||||
bits -= c->inode_shard_bits;
|
||||
u64 start = le64_to_cpu(cursor->v.idx);
|
||||
u64 pos = start;
|
||||
|
||||
min = (cpu << bits);
|
||||
max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
|
||||
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
|
||||
hint = c->unused_inode_hints + cpu;
|
||||
} else {
|
||||
min = BLOCKDEV_INODE_MAX;
|
||||
max = ~(ULLONG_MAX << bits);
|
||||
hint = c->unused_inode_hints;
|
||||
}
|
||||
|
||||
start = READ_ONCE(*hint);
|
||||
|
||||
if (start >= max || start < min)
|
||||
start = min;
|
||||
|
||||
pos = start;
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
|
||||
BTREE_ITER_all_snapshots|
|
||||
BTREE_ITER_intent);
|
||||
struct bkey_s_c k;
|
||||
again:
|
||||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k)) &&
|
||||
@ -925,6 +985,7 @@ again:
|
||||
/* Retry from start */
|
||||
pos = start = min;
|
||||
bch2_btree_iter_set_pos(iter, POS(0, pos));
|
||||
le32_add_cpu(&cursor->v.gen, 1);
|
||||
goto again;
|
||||
found_slot:
|
||||
bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
|
||||
@ -935,9 +996,9 @@ found_slot:
|
||||
return ret;
|
||||
}
|
||||
|
||||
*hint = k.k->p.offset;
|
||||
inode_u->bi_inum = k.k->p.offset;
|
||||
inode_u->bi_generation = bkey_generation(k);
|
||||
inode_u->bi_generation = le64_to_cpu(cursor->v.gen);
|
||||
cursor->v.idx = cpu_to_le64(k.k->p.offset + 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -999,8 +1060,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter = { NULL };
|
||||
struct bkey_i_inode_generation delete;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
@ -1040,13 +1099,7 @@ retry:
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_inode_unpack(k, &inode_u);
|
||||
|
||||
bkey_inode_generation_init(&delete.k_i);
|
||||
delete.k.p = iter.pos;
|
||||
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
err:
|
||||
|
@ -68,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) { \
|
||||
.key_validate = bch2_inode_alloc_cursor_validate, \
|
||||
.val_to_text = bch2_inode_alloc_cursor_to_text, \
|
||||
.min_val_size = 16, \
|
||||
})
|
||||
|
||||
#if 0
|
||||
typedef struct {
|
||||
u64 lo;
|
||||
|
@ -165,4 +165,12 @@ LE64_BITMASK(INODEv3_FIELDS_START,
|
||||
struct bch_inode_v3, bi_flags, 31, 36);
|
||||
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
|
||||
|
||||
struct bch_inode_alloc_cursor {
|
||||
struct bch_val v;
|
||||
__u8 bits;
|
||||
__u8 pad;
|
||||
__le32 gen;
|
||||
__le64 idx;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_INODE_FORMAT_H */
|
||||
|
@ -807,10 +807,11 @@ int bch2_journal_flush(struct journal *j)
|
||||
}
|
||||
|
||||
/*
|
||||
* bch2_journal_noflush_seq - tell the journal not to issue any flushes before
|
||||
* bch2_journal_noflush_seq - ask the journal not to issue any flushes in the
|
||||
* range [start, end)
|
||||
* @seq
|
||||
*/
|
||||
bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
|
||||
bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
u64 unwritten_seq;
|
||||
@ -819,15 +820,15 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
|
||||
if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
|
||||
return false;
|
||||
|
||||
if (seq <= c->journal.flushed_seq_ondisk)
|
||||
if (c->journal.flushed_seq_ondisk >= start)
|
||||
return false;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (seq <= c->journal.flushed_seq_ondisk)
|
||||
if (c->journal.flushed_seq_ondisk >= start)
|
||||
goto out;
|
||||
|
||||
for (unwritten_seq = journal_last_unwritten_seq(j);
|
||||
unwritten_seq < seq;
|
||||
unwritten_seq < end;
|
||||
unwritten_seq++) {
|
||||
struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
|
||||
|
||||
@ -1564,6 +1565,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
printbuf_indent_sub(out, 2);
|
||||
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
if (!ca->mi.durability)
|
||||
continue;
|
||||
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d))
|
||||
@ -1573,6 +1577,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
continue;
|
||||
|
||||
prt_printf(out, "dev %u:\n", ca->dev_idx);
|
||||
prt_printf(out, "durability %u:\n", ca->mi.durability);
|
||||
printbuf_indent_add(out, 2);
|
||||
prt_printf(out, "nr\t%u\n", ja->nr);
|
||||
prt_printf(out, "bucket size\t%u\n", ca->mi.bucket_size);
|
||||
@ -1584,6 +1589,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
--out->atomic;
|
||||
|
@ -404,7 +404,7 @@ void bch2_journal_flush_async(struct journal *, struct closure *);
|
||||
|
||||
int bch2_journal_flush_seq(struct journal *, u64, unsigned);
|
||||
int bch2_journal_flush(struct journal *);
|
||||
bool bch2_journal_noflush_seq(struct journal *, u64);
|
||||
bool bch2_journal_noflush_seq(struct journal *, u64, u64);
|
||||
int bch2_journal_meta(struct journal *);
|
||||
|
||||
void bch2_journal_halt(struct journal *);
|
||||
|
@ -301,7 +301,7 @@ static void journal_entry_err_msg(struct printbuf *out,
|
||||
journal_entry_err_msg(&_buf, version, jset, entry); \
|
||||
prt_printf(&_buf, msg, ##__VA_ARGS__); \
|
||||
\
|
||||
switch (flags & BCH_VALIDATE_write) { \
|
||||
switch (from.flags & BCH_VALIDATE_write) { \
|
||||
case READ: \
|
||||
mustfix_fsck_err(c, _err, "%s", _buf.buf); \
|
||||
break; \
|
||||
@ -390,15 +390,12 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct bkey_i *k = entry->start;
|
||||
struct bkey_validate_context from = {
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.level = entry->level,
|
||||
.btree = entry->btree_id,
|
||||
.flags = flags|BCH_VALIDATE_journal,
|
||||
};
|
||||
|
||||
from.level = entry->level;
|
||||
from.btree = entry->btree_id;
|
||||
|
||||
while (k != vstruct_last(entry)) {
|
||||
int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian);
|
||||
@ -435,11 +432,15 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct bkey_i *k = entry->start;
|
||||
int ret = 0;
|
||||
|
||||
from.root = true;
|
||||
from.level = entry->level + 1;
|
||||
from.btree = entry->btree_id;
|
||||
|
||||
if (journal_entry_err_on(!entry->u64s ||
|
||||
le16_to_cpu(entry->u64s) != k->k.u64s,
|
||||
c, version, jset, entry,
|
||||
@ -456,13 +457,6 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bkey_validate_context from = {
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.level = entry->level + 1,
|
||||
.btree = entry->btree_id,
|
||||
.root = true,
|
||||
.flags = flags,
|
||||
};
|
||||
ret = journal_validate_key(c, jset, entry, k, from, version, big_endian);
|
||||
if (ret == FSCK_DELETED_KEY)
|
||||
ret = 0;
|
||||
@ -480,7 +474,7 @@ static int journal_entry_prio_ptrs_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
/* obsolete, don't care: */
|
||||
return 0;
|
||||
@ -495,7 +489,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@ -522,7 +516,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct jset_entry_blacklist_v2 *bl_entry;
|
||||
int ret = 0;
|
||||
@ -564,7 +558,7 @@ static int journal_entry_usage_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
@ -598,7 +592,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
@ -642,7 +636,7 @@ static int journal_entry_clock_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct jset_entry_clock *clock =
|
||||
container_of(entry, struct jset_entry_clock, entry);
|
||||
@ -682,7 +676,7 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
struct jset_entry_dev_usage *u =
|
||||
container_of(entry, struct jset_entry_dev_usage, entry);
|
||||
@ -739,7 +733,7 @@ static int journal_entry_log_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -756,10 +750,11 @@ static int journal_entry_overwrite_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
from.flags = 0;
|
||||
return journal_entry_btree_keys_validate(c, jset, entry,
|
||||
version, big_endian, READ);
|
||||
version, big_endian, from);
|
||||
}
|
||||
|
||||
static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -772,10 +767,10 @@ static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return journal_entry_btree_keys_validate(c, jset, entry,
|
||||
version, big_endian, READ);
|
||||
version, big_endian, from);
|
||||
}
|
||||
|
||||
static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
@ -788,7 +783,7 @@ static int journal_entry_datetime_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
unsigned bytes = vstruct_bytes(entry);
|
||||
unsigned expected = 16;
|
||||
@ -818,7 +813,7 @@ static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs *
|
||||
struct jset_entry_ops {
|
||||
int (*validate)(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context);
|
||||
void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *);
|
||||
};
|
||||
|
||||
@ -836,11 +831,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
|
||||
struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
unsigned version, int big_endian,
|
||||
enum bch_validate_flags flags)
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
return entry->type < BCH_JSET_ENTRY_NR
|
||||
? bch2_jset_entry_ops[entry->type].validate(c, jset, entry,
|
||||
version, big_endian, flags)
|
||||
version, big_endian, from)
|
||||
: 0;
|
||||
}
|
||||
|
||||
@ -858,10 +853,18 @@ void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
struct bkey_validate_context from = {
|
||||
.flags = flags,
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.journal_seq = le64_to_cpu(jset->seq),
|
||||
};
|
||||
|
||||
unsigned version = le32_to_cpu(jset->version);
|
||||
int ret = 0;
|
||||
|
||||
vstruct_for_each(jset, entry) {
|
||||
from.journal_offset = (u64 *) entry - jset->_data;
|
||||
|
||||
if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset),
|
||||
c, version, jset, entry,
|
||||
journal_entry_past_jset_end,
|
||||
@ -870,8 +873,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = bch2_journal_entry_validate(c, jset, entry,
|
||||
version, JSET_BIG_ENDIAN(jset), flags);
|
||||
ret = bch2_journal_entry_validate(c, jset, entry, version,
|
||||
JSET_BIG_ENDIAN(jset), from);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -884,13 +887,17 @@ static int jset_validate(struct bch_fs *c,
|
||||
struct jset *jset, u64 sector,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
unsigned version;
|
||||
struct bkey_validate_context from = {
|
||||
.flags = flags,
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.journal_seq = le64_to_cpu(jset->seq),
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (le64_to_cpu(jset->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
version = le32_to_cpu(jset->version);
|
||||
unsigned version = le32_to_cpu(jset->version);
|
||||
if (journal_entry_err_on(!bch2_version_compatible(version),
|
||||
c, version, jset, NULL,
|
||||
jset_unsupported_version,
|
||||
@ -935,15 +942,16 @@ static int jset_validate_early(struct bch_fs *c,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read)
|
||||
{
|
||||
size_t bytes = vstruct_bytes(jset);
|
||||
unsigned version;
|
||||
enum bch_validate_flags flags = BCH_VALIDATE_journal;
|
||||
struct bkey_validate_context from = {
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.journal_seq = le64_to_cpu(jset->seq),
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (le64_to_cpu(jset->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
version = le32_to_cpu(jset->version);
|
||||
unsigned version = le32_to_cpu(jset->version);
|
||||
if (journal_entry_err_on(!bch2_version_compatible(version),
|
||||
c, version, jset, NULL,
|
||||
jset_unsupported_version,
|
||||
@ -956,6 +964,7 @@ static int jset_validate_early(struct bch_fs *c,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
size_t bytes = vstruct_bytes(jset);
|
||||
if (bytes > (sectors_read << 9) &&
|
||||
sectors_read < bucket_sectors_left)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
@ -1240,8 +1249,6 @@ int bch2_journal_read(struct bch_fs *c,
|
||||
* those entries will be blacklisted:
|
||||
*/
|
||||
genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
|
||||
enum bch_validate_flags flags = BCH_VALIDATE_journal;
|
||||
|
||||
i = *_i;
|
||||
|
||||
if (journal_replay_ignore(i))
|
||||
@ -1261,6 +1268,10 @@ int bch2_journal_read(struct bch_fs *c,
|
||||
continue;
|
||||
}
|
||||
|
||||
struct bkey_validate_context from = {
|
||||
.from = BKEY_VALIDATE_journal,
|
||||
.journal_seq = le64_to_cpu(i->j.seq),
|
||||
};
|
||||
if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq),
|
||||
c, le32_to_cpu(i->j.version), &i->j, NULL,
|
||||
jset_last_seq_newer_than_seq,
|
||||
@ -1420,27 +1431,50 @@ fsck_err:
|
||||
|
||||
/* journal write: */
|
||||
|
||||
static void journal_advance_devs_to_next_bucket(struct journal *j,
|
||||
struct dev_alloc_list *devs,
|
||||
unsigned sectors, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
darray_for_each(*devs, i) {
|
||||
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
if (sectors > ja->sectors_free &&
|
||||
sectors <= ca->mi.bucket_size &&
|
||||
bch2_journal_dev_buckets_available(j, ja,
|
||||
journal_space_discarded)) {
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
ja->sectors_free = ca->mi.bucket_size;
|
||||
|
||||
/*
|
||||
* ja->bucket_seq[ja->cur_idx] must always have
|
||||
* something sensible:
|
||||
*/
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __journal_write_alloc(struct journal *j,
|
||||
struct journal_buf *w,
|
||||
struct dev_alloc_list *devs_sorted,
|
||||
struct dev_alloc_list *devs,
|
||||
unsigned sectors,
|
||||
unsigned *replicas,
|
||||
unsigned replicas_want)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_device *ja;
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
if (*replicas >= replicas_want)
|
||||
return;
|
||||
|
||||
for (i = 0; i < devs_sorted->nr; i++) {
|
||||
ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
|
||||
darray_for_each(*devs, i) {
|
||||
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
ja = &ca->journal;
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
||||
/*
|
||||
* Check that we can use this device, and aren't already using
|
||||
@ -1486,65 +1520,53 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_devs_mask devs;
|
||||
struct journal_device *ja;
|
||||
struct bch_dev *ca;
|
||||
struct dev_alloc_list devs_sorted;
|
||||
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
|
||||
unsigned target = c->opts.metadata_target ?:
|
||||
c->opts.foreground_target;
|
||||
unsigned i, replicas = 0, replicas_want =
|
||||
unsigned replicas = 0, replicas_want =
|
||||
READ_ONCE(c->opts.metadata_replicas);
|
||||
unsigned replicas_need = min_t(unsigned, replicas_want,
|
||||
READ_ONCE(c->opts.metadata_replicas_required));
|
||||
bool advance_done = false;
|
||||
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
devs = target_rw_devs(c, BCH_DATA_journal, target);
|
||||
|
||||
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
|
||||
|
||||
__journal_write_alloc(j, w, &devs_sorted,
|
||||
sectors, &replicas, replicas_want);
|
||||
|
||||
if (replicas >= replicas_want)
|
||||
goto done;
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++) {
|
||||
ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
ja = &ca->journal;
|
||||
|
||||
if (sectors > ja->sectors_free &&
|
||||
sectors <= ca->mi.bucket_size &&
|
||||
bch2_journal_dev_buckets_available(j, ja,
|
||||
journal_space_discarded)) {
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
ja->sectors_free = ca->mi.bucket_size;
|
||||
|
||||
/*
|
||||
* ja->bucket_seq[ja->cur_idx] must always have
|
||||
* something sensible:
|
||||
*/
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
|
||||
}
|
||||
/* We might run more than once if we have to stop and do discards: */
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
|
||||
bkey_for_each_ptr(ptrs, p) {
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
|
||||
if (ca)
|
||||
replicas += ca->mi.durability;
|
||||
}
|
||||
|
||||
__journal_write_alloc(j, w, &devs_sorted,
|
||||
sectors, &replicas, replicas_want);
|
||||
retry_target:
|
||||
devs = target_rw_devs(c, BCH_DATA_journal, target);
|
||||
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
|
||||
retry_alloc:
|
||||
__journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
|
||||
|
||||
if (likely(replicas >= replicas_want))
|
||||
goto done;
|
||||
|
||||
if (!advance_done) {
|
||||
journal_advance_devs_to_next_bucket(j, &devs_sorted, sectors, w->data->seq);
|
||||
advance_done = true;
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
if (replicas < replicas_want && target) {
|
||||
/* Retry from all devices: */
|
||||
target = 0;
|
||||
goto retry;
|
||||
advance_done = false;
|
||||
goto retry_target;
|
||||
}
|
||||
done:
|
||||
rcu_read_unlock();
|
||||
|
||||
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
|
||||
|
||||
return replicas >= replicas_need ? 0 : -EROFS;
|
||||
return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
|
||||
}
|
||||
|
||||
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
@ -2032,7 +2054,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
bch2_journal_do_discards(j);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
if (ret && !bch2_journal_error(j)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
|
||||
@ -2044,8 +2066,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
spin_unlock(&j->lock);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
goto err;
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* write is allocated, no longer need to account for it in
|
||||
|
@ -63,7 +63,7 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
||||
|
||||
int bch2_journal_entry_validate(struct bch_fs *, struct jset *,
|
||||
struct jset_entry *, unsigned, int,
|
||||
enum bch_validate_flags);
|
||||
struct bkey_validate_context);
|
||||
void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct jset_entry *);
|
||||
|
||||
|
@ -146,7 +146,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
|
||||
if (!ca->journal.nr)
|
||||
if (!ca->journal.nr ||
|
||||
!ca->mi.durability)
|
||||
continue;
|
||||
|
||||
min_bucket_size = min(min_bucket_size, ca->mi.bucket_size);
|
||||
|
@ -65,7 +65,8 @@ int bch2_resume_logged_ops(struct bch_fs *c)
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_max(trans, iter,
|
||||
BTREE_ID_logged_ops,
|
||||
POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX),
|
||||
POS(LOGGED_OPS_INUM_logged_ops, 0),
|
||||
POS(LOGGED_OPS_INUM_logged_ops, U64_MAX),
|
||||
BTREE_ITER_prefetch, k,
|
||||
resume_logged_op(trans, &iter, k)));
|
||||
bch_err_fn(c, ret);
|
||||
@ -76,7 +77,7 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret = bch2_bkey_get_empty_slot(trans, &iter,
|
||||
BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX));
|
||||
BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, U64_MAX));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -2,7 +2,10 @@
|
||||
#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
#define _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
|
||||
#define LOGGED_OPS_INUM 0
|
||||
enum logged_ops_inums {
|
||||
LOGGED_OPS_INUM_logged_ops,
|
||||
LOGGED_OPS_INUM_inode_cursors,
|
||||
};
|
||||
|
||||
struct bch_logged_op_truncate {
|
||||
struct bch_val v;
|
||||
|
@ -825,7 +825,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||
continue;
|
||||
goto next;
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
|
@ -222,15 +222,20 @@ enum fsck_err_opts {
|
||||
BCH_SB_ERASURE_CODE, false, \
|
||||
NULL, "Enable erasure coding (DO NOT USE YET)") \
|
||||
x(inodes_32bit, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, true, \
|
||||
NULL, "Constrain inode numbers to 32 bits") \
|
||||
x(shard_inode_numbers, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_SHARD_INUMS, true, \
|
||||
NULL, "Shard new inode numbers by CPU id") \
|
||||
x(shard_inode_numbers_bits, u8, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_UINT(0, 8), \
|
||||
BCH_SB_SHARD_INUMS_NBITS, 0, \
|
||||
NULL, "Shard new inode numbers by CPU id") \
|
||||
x(inodes_use_key_cache, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -103,27 +103,31 @@ u64 bch2_recovery_passes_from_stable(u64 v)
|
||||
static int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
if (c->opts.recovery_passes & BIT_ULL(pass))
|
||||
return 0;
|
||||
|
||||
if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns))
|
||||
return -BCH_ERR_not_in_recovery;
|
||||
|
||||
if (c->recovery_passes_complete & BIT_ULL(pass))
|
||||
return 0;
|
||||
|
||||
bool print = !(c->opts.recovery_passes & BIT_ULL(pass));
|
||||
|
||||
if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
|
||||
c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
|
||||
bch_info(c, "need recovery pass %s (%u), but already rw",
|
||||
bch2_recovery_passes[pass], pass);
|
||||
if (print)
|
||||
bch_info(c, "need recovery pass %s (%u), but already rw",
|
||||
bch2_recovery_passes[pass], pass);
|
||||
return -BCH_ERR_cannot_rewind_recovery;
|
||||
}
|
||||
|
||||
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||
bch2_recovery_passes[pass], pass,
|
||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||
if (print)
|
||||
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||
bch2_recovery_passes[pass], pass,
|
||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||
|
||||
c->opts.recovery_passes |= BIT_ULL(pass);
|
||||
|
||||
if (c->curr_recovery_pass >= pass) {
|
||||
c->curr_recovery_pass = pass;
|
||||
if (c->curr_recovery_pass > pass) {
|
||||
c->next_recovery_pass = pass;
|
||||
c->recovery_passes_complete &= (1ULL << pass) >> 1;
|
||||
return -BCH_ERR_restart_recovery;
|
||||
} else {
|
||||
@ -264,7 +268,9 @@ int bch2_run_recovery_passes(struct bch_fs *c)
|
||||
*/
|
||||
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
|
||||
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
|
||||
c->next_recovery_pass = c->curr_recovery_pass + 1;
|
||||
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
unsigned pass = c->curr_recovery_pass;
|
||||
|
||||
@ -285,31 +291,25 @@ int bch2_run_recovery_passes(struct bch_fs *c)
|
||||
ret = bch2_run_recovery_pass(c, pass) ?:
|
||||
bch2_journal_flush(&c->journal);
|
||||
|
||||
if (!ret && !test_bit(BCH_FS_error, &c->flags))
|
||||
bch2_clear_recovery_pass_required(c, pass);
|
||||
|
||||
spin_lock_irq(&c->recovery_pass_lock);
|
||||
if (c->curr_recovery_pass < pass) {
|
||||
if (c->next_recovery_pass < c->curr_recovery_pass) {
|
||||
/*
|
||||
* bch2_run_explicit_recovery_pass() was called: we
|
||||
* can't always catch -BCH_ERR_restart_recovery because
|
||||
* it may have been called from another thread (btree
|
||||
* node read completion)
|
||||
*/
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
continue;
|
||||
} else if (c->curr_recovery_pass == pass) {
|
||||
c->curr_recovery_pass++;
|
||||
ret = 0;
|
||||
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
|
||||
} else {
|
||||
BUG();
|
||||
c->recovery_passes_complete |= BIT_ULL(pass);
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
||||
}
|
||||
c->curr_recovery_pass = c->next_recovery_pass;
|
||||
spin_unlock_irq(&c->recovery_pass_lock);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
c->recovery_passes_complete |= BIT_ULL(pass);
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, pass);
|
||||
|
||||
if (!test_bit(BCH_FS_error, &c->flags))
|
||||
bch2_clear_recovery_pass_required(c, pass);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -23,6 +23,10 @@
|
||||
int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean,
|
||||
int write)
|
||||
{
|
||||
struct bkey_validate_context from = {
|
||||
.flags = write,
|
||||
.from = BKEY_VALIDATE_superblock,
|
||||
};
|
||||
struct jset_entry *entry;
|
||||
int ret;
|
||||
|
||||
@ -40,7 +44,7 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle
|
||||
ret = bch2_journal_entry_validate(c, NULL, entry,
|
||||
le16_to_cpu(c->disk_sb.sb->version),
|
||||
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
|
||||
write);
|
||||
from);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ enum bch_fsck_flags {
|
||||
x(bset_empty, 45, 0) \
|
||||
x(bset_bad_seq, 46, 0) \
|
||||
x(bset_blacklisted_journal_seq, 47, 0) \
|
||||
x(first_bset_blacklisted_journal_seq, 48, 0) \
|
||||
x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \
|
||||
x(btree_node_bad_btree, 49, 0) \
|
||||
x(btree_node_bad_level, 50, 0) \
|
||||
x(btree_node_bad_min_key, 51, 0) \
|
||||
@ -168,7 +168,7 @@ enum bch_fsck_flags {
|
||||
x(ptr_to_incorrect_stripe, 151, 0) \
|
||||
x(ptr_gen_newer_than_bucket_gen, 152, 0) \
|
||||
x(ptr_too_stale, 153, 0) \
|
||||
x(stale_dirty_ptr, 154, 0) \
|
||||
x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \
|
||||
x(ptr_bucket_data_type_mismatch, 155, 0) \
|
||||
x(ptr_cached_and_erasure_coded, 156, 0) \
|
||||
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
|
||||
@ -211,6 +211,7 @@ enum bch_fsck_flags {
|
||||
x(bkey_in_missing_snapshot, 190, 0) \
|
||||
x(inode_pos_inode_nonzero, 191, 0) \
|
||||
x(inode_pos_blockdev_range, 192, 0) \
|
||||
x(inode_alloc_cursor_inode_bad, 301, 0) \
|
||||
x(inode_unpack_error, 193, 0) \
|
||||
x(inode_str_hash_invalid, 194, 0) \
|
||||
x(inode_v3_fields_start_bad, 195, 0) \
|
||||
@ -311,7 +312,7 @@ enum bch_fsck_flags {
|
||||
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
|
||||
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
|
||||
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
|
||||
x(MAX, 301, 0)
|
||||
x(MAX, 302, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -570,8 +570,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
|
||||
false, 0, &subvol);
|
||||
ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
goto err;
|
||||
|
||||
@ -811,7 +810,7 @@ static int check_snapshot(struct btree_trans *trans,
|
||||
|
||||
if (should_have_subvol) {
|
||||
id = le32_to_cpu(s.subvol);
|
||||
ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
|
||||
ret = bch2_subvolume_get(trans, id, false, &subvol);
|
||||
if (bch2_err_matches(ret, ENOENT))
|
||||
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
|
209
libbcachefs/str_hash.c
Normal file
209
libbcachefs/str_hash.c
Normal file
@ -0,0 +1,209 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_update.h"
|
||||
#include "dirent.h"
|
||||
#include "fsck.h"
|
||||
#include "str_hash.h"
|
||||
#include "subvolume.h"
|
||||
|
||||
static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
|
||||
{
|
||||
if (d.v->d_type == DT_SUBVOL) {
|
||||
struct bch_subvolume subvol;
|
||||
int ret = bch2_subvolume_get(trans, le32_to_cpu(d.v->d_child_subvol),
|
||||
false, &subvol);
|
||||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
return !ret;
|
||||
} else {
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = bkey_is_inode(k.k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
static int fsck_rename_dirent(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_s_c_dirent old)
|
||||
{
|
||||
struct qstr old_name = bch2_dirent_get_name(old);
|
||||
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
|
||||
int ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_dirent_init(&new->k_i);
|
||||
dirent_copy_target(new, old);
|
||||
new->k.p = old.k->p;
|
||||
|
||||
for (unsigned i = 0; i < 1000; i++) {
|
||||
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
|
||||
old_name.len, old_name.name, i);
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
|
||||
|
||||
if (u64s > U8_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
new->k.u64s = u64s;
|
||||
|
||||
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
|
||||
(subvol_inum) { 0, old.k->p.inode },
|
||||
old.k->p.snapshot, &new->k_i,
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
if (!bch2_err_matches(ret, EEXIST))
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
|
||||
}
|
||||
|
||||
static int hash_pick_winner(struct btree_trans *trans,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct bkey_s_c k1,
|
||||
struct bkey_s_c k2)
|
||||
{
|
||||
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
|
||||
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
|
||||
return 0;
|
||||
|
||||
switch (desc.btree_id) {
|
||||
case BTREE_ID_dirents: {
|
||||
int ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k1));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
ret = bch2_dirent_has_target(trans, bkey_s_c_to_dirent(k2));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_str_hash_check_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter = { NULL };
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c k;
|
||||
u64 hash;
|
||||
int ret = 0;
|
||||
|
||||
if (hash_k.k->type != desc.key_type)
|
||||
return 0;
|
||||
|
||||
hash = desc.hash_bkey(hash_info, hash_k);
|
||||
|
||||
if (likely(hash == hash_k.k->p.offset))
|
||||
return 0;
|
||||
|
||||
if (hash_k.k->p.offset < hash)
|
||||
goto bad_hash;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, desc.btree_id,
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_slots, k, ret) {
|
||||
if (bkey_eq(k.k->p, hash_k.k->p))
|
||||
break;
|
||||
|
||||
if (k.k->type == desc.key_type &&
|
||||
!desc.cmp_bkey(k, hash_k))
|
||||
goto duplicate_entries;
|
||||
|
||||
if (bkey_deleted(k.k)) {
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
goto bad_hash;
|
||||
}
|
||||
}
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
bad_hash:
|
||||
if (fsck_err(trans, hash_table_key_wrong_offset,
|
||||
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
|
||||
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
|
||||
if (IS_ERR(new))
|
||||
return PTR_ERR(new);
|
||||
|
||||
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
|
||||
(subvol_inum) { 0, hash_k.k->p.inode },
|
||||
hash_k.k->p.snapshot, new,
|
||||
STR_HASH_must_create|
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (k.k)
|
||||
goto duplicate_entries;
|
||||
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
bch2_fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
}
|
||||
fsck_err:
|
||||
goto out;
|
||||
duplicate_entries:
|
||||
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (!fsck_err(trans, hash_table_key_duplicate,
|
||||
"duplicate hash table keys%s:\n%s",
|
||||
ret != 2 ? "" : ", both point to valid inodes",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, hash_k),
|
||||
prt_newline(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf)))
|
||||
goto out;
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
break;
|
||||
case 1:
|
||||
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
|
||||
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto out;
|
||||
}
|
@ -393,4 +393,11 @@ int bch2_hash_delete(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct snapshots_seen;
|
||||
int bch2_str_hash_check_key(struct btree_trans *,
|
||||
struct snapshots_seen *,
|
||||
const struct bch_hash_desc,
|
||||
struct bch_hash_info *,
|
||||
struct btree_iter *, struct bkey_s_c);
|
||||
|
||||
#endif /* _BCACHEFS_STR_HASH_H */
|
||||
|
@ -286,11 +286,11 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
|
||||
static __always_inline int
|
||||
bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
|
||||
bool inconsistent_if_not_found,
|
||||
int iter_flags,
|
||||
struct bch_subvolume *s)
|
||||
{
|
||||
int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
|
||||
iter_flags, subvolume, s);
|
||||
BTREE_ITER_cached|
|
||||
BTREE_ITER_with_updates, subvolume, s);
|
||||
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
|
||||
inconsistent_if_not_found,
|
||||
trans->c, "missing subvolume %u", subvol);
|
||||
@ -299,16 +299,15 @@ bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
|
||||
|
||||
int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
|
||||
bool inconsistent_if_not_found,
|
||||
int iter_flags,
|
||||
struct bch_subvolume *s)
|
||||
{
|
||||
return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
|
||||
return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, s);
|
||||
}
|
||||
|
||||
int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
|
||||
{
|
||||
struct bch_subvolume s;
|
||||
int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
|
||||
int ret = bch2_subvolume_get_inlined(trans, subvol, true, &s);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -328,7 +327,7 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
|
||||
struct bch_snapshot snap;
|
||||
|
||||
return bch2_snapshot_lookup(trans, snapshot, &snap) ?:
|
||||
bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
|
||||
bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, subvol);
|
||||
}
|
||||
|
||||
int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
|
||||
@ -396,8 +395,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
|
||||
struct bch_subvolume s;
|
||||
|
||||
return lockrestart_do(trans,
|
||||
bch2_subvolume_get(trans, subvolid_to_delete, true,
|
||||
BTREE_ITER_cached, &s)) ?:
|
||||
bch2_subvolume_get(trans, subvolid_to_delete, true, &s)) ?:
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
|
@ -24,7 +24,7 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
|
||||
|
||||
int bch2_subvol_has_children(struct btree_trans *, u32);
|
||||
int bch2_subvolume_get(struct btree_trans *, unsigned,
|
||||
bool, int, struct bch_subvolume *);
|
||||
bool, struct bch_subvolume *);
|
||||
int __bch2_subvolume_get_snapshot(struct btree_trans *, u32,
|
||||
u32 *, bool);
|
||||
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
|
||||
|
@ -432,7 +432,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
|
||||
bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb));
|
||||
prt_str(out, " > incompat_allowed ");
|
||||
bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb));
|
||||
return -BCH_ERR_invalid_sb_version;
|
||||
if (flags & BCH_VALIDATE_write)
|
||||
return -BCH_ERR_invalid_sb_version;
|
||||
else
|
||||
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb));
|
||||
}
|
||||
|
||||
if (!flags) {
|
||||
@ -457,6 +460,11 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
|
||||
SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
if (!BCH_SB_SHARD_INUMS_NBITS(sb))
|
||||
SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus())));
|
||||
#endif
|
||||
|
||||
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
|
||||
const struct bch_option *opt = bch2_opt_table + opt_id;
|
||||
|
||||
|
@ -586,7 +586,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
#endif
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
|
||||
if (c->write_ref_wq)
|
||||
destroy_workqueue(c->write_ref_wq);
|
||||
@ -872,8 +871,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
(btree_blocks(c) + 1) * 2 *
|
||||
sizeof(struct sort_iter_set);
|
||||
|
||||
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
|
||||
|
||||
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
|
||||
!(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
|
||||
@ -900,9 +897,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
c->opts.btree_node_size) ||
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
|
||||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
|
||||
sizeof(u64), GFP_KERNEL))) {
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) {
|
||||
ret = -BCH_ERR_ENOMEM_fs_other_alloc;
|
||||
goto err;
|
||||
}
|
||||
|
@ -199,6 +199,30 @@ DECLARE_EVENT_CLASS(bio,
|
||||
(unsigned long long)__entry->sector, __entry->nr_sector)
|
||||
);
|
||||
|
||||
/* disk_accounting.c */
|
||||
|
||||
TRACE_EVENT(accounting_mem_insert,
|
||||
TP_PROTO(struct bch_fs *c, const char *acc),
|
||||
TP_ARGS(c, acc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
__field(unsigned, new_nr )
|
||||
__string(acc, acc )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = c->dev;
|
||||
__entry->new_nr = c->accounting.k.nr;
|
||||
__assign_str(acc);
|
||||
),
|
||||
|
||||
TP_printk("%d,%d entries %u added %s",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->new_nr,
|
||||
__get_str(acc))
|
||||
);
|
||||
|
||||
/* fs.c: */
|
||||
TRACE_EVENT(bch2_sync_fs,
|
||||
TP_PROTO(struct super_block *sb, int wait),
|
||||
|
Loading…
Reference in New Issue
Block a user