From 01413354a54de46edbbd8e53b3767459f8979626 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 21:58:03 -0400 Subject: [PATCH] Update bcachefs sources to 8aa83b2beeb3 bcachefs: Improve check_snapshot_exists() --- .bcachefs_revision | 2 +- libbcachefs/acl.c | 11 +- libbcachefs/bcachefs.h | 9 ++ libbcachefs/bcachefs_format.h | 13 +- libbcachefs/btree_iter.c | 35 +++++- libbcachefs/btree_iter.h | 49 +++++--- libbcachefs/btree_trans_commit.c | 41 ++---- libbcachefs/btree_types.h | 3 + libbcachefs/btree_update.c | 2 +- libbcachefs/disk_accounting.h | 38 ++++++ libbcachefs/ec.c | 206 ++++++++++++++++--------------- libbcachefs/errcode.h | 5 +- libbcachefs/fs.c | 4 +- libbcachefs/fsck.c | 30 +++-- libbcachefs/inode.c | 4 +- libbcachefs/inode.h | 2 +- libbcachefs/journal_io.c | 3 +- libbcachefs/journal_reclaim.c | 6 +- libbcachefs/opts.c | 3 +- libbcachefs/rcu_pending.c | 2 + libbcachefs/recovery.c | 63 ++++++---- libbcachefs/recovery.h | 2 +- libbcachefs/recovery_passes.c | 101 +++++++++++---- libbcachefs/recovery_passes.h | 1 + libbcachefs/snapshot.c | 66 +++++++--- libbcachefs/super.c | 5 +- libbcachefs/xattr.c | 2 +- libbcachefs/xattr.h | 2 +- 28 files changed, 456 insertions(+), 254 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 9f6715bd..eb105111 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -3b80552e70573764bbf38b89c58749aef9dd8753 +8aa83b2beeb30185242600116e24d2e6c0c2fce5 diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 87f1be9d..99487727 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -184,11 +184,6 @@ invalid: return ERR_PTR(-EINVAL); } -#define acl_for_each_entry(acl, acl_e) \ - for (acl_e = acl->a_entries; \ - acl_e < acl->a_entries + acl->a_count; \ - acl_e++) - /* * Convert from in-memory to filesystem representation. */ @@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans, { struct bkey_i_xattr *xattr; bch_acl_header *acl_header; - const struct posix_acl_entry *acl_e; + const struct posix_acl_entry *acl_e, *pe; void *outptr; unsigned nr_short = 0, nr_long = 0, acl_len, u64s; - acl_for_each_entry(acl, acl_e) { + FOREACH_ACL_ENTRY(acl_e, acl, pe) { switch (acl_e->e_tag) { case ACL_USER: case ACL_GROUP: @@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, outptr = (void *) acl_header + sizeof(*acl_header); - acl_for_each_entry(acl, acl_e) { + FOREACH_ACL_ENTRY(acl_e, acl, pe) { bch_acl_entry *entry = outptr; entry->e_tag = cpu_to_le16(acl_e->e_tag); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index f4151ee5..3f88715a 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -293,6 +293,8 @@ do { \ #define bch_info(c, fmt, ...) \ bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_info_ratelimited(c, fmt, ...) \ + bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_notice(c, fmt, ...) \ bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_warn(c, fmt, ...) \ @@ -352,6 +354,12 @@ do { \ bch_info(c, fmt, ##__VA_ARGS__); \ } while (0) +#define bch_verbose_ratelimited(c, fmt, ...) \ +do { \ + if ((c)->opts.verbose) \ + bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ +} while (0) + #define pr_verbose_init(opts, fmt, ...) \ do { \ if (opt_get(opts, verbose)) \ @@ -1051,6 +1059,7 @@ struct bch_fs { u64 recovery_passes_complete; /* never rewinds version of curr_recovery_pass */ enum bch_recovery_pass recovery_pass_done; + spinlock_t recovery_pass_lock; struct semaphore online_fsck_mutex; /* DEBUG JUNK */ diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 5004f6ba..c5e3824d 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -499,8 +499,6 @@ struct bch_sb_field { #include "disk_groups_format.h" #include "extents_format.h" #include "ec_format.h" -#include "dirent_format.h" -#include "disk_groups_format.h" #include "inode_format.h" #include "journal_seq_blacklist_format.h" #include "logged_ops_format.h" @@ -1221,6 +1219,15 @@ struct jset_entry_log { u8 d[]; } __packed __aligned(8); +static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) +{ + unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); + + while (b && !l->d[b - 1]) + --b; + return b; +} + struct jset_entry_datetime { struct jset_entry entry; __le64 seconds; @@ -1361,6 +1368,8 @@ static inline bool btree_id_is_alloc(enum btree_id id) case BTREE_ID_need_discard: case BTREE_ID_freespace: case BTREE_ID_bucket_gens: + case BTREE_ID_lru: + case BTREE_ID_accounting: return true; default: return false; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 0883cf6e..eb707bb5 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -748,8 +748,6 @@ static inline int btree_path_lock_root(struct btree_trans *trans, ret = btree_node_lock(trans, path, &b->c, path->level, lock_type, trace_ip); if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) - continue; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; BUG(); @@ -2293,6 +2291,12 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e bch2_btree_iter_verify_entry_exit(iter); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + while (1) { k = __bch2_btree_iter_peek(iter, search_key); if (unlikely(!k.k)) @@ -2462,6 +2466,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (iter->flags & BTREE_ITER_with_journal) return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); @@ -2599,6 +2609,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { @@ -2944,6 +2960,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (ret) + return ERR_PTR(ret); + struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); @@ -3001,7 +3021,8 @@ out_new_mem: if (old_bytes) { trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } out_change_top: p = trans->mem + trans->mem_top; @@ -3109,6 +3130,14 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->last_begin_ip = _RET_IP_; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (trans->restarted) { + trans->restart_count_this_trans++; + } else { + trans->restart_count_this_trans = 0; + } +#endif + trans_set_locked(trans); if (trans->restarted) { diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 31a58bf4..cbcd64d1 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path { unsigned idx = path - trans->paths; + EBUG_ON(idx >= trans->nr_paths); EBUG_ON(!test_bit(idx, trans->paths_allocated)); if (unlikely(path->ref == U8_MAX)) { bch2_dump_trans_paths_updates(trans); @@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) { + EBUG_ON(path - trans->paths >= trans->nr_paths); EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated)); EBUG_ON(!path->ref); EBUG_ON(!path->intent_ref && intent); @@ -341,21 +343,32 @@ static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) } __always_inline -static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) +static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip) { BUG_ON(err <= 0); BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); trans->restarted = err; - trans->last_restarted_ip = _THIS_IP_; + trans->last_restarted_ip = ip; return -err; } __always_inline static int btree_trans_restart(struct btree_trans *trans, int err) { - btree_trans_restart_nounlock(trans, err); - return -err; + return btree_trans_restart_ip(trans, err, _THIS_IP_); +} + +static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip) +{ +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (!(ktime_get_ns() & ~(~0UL << min(63, (10 + trans->restart_count_this_trans))))) { + trace_and_count(trans->c, trans_restart_injected, trans, ip); + return btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_fault_inject, ip); + } +#endif + return 0; } bool bch2_btree_node_upgrade(struct btree_trans *, @@ -593,13 +606,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) +{ + unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); + memcpy(dst_v, src_k.v, b); + if (unlikely(b < dst_size)) + memset(dst_v + b, 0, dst_size - b); +} + #define bkey_val_copy(_dst_v, _src_k) \ do { \ - unsigned b = min_t(unsigned, sizeof(*_dst_v), \ - bkey_val_bytes(_src_k.k)); \ - memcpy(_dst_v, _src_k.v, b); \ - if (b < sizeof(*_dst_v)) \ - memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ + BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ + __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ } while (0) static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, @@ -608,17 +626,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned val_size, void *val) { struct btree_iter iter; - struct bkey_s_c k; - int ret; - - k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); - ret = bkey_err(k); + struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); + int ret = bkey_err(k); if (!ret) { - unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); - - memcpy(val, k.v, b); - if (unlikely(b < sizeof(*val))) - memset((void *) val + b, 0, sizeof(*val) - b); + __bkey_val_copy(val, val_size, k); bch2_trans_iter_exit(trans, &iter); } diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 9bf471fa..766c56cf 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -609,14 +609,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) return 0; } -static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -{ - return (struct bversion) { - .hi = res->seq >> 32, - .lo = (res->seq << 32) | (res->offset + offset), - }; -} - static inline int bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct btree_insert_entry **stopped_at, @@ -632,7 +624,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (race_fault()) { trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); - return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); } /* @@ -701,25 +693,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct jset_entry *entry = trans->journal_entries; percpu_down_read(&c->mark_lock); - for (entry = trans->journal_entries; entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); entry = vstruct_next(entry)) if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && entry->start->k.type == KEY_TYPE_accounting) { - BUG_ON(!trans->journal_res.ref); - - struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); - - a->k.bversion = journal_pos_to_bversion(&trans->journal_res, - (u64 *) entry - (u64 *) trans->journal_entries); - BUG_ON(bversion_zero(a->k.bversion)); - - if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); - if (ret) - goto revert_fs_usage; - } + ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); + if (ret) + goto revert_fs_usage; } percpu_up_read(&c->mark_lock); @@ -833,13 +814,9 @@ revert_fs_usage: entry2 != entry; entry2 = vstruct_next(entry2)) if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && - entry2->start->k.type == KEY_TYPE_accounting) { - struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); - - bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); - bch2_accounting_neg(a); - } + entry2->start->k.type == KEY_TYPE_accounting) + bch2_accounting_trans_commit_revert(trans, + bkey_i_to_accounting(entry2->start), flags); percpu_up_read(&c->mark_lock); return ret; } @@ -1050,6 +1027,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_verify_not_unlocked(trans); bch2_trans_verify_not_in_restart(trans); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) + goto out_reset; + if (!trans->nr_updates && !trans->journal_entries_u64s) goto out_reset; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 4568a41f..ad911bb0 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -509,6 +509,9 @@ struct btree_trans { bool notrace_relock_fail:1; enum bch_errcode restarted:16; u32 restart_count; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + u32 restart_count_this_trans; +#endif u64 last_begin_time; unsigned long last_begin_ip; diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 514df618..39fc7778 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -144,7 +144,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, !(ret = bkey_err(old_k)) && bkey_eq(old_pos, old_k.k->p)) { struct bpos whiteout_pos = - SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; + SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot); if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) diff --git a/libbcachefs/disk_accounting.h b/libbcachefs/disk_accounting.h index 4ea6c8a0..6639535d 100644 --- a/libbcachefs/disk_accounting.h +++ b/libbcachefs/disk_accounting.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_DISK_ACCOUNTING_H #define _BCACHEFS_DISK_ACCOUNTING_H +#include "btree_update.h" #include "eytzinger.h" #include "sb-members.h" @@ -204,6 +205,43 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, bch2_accounting_mem_read_counters(acc, idx, v, nr, false); } +static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) +{ + EBUG_ON(!res->ref); + + return (struct bversion) { + .hi = res->seq >> 32, + .lo = (res->seq << 32) | (res->offset + offset), + }; +} + +static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, + struct bkey_i_accounting *a, + unsigned commit_flags) +{ + a->k.bversion = journal_pos_to_bversion(&trans->journal_res, + (u64 *) a - (u64 *) trans->journal_entries); + + EBUG_ON(bversion_zero(a->k.bversion)); + + return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) + ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) + : 0; +} + +static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans, + struct bkey_i_accounting *a_i, + unsigned commit_flags) +{ + if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { + struct bkey_s_accounting a = accounting_i_to_s(a_i); + + bch2_accounting_neg(a); + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); + bch2_accounting_neg(a); + } +} + int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 1587c6e1..564841e5 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -900,7 +900,7 @@ err: bch2_bkey_val_to_text(&msgbuf, c, orig_k); bch_err_ratelimited(c, "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); - printbuf_exit(&msgbuf);; + printbuf_exit(&msgbuf); ret = -BCH_ERR_stripe_reconstruct; goto out; } @@ -1690,7 +1690,7 @@ static void ec_stripe_key_init(struct bch_fs *c, set_bkey_val_u64s(&s->k, u64s); } -static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) +static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s; @@ -1698,7 +1698,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) - return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; + return NULL; mutex_init(&s->lock); closure_init(&s->iodone, NULL); @@ -1713,10 +1713,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize, h->disk_label); - - h->s = s; - h->nr_created++; - return 0; + return s; } static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) @@ -1857,41 +1854,42 @@ err: return h; } -static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, +static int new_stripe_alloc_buckets(struct btree_trans *trans, + struct ec_stripe_head *h, struct ec_stripe_new *s, enum bch_watermark watermark, struct closure *cl) { struct bch_fs *c = trans->c; struct bch_devs_mask devs = h->devs; struct open_bucket *ob; struct open_buckets buckets; - struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; unsigned i, j, nr_have_parity = 0, nr_have_data = 0; bool have_cache = true; int ret = 0; - BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); - BUG_ON(v->nr_redundant != h->s->nr_parity); + BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); + BUG_ON(v->nr_redundant != s->nr_parity); /* * We bypass the sector allocator which normally does this: */ bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); - for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); - if (i < h->s->nr_data) + if (i < s->nr_data) nr_have_data++; else nr_have_parity++; } - BUG_ON(nr_have_data > h->s->nr_data); - BUG_ON(nr_have_parity > h->s->nr_parity); + BUG_ON(nr_have_data > s->nr_data); + BUG_ON(nr_have_parity > s->nr_parity); buckets.nr = 0; - if (nr_have_parity < h->s->nr_parity) { + if (nr_have_parity < s->nr_parity) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->parity_stripe, &devs, - h->s->nr_parity, + s->nr_parity, &nr_have_parity, &have_cache, 0, BCH_DATA_parity, @@ -1899,14 +1897,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data + h->s->nr_parity, - h->s->nr_data); - BUG_ON(j >= h->s->nr_data + h->s->nr_parity); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data + s->nr_parity, + s->nr_data); + BUG_ON(j >= s->nr_data + s->nr_parity); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -1914,11 +1912,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ } buckets.nr = 0; - if (nr_have_data < h->s->nr_data) { + if (nr_have_data < s->nr_data) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->block_stripe, &devs, - h->s->nr_data, + s->nr_data, &nr_have_data, &have_cache, 0, BCH_DATA_user, @@ -1926,13 +1924,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data, 0); - BUG_ON(j >= h->s->nr_data); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data, 0); + BUG_ON(j >= s->nr_data); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -1978,12 +1976,54 @@ static s64 get_existing_stripe(struct bch_fs *c, return ret; } -static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) +static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) +{ + struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; + unsigned i; + + BUG_ON(existing_v->nr_redundant != s->nr_parity); + s->nr_data = existing_v->nr_blocks - + existing_v->nr_redundant; + + int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); + if (ret) { + bch2_stripe_close(c, s); + return ret; + } + + BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); + + /* + * Free buckets we initially allocated - they might conflict with + * blocks from the stripe we're reusing: + */ + for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { + bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); + s->blocks[i] = 0; + } + memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); + memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); + + for (unsigned i = 0; i < existing_v->nr_blocks; i++) { + if (stripe_blockcount_get(existing_v, i)) { + __set_bit(i, s->blocks_gotten); + __set_bit(i, s->blocks_allocated); + } + + ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); + } + + bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); + s->have_existing_stripe = true; + + return 0; +} + +static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) { struct bch_fs *c = trans->c; - struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; - struct bch_stripe *existing_v; - unsigned i; s64 idx; int ret; @@ -1995,56 +2035,19 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri if (idx < 0) return -BCH_ERR_stripe_alloc_blocked; - ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); + ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, "reading stripe key: %s", bch2_err_str(ret)); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); return ret; } - existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; - - BUG_ON(existing_v->nr_redundant != h->s->nr_parity); - h->s->nr_data = existing_v->nr_blocks - - existing_v->nr_redundant; - - ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); - if (ret) { - bch2_stripe_close(c, h->s); - return ret; - } - - BUG_ON(h->s->existing_stripe.size != h->blocksize); - BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); - - /* - * Free buckets we initially allocated - they might conflict with - * blocks from the stripe we're reusing: - */ - for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { - bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); - h->s->blocks[i] = 0; - } - memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); - memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); - - for (i = 0; i < existing_v->nr_blocks; i++) { - if (stripe_blockcount_get(existing_v, i)) { - __set_bit(i, h->s->blocks_gotten); - __set_bit(i, h->s->blocks_allocated); - } - - ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); - } - - bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); - h->s->have_existing_stripe = true; - - return 0; + return init_new_stripe_from_existing(c, s); } -static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) +static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -2053,15 +2056,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - if (!h->s->res.sectors) { - ret = bch2_disk_reservation_get(c, &h->s->res, + if (!s->res.sectors) { + ret = bch2_disk_reservation_get(c, &s->res, h->blocksize, - h->s->nr_parity, + s->nr_parity, BCH_DISK_RESERVATION_NOFAIL); if (ret) return ret; } + /* + * Allocate stripe slot + * XXX: we're going to need a bitrange btree of free stripes + */ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { if (bkey_gt(k.k->p, POS(0, U32_MAX))) { @@ -2076,7 +2083,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st } if (bkey_deleted(k.k) && - bch2_try_open_stripe(c, h->s, k.k->p.offset)) + bch2_try_open_stripe(c, s, k.k->p.offset)) break; } @@ -2087,16 +2094,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st ret = ec_stripe_mem_alloc(trans, &iter); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); goto err; } - h->s->new_stripe.key.k.p = iter.pos; + s->new_stripe.key.k.p = iter.pos; out: bch2_trans_iter_exit(trans, &iter); return ret; err: - bch2_disk_reservation_put(c, &h->s->res); + bch2_disk_reservation_put(c, &s->res); goto out; } @@ -2127,22 +2134,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, return h; if (!h->s) { - ret = ec_new_stripe_alloc(c, h); - if (ret) { + h->s = ec_new_stripe_alloc(c, h); + if (!h->s) { + ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; bch_err(c, "failed to allocate new stripe"); goto err; } + + h->nr_created++; } - if (h->s->allocated) + struct ec_stripe_new *s = h->s; + + if (s->allocated) goto allocated; - if (h->s->have_existing_stripe) + if (s->have_existing_stripe) goto alloc_existing; /* First, try to allocate a full stripe: */ - ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (!ret) goto allocate_buf; if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || @@ -2154,15 +2166,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, * existing stripe: */ while (1) { - ret = __bch2_ec_stripe_head_reuse(trans, h); + ret = __bch2_ec_stripe_head_reuse(trans, h, s); if (!ret) break; if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; if (watermark == BCH_WATERMARK_copygc) { - ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (ret) goto err; goto allocate_buf; @@ -2180,19 +2192,19 @@ alloc_existing: * Retry allocating buckets, with the watermark for this * particular write: */ - ret = new_stripe_alloc_buckets(trans, h, watermark, cl); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); if (ret) goto err; allocate_buf: - ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); + ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); if (ret) goto err; - h->s->allocated = true; + s->allocated = true; allocated: - BUG_ON(!h->s->idx); - BUG_ON(!h->s->new_stripe.data[0]); + BUG_ON(!s->idx); + BUG_ON(!s->new_stripe.data[0]); BUG_ON(trans->restarted); return h; err: diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 64926351..26990ad5 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -162,7 +162,6 @@ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ x(0, backpointer_to_overwritten_btree_node) \ - x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ @@ -171,7 +170,9 @@ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ - x(0, restart_recovery) \ + x(EINVAL, restart_recovery) \ + x(EINVAL, not_in_recovery) \ + x(EINVAL, cannot_rewind_recovery) \ x(0, data_update_done) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 857175f4..2d3e134e 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -630,7 +630,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, goto err; /* regular files may have hardlinks: */ - if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) && + if (bch2_fs_inconsistent_on(bch2_inode_should_have_single_bp(&inode_u) && !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)), c, "dirent points to inode that does not point back:\n %s", @@ -2042,7 +2042,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); printbuf_nul_terminate(&buf); - seq_puts(seq, buf.buf); + seq_printf(seq, ",%s", buf.buf); int ret = buf.allocation_failure ? -ENOMEM : 0; printbuf_exit(&buf); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index a1087fd2..b63ed5f1 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -170,7 +170,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, if (ret) return ret; - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); *target = le64_to_cpu(d.v->d_inum); *type = d.v->d_type; bch2_trans_iter_exit(trans, &iter); @@ -482,6 +482,13 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * return ret; } +static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos pos) +{ + return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); +} + static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { @@ -490,13 +497,11 @@ static int remove_backpointer(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; - struct bkey_s_c_dirent d = - bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, - SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, - dirent); - int ret = bkey_err(d) ?: - dirent_points_to_inode(c, d, inode) ?: - __remove_dirent(trans, d.k->p); + struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, + SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); + int ret = bkey_err(d) ?: + dirent_points_to_inode(c, d, inode) ?: + __remove_dirent(trans, d.k->p); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1022,13 +1027,6 @@ fsck_err: goto out; } -static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos) -{ - return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); -} - static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, @@ -1983,7 +1981,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, return __bch2_fsck_write_inode(trans, target); } - if (bch2_inode_should_have_bp(target) && + if (bch2_inode_should_have_single_bp(target) && !fsck_err(trans, inode_wrong_backpointer, "dirent points to inode that does not point back:\n %s", (bch2_bkey_val_to_text(&buf, c, d.s_c), diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 2c037e84..ce6ed81c 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -533,6 +533,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out, prt_printf(out, "(%x)\n", inode->bi_flags); prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq); + prt_printf(out, "hash_seed=%llx\n", inode->bi_hash_seed); prt_printf(out, "bi_size=%llu\n", inode->bi_size); prt_printf(out, "bi_sectors=%llu\n", inode->bi_sectors); prt_printf(out, "bi_version=%llu\n", inode->bi_version); @@ -1377,7 +1378,8 @@ again: NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); if (ret > 0) { - bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); + bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", + k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); /* diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index c8e98443..e579bd13 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -248,7 +248,7 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, int bch2_inode_nlink_inc(struct bch_inode_unpacked *); void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); -static inline bool bch2_inode_should_have_bp(struct bch_inode_unpacked *inode) +static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *inode) { bool inode_has_bp = inode->bi_dir || inode->bi_dir_offset; diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 954f6a96..c113c99b 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -735,9 +735,8 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry *entry) { struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); - unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); - prt_printf(out, "%.*s", bytes, l->d); + prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d); } static int journal_entry_overwrite_validate(struct bch_fs *c, diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index ace291f1..3d8fc264 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -758,10 +758,12 @@ static int bch2_journal_reclaim_thread(void *arg) journal_empty = fifo_empty(&j->pin); spin_unlock(&j->lock); + long timeout = j->next_reclaim - jiffies; + if (journal_empty) schedule(); - else if (time_after(j->next_reclaim, jiffies)) - schedule_timeout(j->next_reclaim - jiffies); + else if (timeout > 0) + schedule_timeout(timeout); else break; } diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 84097235..f853f2f3 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -522,9 +522,8 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, val = "0"; } - /* Unknown options are ignored: */ if (id < 0) - return 0; + goto bad_opt; if (!(bch2_opt_table[id].flags & OPT_MOUNT)) goto bad_opt; diff --git a/libbcachefs/rcu_pending.c b/libbcachefs/rcu_pending.c index 40a20192..67522aa3 100644 --- a/libbcachefs/rcu_pending.c +++ b/libbcachefs/rcu_pending.c @@ -478,7 +478,9 @@ start_gp: */ if (!p->cb_armed) { p->cb_armed = true; + spin_unlock_irqrestore(&p->lock, flags); __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); + goto free_node; } else { __start_poll_synchronize_rcu(pending->srcu); } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 6db72d3b..1cc10528 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -34,21 +34,52 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { - if (btree >= BTREE_ID_NR_MAX) - return; - u64 b = BIT_ULL(btree); + int ret = 0; + + mutex_lock(&c->sb_lock); if (!(c->sb.btrees_lost_data & b)) { bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); - - mutex_lock(&c->sb_lock); bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); } + + switch (btree) { + case BTREE_ID_alloc: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_backpointers: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + goto out; + case BTREE_ID_need_discard: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_freespace: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_bucket_gens: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_lru: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_accounting: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + goto out; + default: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + goto out; + } +out: + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; } /* for -o reconstruct_alloc: */ @@ -524,22 +555,10 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s l=%u: %s", bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) { - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + if (btree_id_is_alloc(i)) r->error = 0; - } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { - bch_info(c, "will run btree node scan"); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - } - ret = 0; - bch2_btree_lost_data(c, i); + ret = bch2_btree_lost_data(c, i); } } diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h index 4bf818de..b0d55754 100644 --- a/libbcachefs/recovery.h +++ b/libbcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -void bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, enum btree_id); int bch2_journal_replay(struct bch_fs *); diff --git a/libbcachefs/recovery_passes.c b/libbcachefs/recovery_passes.c index 735b8adc..e6676a1b 100644 --- a/libbcachefs/recovery_passes.c +++ b/libbcachefs/recovery_passes.c @@ -94,12 +94,22 @@ u64 bch2_recovery_passes_from_stable(u64 v) /* * For when we need to rewind recovery passes and run a pass we skipped: */ -int bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) +static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) { if (c->opts.recovery_passes & BIT_ULL(pass)) return 0; + if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) + return -BCH_ERR_not_in_recovery; + + if (pass < BCH_RECOVERY_PASS_set_may_go_rw && + c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { + bch_info(c, "need recovery pass %s (%u), but already rw", + bch2_recovery_passes[pass], pass); + return -BCH_ERR_cannot_rewind_recovery; + } + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -115,6 +125,27 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } } +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + unsigned long flags; + spin_lock_irqsave(&c->recovery_pass_lock, flags); + int ret = __bch2_run_explicit_recovery_pass(c, pass); + spin_unlock_irqrestore(&c->recovery_pass_lock, flags); + return ret; +} + +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + + return bch2_run_explicit_recovery_pass(c, pass); +} + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { @@ -204,7 +235,14 @@ int bch2_run_online_recovery_passes(struct bch_fs *c) continue; ret = bch2_run_recovery_pass(c, i); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { + + if (c->curr_recovery_pass < i) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ i = c->curr_recovery_pass; continue; } @@ -222,30 +260,51 @@ int bch2_run_recovery_passes(struct bch_fs *c) int ret = 0; while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + spin_lock_irq(&c->recovery_pass_lock); + unsigned pass = c->curr_recovery_pass; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) + c->curr_recovery_pass > c->opts.recovery_pass_last) { + spin_unlock_irq(&c->recovery_pass_lock); break; - - if (should_run_recovery_pass(c, c->curr_recovery_pass)) { - unsigned pass = c->curr_recovery_pass; - - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: - bch2_journal_flush(&c->journal); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || - (ret && c->curr_recovery_pass < pass)) - continue; - if (ret) - break; - - c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); } - c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); + if (!should_run_recovery_pass(c, pass)) { + c->curr_recovery_pass++; + c->recovery_pass_done = max(c->recovery_pass_done, pass); + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } + spin_unlock_irq(&c->recovery_pass_lock); + + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + spin_lock_irq(&c->recovery_pass_lock); + if (c->curr_recovery_pass < pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } else if (c->curr_recovery_pass == pass) { + c->curr_recovery_pass++; + } else { + BUG(); + } + spin_unlock_irq(&c->recovery_pass_lock); + + if (ret) + break; + + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); if (!test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); - - c->curr_recovery_pass++; + bch2_clear_recovery_pass_required(c, pass); } return ret; diff --git a/libbcachefs/recovery_passes.h b/libbcachefs/recovery_passes.h index 99b464e1..7d7339c8 100644 --- a/libbcachefs/recovery_passes.h +++ b/libbcachefs/recovery_passes.h @@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index 9f4d13fa..34e01bd8 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -506,7 +506,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, break; } } - bch2_trans_iter_exit(trans, &iter); if (!ret && !found) { @@ -536,6 +535,7 @@ static int check_snapshot_tree(struct btree_trans *trans, struct bch_snapshot s; struct bch_subvolume subvol; struct printbuf buf = PRINTBUF; + struct btree_iter snapshot_iter = {}; u32 root_id; int ret; @@ -545,16 +545,27 @@ static int check_snapshot_tree(struct btree_trans *trans, st = bkey_s_c_to_snapshot_tree(k); root_id = le32_to_cpu(st.v->root_snapshot); - ret = bch2_snapshot_lookup(trans, root_id, &s); + struct bkey_s_c_snapshot snapshot_k = + bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, + POS(0, root_id), 0, snapshot); + ret = bkey_err(snapshot_k); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; + if (!ret) + bkey_val_copy(&s, snapshot_k); + if (fsck_err_on(ret || root_id != bch2_snapshot_root(c, root_id) || st.k->p.offset != le32_to_cpu(s.tree), trans, snapshot_tree_to_missing_snapshot, "snapshot tree points to missing/incorrect snapshot:\n %s", - (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, st.s_c), + prt_newline(&buf), + ret + ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) + : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), + buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); goto err; } @@ -605,6 +616,7 @@ static int check_snapshot_tree(struct btree_trans *trans, } err: fsck_err: + bch2_trans_iter_exit(trans, &snapshot_iter); printbuf_exit(&buf); return ret; } @@ -905,12 +917,30 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) if (bch2_snapshot_equiv(c, id)) return 0; - /* 0 is an invalid tree ID */ + /* Do we need to reconstruct the snapshot_tree entry as well? */ + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; u32 tree_id = 0; - int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); + + for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, + 0, k, ret) { + if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { + tree_id = k.k->p.offset; + break; + } + } + bch2_trans_iter_exit(trans, &iter); + if (ret) return ret; + if (!tree_id) { + ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id); + if (ret) + return ret; + } + struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot)); ret = PTR_ERR_OR_ZERO(snapshot); if (ret) @@ -921,6 +951,16 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) snapshot->v.tree = cpu_to_le32(tree_id); snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c)); + for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, + 0, k, ret) { + if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { + snapshot->v.subvol = cpu_to_le32(k.k->p.offset); + SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); + break; + } + } + bch2_trans_iter_exit(trans, &iter); + return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?: @@ -1707,18 +1747,10 @@ int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, struct bkey_s_c k; int ret; - bch2_trans_iter_init(trans, &iter, id, pos, - BTREE_ITER_not_extents| - BTREE_ITER_all_snapshots); - while (1) { - k = bch2_btree_iter_prev(&iter); - ret = bkey_err(k); - if (ret) - break; - - if (!k.k) - break; - + for_each_btree_key_reverse_norestart(trans, iter, id, bpos_predecessor(pos), + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots, + k, ret) { if (!bkey_eq(pos, k.k->p)) break; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 873e4be7..843431e5 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -766,6 +766,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) refcount_set(&c->ro_ref, 1); init_waitqueue_head(&c->ro_ref_wait); + spin_lock_init(&c->recovery_pass_lock); sema_init(&c->online_fsck_mutex, 1); init_rwsem(&c->gc_lock); @@ -1120,12 +1121,12 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, prt_bdevname(&buf, fs->bdev); prt_char(&buf, ' '); - bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; + bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time)); prt_newline(&buf); prt_bdevname(&buf, sb->bdev); prt_char(&buf, ' '); - bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; + bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time)); prt_newline(&buf); if (!opts->no_splitbrain_check) diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 56c8d3fe..9986bc7f 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -609,7 +609,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { #endif /* NO_BCACHEFS_FS */ -const struct xattr_handler *bch2_xattr_handlers[] = { +const struct xattr_handler * const bch2_xattr_handlers[] = { &bch_xattr_user_handler, &bch_xattr_trusted_handler, &bch_xattr_security_handler, diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h index c188a5ad..2c96de05 100644 --- a/libbcachefs/xattr.h +++ b/libbcachefs/xattr.h @@ -44,6 +44,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum, ssize_t bch2_xattr_list(struct dentry *, char *, size_t); -extern const struct xattr_handler *bch2_xattr_handlers[]; +extern const struct xattr_handler * const bch2_xattr_handlers[]; #endif /* _BCACHEFS_XATTR_H */