From 8d6138baac3b4fcd715c34cf325ae11b01a4ca67 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 9 Oct 2022 02:32:57 -0400 Subject: [PATCH] Update bcachefs sources to cbccc6d869 bcachefs: Ratelimit ec error message --- .bcachefs_revision | 2 +- Makefile.compiler | 2 +- include/linux/kmemleak.h | 8 +- include/linux/shrinker.h | 2 +- libbcachefs/alloc_foreground.c | 15 ++- libbcachefs/backpointers.c | 28 ++++-- libbcachefs/bcachefs_format.h | 2 +- libbcachefs/btree_cache.c | 7 +- libbcachefs/btree_gc.c | 2 +- libbcachefs/btree_iter.c | 10 +- libbcachefs/btree_iter.h | 1 + libbcachefs/btree_key_cache.c | 21 ++-- libbcachefs/btree_locking.c | 68 +++++++++---- libbcachefs/btree_locking.h | 1 + libbcachefs/btree_update_interior.c | 12 +++ libbcachefs/buckets.c | 4 + libbcachefs/data_update.c | 4 - libbcachefs/debug.c | 17 +++- libbcachefs/disk_groups.c | 5 +- libbcachefs/ec.c | 121 +++++++++++++---------- libbcachefs/ec.h | 6 -- libbcachefs/error.c | 7 +- libbcachefs/fs-io.c | 148 +++++++--------------------- libbcachefs/fs-io.h | 2 - libbcachefs/fs.c | 2 +- libbcachefs/io.c | 4 - libbcachefs/super-io.c | 6 +- libbcachefs/super.c | 8 +- libbcachefs/sysfs.c | 2 +- linux/shrinker.c | 2 +- 30 files changed, 254 insertions(+), 265 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index d5115b93..83d5a7db 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -24c6361e202cc09de0159505eb3ab3ca265520d8 +cbccc6d8692fdd3af7d5db97a065af5a47bc733c diff --git a/Makefile.compiler b/Makefile.compiler index 86ecd2ac..94d0d40c 100644 --- a/Makefile.compiler +++ b/Makefile.compiler @@ -21,8 +21,8 @@ TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ # automatically cleaned up. try-run = $(shell set -e; \ TMP=$(TMPOUT)/tmp; \ - mkdir -p $(TMPOUT); \ trap "rm -rf $(TMPOUT)" EXIT; \ + mkdir -p $(TMPOUT); \ if ($(1)) >/dev/null 2>&1; \ then echo "$(2)"; \ else echo "$(3)"; \ diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 34684b20..6a3cd1bf 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -29,10 +29,9 @@ extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; -extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, gfp_t gfp) __ref; extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; -extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, @@ -107,15 +106,12 @@ static inline void kmemleak_no_scan(const void *ptr) { } static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, - int min_count, gfp_t gfp) + gfp_t gfp) { } static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) { } -static inline void kmemleak_not_leak_phys(phys_addr_t phys) -{ -} static inline void kmemleak_ignore_phys(phys_addr_t phys) { } diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index c7d32d80..ebbab7a6 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -24,7 +24,7 @@ struct shrinker { struct list_head list; }; -int register_shrinker(struct shrinker *); +int register_shrinker(struct shrinker *, const char *, ...); void unregister_shrinker(struct shrinker *); void run_shrinkers(gfp_t gfp_mask, bool); diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index ce365fec..e89999cf 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -688,7 +688,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, bch2_dev_alloc_list(c, stripe, devs_may_alloc); unsigned dev; struct bch_dev *ca; - int ret = 0; + int ret = -BCH_ERR_insufficient_devices; unsigned i; BUG_ON(*nr_effective >= nr_replicas); @@ -718,8 +718,8 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, bch2_dev_stripe_increment(ca, stripe); percpu_ref_put(&ca->ref); - ret = PTR_ERR_OR_ZERO(ob); - if (ret) { + if (IS_ERR(ob)) { + ret = PTR_ERR(ob); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) break; continue; @@ -728,15 +728,12 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, add_new_bucket(c, ptrs, devs_may_alloc, nr_effective, have_cache, flags, ob); - if (*nr_effective >= nr_replicas) + if (*nr_effective >= nr_replicas) { + ret = 0; break; + } } - if (*nr_effective >= nr_replicas) - ret = 0; - else if (!ret) - ret = -BCH_ERR_insufficient_devices; - return ret; } diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 955f3ee9..7e8b1301 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -29,10 +29,15 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, u64 bucket_offset) { struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); + struct bpos ret; - return POS(bucket.inode, - (bucket_to_sector(ca, bucket.offset) << - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); + ret = POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); + + BUG_ON(bpos_cmp(bucket, bp_pos_to_bucket(c, ret))); + + return ret; } void bch2_extent_ptr_to_bp(struct bch_fs *c, @@ -409,17 +414,20 @@ int bch2_get_next_backpointer(struct btree_trans *trans, struct bch_backpointer *dst) { struct bch_fs *c = trans->c; - struct bpos bp_pos = - bucket_pos_to_bp(c, bucket, - max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX); - struct bpos bp_end_pos = - bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); + struct bpos bp_pos, bp_end_pos; struct btree_iter alloc_iter, bp_iter = { NULL }; struct bkey_s_c k; struct bkey_s_c_alloc_v4 a; size_t i; int ret; + if (*bp_offset == U64_MAX) + return 0; + + bp_pos = bucket_pos_to_bp(c, bucket, + max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX); + bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED); k = bch2_btree_iter_peek_slot(&alloc_iter); @@ -803,8 +811,10 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { + unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; + bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0, - 0, + depth, BTREE_ITER_ALL_LEVELS| BTREE_ITER_PREFETCH); diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 7b5fd726..9e10fc83 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1412,7 +1412,7 @@ struct bch_sb_field_disk_groups { x(trans_traverse_all, 71) \ x(transaction_commit, 72) \ x(write_super, 73) \ - x(trans_restart_would_deadlock_recursion_limit, 74) \ + x(trans_restart_would_deadlock_recursion_limit, 74) enum bch_persistent_counters { #define x(t, n, ...) BCH_COUNTER_##t, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 4d1fc39c..f84b5086 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -341,7 +341,7 @@ restart: six_unlock_intent(&b->c.lock); if (freed == nr) - goto out; + goto out_rotate; } else if (trigger_writes && btree_node_dirty(b) && !btree_node_will_make_reachable(b) && @@ -360,6 +360,9 @@ restart: if (touched >= nr) break; } +out_rotate: + if (&t->list != &bc->live) + list_move_tail(&bc->live, &t->list); out: mutex_unlock(&bc->lock); out_nounlock: @@ -475,7 +478,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->shrink.scan_objects = bch2_btree_cache_scan; bc->shrink.to_text = bch2_btree_cache_shrinker_to_text; bc->shrink.seeks = 4; - ret = register_shrinker(&bc->shrink); + ret = register_shrinker(&bc->shrink, "%s/btree_cache", c->name); out: pr_verbose_init(c->opts, "ret %i", ret); return ret; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 5b7f7cd3..fd89165e 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1966,7 +1966,7 @@ int bch2_gc_gens(struct bch_fs *c) } for (i = 0; i < BTREE_ID_NR; i++) - if ((1 << i) & BTREE_ID_HAS_PTRS) { + if (btree_type_has_ptrs(i)) { struct btree_iter iter; struct bkey_s_c k; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index a7ff5df4..925ffb31 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2834,16 +2834,18 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char * s = btree_trans_stats(trans); if (s) { - unsigned expected_mem_bytes = s->max_mem; + unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); - trans->mem_bytes = roundup_pow_of_two(expected_mem_bytes); - trans->mem = kmalloc(trans->mem_bytes, GFP_KERNEL|__GFP_NOFAIL); - trans->nr_max_paths = s->nr_max_paths; + trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); if (!unlikely(trans->mem)) { trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); trans->mem_bytes = BTREE_TRANS_MEM_MAX; + } else { + trans->mem_bytes = expected_mem_bytes; } + + trans->nr_max_paths = s->nr_max_paths; } trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 4ec873aa..910f6d7b 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -171,6 +171,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool); int bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); +bool bch2_trans_locked(struct btree_trans *); static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count) { diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 918dde31..35e94194 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -938,25 +938,26 @@ static void bch2_btree_key_cache_shrinker_to_text(struct printbuf *out, struct s bch2_btree_key_cache_to_text(out, bc); } -int bch2_fs_btree_key_cache_init(struct btree_key_cache *c) +int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) { + struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); int ret; - c->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); - if (!c->pcpu_freed) + bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); + if (!bc->pcpu_freed) return -ENOMEM; - ret = rhashtable_init(&c->table, &bch2_btree_key_cache_params); + ret = rhashtable_init(&bc->table, &bch2_btree_key_cache_params); if (ret) return ret; - c->table_init_done = true; + bc->table_init_done = true; - c->shrink.seeks = 1; - c->shrink.count_objects = bch2_btree_key_cache_count; - c->shrink.scan_objects = bch2_btree_key_cache_scan; - c->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; - return register_shrinker(&c->shrink); + bc->shrink.seeks = 1; + bc->shrink.count_objects = bch2_btree_key_cache_count; + bc->shrink.scan_objects = bch2_btree_key_cache_scan; + bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; + return register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name); } void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 339d44ce..f4340086 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -71,11 +71,6 @@ struct lock_graph { unsigned nr; }; -static void lock_graph_pop(struct lock_graph *g) -{ - closure_put(&g->g[--g->nr].trans->ref); -} - static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) { struct trans_waiting_for_lock *i; @@ -87,6 +82,18 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) bch2_btree_trans_to_text(out, i->trans); } +static noinline void print_chain(struct printbuf *out, struct lock_graph *g) +{ + struct trans_waiting_for_lock *i; + + for (i = g->g; i != g->g + g->nr; i++) { + if (i != g->g) + prt_str(out, "<- "); + prt_printf(out, "%u ", i->trans->locking_wait.task->pid); + } + prt_newline(out); +} + static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { int ret; @@ -134,6 +141,21 @@ static noinline int break_cycle(struct lock_graph *g) BUG(); } +static void lock_graph_pop(struct lock_graph *g) +{ + closure_put(&g->g[--g->nr].trans->ref); +} + +static void lock_graph_pop_above(struct lock_graph *g, struct trans_waiting_for_lock *above, + struct printbuf *cycle) +{ + if (g->nr > 1 && cycle) + print_chain(cycle, g); + + while (g->g + g->nr > above) + lock_graph_pop(g); +} + static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, struct printbuf *cycle) { @@ -142,11 +164,10 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, int ret = 0; for (i = g->g; i < g->g + g->nr; i++) { - if (i->trans->locking != i->node_want) - while (g->g + g->nr >= i) { - lock_graph_pop(g); - return 0; - } + if (i->trans->locking != i->node_want) { + lock_graph_pop_above(g, i - 1, cycle); + return 0; + } if (i->trans == trans) { if (cycle) { @@ -185,20 +206,19 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, return 0; deadlock: - while (g->nr) - lock_graph_pop(g); + lock_graph_pop_above(g, g->g, cycle); return ret; } -static noinline void lock_graph_remove_non_waiters(struct lock_graph *g) +static noinline void lock_graph_remove_non_waiters(struct lock_graph *g, + struct printbuf *cycle) { struct trans_waiting_for_lock *i; for (i = g->g + 1; i < g->g + g->nr; i++) if (i->trans->locking != i->node_want || i->trans->locking_wait.start_time != i[-1].lock_start_time) { - while (g->g + g->nr >= i) - lock_graph_pop(g); + lock_graph_pop_above(g, i - 1, cycle); return; } BUG(); @@ -252,7 +272,7 @@ next: b = &READ_ONCE(path->l[top->level].b)->c; if (unlikely(IS_ERR_OR_NULL(b))) { - lock_graph_remove_non_waiters(&g); + lock_graph_remove_non_waiters(&g, cycle); goto next; } @@ -286,6 +306,8 @@ next: } } + if (g.nr > 1 && cycle) + print_chain(cycle, &g); lock_graph_pop(&g); goto next; } @@ -602,8 +624,18 @@ void bch2_trans_unlock(struct btree_trans *trans) * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking * btree nodes, it implements its own walking: */ - BUG_ON(!trans->is_initial_gc && - lock_class_is_held(&bch2_btree_node_lock_key)); + EBUG_ON(!trans->is_initial_gc && + lock_class_is_held(&bch2_btree_node_lock_key)); +} + +bool bch2_trans_locked(struct btree_trans *trans) +{ + struct btree_path *path; + + trans_for_each_path(trans, path) + if (path->nodes_locked) + return true; + return false; } /* Debug */ diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 61d5038a..d91b42bf 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -194,6 +194,7 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans, bool lock_may_not_fail) { int ret; + trans->lock_may_not_fail = lock_may_not_fail; trans->lock_must_abort = false; trans->locking = b; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index cf9b9ec4..578ba747 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -36,6 +36,7 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, struct btree_path *path; path = bch2_path_get(trans, btree_id, pos, level + 1, level, + BTREE_ITER_NOPRESERVE| BTREE_ITER_INTENT, _THIS_IP_); path = bch2_btree_path_make_mut(trans, path, true, _THIS_IP_); bch2_btree_path_downgrade(trans, path); @@ -649,6 +650,17 @@ err: * we're in journal error state: */ + /* + * Ensure transaction is unlocked before using + * btree_node_lock_nopath() (the use of which is always suspect, + * we need to work on removing this in the future) + * + * It should be, but get_unlocked_mut_path() -> bch2_path_get() + * calls bch2_path_upgrade(), before we call path_make_mut(), so + * we may rarely end up with a locked path besides the one we + * have here: + */ + bch2_trans_unlock(&trans); btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent); bch2_btree_path_level_init(&trans, path, b); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 8af0dd02..de57e625 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -674,6 +674,10 @@ static int check_bucket_ref(struct bch_fs *c, if (bucket_data_type == BCH_DATA_cached) bucket_data_type = BCH_DATA_user; + if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) || + (bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe)) + bucket_data_type = ptr_data_type = BCH_DATA_stripe; + if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index cb25efb6..3102166d 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -104,7 +104,6 @@ static int bch2_data_update_index_update(struct bch_write_op *op) struct btree_iter iter; struct data_update *m = container_of(op, struct data_update, op); - struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets); struct keylist *keys = &op->insert_keys; struct bkey_buf _new, _insert; int ret = 0; @@ -232,9 +231,6 @@ static int bch2_data_update_index_update(struct bch_write_op *op) if (!ret) { bch2_btree_iter_set_pos(&iter, next_pos); - if (ec_ob) - bch2_ob_add_backpointer(c, ec_ob, &insert->k); - this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); trace_move_extent_finish(&new->k); } diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index bff5e9b6..1d2a1615 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -724,11 +724,18 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, goto out; mutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) - if (bch2_check_for_deadlock(trans, &i->buf)) { - i->iter = 1; - break; - } + list_for_each_entry(trans, &c->btree_trans_list, list) { + if (trans->locking_wait.task->pid <= i->iter) + continue; + + ret = flush_buf(i); + if (ret) + return ret; + + bch2_check_for_deadlock(trans, &i->buf); + + i->iter = trans->locking_wait.task->pid; + } mutex_unlock(&c->btree_trans_lock); out: if (i->buf.allocation_failure) diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c index 19b44408..6b81f358 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/disk_groups.c @@ -468,10 +468,7 @@ void bch2_opt_target_to_text(struct printbuf *out, : NULL; if (ca && percpu_ref_tryget(&ca->io_ref)) { - char b[BDEVNAME_SIZE]; - - prt_printf(out, "/dev/%s", - bdevname(ca->disk_sb.bdev, b)); + prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); percpu_ref_put(&ca->io_ref); } else if (ca) { prt_printf(out, "offline device %u", t.dev); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index aa830114..f902da01 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -4,6 +4,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "bkey_buf.h" #include "bset.h" #include "btree_gc.h" @@ -820,17 +821,13 @@ static void extent_stripe_ptr_add(struct bkey_s_extent e, static int ec_stripe_update_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, - struct ec_stripe_buf *s, - struct bpos end) + struct ec_stripe_buf *s) { const struct bch_extent_ptr *ptr_c; struct bch_extent_ptr *ptr, *ec_ptr = NULL; struct bkey_i *n; int ret, dev, block; - if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) - return 1; - if (extent_has_stripe_ptr(k, s->key.k.p.offset)) return 0; @@ -860,19 +857,72 @@ static int ec_stripe_update_extent(struct btree_trans *trans, return bch2_trans_update(trans, iter, n, 0); } -static int ec_stripe_update_extents(struct bch_fs *c, - struct ec_stripe_buf *s, - struct bkey *pos) +static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_buf *s, + unsigned block) { + struct bch_fs *c = trans->c; + struct bch_extent_ptr bucket = s->key.v.ptrs[block]; + struct bpos bucket_pos = PTR_BUCKET_POS(c, &bucket); + struct bch_backpointer bp; struct btree_iter iter; struct bkey_s_c k; + u64 bp_offset = 0; + int ret = 0; +retry: + while (1) { + bch2_trans_begin(trans); - return bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, - BTREE_ID_extents, bkey_start_pos(pos), - BTREE_ITER_NOT_EXTENTS|BTREE_ITER_INTENT, k, - NULL, NULL, BTREE_INSERT_NOFAIL, - ec_stripe_update_extent(&trans, &iter, k, s, pos->p))); + ret = bch2_get_next_backpointer(trans, bucket_pos, bucket.gen, &bp_offset, &bp); + if (ret) + break; + if (bp_offset == U64_MAX) + break; + + if (bch2_fs_inconsistent_on(bp.level, c, "found btree node in erasure coded bucket!?")) { + ret = -EIO; + break; + } + + k = bch2_backpointer_get_key(trans, &iter, bucket_pos, bp_offset, bp); + ret = bkey_err(k); + if (ret) + break; + if (!k.k) + continue; + + ret = ec_stripe_update_extent(trans, &iter, k, s); + bch2_trans_iter_exit(trans, &iter); + if (ret) + break; + + bp_offset++; + } + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + + return ret; +} + +static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) +{ + struct btree_trans trans; + struct bch_stripe *v = &s->key.v; + unsigned i, nr_data = v->nr_blocks - v->nr_redundant; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + + for (i = 0; i < nr_data; i++) { + ret = ec_stripe_update_bucket(&trans, s, i); + if (ret) + break; + } + + + bch2_trans_exit(&trans); + + return ret; } /* @@ -882,7 +932,6 @@ static void ec_stripe_create(struct ec_stripe_new *s) { struct bch_fs *c = s->c; struct open_bucket *ob; - struct bkey_i *k; struct stripe *m; struct bch_stripe *v = &s->new_stripe.key.v; unsigned i, nr_data = v->nr_blocks - v->nr_redundant; @@ -942,14 +991,10 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err_put_writes; } - for_each_keylist_key(&s->keys, k) { - ret = ec_stripe_update_extents(c, &s->new_stripe, &k->k); - if (ret) { - bch_err(c, "error creating stripe: error updating pointers: %s", - bch2_err_str(ret)); - break; - } - } + ret = ec_stripe_update_extents(c, &s->new_stripe); + if (ret) + bch_err(c, "error creating stripe: error updating pointers: %s", + bch2_err_str(ret)); spin_lock(&c->ec_stripes_heap_lock); m = genradix_ptr(&c->stripes, s->new_stripe.key.k.p.offset); @@ -974,8 +1019,6 @@ err: } } - bch2_keylist_free(&s->keys, s->inline_keys); - ec_stripe_buf_exit(&s->existing_stripe); ec_stripe_buf_exit(&s->new_stripe); closure_debug_destroy(&s->iodone); @@ -1058,30 +1101,6 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9); } -void bch2_ob_add_backpointer(struct bch_fs *c, struct open_bucket *ob, - struct bkey *k) -{ - struct ec_stripe_new *ec = ob->ec; - - if (!ec) - return; - - mutex_lock(&ec->lock); - - if (bch2_keylist_realloc(&ec->keys, ec->inline_keys, - ARRAY_SIZE(ec->inline_keys), - BKEY_U64s)) { - BUG(); - } - - bkey_init(&ec->keys.top->k); - ec->keys.top->k.p = k->p; - ec->keys.top->k.size = k->size; - bch2_keylist_push(&ec->keys); - - mutex_unlock(&ec->lock); -} - static int unsigned_cmp(const void *_l, const void *_r) { unsigned l = *((const unsigned *) _l); @@ -1174,8 +1193,6 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) BCH_BKEY_PTRS_MAX) - h->redundancy; s->nr_parity = h->redundancy; - bch2_keylist_init(&s->keys, s->inline_keys); - ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize); @@ -1438,7 +1455,7 @@ static int __bch2_ec_stripe_head_reserve(struct bch_fs *c, * This means we need to wait for copygc to * empty out buckets from existing stripes: */ - bch_err(c, "failed to reserve stripe"); + bch_err_ratelimited(c, "failed to reserve stripe: %s", bch2_err_str(ret)); } return ret; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index a4c13d61..3e2b22c0 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -4,7 +4,6 @@ #include "ec_types.h" #include "buckets_types.h" -#include "keylist_types.h" int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c, int rw, struct printbuf *); @@ -166,9 +165,6 @@ struct ec_stripe_new { open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX]; struct disk_reservation res; - struct keylist keys; - u64 inline_keys[BKEY_U64s * 8]; - struct ec_stripe_buf new_stripe; struct ec_stripe_buf existing_stripe; }; @@ -196,8 +192,6 @@ struct ec_stripe_head { int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); -void bch2_ob_add_backpointer(struct bch_fs *, struct open_bucket *, - struct bkey *); void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *); void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 762abdf2..2fb5102e 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -104,7 +104,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) { struct fsck_err_state *s = NULL; va_list args; - bool print = true, suppressing = false; + bool print = true, suppressing = false, inconsistent = false; struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; @@ -136,7 +136,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); - bch2_inconsistent_error(c); + inconsistent = true; ret = -BCH_ERR_fsck_errors_not_fixed; } else if (flags & FSCK_CAN_FIX) { prt_str(out, ", fixing"); @@ -189,6 +189,9 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) printbuf_exit(&buf); + if (inconsistent) + bch2_inconsistent_error(c); + if (ret == -BCH_ERR_fsck_fix) { set_bit(BCH_FS_ERRORS_FIXED, &c->flags); } else { diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 95b84c3c..7d45f486 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -434,22 +434,20 @@ static void mark_pagecache_unallocated(struct bch_inode_info *inode, { pgoff_t index = start >> PAGE_SECTORS_SHIFT; pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; - struct pagevec pvec; + struct folio_batch fbatch; + unsigned i, j; if (end <= start) return; - pagevec_init(&pvec); + folio_batch_init(&fbatch); - do { - unsigned nr_pages, i, j; - - nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping, - &index, end_index); - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - u64 pg_start = page->index << PAGE_SECTORS_SHIFT; - u64 pg_end = (page->index + 1) << PAGE_SECTORS_SHIFT; + while (filemap_get_folios(inode->v.i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + u64 pg_start = folio->index << PAGE_SECTORS_SHIFT; + u64 pg_end = (folio->index + 1) << PAGE_SECTORS_SHIFT; unsigned pg_offset = max(start, pg_start) - pg_start; unsigned pg_len = min(end, pg_end) - pg_offset - pg_start; struct bch_page_state *s; @@ -458,8 +456,8 @@ static void mark_pagecache_unallocated(struct bch_inode_info *inode, BUG_ON(pg_offset >= PAGE_SECTORS); BUG_ON(pg_offset + pg_len > PAGE_SECTORS); - lock_page(page); - s = bch2_page_state(page); + folio_lock(folio); + s = bch2_page_state(&folio->page); if (s) { spin_lock(&s->lock); @@ -468,10 +466,11 @@ static void mark_pagecache_unallocated(struct bch_inode_info *inode, spin_unlock(&s->lock); } - unlock_page(page); + folio_unlock(folio); } - pagevec_release(&pvec); - } while (index <= end_index); + folio_batch_release(&fbatch); + cond_resched(); + } } static void mark_pagecache_reserved(struct bch_inode_info *inode, @@ -480,23 +479,21 @@ static void mark_pagecache_reserved(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; pgoff_t index = start >> PAGE_SECTORS_SHIFT; pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; - struct pagevec pvec; + struct folio_batch fbatch; s64 i_sectors_delta = 0; + unsigned i, j; if (end <= start) return; - pagevec_init(&pvec); + folio_batch_init(&fbatch); - do { - unsigned nr_pages, i, j; - - nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping, - &index, end_index); - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - u64 pg_start = page->index << PAGE_SECTORS_SHIFT; - u64 pg_end = (page->index + 1) << PAGE_SECTORS_SHIFT; + while (filemap_get_folios(inode->v.i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + u64 pg_start = folio->index << PAGE_SECTORS_SHIFT; + u64 pg_end = (folio->index + 1) << PAGE_SECTORS_SHIFT; unsigned pg_offset = max(start, pg_start) - pg_start; unsigned pg_len = min(end, pg_end) - pg_offset - pg_start; struct bch_page_state *s; @@ -505,8 +502,8 @@ static void mark_pagecache_reserved(struct bch_inode_info *inode, BUG_ON(pg_offset >= PAGE_SECTORS); BUG_ON(pg_offset + pg_len > PAGE_SECTORS); - lock_page(page); - s = bch2_page_state(page); + folio_lock(folio); + s = bch2_page_state(&folio->page); if (s) { spin_lock(&s->lock); @@ -525,10 +522,11 @@ static void mark_pagecache_reserved(struct bch_inode_info *inode, spin_unlock(&s->lock); } - unlock_page(page); + folio_unlock(folio); } - pagevec_release(&pvec); - } while (index <= end_index); + folio_batch_release(&fbatch); + cond_resched(); + } i_sectors_acct(c, inode, NULL, i_sectors_delta); } @@ -859,30 +857,6 @@ bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) return true; } -#ifdef CONFIG_MIGRATION -int bch2_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) -{ - int ret; - - EBUG_ON(!PageLocked(page)); - EBUG_ON(!PageLocked(newpage)); - - ret = migrate_page_move_mapping(mapping, newpage, page, 0); - if (ret != MIGRATEPAGE_SUCCESS) - return ret; - - if (PagePrivate(page)) - attach_page_private(newpage, detach_page_private(page)); - - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else - migrate_page_states(newpage, page); - return MIGRATEPAGE_SUCCESS; -} -#endif - /* readpage(s): */ static void bch2_readpages_end_io(struct bio *bio) @@ -3224,58 +3198,6 @@ err: /* fseek: */ -static int page_data_offset(struct page *page, unsigned offset) -{ - struct bch_page_state *s = bch2_page_state(page); - unsigned i; - - if (s) - for (i = offset >> 9; i < PAGE_SECTORS; i++) - if (s->s[i].state >= SECTOR_DIRTY) - return i << 9; - - return -1; -} - -static loff_t bch2_seek_pagecache_data(struct inode *vinode, - loff_t start_offset, - loff_t end_offset) -{ - struct address_space *mapping = vinode->i_mapping; - struct page *page; - pgoff_t start_index = start_offset >> PAGE_SHIFT; - pgoff_t end_index = end_offset >> PAGE_SHIFT; - pgoff_t index = start_index; - loff_t ret; - int offset; - - while (index <= end_index) { - if (find_get_pages_range(mapping, &index, end_index, 1, &page)) { - lock_page(page); - - offset = page_data_offset(page, - page->index == start_index - ? start_offset & (PAGE_SIZE - 1) - : 0); - if (offset >= 0) { - ret = clamp(((loff_t) page->index << PAGE_SHIFT) + - offset, - start_offset, end_offset); - unlock_page(page); - put_page(page); - return ret; - } - - unlock_page(page); - put_page(page); - } else { - break; - } - } - - return end_offset; -} - static loff_t bch2_seek_data(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); @@ -3319,9 +3241,13 @@ err: if (ret) return ret; - if (next_data > offset) - next_data = bch2_seek_pagecache_data(&inode->v, - offset, next_data); + if (next_data > offset) { + loff_t pagecache_next_data = + mapping_seek_hole_data(inode->v.i_mapping, offset, + next_data, SEEK_DATA); + if (pagecache_next_data >= 0) + next_data = min_t(u64, next_data, pagecache_next_data); + } if (next_data >= isize) return -ENXIO; diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h index a22a4e95..a8835298 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/fs-io.h @@ -43,8 +43,6 @@ vm_fault_t bch2_page_fault(struct vm_fault *); vm_fault_t bch2_page_mkwrite(struct vm_fault *); void bch2_invalidate_folio(struct folio *, size_t, size_t); bool bch2_release_folio(struct folio *, gfp_t); -int bch2_migrate_page(struct address_space *, struct page *, - struct page *, enum migrate_mode); void bch2_fs_fsio_exit(struct bch_fs *); int bch2_fs_fsio_init(struct bch_fs *); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index b5977c46..57e6e218 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1123,7 +1123,7 @@ static const struct address_space_operations bch_address_space_operations = { .release_folio = bch2_release_folio, .direct_IO = noop_direct_IO, #ifdef CONFIG_MIGRATION - .migratepage = bch2_migrate_page, + .migrate_folio = filemap_migrate_folio, #endif .error_remove_page = generic_error_remove_page, }; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index e047ef28..558d0c23 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -395,7 +395,6 @@ int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; struct bkey_buf sk; - struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets); struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; @@ -439,9 +438,6 @@ int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (ec_ob) - bch2_ob_add_backpointer(c, ec_ob, &sk.k->k); - if (bkey_cmp(iter.pos, k->k.p) >= 0) bch2_keylist_pop_front(&op->insert_keys); else diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index d34aa6b6..cbc5979a 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -128,10 +128,8 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; if (new_bytes > max_bytes) { - char buf[BDEVNAME_SIZE]; - - pr_err("%s: superblock too big: want %zu but have %llu", - bdevname(sb->bdev, buf), new_bytes, max_bytes); + pr_err("%pg: superblock too big: want %zu but have %llu", + sb->bdev, new_bytes, max_bytes); return -BCH_ERR_ENOSPC_sb; } } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index b1809f8c..a824e160 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1224,8 +1224,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) bch2_dev_sysfs_online(c, ca); if (c->sb.nr_devices == 1) - bdevname(ca->disk_sb.bdev, c->name); - bdevname(ca->disk_sb.bdev, ca->name); + snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev); + snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev); rebalance_wakeup(c); return 0; @@ -1867,9 +1867,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, while (i < nr_devices) { if (i != best_sb && !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) { - char buf[BDEVNAME_SIZE]; - pr_info("%s has been removed, skipping", - bdevname(sb[i].bdev, buf)); + pr_info("%pg has been removed, skipping", sb[i].bdev); bch2_free_super(&sb[i]); array_remove_item(sb, nr_devices, i); continue; diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index f1b0f001..103fde97 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -298,7 +298,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c bch2_trans_init(&trans, c, 0, 0); for (id = 0; id < BTREE_ID_NR; id++) { - if (!((1U << id) & BTREE_ID_HAS_PTRS)) + if (!btree_type_has_ptrs(id)) continue; for_each_btree_key(&trans, iter, id, POS_MIN, diff --git a/linux/shrinker.c b/linux/shrinker.c index 876c1bae..13f0c4b9 100644 --- a/linux/shrinker.c +++ b/linux/shrinker.c @@ -10,7 +10,7 @@ static LIST_HEAD(shrinker_list); static DEFINE_MUTEX(shrinker_lock); -int register_shrinker(struct shrinker *shrinker) +int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) { mutex_lock(&shrinker_lock); list_add_tail(&shrinker->list, &shrinker_list);