From 72add8822c47e5801d4ac6d42af8c5d9d7b4d3c9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 9 Oct 2022 23:27:41 -0400 Subject: [PATCH] Update bcachefs sources to 47ffed9fad bcachefs: bch2_btree_delete_range_trans() now uses peek_upto() --- .bcachefs_revision | 2 +- include/linux/mm.h | 25 ++++ include/linux/rwsem.h | 1 + include/linux/sched.h | 12 ++ libbcachefs/backpointers.c | 155 +++++++++++++++++++-- libbcachefs/btree_io.c | 15 +- libbcachefs/btree_iter.c | 21 ++- libbcachefs/btree_locking.c | 43 ++++-- libbcachefs/btree_update.h | 4 +- libbcachefs/btree_update_interior.c | 204 ++++++++++++++++++++-------- libbcachefs/btree_update_interior.h | 1 + libbcachefs/btree_update_leaf.c | 19 +-- libbcachefs/data_update.c | 23 +++- libbcachefs/data_update.h | 2 + libbcachefs/debug.c | 22 +-- libbcachefs/ec.c | 26 +--- libbcachefs/errcode.h | 1 + libbcachefs/fs-io.c | 3 + libbcachefs/move.c | 67 ++++++++- libbcachefs/super.c | 8 -- libbcachefs/util.c | 20 +++ libbcachefs/util.h | 1 + linux/kthread.c | 2 + linux/shrinker.c | 22 +-- 24 files changed, 518 insertions(+), 181 deletions(-) create mode 100644 include/linux/mm.h diff --git a/.bcachefs_revision b/.bcachefs_revision index 83d5a7db..1c9c4ec1 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -cbccc6d8692fdd3af7d5db97a065af5a47bc733c +47ffed9fad891300a610191602a10ecd1e857cce diff --git a/include/linux/mm.h b/include/linux/mm.h new file mode 100644 index 00000000..4bf80ba3 --- /dev/null +++ b/include/linux/mm.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MM_H +#define _TOOLS_LINUX_MM_H + +#include + +struct sysinfo { + long uptime; /* Seconds since boot */ + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ + unsigned long totalram; /* Total usable main memory size */ + unsigned long freeram; /* Available memory size */ + unsigned long sharedram; /* Amount of shared memory */ + unsigned long bufferram; /* Memory used by buffers */ + unsigned long totalswap; /* Total swap space size */ + unsigned long freeswap; /* swap space still available */ + __u16 procs; /* Number of current processes */ + __u16 pad; /* Explicit padding for m68k */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + __u32 mem_unit; /* Memory unit size in bytes */ +}; + +extern void si_meminfo(struct sysinfo * val); + +#endif /* _TOOLS_LINUX_MM_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 9d70e6e2..f851d6a2 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -19,6 +19,7 @@ static inline void init_rwsem(struct rw_semaphore *lock) } #define down_read(l) pthread_rwlock_rdlock(&(l)->lock) +#define down_read_killable(l) (pthread_rwlock_rdlock(&(l)->lock), 0) #define down_read_trylock(l) (!pthread_rwlock_tryrdlock(&(l)->lock)) #define up_read(l) pthread_rwlock_unlock(&(l)->lock) diff --git a/include/linux/sched.h b/include/linux/sched.h index 48d20e29..ac6d27bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #define TASK_RUNNING 0 @@ -88,6 +89,10 @@ struct task_struct { pid_t pid; struct bio_list *bio_list; + + struct signal_struct { + struct rw_semaphore exec_update_lock; + } *signal, _signal; }; extern __thread struct task_struct *current; @@ -157,4 +162,11 @@ static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts) #define current_kernel_time64() current_kernel_time() #define CURRENT_TIME (current_kernel_time()) +static inline unsigned int stack_trace_save_tsk(struct task_struct *task, + unsigned long *store, unsigned int size, + unsigned int skipnr) +{ + return 0; +} + #endif /* __TOOLS_LINUX_SCHED_H */ diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 7e8b1301..ee7e610f 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -6,6 +6,8 @@ #include "btree_update.h" #include "error.h" +#include + #define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10 /* @@ -802,6 +804,103 @@ err: return ret; } +struct bbpos { + enum btree_id btree; + struct bpos pos; +}; + +static inline int bbpos_cmp(struct bbpos l, struct bbpos r) +{ + return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos); +} + +static inline struct bbpos bbpos_successor(struct bbpos pos) +{ + if (bpos_cmp(pos.pos, SPOS_MAX)) { + pos.pos = bpos_successor(pos.pos); + return pos; + } + + if (pos.btree != BTREE_ID_NR) { + pos.btree++; + pos.pos = POS_MIN; + return pos; + } + + BUG(); +} + +#if 0 +static void bbpos_to_text(struct printbuf *out, struct bbpos pos) +{ + prt_str(out, bch2_btree_ids[pos.btree]); + prt_char(out, ':'); + bch2_bpos_to_text(out, pos.pos); +} +#endif + +static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) +{ + return (struct bbpos) { + .btree = bp.btree_id, + .pos = bp.pos, + }; +} + +int bch2_get_btree_in_memory_pos(struct btree_trans *trans, + unsigned btree_leaf_mask, + unsigned btree_interior_mask, + struct bbpos start, struct bbpos *end) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct sysinfo i; + size_t btree_nodes; + enum btree_id btree; + int ret = 0; + + si_meminfo(&i); + + btree_nodes = (i.totalram >> 1) / btree_bytes(trans->c); + + for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) { + unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2; + + if (!((1U << btree) & btree_leaf_mask) && + !((1U << btree) & btree_interior_mask)) + continue; + + bch2_trans_node_iter_init(trans, &iter, btree, + btree == start.btree ? start.pos : POS_MIN, + 0, depth, 0); + /* + * for_each_btree_key_contineu() doesn't check the return value + * from bch2_btree_iter_advance(), which is needed when + * iterating over interior nodes where we'll see keys at + * SPOS_MAX: + */ + do { + k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0); + ret = bkey_err(k); + if (!k.k || ret) + break; + + --btree_nodes; + if (!btree_nodes) { + end->btree = btree; + end->pos = k.k->p; + bch2_trans_iter_exit(trans, &iter); + return 0; + } + } while (bch2_btree_iter_advance(&iter)); + bch2_trans_iter_exit(trans, &iter); + } + + end->btree = BTREE_ID_NR; + end->pos = POS_MIN; + return ret; +} + int bch2_check_extents_to_backpointers(struct bch_fs *c) { struct btree_trans trans; @@ -845,19 +944,26 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) static int check_one_backpointer(struct btree_trans *trans, struct bpos bucket, - u64 *bp_offset) + u64 *bp_offset, + struct bbpos start, + struct bbpos end) { struct btree_iter iter; struct bch_backpointer bp; + struct bbpos pos; struct bkey_s_c k; struct printbuf buf = PRINTBUF; int ret; - ret = bch2_get_next_backpointer(trans, bucket, -1, - bp_offset, &bp); + ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp); if (ret || *bp_offset == U64_MAX) return ret; + pos = bp_to_bbpos(bp); + if (bbpos_cmp(pos, start) < 0 || + bbpos_cmp(pos, end) > 0) + return 0; + k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp); ret = bkey_err(k); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) @@ -880,29 +986,52 @@ fsck_err: return ret; } -int bch2_check_backpointers_to_extents(struct bch_fs *c) +static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, + struct bbpos start, + struct bbpos end) { - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { u64 bp_offset = 0; - while (!(ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL, - check_one_backpointer(&trans, iter.pos, &bp_offset))) && + while (!(ret = commit_do(trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) && bp_offset < U64_MAX) bp_offset++; if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); return ret < 0 ? ret : 0; } + +int bch2_check_backpointers_to_extents(struct bch_fs *c) +{ + struct btree_trans trans; + struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + while (1) { + ret = bch2_get_btree_in_memory_pos(&trans, + (1U << BTREE_ID_extents)| + (1U << BTREE_ID_reflink), + ~0, + start, &end) ?: + bch2_check_backpointers_to_extents_pass(&trans, start, end); + if (ret || end.btree == BTREE_ID_NR) + break; + + start = bbpos_successor(end); + } + bch2_trans_exit(&trans); + + return ret; +} diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 13ce2975..dd6b536c 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1913,6 +1913,8 @@ do_write: u64s = bch2_sort_keys(i->start, &sort_iter, false); le16_add_cpu(&i->u64s, u64s); + BUG_ON(!b->written && i->u64s != b->data->keys.u64s); + set_needs_whiteout(i, false); /* do we have data to write? */ @@ -1922,6 +1924,10 @@ do_write: bytes_to_write = vstruct_end(i) - data; sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; + if (!b->written && + b->key.k.type == KEY_TYPE_btree_ptr_v2) + BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write); + memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); @@ -2010,11 +2016,6 @@ do_write: b->written += sectors_to_write; - if (wbio->wbio.first_btree_write && - b->key.k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written = - cpu_to_le16(b->written); - if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = cpu_to_le16(b->written); @@ -2027,10 +2028,6 @@ do_write: return; err: set_btree_node_noevict(b); - if (!b->written && - b->key.k.type == KEY_TYPE_btree_ptr_v2) - bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written = - cpu_to_le16(sectors_to_write); b->written += sectors_to_write; nowrite: btree_bounce_free(c, bytes, used_mempool, data); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 925ffb31..dffb0170 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1850,10 +1850,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp struct bkey_s_c k, k2; int ret; - EBUG_ON(iter->path->cached || iter->path->level); + EBUG_ON(iter->path->cached); bch2_btree_iter_verify(iter); while (1) { + struct btree_path_level *l; + iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, iter->flags & BTREE_ITER_INTENT, btree_iter_ip_allocated(iter)); @@ -1866,9 +1868,18 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp goto out; } + l = path_l(iter->path); + + if (unlikely(!l->b)) { + /* No btree nodes at requested level: */ + bch2_btree_iter_set_pos(iter, SPOS_MAX); + k = bkey_s_c_null; + goto out; + } + btree_path_set_should_be_locked(iter->path); - k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); + k = btree_path_level_peek_all(trans->c, l, &iter->k); if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && k.k && @@ -1889,7 +1900,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp : NULL; if (next_update && bpos_cmp(next_update->k.p, - k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { + k.k ? k.k->p : l->b->key.k.p) <= 0) { iter->k = next_update->k; k = bkey_i_to_s_c(next_update); } @@ -1910,9 +1921,9 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (likely(k.k)) { break; - } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { + } else if (likely(bpos_cmp(l->b->key.k.p, SPOS_MAX))) { /* Advance to next leaf node: */ - search_key = bpos_successor(iter->path->l[0].b->key.k.p); + search_key = bpos_successor(l->b->key.k.p); } else { /* End of btree: */ bch2_btree_iter_set_pos(iter, SPOS_MAX); diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index f4340086..9a525d34 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -96,25 +96,26 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g) static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { - int ret; - if (i == g->g) { trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); - ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); + return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; - ret = 0; - } - - for (i = g->g + 1; i < g->g + g->nr; i++) wake_up_process(i->trans->locking_wait.task); - return ret; + return 0; + } } static noinline int break_cycle(struct lock_graph *g) { struct trans_waiting_for_lock *i; + /* + * We'd like to prioritize aborting transactions that have done less + * work - but it appears breaking cycles by telling other transactions + * to abort may still be buggy: + */ +#if 0 for (i = g->g; i < g->g + g->nr; i++) { if (i->trans->lock_may_not_fail || i->trans->locking_wait.lock_want == SIX_LOCK_write) @@ -130,7 +131,7 @@ static noinline int break_cycle(struct lock_graph *g) return abort_lock(g, i); } - +#endif for (i = g->g; i < g->g + g->nr; i++) { if (i->trans->lock_may_not_fail) continue; @@ -138,7 +139,29 @@ static noinline int break_cycle(struct lock_graph *g) return abort_lock(g, i); } - BUG(); + { + struct bch_fs *c = g->g->trans->c; + struct printbuf buf = PRINTBUF; + + bch_err(c, "cycle of nofail locks"); + + for (i = g->g; i < g->g + g->nr; i++) { + struct btree_trans *trans = i->trans; + + bch2_btree_trans_to_text(&buf, trans); + + prt_printf(&buf, "backtrace:"); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + bch2_prt_backtrace(&buf, trans->locking_wait.task); + printbuf_indent_sub(&buf, 2); + prt_newline(&buf); + } + + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); + BUG(); + } } static void lock_graph_pop(struct lock_graph *g) diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 89941fb8..1c2e7b2b 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -8,8 +8,8 @@ struct bch_fs; struct btree; -void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *, - struct btree *); +void bch2_btree_node_prep_for_write(struct btree_trans *, + struct btree_path *, struct btree *); bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, struct btree *, struct btree_node_iter *, struct bkey_i *); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 578ba747..b9661407 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -23,9 +23,9 @@ #include #include -static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_path *, struct btree *, - struct keylist *, unsigned); +static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, + struct btree_path *, struct btree *, + struct keylist *, unsigned); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, @@ -37,8 +37,8 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, path = bch2_path_get(trans, btree_id, pos, level + 1, level, BTREE_ITER_NOPRESERVE| - BTREE_ITER_INTENT, _THIS_IP_); - path = bch2_btree_path_make_mut(trans, path, true, _THIS_IP_); + BTREE_ITER_INTENT, _RET_IP_); + path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_); bch2_btree_path_downgrade(trans, path); __bch2_btree_path_unlock(trans, path); return path; @@ -195,6 +195,43 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, } } +static void bch2_btree_node_free_never_used(struct btree_update *as, + struct btree_trans *trans, + struct btree *b) +{ + struct bch_fs *c = as->c; + struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; + struct btree_path *path; + unsigned level = b->c.level; + + BUG_ON(!list_empty(&b->write_blocked)); + BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); + + b->will_make_reachable = 0; + closure_put(&as->cl); + + clear_btree_node_will_make_reachable(b); + clear_btree_node_accessed(b); + clear_btree_node_dirty_acct(c, b); + clear_btree_node_need_write(b); + + mutex_lock(&c->btree_cache.lock); + list_del_init(&b->list); + bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_unlock(&c->btree_cache.lock); + + BUG_ON(p->nr >= ARRAY_SIZE(p->b)); + p->b[p->nr++] = b; + + six_unlock_intent(&b->c.lock); + + trans_for_each_path(trans, path) + if (path->l[level].b == b) { + btree_node_unlock(trans, path, level); + path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); + } +} + static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, @@ -392,8 +429,6 @@ static struct btree *__btree_root_alloc(struct btree_update *as, btree_node_set_format(b, b->data->format); bch2_btree_build_aux_trees(b); - - bch2_btree_update_add_new_node(as, b); six_unlock_write(&b->c.lock); return b; @@ -859,6 +894,14 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree mutex_unlock(&c->btree_interior_update_lock); btree_update_add_key(as, &as->new_keys, b); + + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data; + unsigned sectors = round_up(bytes, block_bytes(c)) >> 9; + + bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written = + cpu_to_le16(sectors); + } } /* @@ -1026,24 +1069,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, nr_nodes[!!update_level] += 1 + split; update_level++; - if (!btree_path_node(path, update_level)) + ret = bch2_btree_path_upgrade(trans, path, update_level + 1); + if (ret) + return ERR_PTR(ret); + + if (!btree_path_node(path, update_level)) { + /* Allocating new root? */ + nr_nodes[1] += split; + update_level = BTREE_MAX_DEPTH; + break; + } + + if (bch2_btree_node_insert_fits(c, path->l[update_level].b, + BKEY_BTREE_PTR_U64s_MAX * (1 + split))) break; - /* - * XXX: figure out how far we might need to split, - * instead of locking/reserving all the way to the root: - */ - split = update_level + 1 < BTREE_MAX_DEPTH; + split = true; } - /* Might have to allocate a new root: */ - if (update_level < BTREE_MAX_DEPTH) - nr_nodes[1] += 1; - - ret = bch2_btree_path_upgrade(trans, path, U8_MAX); - if (ret) - return ERR_PTR(ret); - if (flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&c->gc_lock); else if (!down_read_trylock(&c->gc_lock)) { @@ -1064,6 +1107,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); as->btree_id = path->btree_id; + as->update_level = update_level; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1191,7 +1235,6 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *old; trace_and_count(c, btree_node_set_root, c, b); - BUG_ON(!b->written); old = btree_node_root(c, b); @@ -1315,8 +1358,6 @@ static struct btree *__btree_split_node(struct btree_update *as, SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data)); n2->key.k.p = n1->key.k.p; - bch2_btree_update_add_new_node(as, n2); - set1 = btree_bset_first(n1); set2 = btree_bset_first(n2); @@ -1458,18 +1499,19 @@ static void btree_split_insert_keys(struct btree_update *as, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, - struct keylist *keys, unsigned flags) +static int btree_split(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; struct btree_path *path1 = NULL, *path2 = NULL; u64 start_time = local_clock(); + int ret = 0; BUG_ON(!parent && (b != btree_node_root(c, b))); - BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); + BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1)); bch2_btree_interior_update_will_free_node(as, b); @@ -1499,9 +1541,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_path_level_init(trans, path2, n2); bch2_btree_update_add_new_node(as, n1); - - bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); - bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); + bch2_btree_update_add_new_node(as, n2); /* * Note that on recursive parent_keys == keys, so we @@ -1524,9 +1564,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + bch2_btree_update_add_new_node(as, n3); - bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); + btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); } } else { trace_and_count(c, btree_node_compact, c, b); @@ -1541,8 +1581,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_update_add_new_node(as, n1); - bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); - if (parent) bch2_keylist_add(&as->parent_keys, &n1->key); } @@ -1551,7 +1589,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + if (ret) + goto err; } else if (n3) { bch2_btree_set_root(as, trans, path, n3); } else { @@ -1559,11 +1599,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_set_root(as, trans, path, n1); } - bch2_btree_update_get_open_buckets(as, n1); - if (n2) - bch2_btree_update_get_open_buckets(as, n2); - if (n3) + if (n3) { bch2_btree_update_get_open_buckets(as, n3); + bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); + } + if (n2) { + bch2_btree_update_get_open_buckets(as, n2); + bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); + } + bch2_btree_update_get_open_buckets(as, n1); + bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); /* * The old node must be freed (in memory) _before_ unlocking the new @@ -1584,7 +1629,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, if (n2) six_unlock_intent(&n2->c.lock); six_unlock_intent(&n1->c.lock); - +out: if (path2) { __bch2_btree_path_unlock(trans, path2); bch2_path_put(trans, path2, true); @@ -1600,6 +1645,14 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, ? BCH_TIME_btree_node_split : BCH_TIME_btree_node_compact], start_time); + return ret; +err: + if (n3) + bch2_btree_node_free_never_used(as, trans, n3); + if (n2) + bch2_btree_node_free_never_used(as, trans, n2); + bch2_btree_node_free_never_used(as, trans, n1); + goto out; } static void @@ -1634,22 +1687,30 @@ bch2_btree_insert_keys_interior(struct btree_update *as, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, - struct btree_path *path, struct btree *b, - struct keylist *keys, unsigned flags) +static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; + int ret; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, b->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - bch2_btree_node_lock_for_insert(trans, path, b); + if (!(local_clock() & 63)) + return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); + + ret = bch2_btree_node_lock_write(trans, path, &b->c); + if (ret) + return ret; + + bch2_btree_node_prep_for_write(trans, path, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { bch2_btree_node_unlock_write(trans, path, b); @@ -1675,9 +1736,16 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans * bch2_btree_node_unlock_write(trans, path, b); btree_node_interior_verify(c, b); - return; + return 0; split: - btree_split(as, trans, path, b, keys, flags); + /* + * We could attempt to avoid the transaction restart, by calling + * bch2_btree_path_upgrade() and allocating more nodes: + */ + if (b->c.level >= as->update_level) + return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); + + return btree_split(as, trans, path, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, @@ -1694,10 +1762,15 @@ int bch2_btree_split_leaf(struct btree_trans *trans, if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, trans, path, b, NULL, flags); + ret = btree_split(as, trans, path, b, NULL, flags); + if (ret) { + bch2_btree_update_free(as, trans); + return ret; + } + bch2_btree_update_done(as, trans); - for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) + for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++) ret = bch2_foreground_maybe_merge(trans, path, l, flags); return ret; @@ -1823,8 +1896,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, btree_set_min(n, prev->data->min_key); btree_set_max(n, next->data->max_key); - bch2_btree_update_add_new_node(as, n); - n->data->format = new_f; btree_node_set_format(n, new_f); @@ -1834,13 +1905,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_build_aux_trees(n); six_unlock_write(&n->c.lock); + bch2_btree_update_add_new_node(as, n); + new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p); six_lock_increment(&n->c.lock, SIX_LOCK_intent); mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent); bch2_btree_path_level_init(trans, new_path, n); - bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); - bkey_init(&delete.k); delete.k.p = prev->key.k.p; bch2_keylist_add(&as->parent_keys, &delete); @@ -1848,11 +1919,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_trans_verify_paths(trans); - bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); + if (ret) + goto err_free_update; bch2_trans_verify_paths(trans); bch2_btree_update_get_open_buckets(as, n); + bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_btree_node_free_inmem(trans, path, b); bch2_btree_node_free_inmem(trans, sib_path, m); @@ -1873,6 +1947,10 @@ err: bch2_path_put(trans, sib_path, true); bch2_trans_verify_locks(trans); return ret; +err_free_update: + bch2_btree_node_free_never_used(as, trans, n); + bch2_btree_update_free(as, trans); + goto out; } /** @@ -1913,17 +1991,18 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, trace_and_count(c, btree_node_rewrite, c, b); - bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); - if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter->path, parent, - &as->parent_keys, flags); + ret = bch2_btree_insert_node(as, trans, iter->path, parent, + &as->parent_keys, flags); + if (ret) + goto err; } else { bch2_btree_set_root(as, trans, iter->path, n); } bch2_btree_update_get_open_buckets(as, n); + bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_btree_node_free_inmem(trans, iter->path, b); @@ -1931,10 +2010,15 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, six_unlock_intent(&n->c.lock); bch2_btree_update_done(as, trans); - bch2_path_put(trans, new_path, true); out: + if (new_path) + bch2_path_put(trans, new_path, true); bch2_btree_path_downgrade(trans, iter->path); return ret; +err: + bch2_btree_node_free_never_used(as, trans, n); + bch2_btree_update_free(as, trans); + goto out; } struct async_btree_rewrite { diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index 7af810df..dabe8159 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -52,6 +52,7 @@ struct btree_update { unsigned took_gc_lock:1; enum btree_id btree_id; + unsigned update_level; struct disk_reservation disk_res; struct journal_preres journal_preres; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 08d7001f..af3fbfcc 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -56,9 +56,9 @@ static inline bool same_leaf_as_next(struct btree_trans *trans, insert_l(&i[0])->b == insert_l(&i[1])->b; } -static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, - struct btree_path *path, - struct btree *b) +inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, + struct btree_path *path, + struct btree *b) { struct bch_fs *c = trans->c; @@ -77,14 +77,6 @@ static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, bch2_btree_init_next(trans, b); } -void bch2_btree_node_lock_for_insert(struct btree_trans *trans, - struct btree_path *path, - struct btree *b) -{ - bch2_btree_node_lock_write_nofail(trans, path, &b->c); - bch2_btree_node_prep_for_write(trans, path, b); -} - /* Inserting into a given leaf node (last stage of insert): */ /* Handle overwrites and do insert, for non extents: */ @@ -1631,7 +1623,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, int ret = 0; bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); - while ((k = bch2_btree_iter_peek(&iter)).k) { + while ((k = bch2_btree_iter_peek_upto(&iter, bpos_predecessor(end))).k) { struct disk_reservation disk_res = bch2_disk_reservation_init(trans->c, 0); struct bkey_i delete; @@ -1640,9 +1632,6 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, if (ret) goto err; - if (bkey_cmp(iter.pos, end) >= 0) - break; - bkey_init(&delete.k); /* diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 3102166d..5ef35e3b 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -328,8 +328,9 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.ptr.cached) - m->data_opts.rewrite_ptrs &= ~(1U << i); + if (((1U << i) & m->data_opts.rewrite_ptrs) && + p.ptr.cached) + BUG(); if (!((1U << i) & m->data_opts.rewrite_ptrs)) bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); @@ -365,5 +366,23 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, m->op.nr_replicas = m->op.nr_replicas_required = hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas; + + BUG_ON(!m->op.nr_replicas); return 0; } + +void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + unsigned i = 0; + + bkey_for_each_ptr(ptrs, ptr) { + if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) { + opts->kill_ptrs |= 1U << i; + opts->rewrite_ptrs ^= 1U << i; + } + + i++; + } +} diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h index e6450545..6793aa57 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data_update.h @@ -10,6 +10,7 @@ struct moving_context; struct data_update_opts { unsigned rewrite_ptrs; + unsigned kill_ptrs; u16 target; u8 extra_replicas; unsigned btree_insert_flags; @@ -34,5 +35,6 @@ int bch2_data_update_init(struct bch_fs *, struct data_update *, struct write_point_specifier, struct bch_io_opts, struct data_update_opts, enum btree_id, struct bkey_s_c); +void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *); #endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 1d2a1615..d87131f5 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -501,26 +501,6 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; -static int prt_backtrace(struct printbuf *out, struct task_struct *task) -{ - unsigned long entries[32]; - unsigned i, nr_entries; - int ret; - - ret = down_read_killable(&task->signal->exec_update_lock); - if (ret) - return ret; - - nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0); - for (i = 0; i < nr_entries; i++) { - prt_printf(out, "[<0>] %pB", (void *)entries[i]); - prt_newline(out); - } - - up_read(&task->signal->exec_update_lock); - return 0; -} - static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -547,7 +527,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - prt_backtrace(&i->buf, trans->locking_wait.task); + bch2_prt_backtrace(&i->buf, trans->locking_wait.task); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index f902da01..d3fa2d7a 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1403,10 +1403,8 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, int ret; idx = get_existing_stripe(c, h); - if (idx < 0) { - bch_err(c, "failed to find an existing stripe"); + if (idx < 0) return -BCH_ERR_ENOSPC_stripe_reuse; - } h->s->have_existing_stripe = true; ret = get_stripe_key(c, idx, &h->s->existing_stripe); @@ -1444,21 +1442,9 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c, static int __bch2_ec_stripe_head_reserve(struct bch_fs *c, struct ec_stripe_head *h) { - int ret; - - ret = bch2_disk_reservation_get(c, &h->s->res, - h->blocksize, - h->s->nr_parity, 0); - - if (ret) { - /* - * This means we need to wait for copygc to - * empty out buckets from existing stripes: - */ - bch_err_ratelimited(c, "failed to reserve stripe: %s", bch2_err_str(ret)); - } - - return ret; + return bch2_disk_reservation_get(c, &h->s->res, + h->blocksize, + h->s->nr_parity, 0); } struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, @@ -1500,8 +1486,10 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, ret = __bch2_ec_stripe_head_reserve(c, h); if (ret && needs_stripe_new) ret = __bch2_ec_stripe_head_reuse(c, h); - if (ret) + if (ret) { + bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret)); goto err; + } if (!h->s->allocated) { ret = new_stripe_alloc_buckets(c, h, cl); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index fc0bb5f8..9f293040 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -42,6 +42,7 @@ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ + x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ x(0, no_btree_node) \ x(BCH_ERR_no_btree_node, no_btree_node_relock) \ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 7d45f486..fdd43686 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -2208,6 +2208,9 @@ err: /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); + if (ret < 0) + ret = bch2_err_class(ret); + if (!sync) { req->ki_complete(req, ret); ret = -EIOCBQUEUED; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index e85c3143..4f4dfaa7 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -191,7 +191,52 @@ void bch_move_stats_init(struct bch_move_stats *stats, char *name) scnprintf(stats->name, sizeof(stats->name), "%s", name); } +static int bch2_extent_drop_ptrs(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct data_update_opts data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_i *n; + int ret; + + n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bkey_reassemble(n, k); + + while (data_opts.kill_ptrs) { + unsigned i = 0, drop = __fls(data_opts.kill_ptrs); + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); + data_opts.kill_ptrs ^= 1U << drop; + } + + /* + * If the new extent no longer has any pointers, bch2_extent_normalize() + * will do the appropriate thing with it (turning it into a + * KEY_TYPE_error key, or just a discard if it was a cached extent) + */ + bch2_extent_normalize(c, bkey_i_to_s(n)); + + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&n->k)) + n->k.size = 0; + + return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); +} + static int bch2_move_extent(struct btree_trans *trans, + struct btree_iter *iter, struct moving_context *ctxt, struct bch_io_opts io_opts, enum btree_id btree_id, @@ -206,6 +251,15 @@ static int bch2_move_extent(struct btree_trans *trans, unsigned sectors = k.k->size, pages; int ret = -ENOMEM; + bch2_data_update_opts_normalize(k, &data_opts); + + if (!data_opts.rewrite_ptrs && + !data_opts.extra_replicas) { + if (data_opts.kill_ptrs) + return bch2_extent_drop_ptrs(trans, iter, k, data_opts); + return 0; + } + if (!percpu_ref_tryget_live(&c->writes)) return -EROFS; @@ -447,7 +501,7 @@ static int __bch2_move_data(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(&trans, ctxt, io_opts, + ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts, btree_id, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) @@ -544,7 +598,7 @@ again: prt_str(&buf, "failed to evacuate bucket "); bch2_bkey_val_to_text(&buf, c, k); - bch2_trans_inconsistent(trans, "%s", buf.buf); + bch_err(c, "%s", buf.buf); printbuf_exit(&buf); } } @@ -599,11 +653,12 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_iter_exit(&trans, &iter); ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum); - if (ret) + if (ret) { + bch2_trans_iter_exit(&trans, &iter); continue; + } data_opts = _data_opts; data_opts.target = io_opts.background_target; @@ -615,8 +670,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, i++; } - ret = bch2_move_extent(&trans, ctxt, io_opts, + ret = bch2_move_extent(&trans, &iter, ctxt, io_opts, bp.btree_id, k, data_opts); + bch2_trans_iter_exit(&trans, &iter); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { diff --git a/libbcachefs/super.c b/libbcachefs/super.c index a824e160..9df08289 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1325,19 +1325,11 @@ static bool bch2_fs_may_start(struct bch_fs *c) static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { - /* - * Device going read only means the copygc reserve get smaller, so we - * don't want that happening while copygc is in progress: - */ - bch2_copygc_stop(c); - /* * The allocator thread itself allocates btree nodes, so stop it first: */ bch2_dev_allocator_remove(c, ca); bch2_dev_journal_stop(&c->journal, ca); - - bch2_copygc_start(c); } static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 81befc43..d1919350 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -296,6 +296,26 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) console_unlock(); } +int bch2_prt_backtrace(struct printbuf *out, struct task_struct *task) +{ + unsigned long entries[32]; + unsigned i, nr_entries; + int ret; + + ret = down_read_killable(&task->signal->exec_update_lock); + if (ret) + return ret; + + nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0); + for (i = 0; i < nr_entries; i++) { + prt_printf(out, "[<0>] %pB", (void *)entries[i]); + prt_newline(out); + } + + up_read(&task->signal->exec_update_lock); + return 0; +} + /* time stats: */ static void bch2_time_stats_update_one(struct time_stats *stats, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index aa8b416a..a7f68e17 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -356,6 +356,7 @@ u64 bch2_read_flag_list(char *, const char * const[]); void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); void bch2_print_string_as_lines(const char *prefix, const char *lines); +int bch2_prt_backtrace(struct printbuf *, struct task_struct *); #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) diff --git a/linux/kthread.c b/linux/kthread.c index 41bfca2f..3c7bdb81 100644 --- a/linux/kthread.c +++ b/linux/kthread.c @@ -71,8 +71,10 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data), p->thread_fn = thread_fn; p->thread_data = thread_data; p->state = TASK_UNINTERRUPTIBLE; + p->signal = &p->_signal; atomic_set(&p->usage, 1); init_completion(&p->exited); + init_rwsem(&p->_signal.exec_update_lock); pthread_attr_t attr; pthread_attr_init(&attr); diff --git a/linux/shrinker.c b/linux/shrinker.c index 13f0c4b9..25cdfbb6 100644 --- a/linux/shrinker.c +++ b/linux/shrinker.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -39,30 +40,29 @@ static u64 parse_meminfo_line(const char *line) return v << 10; } -static struct meminfo read_meminfo(void) +void si_meminfo(struct sysinfo *val) { - struct meminfo ret = { 0 }; size_t len, n = 0; char *line = NULL; const char *v; FILE *f; + memset(val, 0, sizeof(*val)); + f = fopen("/proc/meminfo", "r"); if (!f) - return ret; + return; while ((len = getline(&line, &n, f)) != -1) { if ((v = strcmp_prefix(line, "MemTotal:"))) - ret.total = parse_meminfo_line(v); + val->totalram = parse_meminfo_line(v); if ((v = strcmp_prefix(line, "MemAvailable:"))) - ret.available = parse_meminfo_line(v); + val->freeram = parse_meminfo_line(v); } fclose(f); free(line); - - return ret; } static void run_shrinkers_allocation_failed(gfp_t gfp_mask) @@ -85,7 +85,7 @@ static void run_shrinkers_allocation_failed(gfp_t gfp_mask) void run_shrinkers(gfp_t gfp_mask, bool allocation_failed) { struct shrinker *shrinker; - struct meminfo info; + struct sysinfo info; s64 want_shrink; /* Fast out if there are no shrinkers to run. */ @@ -97,10 +97,10 @@ void run_shrinkers(gfp_t gfp_mask, bool allocation_failed) return; } - info = read_meminfo(); + si_meminfo(&info); - if (info.total && info.available) { - want_shrink = (info.total >> 2) - info.available; + if (info.totalram && info.freeram) { + want_shrink = (info.totalram >> 2) - info.freeram; if (want_shrink <= 0) return;