mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-03-31 00:00:03 +03:00
Update bcachefs sources to 794723fc10c4 bcachefs: Topology repair now uses nodes found by scanning to fill holes
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
89b322abb3
commit
8e02744a90
.bcachefs_revision
c_src
include/linux
libbcachefs
alloc_background.calloc_foreground.cbackpointers.cbackpointers.hbcachefs.hbset.cbset.hbtree_cache.cbtree_gc.cbtree_io.cbtree_iter.cbtree_journal_iter.cbtree_journal_iter.hbtree_key_cache.cbtree_node_scan.cbtree_node_scan.hbtree_node_scan_types.hbtree_trans_commit.cbtree_update.cbtree_update_interior.cbtree_update_interior.hbtree_write_buffer.cbuckets.cchardev.cdata_update.cdebug.cec.cerrcode.herror.cerror.hextents.cextents.heytzinger.ceytzinger.hfs-io-direct.cfs.cfsck.cinode.cio_misc.cjournal.cjournal_io.cjournal_seq_blacklist.clogged_ops.cmovinggc.copts.hrecovery.crecovery.hrecovery_passes.crecovery_passes.hrecovery_passes_types.hreflink.creplicas.csb-downgrade.csb-errors_types.hsnapshot.csnapshot.hsubvolume.csubvolume.hsubvolume_types.hsuper-io.csuper.cutil.cutil.h
linux
@ -1 +1 @@
|
||||
83338f5b2cb8406cda8bf7be3f566ab97c696917
|
||||
794723fc10c4a1ff28d4b11c436277ba783f47e6
|
||||
|
@ -223,7 +223,7 @@ int cmd_list_journal(int argc, char *argv[])
|
||||
opt_set(opts, degraded, true);
|
||||
opt_set(opts, errors, BCH_ON_ERROR_continue);
|
||||
opt_set(opts, fix_errors, FSCK_FIX_yes);
|
||||
opt_set(opts, keep_journal, true);
|
||||
opt_set(opts, retain_recovery_info ,true);
|
||||
opt_set(opts, read_journal_only,true);
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "an:t:k:vh",
|
||||
|
@ -235,6 +235,7 @@ enum {
|
||||
|
||||
struct bio *bio_alloc_bioset(struct block_device *, unsigned,
|
||||
blk_opf_t, gfp_t, struct bio_set *);
|
||||
|
||||
extern void bio_put(struct bio *);
|
||||
|
||||
int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
|
||||
@ -242,6 +243,9 @@ int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
|
||||
struct bio *bio_alloc_clone(struct block_device *, struct bio *,
|
||||
gfp_t, struct bio_set *);
|
||||
|
||||
struct bio *bio_alloc(struct block_device *, unsigned,
|
||||
blk_opf_t, gfp_t);
|
||||
|
||||
struct bio *bio_kmalloc(unsigned int, gfp_t);
|
||||
|
||||
extern void bio_endio(struct bio *);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define DECLARE_BITMAP(name,bits) \
|
||||
|
@ -2,7 +2,6 @@
|
||||
#define _TOOLS_LINUX_BITOPS_H_
|
||||
|
||||
#include <asm/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/page.h>
|
||||
|
||||
|
@ -111,9 +111,6 @@
|
||||
#define struct_group(NAME, MEMBERS...) \
|
||||
__struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS)
|
||||
|
||||
#define swap(a, b) \
|
||||
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
|
||||
|
||||
/* This counts to 12. Any more, it will return 13th argument. */
|
||||
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
|
||||
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
|
||||
#define kfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
|
||||
#define kvfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
|
||||
#define kvfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
|
||||
|
||||
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
|
||||
|
@ -20,6 +20,11 @@
|
||||
#define ARCH_KMALLOC_MINALIGN 16
|
||||
#define KMALLOC_MAX_SIZE SIZE_MAX
|
||||
|
||||
static inline size_t kmalloc_size_roundup(size_t s)
|
||||
{
|
||||
return roundup_pow_of_two(s);
|
||||
}
|
||||
|
||||
static inline void *kmalloc_noprof(size_t size, gfp_t flags)
|
||||
{
|
||||
unsigned i;
|
||||
|
@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
u8 gen = bch2_alloc_to_v4(k, &a)->gen;
|
||||
unsigned offset;
|
||||
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
|
||||
int ret2 = 0;
|
||||
|
||||
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
|
||||
if (ret)
|
||||
break;
|
||||
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
if (ret2)
|
||||
goto iter_err;
|
||||
have_bucket_gens_key = false;
|
||||
}
|
||||
|
||||
@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
|
||||
}
|
||||
|
||||
g.v.gens[offset] = gen;
|
||||
0;
|
||||
iter_err:
|
||||
ret2;
|
||||
}));
|
||||
|
||||
if (have_bucket_gens_key && !ret)
|
||||
@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
bucket_journal_seq);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"error setting bucket_needs_journal_commit: %i", ret);
|
||||
"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@ -1356,15 +1356,17 @@ retry:
|
||||
|
||||
/* Don't retry from all devices if we're out of open buckets: */
|
||||
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
|
||||
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
|
||||
target, erasure_code,
|
||||
nr_replicas, &nr_effective,
|
||||
&have_cache, watermark,
|
||||
flags, cl);
|
||||
if (!ret ||
|
||||
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
|
||||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
|
||||
if (!ret2 ||
|
||||
bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
|
||||
bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
|
||||
ret = ret2;
|
||||
goto alloc_done;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -29,8 +29,7 @@ static bool extent_matches_bp(struct bch_fs *c,
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
|
||||
&bucket2, &bp2);
|
||||
bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
|
||||
if (bpos_eq(bucket, bucket2) &&
|
||||
!memcmp(&bp, &bp2, sizeof(bp)))
|
||||
return true;
|
||||
@ -44,6 +43,11 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
|
||||
/* these will be caught by fsck */
|
||||
if (!bch2_dev_exists2(c, bp.k->p.inode))
|
||||
return 0;
|
||||
|
||||
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
|
||||
int ret = 0;
|
||||
|
||||
@ -378,7 +382,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
|
||||
backpointer_to_missing_alloc,
|
||||
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
|
||||
alloc_iter.pos.inode, alloc_iter.pos.offset,
|
||||
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||
ret = bch2_btree_delete_at(trans, bp_iter, 0);
|
||||
goto out;
|
||||
}
|
||||
@ -502,8 +506,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
|
||||
if (p.ptr.cached)
|
||||
continue;
|
||||
|
||||
bch2_extent_ptr_to_bp(c, btree, level,
|
||||
k, p, &bucket_pos, &bp);
|
||||
bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
|
||||
|
||||
ret = check_bp_exists(trans, s, bucket_pos, bp, k);
|
||||
if (ret)
|
||||
|
@ -90,20 +90,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
|
||||
}
|
||||
|
||||
static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p)
|
||||
static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
|
||||
struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry)
|
||||
{
|
||||
return level ? BCH_DATA_btree :
|
||||
p.has_ec ? BCH_DATA_stripe :
|
||||
BCH_DATA_user;
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
const struct bch_extent_ptr *ptr = &entry->ptr;
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
|
||||
BUG_ON(ptr < s.v->ptrs ||
|
||||
ptr >= s.v->ptrs + s.v->nr_blocks);
|
||||
|
||||
return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
|
||||
? BCH_DATA_parity
|
||||
: BCH_DATA_user;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
const union bch_extent_entry *entry,
|
||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||
{
|
||||
enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
|
||||
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
||||
u32 bucket_offset;
|
||||
|
||||
|
@ -209,7 +209,7 @@
|
||||
#include "fifo.h"
|
||||
#include "nocow_locking_types.h"
|
||||
#include "opts.h"
|
||||
#include "recovery_types.h"
|
||||
#include "recovery_passes_types.h"
|
||||
#include "sb-errors_types.h"
|
||||
#include "seqmutex.h"
|
||||
#include "time_stats.h"
|
||||
@ -456,6 +456,7 @@ enum bch_time_stats {
|
||||
|
||||
#include "alloc_types.h"
|
||||
#include "btree_types.h"
|
||||
#include "btree_node_scan_types.h"
|
||||
#include "btree_write_buffer_types.h"
|
||||
#include "buckets_types.h"
|
||||
#include "buckets_waiting_for_journal_types.h"
|
||||
@ -810,7 +811,6 @@ struct bch_fs {
|
||||
|
||||
/* snapshot.c: */
|
||||
struct snapshot_table __rcu *snapshots;
|
||||
size_t snapshot_table_size;
|
||||
struct mutex snapshot_table_lock;
|
||||
struct rw_semaphore snapshot_create_lock;
|
||||
|
||||
@ -826,6 +826,7 @@ struct bch_fs {
|
||||
struct btree_root btree_roots_known[BTREE_ID_NR];
|
||||
DARRAY(struct btree_root) btree_roots_extra;
|
||||
struct mutex btree_root_lock;
|
||||
unsigned long btrees_lost_data; /* bitmask */
|
||||
|
||||
struct btree_cache btree_cache;
|
||||
|
||||
@ -849,6 +850,8 @@ struct bch_fs {
|
||||
struct workqueue_struct *btree_interior_update_worker;
|
||||
struct work_struct btree_interior_update_work;
|
||||
|
||||
struct workqueue_struct *btree_node_rewrite_worker;
|
||||
|
||||
struct list_head pending_node_rewrites;
|
||||
struct mutex pending_node_rewrites_lock;
|
||||
|
||||
@ -1102,6 +1105,8 @@ struct bch_fs {
|
||||
struct journal_keys journal_keys;
|
||||
struct list_head journal_iters;
|
||||
|
||||
struct find_btree_nodes found_btree_nodes;
|
||||
|
||||
u64 last_bucket_seq_cleanup;
|
||||
|
||||
u64 counters_on_mount[BCH_COUNTER_NR];
|
||||
|
@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b,
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
void __bch2_verify_btree_nr_keys(struct btree *b)
|
||||
struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
struct bkey_packed *k;
|
||||
struct btree_nr_keys nr = { 0 };
|
||||
struct btree_nr_keys nr = {};
|
||||
|
||||
for_each_bset(b, t)
|
||||
bset_tree_for_each_key(b, t, k)
|
||||
if (!bkey_deleted(k))
|
||||
btree_keys_account_key_add(&nr, t - b->set, k);
|
||||
return nr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
void __bch2_verify_btree_nr_keys(struct btree *b)
|
||||
{
|
||||
struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
|
||||
|
||||
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
|
||||
}
|
||||
|
@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
|
||||
|
||||
/* Accounting: */
|
||||
|
||||
struct btree_nr_keys bch2_btree_node_count_keys(struct btree *);
|
||||
|
||||
static inline void btree_keys_account_key(struct btree_nr_keys *n,
|
||||
unsigned bset,
|
||||
struct bkey_packed *k,
|
||||
|
@ -808,7 +808,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
|
||||
prt_printf(&buf, "\nmax ");
|
||||
bch2_bpos_to_text(&buf, b->data->max_key);
|
||||
|
||||
bch2_fs_inconsistent(c, "%s", buf.buf);
|
||||
bch2_fs_topology_error(c, "%s", buf.buf);
|
||||
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@ -1134,6 +1135,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
|
||||
b = btree_cache_find(bc, k);
|
||||
if (!b)
|
||||
return;
|
||||
|
||||
BUG_ON(b == btree_node_root(trans->c, b));
|
||||
wait_on_io:
|
||||
/* not allowed to wait on io with btree locks held: */
|
||||
|
||||
|
@ -7,11 +7,13 @@
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_node_scan.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_gc.h"
|
||||
@ -24,7 +26,7 @@
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
@ -40,6 +42,7 @@
|
||||
|
||||
#define DROP_THIS_NODE 10
|
||||
#define DROP_PREV_NODE 11
|
||||
#define DID_FILL_FROM_SCAN 12
|
||||
|
||||
static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
|
||||
{
|
||||
@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
__gc_pos_set(c, new_pos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Missing: if an interior btree node is empty, we need to do something -
|
||||
* perhaps just kill it
|
||||
*/
|
||||
static int bch2_gc_check_topology(struct bch_fs *c,
|
||||
struct btree *b,
|
||||
struct bkey_buf *prev,
|
||||
struct bkey_buf cur,
|
||||
bool is_last)
|
||||
{
|
||||
struct bpos node_start = b->data->min_key;
|
||||
struct bpos node_end = b->data->max_key;
|
||||
struct bpos expected_start = bkey_deleted(&prev->k->k)
|
||||
? node_start
|
||||
: bpos_successor(prev->k->k.p);
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
|
||||
|
||||
if (!bpos_eq(expected_start, bp->v.min_key)) {
|
||||
bch2_topology_error(c);
|
||||
|
||||
if (bkey_deleted(&prev->k->k)) {
|
||||
prt_printf(&buf1, "start of node: ");
|
||||
bch2_bpos_to_text(&buf1, node_start);
|
||||
} else {
|
||||
bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
|
||||
}
|
||||
bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
|
||||
|
||||
if (__fsck_err(c,
|
||||
FSCK_CAN_FIX|
|
||||
FSCK_CAN_IGNORE|
|
||||
FSCK_NO_RATELIMIT,
|
||||
btree_node_topology_bad_min_key,
|
||||
"btree node with incorrect min_key at btree %s level %u:\n"
|
||||
" prev %s\n"
|
||||
" cur %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
|
||||
bch_info(c, "Halting mark and sweep to start topology repair pass");
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
goto err;
|
||||
} else {
|
||||
set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
|
||||
bch2_topology_error(c);
|
||||
|
||||
printbuf_reset(&buf1);
|
||||
printbuf_reset(&buf2);
|
||||
|
||||
bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
|
||||
bch2_bpos_to_text(&buf2, node_end);
|
||||
|
||||
if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
|
||||
btree_node_topology_bad_max_key,
|
||||
"btree node with incorrect max_key at btree %s level %u:\n"
|
||||
" %s\n"
|
||||
" expected %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf) &&
|
||||
should_restart_for_topology_repair(c)) {
|
||||
bch_info(c, "Halting mark and sweep to start topology repair pass");
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
goto err;
|
||||
} else {
|
||||
set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
|
||||
}
|
||||
}
|
||||
|
||||
bch2_bkey_buf_copy(prev, c, cur.k);
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
|
||||
{
|
||||
switch (b->key.k.type) {
|
||||
@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
|
||||
struct bkey_i_btree_ptr_v2 *new;
|
||||
int ret;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, " -> ");
|
||||
bch2_bpos_to_text(&buf, new_min);
|
||||
|
||||
bch_info(c, "%s(): %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
|
||||
if (!new)
|
||||
return -BCH_ERR_ENOMEM_gc_repair_key;
|
||||
@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
|
||||
struct bkey_i_btree_ptr_v2 *new;
|
||||
int ret;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, " -> ");
|
||||
bch2_bpos_to_text(&buf, new_max);
|
||||
|
||||
bch_info(c, "%s(): %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
|
||||
struct btree *prev, struct btree *cur)
|
||||
static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b,
|
||||
struct btree *prev, struct btree *cur,
|
||||
struct bpos *pulled_from_scan)
|
||||
{
|
||||
struct bpos expected_start = !prev
|
||||
? b->data->min_key
|
||||
: bpos_successor(prev->key.k.p);
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (!prev) {
|
||||
prt_printf(&buf1, "start of node: ");
|
||||
bch2_bpos_to_text(&buf1, b->data->min_key);
|
||||
} else {
|
||||
bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
|
||||
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
|
||||
b->data->min_key));
|
||||
|
||||
if (bpos_eq(expected_start, cur->data->min_key))
|
||||
return 0;
|
||||
|
||||
prt_printf(&buf, " at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
if (prev) {
|
||||
prt_printf(&buf, "\n prev: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
|
||||
}
|
||||
|
||||
bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
|
||||
prt_str(&buf, "\n next: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
|
||||
|
||||
if (prev &&
|
||||
bpos_gt(expected_start, cur->data->min_key) &&
|
||||
BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
|
||||
/* cur overwrites prev: */
|
||||
if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
|
||||
if (b->c.level == 1 &&
|
||||
bpos_lt(*pulled_from_scan, cur->data->min_key)) {
|
||||
ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
|
||||
expected_start,
|
||||
bpos_predecessor(cur->data->min_key));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
|
||||
cur->data->min_key), c,
|
||||
btree_node_topology_overwritten_by_next_node,
|
||||
"btree node overwritten by next node at btree %s level %u:\n"
|
||||
" node %s\n"
|
||||
" next %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf)) {
|
||||
ret = DROP_PREV_NODE;
|
||||
goto out;
|
||||
*pulled_from_scan = cur->data->min_key;
|
||||
ret = DID_FILL_FROM_SCAN;
|
||||
} else {
|
||||
if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
|
||||
"btree node with incorrect min_key%s", buf.buf))
|
||||
ret = set_node_min(c, cur, expected_start);
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
|
||||
bpos_predecessor(cur->data->min_key)), c,
|
||||
btree_node_topology_bad_max_key,
|
||||
"btree node with incorrect max_key at btree %s level %u:\n"
|
||||
" node %s\n"
|
||||
" next %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf))
|
||||
ret = set_node_max(c, prev,
|
||||
bpos_predecessor(cur->data->min_key));
|
||||
} else {
|
||||
/* prev overwrites cur: */
|
||||
|
||||
if (mustfix_fsck_err_on(bpos_ge(expected_start,
|
||||
cur->data->max_key), c,
|
||||
btree_node_topology_overwritten_by_prev_node,
|
||||
"btree node overwritten by prev node at btree %s level %u:\n"
|
||||
" prev %s\n"
|
||||
" node %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf)) {
|
||||
ret = DROP_THIS_NODE;
|
||||
goto out;
|
||||
} else { /* overlap */
|
||||
if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */
|
||||
if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */
|
||||
if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node,
|
||||
"btree node overwritten by next node%s", buf.buf))
|
||||
ret = DROP_PREV_NODE;
|
||||
} else {
|
||||
if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
|
||||
"btree node with incorrect max_key%s", buf.buf))
|
||||
ret = set_node_max(c, prev,
|
||||
bpos_predecessor(cur->data->min_key));
|
||||
}
|
||||
} else {
|
||||
if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */
|
||||
if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node,
|
||||
"btree node overwritten by prev node%s", buf.buf))
|
||||
ret = DROP_THIS_NODE;
|
||||
} else {
|
||||
if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
|
||||
"btree node with incorrect min_key%s", buf.buf))
|
||||
ret = set_node_min(c, cur, expected_start);
|
||||
}
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
|
||||
btree_node_topology_bad_min_key,
|
||||
"btree node with incorrect min_key at btree %s level %u:\n"
|
||||
" prev %s\n"
|
||||
" node %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf))
|
||||
ret = set_node_min(c, cur, expected_start);
|
||||
}
|
||||
out:
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
|
||||
struct btree *child)
|
||||
struct btree *child, struct bpos *pulled_from_scan)
|
||||
{
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
|
||||
bch2_bpos_to_text(&buf2, b->key.k.p);
|
||||
if (bpos_eq(child->key.k.p, b->key.k.p))
|
||||
return 0;
|
||||
|
||||
if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
|
||||
btree_node_topology_bad_max_key,
|
||||
"btree node with incorrect max_key at btree %s level %u:\n"
|
||||
" %s\n"
|
||||
" expected %s",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level,
|
||||
buf1.buf, buf2.buf)) {
|
||||
ret = set_node_max(c, child, b->key.k.p);
|
||||
if (ret)
|
||||
goto err;
|
||||
prt_printf(&buf, "at btree %s level %u:\n parent: ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
prt_str(&buf, "\n child: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
|
||||
|
||||
if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
|
||||
"btree node with incorrect max_key%s", buf.buf)) {
|
||||
if (b->c.level == 1 &&
|
||||
bpos_lt(*pulled_from_scan, b->key.k.p)) {
|
||||
ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
|
||||
bpos_successor(child->key.k.p), b->key.k.p);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
*pulled_from_scan = b->key.k.p;
|
||||
ret = DID_FILL_FROM_SCAN;
|
||||
} else {
|
||||
ret = set_node_max(c, child, b->key.k.p);
|
||||
}
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
|
||||
static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
|
||||
struct bpos *pulled_from_scan)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf prev_k, cur_k;
|
||||
struct btree *prev = NULL, *cur = NULL;
|
||||
bool have_child, dropped_children = false;
|
||||
bool have_child, new_pass = false;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (!b->c.level)
|
||||
return 0;
|
||||
again:
|
||||
prev = NULL;
|
||||
have_child = dropped_children = false;
|
||||
|
||||
bch2_bkey_buf_init(&prev_k);
|
||||
bch2_bkey_buf_init(&cur_k);
|
||||
again:
|
||||
cur = prev = NULL;
|
||||
have_child = new_pass = false;
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
@ -415,9 +367,10 @@ again:
|
||||
b->c.level - 1,
|
||||
buf.buf)) {
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
|
||||
bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
@ -427,7 +380,23 @@ again:
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = btree_repair_node_boundaries(c, b, prev, cur);
|
||||
if (bch2_btree_node_is_stale(c, cur)) {
|
||||
bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
cur = NULL;
|
||||
if (ret)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan);
|
||||
if (ret == DID_FILL_FROM_SCAN) {
|
||||
new_pass = true;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
six_unlock_read(&cur->c.lock);
|
||||
@ -445,6 +414,7 @@ again:
|
||||
prev = NULL;
|
||||
|
||||
if (ret == DROP_PREV_NODE) {
|
||||
bch_info(c, "dropped prev node");
|
||||
bch2_btree_node_evict(trans, prev_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, prev_k.k->k.p);
|
||||
@ -452,8 +422,6 @@ again:
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&prev_k, c);
|
||||
bch2_bkey_buf_exit(&cur_k, c);
|
||||
goto again;
|
||||
} else if (ret)
|
||||
break;
|
||||
@ -465,7 +433,11 @@ again:
|
||||
|
||||
if (!ret && !IS_ERR_OR_NULL(prev)) {
|
||||
BUG_ON(cur);
|
||||
ret = btree_repair_node_end(c, b, prev);
|
||||
ret = btree_repair_node_end(c, b, prev, pulled_from_scan);
|
||||
if (ret == DID_FILL_FROM_SCAN) {
|
||||
new_pass = true;
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!IS_ERR_OR_NULL(prev))
|
||||
@ -479,6 +451,10 @@ again:
|
||||
goto err;
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
|
||||
if (new_pass)
|
||||
goto again;
|
||||
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
iter.prefetch = true;
|
||||
|
||||
@ -495,7 +471,7 @@ again:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_btree_repair_topology_recurse(trans, cur);
|
||||
ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
|
||||
six_unlock_read(&cur->c.lock);
|
||||
cur = NULL;
|
||||
|
||||
@ -503,7 +479,7 @@ again:
|
||||
bch2_btree_node_evict(trans, cur_k.k);
|
||||
ret = bch2_journal_key_delete(c, b->c.btree_id,
|
||||
b->c.level, cur_k.k->k.p);
|
||||
dropped_children = true;
|
||||
new_pass = true;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
@ -530,12 +506,14 @@ fsck_err:
|
||||
six_unlock_read(&cur->c.lock);
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&prev_k, c);
|
||||
bch2_bkey_buf_exit(&cur_k, c);
|
||||
|
||||
if (!ret && dropped_children)
|
||||
if (!ret && new_pass)
|
||||
goto again;
|
||||
|
||||
BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
|
||||
|
||||
bch2_bkey_buf_exit(&prev_k, c);
|
||||
bch2_bkey_buf_exit(&cur_k, c);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -543,32 +521,63 @@ fsck_err:
|
||||
int bch2_check_topology(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
struct bpos pulled_from_scan = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
bool reconstructed_root = false;
|
||||
|
||||
if (!r->alive)
|
||||
continue;
|
||||
if (r->error) {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
if (ret)
|
||||
break;
|
||||
reconstruct_root:
|
||||
bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
|
||||
|
||||
b = r->b;
|
||||
if (btree_node_fake(b))
|
||||
continue;
|
||||
r->alive = false;
|
||||
r->error = 0;
|
||||
|
||||
if (!bch2_btree_has_scanned_nodes(c, i)) {
|
||||
mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing,
|
||||
"no nodes found for btree %s, continue?", bch2_btree_id_str(i));
|
||||
bch2_btree_root_alloc_fake(c, i, 0);
|
||||
} else {
|
||||
bch2_btree_root_alloc_fake(c, i, 1);
|
||||
ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
reconstructed_root = true;
|
||||
}
|
||||
|
||||
struct btree *b = r->b;
|
||||
|
||||
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
|
||||
ret = bch2_btree_repair_topology_recurse(trans, b);
|
||||
ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
if (ret == DROP_THIS_NODE) {
|
||||
bch_err(c, "empty btree root - repair unimplemented");
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
list_move(&b->list, &c->btree_cache.freeable);
|
||||
mutex_unlock(&c->btree_cache.lock);
|
||||
|
||||
r->b = NULL;
|
||||
|
||||
if (!reconstructed_root)
|
||||
goto reconstruct_root;
|
||||
|
||||
bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
|
||||
bch2_btree_root_alloc_fake(c, i, 0);
|
||||
r->alive = false;
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
fsck_err:
|
||||
bch2_trans_put(trans);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -591,7 +600,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c);
|
||||
|
||||
if (fsck_err_on(!g->gen_valid,
|
||||
c, ptr_to_missing_alloc_key,
|
||||
@ -657,7 +666,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
continue;
|
||||
|
||||
if (fsck_err_on(bucket_data_type(g->data_type) &&
|
||||
bucket_data_type(g->data_type) != data_type, c,
|
||||
bucket_data_type(g->data_type) !=
|
||||
bucket_data_type(data_type), c,
|
||||
ptr_bucket_data_type_mismatch,
|
||||
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
|
||||
"while marking %s",
|
||||
@ -698,18 +708,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
}
|
||||
|
||||
if (do_update) {
|
||||
struct bkey_ptrs ptrs;
|
||||
union bch_extent_entry *entry;
|
||||
struct bch_extent_ptr *ptr;
|
||||
struct bkey_i *new;
|
||||
|
||||
if (is_root) {
|
||||
bch_err(c, "cannot update btree roots yet");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
|
||||
struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -BCH_ERR_ENOMEM_gc_repair_key;
|
||||
bch_err_msg(c, ret, "allocating new key");
|
||||
@ -724,7 +729,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
* btree node isn't there anymore, the read path will
|
||||
* sort it out:
|
||||
*/
|
||||
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
|
||||
@ -732,19 +737,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
||||
ptr->gen = g->gen;
|
||||
}
|
||||
} else {
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
|
||||
struct bkey_ptrs ptrs;
|
||||
union bch_extent_entry *entry;
|
||||
restart_drop_ptrs:
|
||||
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
|
||||
|
||||
(ptr->cached &&
|
||||
(!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
|
||||
(!ptr->cached &&
|
||||
gen_cmp(ptr->gen, g->gen) < 0) ||
|
||||
gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
|
||||
(g->data_type &&
|
||||
g->data_type != data_type);
|
||||
}));
|
||||
if ((p.ptr.cached &&
|
||||
(!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
|
||||
(!p.ptr.cached &&
|
||||
gen_cmp(p.ptr.gen, g->gen) < 0) ||
|
||||
gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
|
||||
(g->data_type &&
|
||||
g->data_type != data_type)) {
|
||||
bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
|
||||
goto restart_drop_ptrs;
|
||||
}
|
||||
}
|
||||
again:
|
||||
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
@ -774,12 +786,6 @@ found:
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert_take(c, btree_id, level, new);
|
||||
if (ret) {
|
||||
kfree(new);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (level)
|
||||
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
|
||||
|
||||
@ -793,6 +799,12 @@ found:
|
||||
bch_info(c, "new key %s", buf.buf);
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert_take(c, btree_id, level, new);
|
||||
if (ret) {
|
||||
kfree(new);
|
||||
goto err;
|
||||
}
|
||||
|
||||
*k = bkey_i_to_s_c(new);
|
||||
}
|
||||
err:
|
||||
@ -819,10 +831,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
BUG_ON(bch2_journal_seq_verify &&
|
||||
k->k->version.lo > atomic64_read(&c->journal.seq));
|
||||
|
||||
ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
|
||||
bkey_version_in_future,
|
||||
"key version number higher than recorded: %llu > %llu",
|
||||
@ -831,8 +839,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
atomic64_set(&c->key_version, k->k->version.lo);
|
||||
}
|
||||
|
||||
ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
|
||||
bch2_key_trigger(trans, btree_id, level, old,
|
||||
unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
|
||||
fsck_err:
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
@ -841,42 +854,30 @@ err:
|
||||
|
||||
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_node_iter iter;
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf prev, cur;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_node_check_topology(trans, b);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!btree_node_type_needs_gc(btree_node_type(b)))
|
||||
return 0;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bkey_init(&prev.k->k);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
|
||||
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
|
||||
&k, initial);
|
||||
if (ret)
|
||||
break;
|
||||
return ret;
|
||||
|
||||
bch2_btree_node_iter_advance(&iter, b);
|
||||
|
||||
if (b->c.level) {
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
|
||||
ret = bch2_gc_check_topology(c, b, &prev, cur,
|
||||
bch2_btree_node_iter_end(&iter));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
|
||||
@ -925,14 +926,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf cur, prev;
|
||||
struct bkey_buf cur;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_btree_node_check_topology(trans, b);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bch2_bkey_buf_init(&cur);
|
||||
bkey_init(&prev.k->k);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
|
||||
@ -943,20 +946,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
if (b->c.level) {
|
||||
bch2_bkey_buf_reassemble(&cur, c, k);
|
||||
k = bkey_i_to_s_c(cur.k);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
|
||||
ret = bch2_gc_check_topology(c, b,
|
||||
&prev, cur,
|
||||
!bch2_btree_and_journal_iter_peek(&iter).k);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
} else {
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
|
||||
if (b->c.level > target_depth) {
|
||||
@ -1015,7 +1005,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
|
||||
}
|
||||
fsck_err:
|
||||
bch2_bkey_buf_exit(&cur, c);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
@ -1033,9 +1022,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
|
||||
|
||||
b = bch2_btree_id_root(c, btree_id)->b;
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return 0;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
printbuf_reset(&buf);
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
@ -1392,11 +1378,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
*old,
|
||||
b->data_type);
|
||||
gc = *b;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (gc.data_type != old_gc.data_type ||
|
||||
gc.dirty_sectors != old_gc.dirty_sectors)
|
||||
bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (metadata_only &&
|
||||
gc.data_type != BCH_DATA_sb &&
|
||||
|
@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
|
||||
*/
|
||||
bch2_bset_set_no_aux_tree(b, b->set);
|
||||
bch2_btree_build_aux_trees(b);
|
||||
b->nr = bch2_btree_node_count_keys(b);
|
||||
|
||||
struct bkey_s_c k;
|
||||
struct bkey unpacked;
|
||||
@ -1066,7 +1067,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting btree node: %i", ret))
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
@ -1107,7 +1108,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting btree node: %i\n", ret))
|
||||
"decrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto fsck_err;
|
||||
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
@ -1265,8 +1266,10 @@ fsck_err:
|
||||
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
|
||||
ret == -BCH_ERR_btree_node_read_err_must_retry)
|
||||
retry_read = 1;
|
||||
else
|
||||
else {
|
||||
set_btree_node_read_error(b);
|
||||
set_bit(b->c.btree_id, &c->btrees_lost_data);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1327,6 +1330,7 @@ start:
|
||||
|
||||
if (!can_retry) {
|
||||
set_btree_node_read_error(b);
|
||||
set_bit(b->c.btree_id, &c->btrees_lost_data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1338,7 +1342,7 @@ start:
|
||||
if (saw_error && !btree_node_read_error(b)) {
|
||||
printbuf_reset(&buf);
|
||||
bch2_bpos_to_text(&buf, b->key.k.p);
|
||||
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
|
||||
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
|
||||
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
@ -1526,9 +1530,10 @@ fsck_err:
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
set_btree_node_read_error(b);
|
||||
else if (*saw_error)
|
||||
set_bit(b->c.btree_id, &c->btrees_lost_data);
|
||||
} else if (*saw_error)
|
||||
bch2_btree_node_rewrite_async(c, b);
|
||||
|
||||
for (i = 0; i < ra->nr; i++) {
|
||||
@ -1657,13 +1662,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
|
||||
|
||||
prt_str(&buf, "btree node read error: no device to read from\n at ");
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
|
||||
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
|
||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
|
||||
bch2_fatal_error(c);
|
||||
|
||||
set_btree_node_read_error(b);
|
||||
set_bit(b->c.btree_id, &c->btrees_lost_data);
|
||||
clear_btree_node_read_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
|
||||
printbuf_exit(&buf);
|
||||
@ -1874,8 +1880,8 @@ out:
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
|
||||
"writing btree node: %s", bch2_err_str(ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2131,7 +2137,7 @@ do_write:
|
||||
|
||||
ret = bset_encrypt(c, i, b->written << 9);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error encrypting btree node: %i\n", ret))
|
||||
"encrypting btree node: %s", bch2_err_str(ret)))
|
||||
goto err;
|
||||
|
||||
nonce = btree_nonce(i, b->written << 9);
|
||||
|
@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
bch2_bkey_buf_unpack(&tmp, c, l->b,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
|
||||
if (!k) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "node not found at pos ");
|
||||
bch2_bpos_to_text(&buf, path->pos);
|
||||
prt_str(&buf, " within parent node ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
|
||||
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
ret = -BCH_ERR_btree_need_topology_repair;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
|
||||
|
||||
if ((flags & BTREE_ITER_PREFETCH) &&
|
||||
c->opts.btree_node_prefetch) {
|
||||
@ -962,7 +976,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
struct btree_transaction_stats *s = btree_trans_stats(trans);
|
||||
s->max_mem = max(s->max_mem, new_bytes);
|
||||
|
||||
if (trans->used_mempool) {
|
||||
if (trans->mem_bytes >= new_bytes)
|
||||
goto out_change_top;
|
||||
|
||||
/* No more space from mempool item, need malloc new one */
|
||||
new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (unlikely(!new_mem)) {
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_mem = kmalloc(new_bytes, GFP_KERNEL);
|
||||
if (!new_mem)
|
||||
return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret) {
|
||||
kfree(new_mem);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
memcpy(new_mem, trans->mem, trans->mem_top);
|
||||
trans->used_mempool = false;
|
||||
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
||||
goto out_new_mem;
|
||||
}
|
||||
|
||||
new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (unlikely(!new_mem)) {
|
||||
bch2_trans_unlock(trans);
|
||||
@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
|
||||
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
|
||||
new_bytes = BTREE_TRANS_MEM_MAX;
|
||||
memcpy(new_mem, trans->mem, trans->mem_top);
|
||||
trans->used_mempool = true;
|
||||
kfree(trans->mem);
|
||||
}
|
||||
|
||||
@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
out_new_mem:
|
||||
trans->mem = new_mem;
|
||||
trans->mem_bytes = new_bytes;
|
||||
|
||||
@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
|
||||
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
|
||||
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
|
||||
}
|
||||
|
||||
out_change_top:
|
||||
p = trans->mem + trans->mem_top;
|
||||
trans->mem_top += size;
|
||||
memset(p, 0, size);
|
||||
@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
if (paths_allocated != trans->_paths_allocated)
|
||||
kvfree_rcu_mightsleep(paths_allocated);
|
||||
|
||||
if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
|
||||
if (trans->used_mempool)
|
||||
mempool_free(trans->mem, &c->btree_trans_mem_pool);
|
||||
else
|
||||
kfree(trans->mem);
|
||||
|
@ -261,6 +261,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
|
||||
return bch2_journal_key_insert(c, id, level, &whiteout);
|
||||
}
|
||||
|
||||
bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
struct journal_keys *keys = &trans->c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (!trans->journal_replay_not_finished)
|
||||
return false;
|
||||
|
||||
return (idx < keys->size &&
|
||||
keys->data[idx].btree_id == btree &&
|
||||
keys->data[idx].level == level &&
|
||||
bpos_eq(keys->data[idx].k->k.p, pos) &&
|
||||
bkey_deleted(&keys->data[idx].k->k));
|
||||
}
|
||||
|
||||
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
@ -363,7 +379,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
|
||||
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
|
||||
{
|
||||
struct bkey_s_c btree_k, journal_k, ret;
|
||||
struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
|
||||
|
||||
if (iter->prefetch && iter->journal.level)
|
||||
btree_and_journal_iter_prefetch(iter);
|
||||
@ -375,9 +391,10 @@ again:
|
||||
bpos_lt(btree_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance_btree(iter);
|
||||
|
||||
while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
|
||||
bpos_lt(journal_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance(&iter->journal);
|
||||
if (iter->trans->journal_replay_not_finished)
|
||||
while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
|
||||
bpos_lt(journal_k.k->p, iter->pos))
|
||||
bch2_journal_iter_advance(&iter->journal);
|
||||
|
||||
ret = journal_k.k &&
|
||||
(!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
|
||||
@ -435,7 +452,9 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&node_iter, b);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
|
||||
list_add(&iter->journal.list, &trans->c->journal_iters);
|
||||
if (trans->journal_replay_not_finished &&
|
||||
!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
|
||||
list_add(&iter->journal.list, &trans->c->journal_iters);
|
||||
}
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
@ -548,3 +567,22 @@ int bch2_journal_keys_sort(struct bch_fs *c)
|
||||
bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level_min, unsigned level_max,
|
||||
struct bpos start, struct bpos end)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t dst = 0;
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
darray_for_each(*keys, i)
|
||||
if (!(i->btree_id == btree &&
|
||||
i->level >= level_min &&
|
||||
i->level <= level_max &&
|
||||
bpos_ge(i->k->k.p, start) &&
|
||||
bpos_le(i->k->k.p, end)))
|
||||
keys->data[dst++] = *i;
|
||||
keys->nr = keys->gap = dst;
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bkey_i *);
|
||||
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos);
|
||||
void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
|
||||
|
||||
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
|
||||
@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *);
|
||||
|
||||
int bch2_journal_keys_sort(struct bch_fs *);
|
||||
|
||||
void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
|
||||
unsigned, unsigned,
|
||||
struct bpos, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
|
||||
|
@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
|
||||
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
|
||||
!bch2_journal_error(j), c,
|
||||
"error flushing key cache: %s", bch2_err_str(ret));
|
||||
"flushing key cache: %s", bch2_err_str(ret));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
495
libbcachefs/btree_node_scan.c
Normal file
495
libbcachefs/btree_node_scan.c
Normal file
@ -0,0 +1,495 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "btree_node_scan.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "buckets.h"
|
||||
#include "error.h"
|
||||
#include "journal_io.h"
|
||||
#include "recovery_passes.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct find_btree_nodes_worker {
|
||||
struct closure *cl;
|
||||
struct find_btree_nodes *f;
|
||||
struct bch_dev *ca;
|
||||
};
|
||||
|
||||
static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
|
||||
{
|
||||
prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
|
||||
bch2_bpos_to_text(out, n->min_key);
|
||||
prt_str(out, "-");
|
||||
bch2_bpos_to_text(out, n->max_key);
|
||||
|
||||
if (n->range_updated)
|
||||
prt_str(out, " range updated");
|
||||
if (n->overwritten)
|
||||
prt_str(out, " overwritten");
|
||||
|
||||
for (unsigned i = 0; i < n->nr_ptrs; i++) {
|
||||
prt_char(out, ' ');
|
||||
bch2_extent_ptr_to_text(out, c, n->ptrs + i);
|
||||
}
|
||||
}
|
||||
|
||||
static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
|
||||
{
|
||||
printbuf_indent_add(out, 2);
|
||||
darray_for_each(nodes, i) {
|
||||
found_btree_node_to_text(out, c, i);
|
||||
prt_newline(out);
|
||||
}
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
|
||||
{
|
||||
struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
|
||||
|
||||
set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
|
||||
bp->k.p = f->max_key;
|
||||
bp->v.seq = cpu_to_le64(f->cookie);
|
||||
bp->v.sectors_written = 0;
|
||||
bp->v.flags = 0;
|
||||
bp->v.min_key = f->min_key;
|
||||
SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
|
||||
memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
|
||||
}
|
||||
|
||||
static bool found_btree_node_is_readable(struct btree_trans *trans,
|
||||
const struct found_btree_node *f)
|
||||
{
|
||||
struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
|
||||
|
||||
found_btree_node_to_key(&k.k, f);
|
||||
|
||||
struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
|
||||
bool ret = !IS_ERR_OR_NULL(b);
|
||||
if (ret)
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
/*
|
||||
* We might update this node's range; if that happens, we need the node
|
||||
* to be re-read so the read path can trim keys that are no longer in
|
||||
* this node
|
||||
*/
|
||||
if (b != btree_node_root(trans->c, b))
|
||||
bch2_btree_node_evict(trans, &k.k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
|
||||
{
|
||||
const struct found_btree_node *l = _l;
|
||||
const struct found_btree_node *r = _r;
|
||||
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
cmp_int(l->cookie, r->cookie);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given two found btree nodes, if their sequence numbers are equal, take the
|
||||
* one that's readable:
|
||||
*/
|
||||
static int found_btree_node_cmp_time(const struct found_btree_node *l,
|
||||
const struct found_btree_node *r)
|
||||
{
|
||||
return cmp_int(l->seq, r->seq);
|
||||
}
|
||||
|
||||
static int found_btree_node_cmp_pos(const void *_l, const void *_r)
|
||||
{
|
||||
const struct found_btree_node *l = _l;
|
||||
const struct found_btree_node *r = _r;
|
||||
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->min_key, r->min_key) ?:
|
||||
-found_btree_node_cmp_time(l, r);
|
||||
}
|
||||
|
||||
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
|
||||
struct bio *bio, struct btree_node *bn, u64 offset)
|
||||
{
|
||||
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
|
||||
|
||||
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
|
||||
bio->bi_iter.bi_sector = offset;
|
||||
bch2_bio_map(bio, bn, PAGE_SIZE);
|
||||
|
||||
submit_bio_wait(bio);
|
||||
if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
|
||||
"IO error in try_read_btree_node() at %llu: %s",
|
||||
offset, bch2_blk_status_to_str(bio->bi_status)))
|
||||
return;
|
||||
|
||||
if (le64_to_cpu(bn->magic) != bset_magic(c))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
struct found_btree_node n = {
|
||||
.btree_id = BTREE_NODE_ID(bn),
|
||||
.level = BTREE_NODE_LEVEL(bn),
|
||||
.seq = BTREE_NODE_SEQ(bn),
|
||||
.cookie = le64_to_cpu(bn->keys.seq),
|
||||
.min_key = bn->min_key,
|
||||
.max_key = bn->max_key,
|
||||
.nr_ptrs = 1,
|
||||
.ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
|
||||
.ptrs[0].offset = offset,
|
||||
.ptrs[0].dev = ca->dev_idx,
|
||||
.ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
|
||||
};
|
||||
rcu_read_unlock();
|
||||
|
||||
if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
|
||||
mutex_lock(&f->lock);
|
||||
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
|
||||
bch_err(c, "try_read_btree_node() can't handle endian conversion");
|
||||
f->ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (darray_push(&f->nodes, n))
|
||||
f->ret = -ENOMEM;
|
||||
unlock:
|
||||
mutex_unlock(&f->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int read_btree_nodes_worker(void *p)
|
||||
{
|
||||
struct find_btree_nodes_worker *w = p;
|
||||
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
|
||||
struct bch_dev *ca = w->ca;
|
||||
void *buf = (void *) __get_free_page(GFP_KERNEL);
|
||||
struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
|
||||
unsigned long last_print = jiffies;
|
||||
|
||||
if (!buf || !bio) {
|
||||
bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
|
||||
w->f->ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
|
||||
for (unsigned bucket_offset = 0;
|
||||
bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
|
||||
bucket_offset += btree_sectors(c)) {
|
||||
if (time_after(jiffies, last_print + HZ * 30)) {
|
||||
u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
|
||||
u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
|
||||
|
||||
bch_info(ca, "%s: %2u%% done", __func__,
|
||||
(unsigned) div64_u64(cur_sector * 100, end_sector));
|
||||
last_print = jiffies;
|
||||
}
|
||||
|
||||
try_read_btree_node(w->f, ca, bio, buf,
|
||||
bucket * ca->mi.bucket_size + bucket_offset);
|
||||
}
|
||||
err:
|
||||
bio_put(bio);
|
||||
free_page((unsigned long) buf);
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
closure_put(w->cl);
|
||||
kfree(w);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_btree_nodes(struct find_btree_nodes *f)
|
||||
{
|
||||
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
|
||||
struct closure cl;
|
||||
int ret = 0;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
for_each_online_member(c, ca) {
|
||||
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
|
||||
struct task_struct *t;
|
||||
|
||||
if (!w) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
closure_get(&cl);
|
||||
w->cl = &cl;
|
||||
w->f = f;
|
||||
w->ca = ca;
|
||||
|
||||
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
|
||||
ret = IS_ERR_OR_NULL(t);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
closure_put(&cl);
|
||||
f->ret = ret;
|
||||
bch_err(c, "error starting kthread: %i", ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
err:
|
||||
closure_sync(&cl);
|
||||
return f->ret ?: ret;
|
||||
}
|
||||
|
||||
static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
|
||||
{
|
||||
while (n + 1 < end &&
|
||||
found_btree_node_cmp_pos(n, n + 1) > 0) {
|
||||
swap(n[0], n[1]);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_overwrites(struct bch_fs *c,
|
||||
struct found_btree_node *start,
|
||||
struct found_btree_node *end)
|
||||
{
|
||||
struct found_btree_node *n;
|
||||
again:
|
||||
for (n = start + 1;
|
||||
n < end &&
|
||||
n->btree_id == start->btree_id &&
|
||||
n->level == start->level &&
|
||||
bpos_lt(n->min_key, start->max_key);
|
||||
n++) {
|
||||
int cmp = found_btree_node_cmp_time(start, n);
|
||||
|
||||
if (cmp > 0) {
|
||||
if (bpos_cmp(start->max_key, n->max_key) >= 0)
|
||||
n->overwritten = true;
|
||||
else {
|
||||
n->range_updated = true;
|
||||
n->min_key = bpos_successor(start->max_key);
|
||||
n->range_updated = true;
|
||||
bubble_up(n, end);
|
||||
goto again;
|
||||
}
|
||||
} else if (cmp < 0) {
|
||||
BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
|
||||
|
||||
start->max_key = bpos_predecessor(n->min_key);
|
||||
start->range_updated = true;
|
||||
} else {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "overlapping btree nodes with same seq! halting\n ");
|
||||
found_btree_node_to_text(&buf, c, start);
|
||||
prt_str(&buf, "\n ");
|
||||
found_btree_node_to_text(&buf, c, n);
|
||||
bch_err(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_scan_for_btree_nodes(struct bch_fs *c)
|
||||
{
|
||||
struct find_btree_nodes *f = &c->found_btree_nodes;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
size_t dst;
|
||||
int ret = 0;
|
||||
|
||||
if (f->nodes.nr)
|
||||
return 0;
|
||||
|
||||
mutex_init(&f->lock);
|
||||
|
||||
ret = read_btree_nodes(f);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!f->nodes.nr) {
|
||||
bch_err(c, "%s: no btree nodes found", __func__);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (0 && c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
|
||||
|
||||
dst = 0;
|
||||
darray_for_each(f->nodes, i) {
|
||||
struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
|
||||
|
||||
if (prev &&
|
||||
prev->cookie == i->cookie) {
|
||||
if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
|
||||
bch_err(c, "%s: found too many replicas for btree node", __func__);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
|
||||
} else {
|
||||
f->nodes.data[dst++] = *i;
|
||||
}
|
||||
}
|
||||
f->nodes.nr = dst;
|
||||
|
||||
sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
|
||||
if (0 && c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
dst = 0;
|
||||
darray_for_each(f->nodes, i) {
|
||||
if (i->overwritten)
|
||||
continue;
|
||||
|
||||
ret = handle_overwrites(c, i, &darray_top(f->nodes));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
BUG_ON(i->overwritten);
|
||||
f->nodes.data[dst++] = *i;
|
||||
}
|
||||
f->nodes.nr = dst;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
printbuf_reset(&buf);
|
||||
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
|
||||
found_btree_nodes_to_text(&buf, c, f->nodes);
|
||||
bch2_print_string_as_lines(KERN_INFO, buf.buf);
|
||||
}
|
||||
|
||||
eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
|
||||
err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct found_btree_node *l = _l;
|
||||
const struct found_btree_node *r = _r;
|
||||
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
-cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->max_key, r->min_key);
|
||||
}
|
||||
|
||||
#define for_each_found_btree_node_in_range(_f, _search, _idx) \
|
||||
for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \
|
||||
sizeof((_f)->nodes.data[0]), \
|
||||
found_btree_node_range_start_cmp, &search); \
|
||||
_idx < (_f)->nodes.nr && \
|
||||
(_f)->nodes.data[_idx].btree_id == _search.btree_id && \
|
||||
(_f)->nodes.data[_idx].level == _search.level && \
|
||||
bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \
|
||||
_idx = eytzinger0_next(_idx, (_f)->nodes.nr))
|
||||
|
||||
bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
|
||||
{
|
||||
struct find_btree_nodes *f = &c->found_btree_nodes;
|
||||
|
||||
struct found_btree_node search = {
|
||||
.btree_id = b->c.btree_id,
|
||||
.level = b->c.level,
|
||||
.min_key = b->data->min_key,
|
||||
.max_key = b->key.k.p,
|
||||
};
|
||||
|
||||
for_each_found_btree_node_in_range(f, search, idx)
|
||||
if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
|
||||
{
|
||||
struct found_btree_node search = {
|
||||
.btree_id = btree,
|
||||
.level = 0,
|
||||
.min_key = POS_MIN,
|
||||
.max_key = SPOS_MAX,
|
||||
};
|
||||
|
||||
for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos node_min, struct bpos node_max)
|
||||
{
|
||||
struct find_btree_nodes *f = &c->found_btree_nodes;
|
||||
|
||||
int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (c->opts.verbose) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
|
||||
bch2_bpos_to_text(&buf, node_min);
|
||||
prt_str(&buf, " - ");
|
||||
bch2_bpos_to_text(&buf, node_max);
|
||||
|
||||
bch_info(c, "%s(): %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
struct found_btree_node search = {
|
||||
.btree_id = btree,
|
||||
.level = level,
|
||||
.min_key = node_min,
|
||||
.max_key = node_max,
|
||||
};
|
||||
|
||||
for_each_found_btree_node_in_range(f, search, idx) {
|
||||
struct found_btree_node n = f->nodes.data[idx];
|
||||
|
||||
n.range_updated |= bpos_lt(n.min_key, node_min);
|
||||
n.min_key = bpos_max(n.min_key, node_min);
|
||||
|
||||
n.range_updated |= bpos_gt(n.max_key, node_max);
|
||||
n.max_key = bpos_min(n.max_key, node_max);
|
||||
|
||||
struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
|
||||
|
||||
found_btree_node_to_key(&tmp.k, &n);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
|
||||
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
|
||||
BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
|
||||
|
||||
ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
|
||||
{
|
||||
darray_exit(&f->nodes);
|
||||
}
|
11
libbcachefs/btree_node_scan.h
Normal file
11
libbcachefs/btree_node_scan.h
Normal file
@ -0,0 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
|
||||
#define _BCACHEFS_BTREE_NODE_SCAN_H
|
||||
|
||||
int bch2_scan_for_btree_nodes(struct bch_fs *);
|
||||
bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
|
||||
bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
|
||||
int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
|
||||
void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
|
30
libbcachefs/btree_node_scan_types.h
Normal file
30
libbcachefs/btree_node_scan_types.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
|
||||
#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
|
||||
|
||||
#include "darray.h"
|
||||
|
||||
struct found_btree_node {
|
||||
bool range_updated:1;
|
||||
bool overwritten:1;
|
||||
u8 btree_id;
|
||||
u8 level;
|
||||
u32 seq;
|
||||
u64 cookie;
|
||||
|
||||
struct bpos min_key;
|
||||
struct bpos max_key;
|
||||
|
||||
unsigned nr_ptrs;
|
||||
struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
|
||||
};
|
||||
|
||||
typedef DARRAY(struct found_btree_node) found_btree_nodes;
|
||||
|
||||
struct find_btree_nodes {
|
||||
int ret;
|
||||
struct mutex lock;
|
||||
found_btree_nodes nodes;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
|
@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
|
||||
i->k->k.p.snapshot &&
|
||||
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
|
||||
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0);
|
||||
}
|
||||
|
||||
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||
|
@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans,
|
||||
struct bkey_i *update;
|
||||
int ret;
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
return 0;
|
||||
|
||||
update = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(update);
|
||||
if (ret)
|
||||
@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
int ret;
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
return 0;
|
||||
|
||||
ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
|
||||
bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
|
||||
if (ret < 0)
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "bkey_methods.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_gc.h"
|
||||
@ -18,6 +19,7 @@
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "keylist.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -44,56 +46,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
|
||||
return path_idx;
|
||||
}
|
||||
|
||||
/* Debug code: */
|
||||
|
||||
/*
|
||||
* Verify that child nodes correctly span parent node's range:
|
||||
*/
|
||||
static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
|
||||
int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct bpos next_node = b->data->min_key;
|
||||
struct btree_node_iter iter;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
|
||||
? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
|
||||
: b->data->min_key;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_c_btree_ptr_v2 bp;
|
||||
struct bkey unpacked;
|
||||
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_buf prev;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!b->c.level);
|
||||
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
|
||||
b->data->min_key));
|
||||
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
return;
|
||||
if (!b->c.level)
|
||||
return 0;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
bch2_bkey_buf_init(&prev);
|
||||
bkey_init(&prev.k->k);
|
||||
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
|
||||
|
||||
while (1) {
|
||||
k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
if (k.k->type != KEY_TYPE_btree_ptr_v2)
|
||||
break;
|
||||
bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
goto out;
|
||||
|
||||
if (!bpos_eq(next_node, bp.v->min_key)) {
|
||||
bch2_dump_btree_node(c, b);
|
||||
bch2_bpos_to_text(&buf1, next_node);
|
||||
bch2_bpos_to_text(&buf2, bp.v->min_key);
|
||||
panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
|
||||
struct bpos expected_min = bkey_deleted(&prev.k->k)
|
||||
? node_min
|
||||
: bpos_successor(prev.k->k.p);
|
||||
|
||||
if (!bpos_eq(expected_min, bp.v->min_key)) {
|
||||
bch2_topology_error(c);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "end of prev node doesn't match start of next node\n"),
|
||||
prt_printf(&buf, " in btree %s level %u node ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n prev ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
prt_str(&buf, "\n next ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
}
|
||||
|
||||
bch2_btree_node_iter_advance(&iter, b);
|
||||
|
||||
if (bch2_btree_node_iter_end(&iter)) {
|
||||
if (!bpos_eq(k.k->p, b->key.k.p)) {
|
||||
bch2_dump_btree_node(c, b);
|
||||
bch2_bpos_to_text(&buf1, b->key.k.p);
|
||||
bch2_bpos_to_text(&buf2, k.k->p);
|
||||
panic("expected end %s got %s\n", buf1.buf, buf2.buf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
next_node = bpos_successor(k.k->p);
|
||||
bch2_bkey_buf_reassemble(&prev, c, k);
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (bkey_deleted(&prev.k->k)) {
|
||||
bch2_topology_error(c);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "empty interior node\n");
|
||||
prt_printf(&buf, " in btree %s level %u node ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
|
||||
need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
} else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
|
||||
bch2_topology_error(c);
|
||||
|
||||
printbuf_reset(&buf);
|
||||
prt_str(&buf, "last child node doesn't end at end of parent node\n");
|
||||
prt_printf(&buf, " in btree %s level %u node ",
|
||||
bch2_btree_id_str(b->c.btree_id), b->c.level);
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
prt_str(&buf, "\n last key ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
|
||||
|
||||
need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
|
||||
goto topology_repair;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&prev, c);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
topology_repair:
|
||||
if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
|
||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -BCH_ERR_btree_need_topology_repair;
|
||||
} else {
|
||||
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Calculate ideal packed bkey format for new btree nodes: */
|
||||
@ -646,7 +695,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
|
||||
"%s(): error %s", __func__, bch2_err_str(ret));
|
||||
"%s", bch2_err_str(ret));
|
||||
err:
|
||||
if (as->b) {
|
||||
|
||||
@ -1067,13 +1116,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
flags &= ~BCH_WATERMARK_MASK;
|
||||
flags |= watermark;
|
||||
|
||||
if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark < c->journal.watermark) {
|
||||
if (watermark < c->journal.watermark) {
|
||||
struct journal_res res = { 0 };
|
||||
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
|
||||
|
||||
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
|
||||
watermark != BCH_WATERMARK_reclaim)
|
||||
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_journal_res_get(&c->journal, &res, 1,
|
||||
watermark|JOURNAL_RES_GET_CHECK));
|
||||
bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
|
||||
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
|
||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1117,6 +1171,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
closure_init(&as->cl, NULL);
|
||||
as->c = c;
|
||||
as->start_time = start_time;
|
||||
as->ip_started = _RET_IP_;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
as->took_gc_lock = true;
|
||||
as->btree_id = path->btree_id;
|
||||
@ -1192,7 +1247,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
err:
|
||||
bch2_btree_update_free(as, trans);
|
||||
if (!bch2_err_matches(ret, ENOSPC) &&
|
||||
!bch2_err_matches(ret, EROFS))
|
||||
!bch2_err_matches(ret, EROFS) &&
|
||||
ret != -BCH_ERR_journal_reclaim_would_deadlock)
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -1373,9 +1429,16 @@ static void __btree_split_node(struct btree_update *as,
|
||||
if (bkey_deleted(k))
|
||||
continue;
|
||||
|
||||
uk = bkey_unpack_key(b, k);
|
||||
|
||||
if (b->c.level &&
|
||||
u64s < n1_u64s &&
|
||||
u64s + k->u64s >= n1_u64s &&
|
||||
bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
|
||||
n1_u64s += k->u64s;
|
||||
|
||||
i = u64s >= n1_u64s;
|
||||
u64s += k->u64s;
|
||||
uk = bkey_unpack_key(b, k);
|
||||
if (!i)
|
||||
n1_pos = uk.p;
|
||||
bch2_bkey_format_add_key(&format[i], &uk);
|
||||
@ -1434,8 +1497,7 @@ static void __btree_split_node(struct btree_update *as,
|
||||
|
||||
bch2_verify_btree_nr_keys(n[i]);
|
||||
|
||||
if (b->c.level)
|
||||
btree_node_interior_verify(as->c, n[i]);
|
||||
BUG_ON(bch2_btree_node_check_topology(trans, n[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1466,7 +1528,7 @@ static void btree_split_insert_keys(struct btree_update *as,
|
||||
|
||||
__bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
|
||||
|
||||
btree_node_interior_verify(as->c, b);
|
||||
BUG_ON(bch2_btree_node_check_topology(trans, b));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1481,9 +1543,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
|
||||
u64 start_time = local_clock();
|
||||
int ret = 0;
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
BUG_ON(!parent && (b != btree_node_root(c, b)));
|
||||
BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
|
||||
|
||||
ret = bch2_btree_node_check_topology(trans, b);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
|
||||
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
|
||||
@ -1703,7 +1770,11 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
goto split;
|
||||
}
|
||||
|
||||
btree_node_interior_verify(c, b);
|
||||
ret = bch2_btree_node_check_topology(trans, b);
|
||||
if (ret) {
|
||||
bch2_btree_node_unlock_write(trans, path, b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_btree_insert_keys_interior(as, trans, path, b, keys);
|
||||
|
||||
@ -1721,7 +1792,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
|
||||
|
||||
bch2_btree_node_unlock_write(trans, path, b);
|
||||
|
||||
btree_node_interior_verify(c, b);
|
||||
BUG_ON(bch2_btree_node_check_topology(trans, b));
|
||||
return 0;
|
||||
split:
|
||||
/*
|
||||
@ -1811,9 +1882,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return bch2_btree_split_leaf(trans, path, flags);
|
||||
|
||||
struct btree_update *as =
|
||||
bch2_btree_update_start(trans, trans->paths + path,
|
||||
b->c.level, true, flags);
|
||||
bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
|
||||
if (IS_ERR(as))
|
||||
return PTR_ERR(as);
|
||||
|
||||
@ -2114,7 +2188,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
async_btree_node_rewrite_trans(trans, a));
|
||||
bch_err_fn(c, ret);
|
||||
bch_err_fn_ratelimited(c, ret);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
|
||||
kfree(a);
|
||||
}
|
||||
@ -2161,7 +2235,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
|
||||
}
|
||||
|
||||
queue_work(c->btree_interior_update_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
}
|
||||
|
||||
void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
@ -2173,7 +2247,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
|
||||
list_del(&a->list);
|
||||
|
||||
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
|
||||
queue_work(c->btree_interior_update_worker, &a->work);
|
||||
queue_work(c->btree_node_rewrite_worker, &a->work);
|
||||
}
|
||||
mutex_unlock(&c->pending_node_rewrites_lock);
|
||||
}
|
||||
@ -2384,7 +2458,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
|
||||
bch2_btree_set_root_inmem(c, b);
|
||||
}
|
||||
|
||||
static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
|
||||
static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct closure cl;
|
||||
@ -2403,7 +2477,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
|
||||
|
||||
set_btree_node_fake(b);
|
||||
set_btree_node_need_rewrite(b);
|
||||
b->c.level = 0;
|
||||
b->c.level = level;
|
||||
b->c.btree_id = id;
|
||||
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
@ -2430,9 +2504,9 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
||||
void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
|
||||
{
|
||||
bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
|
||||
bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
|
||||
}
|
||||
|
||||
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
@ -2441,12 +2515,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
list_for_each_entry(as, &c->btree_interior_update_list, list)
|
||||
prt_printf(out, "%p m %u w %u r %u j %llu\n",
|
||||
as,
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
closure_nr_remaining(&as->cl),
|
||||
as->journal.seq);
|
||||
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
|
||||
(void *) as->ip_started,
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
closure_nr_remaining(&as->cl),
|
||||
as->journal.seq);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
@ -2510,6 +2584,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
|
||||
|
||||
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
|
||||
{
|
||||
if (c->btree_node_rewrite_worker)
|
||||
destroy_workqueue(c->btree_node_rewrite_worker);
|
||||
if (c->btree_interior_update_worker)
|
||||
destroy_workqueue(c->btree_interior_update_worker);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
@ -2534,6 +2610,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
|
||||
if (!c->btree_interior_update_worker)
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
|
||||
|
||||
c->btree_node_rewrite_worker =
|
||||
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
|
||||
if (!c->btree_node_rewrite_worker)
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
|
||||
|
||||
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
sizeof(struct btree_update)))
|
||||
return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
|
||||
|
@ -10,6 +10,8 @@
|
||||
|
||||
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
|
||||
|
||||
int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
|
||||
|
||||
/*
|
||||
* Tracks an in progress split/rewrite of a btree node and the update to the
|
||||
* parent node:
|
||||
@ -32,6 +34,7 @@ struct btree_update {
|
||||
struct closure cl;
|
||||
struct bch_fs *c;
|
||||
u64 start_time;
|
||||
unsigned long ip_started;
|
||||
|
||||
struct list_head list;
|
||||
struct list_head unwritten_list;
|
||||
@ -162,7 +165,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
|
||||
struct bkey_i *, unsigned, bool);
|
||||
|
||||
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
|
||||
void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
|
||||
void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned);
|
||||
|
||||
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "journal_reclaim.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_btree_write_buffer_journal_flush(struct journal *,
|
||||
struct journal_entry_pin *, u64);
|
||||
@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
|
||||
#endif
|
||||
}
|
||||
|
||||
static int wb_key_seq_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct btree_write_buffered_key *l = _l;
|
||||
const struct btree_write_buffered_key *r = _r;
|
||||
|
||||
return cmp_int(l->journal_seq, r->journal_seq);
|
||||
}
|
||||
|
||||
/* Compare excluding idx, the low 24 bits: */
|
||||
static inline bool wb_key_eq(const void *_l, const void *_r)
|
||||
{
|
||||
@ -357,6 +366,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
*/
|
||||
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
|
||||
|
||||
sort(wb->flushing.keys.data,
|
||||
wb->flushing.keys.nr,
|
||||
sizeof(wb->flushing.keys.data[0]),
|
||||
wb_key_seq_cmp, NULL);
|
||||
|
||||
darray_for_each(wb->flushing.keys, i) {
|
||||
if (!i->journal_seq)
|
||||
continue;
|
||||
@ -378,7 +392,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
}
|
||||
}
|
||||
err:
|
||||
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
|
||||
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
|
||||
bch2_journal_pin_drop(j, &wb->flushing.pin);
|
||||
wb->flushing.keys.nr = 0;
|
||||
|
@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_type_str(g->data_type),
|
||||
bch2_data_type_str(data_type))) {
|
||||
BUG();
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
|
||||
bch2_data_type_str(ptr_data_type),
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
|
||||
BUG();
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans,
|
||||
static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
enum btree_id btree_id, unsigned level,
|
||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||
s64 *sectors,
|
||||
unsigned flags)
|
||||
const union bch_extent_entry *entry,
|
||||
s64 *sectors, unsigned flags)
|
||||
{
|
||||
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
|
||||
struct bpos bucket;
|
||||
struct bch_backpointer bp;
|
||||
|
||||
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
|
||||
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp);
|
||||
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
|
||||
|
||||
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
|
||||
@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
if (flags & BTREE_TRIGGER_GC) {
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
|
||||
enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
|
||||
@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
s64 disk_sectors;
|
||||
ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
|
||||
ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -990,8 +992,8 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
ret = !gc
|
||||
? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
|
||||
: update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
|
||||
bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
|
||||
__func__);
|
||||
bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
|
||||
bch2_err_str(ret));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1020,7 +1022,7 @@ static int __trigger_extent(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
|
||||
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
if (ret)
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "chardev.h"
|
||||
#include "journal.h"
|
||||
#include "move.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "replicas.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "move.h"
|
||||
#include "nocow_locking.h"
|
||||
#include "rebalance.h"
|
||||
#include "snapshot.h"
|
||||
#include "subvolume.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
unsigned ptrs_locked = 0;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* fs is corrupt we have a key for a snapshot node that doesn't exist,
|
||||
* and we have to check for this because we go rw before repairing the
|
||||
* snapshots table - just skip it, we can move it later.
|
||||
*/
|
||||
if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
|
||||
return -BCH_ERR_data_update_done;
|
||||
|
||||
bch2_bkey_buf_init(&m->k);
|
||||
bch2_bkey_buf_reassemble(&m->k, c, k);
|
||||
m->btree_id = btree_id;
|
||||
|
@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
|
||||
bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
out:
|
||||
|
@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, new);
|
||||
bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
|
||||
return -BCH_ERR_stripe_alloc_blocked;
|
||||
|
||||
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
|
||||
bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
|
||||
"reading stripe key: %s", bch2_err_str(ret));
|
||||
if (ret) {
|
||||
bch2_stripe_close(c, h->s);
|
||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -252,7 +252,8 @@
|
||||
x(BCH_ERR_nopromote, nopromote_in_flight) \
|
||||
x(BCH_ERR_nopromote, nopromote_no_writes) \
|
||||
x(BCH_ERR_nopromote, nopromote_enomem) \
|
||||
x(0, need_inode_lock)
|
||||
x(0, need_inode_lock) \
|
||||
x(0, invalid_snapshot_node)
|
||||
|
||||
enum bch_errcode {
|
||||
BCH_ERR_START = 2048,
|
||||
|
@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "bcachefs.h"
|
||||
#include "error.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "super.h"
|
||||
#include "thread_with_file.h"
|
||||
|
||||
|
@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *);
|
||||
|
||||
int bch2_topology_error(struct bch_fs *);
|
||||
|
||||
#define bch2_fs_topology_error(c, ...) \
|
||||
({ \
|
||||
bch_err(c, "btree topology error: " __VA_ARGS__); \
|
||||
bch2_topology_error(c); \
|
||||
})
|
||||
|
||||
#define bch2_fs_inconsistent(c, ...) \
|
||||
({ \
|
||||
bch_err(c, __VA_ARGS__); \
|
||||
@ -191,9 +197,9 @@ do { \
|
||||
|
||||
void bch2_fatal_error(struct bch_fs *);
|
||||
|
||||
#define bch2_fs_fatal_error(c, ...) \
|
||||
#define bch2_fs_fatal_error(c, _msg, ...) \
|
||||
do { \
|
||||
bch_err(c, __VA_ARGS__); \
|
||||
bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
|
||||
bch2_fatal_error(c); \
|
||||
} while (0)
|
||||
|
||||
|
@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bkey_invalid_flags flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
|
||||
btree_ptr_v2_val_too_big,
|
||||
bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX,
|
||||
c, err, btree_ptr_v2_val_too_big,
|
||||
"value too big (%zu > %zu)",
|
||||
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
|
||||
|
||||
bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p),
|
||||
c, err, btree_ptr_v2_min_key_bad,
|
||||
"min_key > key");
|
||||
|
||||
ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
|
||||
fsck_err:
|
||||
return ret;
|
||||
@ -973,6 +978,31 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
|
||||
return bkey_deleted(k.k);
|
||||
}
|
||||
|
||||
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
|
||||
? bch_dev_bkey_exists(c, ptr->dev)
|
||||
: NULL;
|
||||
|
||||
if (!ca) {
|
||||
prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
|
||||
(u64) ptr->offset, ptr->gen,
|
||||
ptr->cached ? " cached" : "");
|
||||
} else {
|
||||
u32 offset;
|
||||
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
||||
|
||||
prt_printf(out, "ptr: %u:%llu:%u gen %u",
|
||||
ptr->dev, b, offset, ptr->gen);
|
||||
if (ptr->cached)
|
||||
prt_str(out, " cached");
|
||||
if (ptr->unwritten)
|
||||
prt_str(out, " unwritten");
|
||||
if (ca && ptr_stale(ca, ptr))
|
||||
prt_printf(out, " stale");
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
@ -988,31 +1018,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, " ");
|
||||
|
||||
switch (__extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_ptr: {
|
||||
const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
|
||||
struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
|
||||
? bch_dev_bkey_exists(c, ptr->dev)
|
||||
: NULL;
|
||||
|
||||
if (!ca) {
|
||||
prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
|
||||
(u64) ptr->offset, ptr->gen,
|
||||
ptr->cached ? " cached" : "");
|
||||
} else {
|
||||
u32 offset;
|
||||
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
|
||||
|
||||
prt_printf(out, "ptr: %u:%llu:%u gen %u",
|
||||
ptr->dev, b, offset, ptr->gen);
|
||||
if (ptr->cached)
|
||||
prt_str(out, " cached");
|
||||
if (ptr->unwritten)
|
||||
prt_str(out, " unwritten");
|
||||
if (ca && ptr_stale(ca, ptr))
|
||||
prt_printf(out, " stale");
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
|
||||
break;
|
||||
}
|
||||
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
case BCH_EXTENT_ENTRY_crc128: {
|
||||
|
@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en
|
||||
|
||||
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
|
||||
{
|
||||
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
|
||||
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
|
||||
}
|
||||
|
||||
static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
|
||||
{
|
||||
return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
|
||||
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
|
||||
}
|
||||
|
||||
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
|
||||
{
|
||||
switch (extent_entry_type(e)) {
|
||||
switch (__extent_entry_type(e)) {
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
case BCH_EXTENT_ENTRY_crc64:
|
||||
case BCH_EXTENT_ENTRY_crc128:
|
||||
@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
return BCH_DATA_btree;
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return BCH_DATA_user;
|
||||
case KEY_TYPE_stripe: {
|
||||
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
|
||||
|
||||
BUG_ON(ptr < s.v->ptrs ||
|
||||
ptr >= s.v->ptrs + s.v->nr_blocks);
|
||||
|
||||
return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
|
||||
? BCH_DATA_parity
|
||||
: BCH_DATA_user;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
|
||||
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
|
||||
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
|
||||
@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
|
||||
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
|
||||
|
||||
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
|
||||
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bkey_s_c);
|
||||
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
|
234
libbcachefs/eytzinger.c
Normal file
234
libbcachefs/eytzinger.c
Normal file
@ -0,0 +1,234 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "eytzinger.h"
|
||||
|
||||
/**
|
||||
* is_aligned - is this pointer & size okay for word-wide copying?
|
||||
* @base: pointer to data
|
||||
* @size: size of each element
|
||||
* @align: required alignment (typically 4 or 8)
|
||||
*
|
||||
* Returns true if elements can be copied using word loads and stores.
|
||||
* The size must be a multiple of the alignment, and the base address must
|
||||
* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
|
||||
*
|
||||
* For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
|
||||
* to "if ((a | b) & mask)", so we do that by hand.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static bool is_aligned(const void *base, size_t size, unsigned char align)
|
||||
{
|
||||
unsigned char lsbits = (unsigned char)size;
|
||||
|
||||
(void)base;
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
lsbits |= (unsigned char)(uintptr_t)base;
|
||||
#endif
|
||||
return (lsbits & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_32 - swap two elements in 32-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 4)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index addressing,
|
||||
* which basically all CPUs have, to minimize loop overhead computations.
|
||||
*
|
||||
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
|
||||
* bottom of the loop, even though the zero flag is still valid from the
|
||||
* subtract (since the intervening mov instructions don't alter the flags).
|
||||
* Gcc 8.1.0 doesn't have that problem.
|
||||
*/
|
||||
static void swap_words_32(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_64 - swap two elements in 64-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 8)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index
|
||||
* addressing, which basically all CPUs have, to minimize loop overhead
|
||||
* computations.
|
||||
*
|
||||
* We'd like to use 64-bit loads if possible. If they're not, emulating
|
||||
* one requires base+index+4 addressing which x86 has but most other
|
||||
* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
|
||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||
*/
|
||||
static void swap_words_64(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 t = *(u64 *)(a + (n -= 8));
|
||||
*(u64 *)(a + n) = *(u64 *)(b + n);
|
||||
*(u64 *)(b + n) = t;
|
||||
#else
|
||||
/* Use two 32-bit transfers to avoid base+index+4 addressing */
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
|
||||
t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
#endif
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_bytes - swap two elements a byte at a time
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size
|
||||
*
|
||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||
*/
|
||||
static void swap_bytes(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
char t = ((char *)a)[--n];
|
||||
((char *)a)[n] = ((char *)b)[n];
|
||||
((char *)b)[n] = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* The values are arbitrary as long as they can't be confused with
|
||||
* a pointer, but small integers make for the smallest compare
|
||||
* instructions.
|
||||
*/
|
||||
#define SWAP_WORDS_64 (swap_r_func_t)0
|
||||
#define SWAP_WORDS_32 (swap_r_func_t)1
|
||||
#define SWAP_BYTES (swap_r_func_t)2
|
||||
#define SWAP_WRAPPER (swap_r_func_t)3
|
||||
|
||||
struct wrapper {
|
||||
cmp_func_t cmp;
|
||||
swap_func_t swap_f;
|
||||
};
|
||||
|
||||
/*
|
||||
* The function pointer is last to make tail calls most efficient if the
|
||||
* compiler decides not to inline this function.
|
||||
*/
|
||||
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
|
||||
{
|
||||
if (swap_func == SWAP_WRAPPER) {
|
||||
((const struct wrapper *)priv)->swap_f(a, b, (int)size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (swap_func == SWAP_WORDS_64)
|
||||
swap_words_64(a, b, size);
|
||||
else if (swap_func == SWAP_WORDS_32)
|
||||
swap_words_32(a, b, size);
|
||||
else if (swap_func == SWAP_BYTES)
|
||||
swap_bytes(a, b, size);
|
||||
else
|
||||
swap_func(a, b, (int)size, priv);
|
||||
}
|
||||
|
||||
#define _CMP_WRAPPER ((cmp_r_func_t)0L)
|
||||
|
||||
static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
|
||||
{
|
||||
if (cmp == _CMP_WRAPPER)
|
||||
return ((const struct wrapper *)priv)->cmp(a, b);
|
||||
return cmp(a, b, priv);
|
||||
}
|
||||
|
||||
static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
cmp_func, priv);
|
||||
}
|
||||
|
||||
static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
||||
swap_r_func_t swap_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size, swap_func, priv);
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_f)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
cmp_func_t cmp_func,
|
||||
swap_func_t swap_func)
|
||||
{
|
||||
struct wrapper w = {
|
||||
.cmp = cmp_func,
|
||||
.swap_f = swap_func,
|
||||
};
|
||||
|
||||
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
|
||||
}
|
@ -5,23 +5,33 @@
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "util.h"
|
||||
#ifdef EYTZINGER_DEBUG
|
||||
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EYTZINGER_BUG_ON(cond)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
|
||||
* array
|
||||
*/
|
||||
|
||||
/*
|
||||
* One based indexing version:
|
||||
* array.
|
||||
*
|
||||
* With one based indexing each level of the tree starts at a power of two -
|
||||
* good for cacheline alignment:
|
||||
* Consider using an eytzinger tree any time you would otherwise be doing binary
|
||||
* search over an array. Binary search is a worst case scenario for branch
|
||||
* prediction and prefetching, but in an eytzinger tree every node's children
|
||||
* are adjacent in memory, thus we can prefetch children before knowing the
|
||||
* result of the comparison, assuming multiple nodes fit on a cacheline.
|
||||
*
|
||||
* Two variants are provided, for one based indexing and zero based indexing.
|
||||
*
|
||||
* Zero based indexing is more convenient, but one based indexing has better
|
||||
* alignment and thus better performance because each new level of the tree
|
||||
* starts at a power of two, and thus if element 0 was cacheline aligned, each
|
||||
* new level will be as well.
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + child;
|
||||
}
|
||||
@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_right_child(i) <= size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_left_child(i) <= size) {
|
||||
i = eytzinger1_left_child(i) + 1;
|
||||
@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
|
||||
unsigned shift = __fls(size) - b;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
i ^= 1U << b;
|
||||
i <<= 1;
|
||||
@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
|
||||
unsigned shift;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
/*
|
||||
* sign bit trick:
|
||||
@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + 1 + child;
|
||||
}
|
||||
@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
|
||||
|
||||
/* return greatest node <= @search, or -1 if not found */
|
||||
static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
eytzinger_cmp_fn cmp, const void *search)
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
unsigned i, n = 0;
|
||||
|
||||
@ -244,21 +252,24 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
|
||||
do {
|
||||
i = n;
|
||||
n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
|
||||
n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
|
||||
} while (n < nr);
|
||||
|
||||
if (n & 1) {
|
||||
/* @i was greater than @search, return previous node: */
|
||||
|
||||
if (i == eytzinger0_first(nr))
|
||||
return -1;
|
||||
|
||||
return eytzinger0_prev(i, nr);
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
return eytzinger0_next(idx, size);
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
void *_base = (base); \
|
||||
@ -269,13 +280,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
int _res; \
|
||||
\
|
||||
while (_i < _nr && \
|
||||
(_res = _cmp(_search, _base + _i * _size, _size))) \
|
||||
(_res = _cmp(_search, _base + _i * _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
_i; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort(void *, size_t, size_t,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
cmp_r_func_t, swap_r_func_t, const void *);
|
||||
void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
|
||||
|
||||
#endif /* _EYTZINGER_H */
|
||||
|
@ -536,7 +536,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
|
||||
if (likely(!dio->iter.count) || dio->op.error)
|
||||
break;
|
||||
|
||||
bio_reset(bio, NULL, REQ_OP_WRITE);
|
||||
bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
|
||||
}
|
||||
out:
|
||||
return bch2_dio_write_done(dio);
|
||||
@ -618,7 +618,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
|
||||
|
||||
bio = bio_alloc_bioset(NULL,
|
||||
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
|
||||
REQ_OP_WRITE,
|
||||
REQ_OP_WRITE | REQ_SYNC | REQ_IDLE,
|
||||
GFP_KERNEL,
|
||||
&c->dio_write_bioset);
|
||||
dio = container_of(bio, struct dio_write, op.wbio.bio);
|
||||
|
@ -108,7 +108,8 @@ retry:
|
||||
goto retry;
|
||||
|
||||
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
|
||||
"inode %u:%llu not found when updating",
|
||||
"%s: inode %u:%llu not found when updating",
|
||||
bch2_err_str(ret),
|
||||
inode_inum(inode).subvol,
|
||||
inode_inum(inode).inum);
|
||||
|
||||
@ -1996,6 +1997,7 @@ out:
|
||||
return dget(sb->s_root);
|
||||
|
||||
err_put_super:
|
||||
__bch2_fs_stop(c);
|
||||
deactivate_locked_super(sb);
|
||||
return ERR_PTR(bch2_err_class(ret));
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "keylist.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "snapshot.h"
|
||||
#include "super.h"
|
||||
#include "xattr.h"
|
||||
@ -1114,10 +1114,9 @@ int bch2_check_inodes(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
s64 count2;
|
||||
|
||||
@ -1149,7 +1148,14 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
}
|
||||
fsck_err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_i_sectors_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
struct extent_end {
|
||||
@ -1533,7 +1539,7 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
|
||||
check_extent_overbig(trans, &iter, k);
|
||||
})) ?:
|
||||
check_i_sectors(trans, &w));
|
||||
check_i_sectors_notnested(trans, &w));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
extent_ends_exit(&extent_ends);
|
||||
@ -1563,10 +1569,9 @@ int bch2_check_indirect_extents(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
u32 restart_count = trans->restart_count;
|
||||
int ret = 0;
|
||||
s64 count2;
|
||||
|
||||
@ -1598,7 +1603,14 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
}
|
||||
fsck_err:
|
||||
bch_err_fn(c, ret);
|
||||
return ret ?: trans_was_restarted(trans, restart_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
|
||||
{
|
||||
u32 restart_count = trans->restart_count;
|
||||
return check_subdir_count_notnested(trans, w) ?:
|
||||
trans_was_restarted(trans, restart_count);
|
||||
}
|
||||
|
||||
static int check_dirent_inode_dirent(struct btree_trans *trans,
|
||||
@ -2003,7 +2015,8 @@ int bch2_check_dirents(struct bch_fs *c)
|
||||
k,
|
||||
NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)));
|
||||
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
|
||||
check_subdir_count_notnested(trans, &dir));
|
||||
|
||||
snapshots_seen_exit(&s);
|
||||
inode_walker_exit(&dir);
|
||||
@ -2022,8 +2035,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
|
||||
int ret;
|
||||
|
||||
ret = check_key_has_snapshot(trans, iter, k);
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
i = walk_inode(trans, inode, k);
|
||||
ret = PTR_ERR_OR_ZERO(i);
|
||||
@ -2083,17 +2098,21 @@ static int check_root_trans(struct btree_trans *trans)
|
||||
|
||||
if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
|
||||
"root subvol missing")) {
|
||||
struct bkey_i_subvolume root_subvol;
|
||||
struct bkey_i_subvolume *root_subvol =
|
||||
bch2_trans_kmalloc(trans, sizeof(*root_subvol));
|
||||
ret = PTR_ERR_OR_ZERO(root_subvol);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
snapshot = U32_MAX;
|
||||
inum = BCACHEFS_ROOT_INO;
|
||||
|
||||
bkey_subvolume_init(&root_subvol.k_i);
|
||||
root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_subvol.v.flags = 0;
|
||||
root_subvol.v.snapshot = cpu_to_le32(snapshot);
|
||||
root_subvol.v.inode = cpu_to_le64(inum);
|
||||
ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
|
||||
bkey_subvolume_init(&root_subvol->k_i);
|
||||
root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_subvol->v.flags = 0;
|
||||
root_subvol->v.snapshot = cpu_to_le32(snapshot);
|
||||
root_subvol->v.inode = cpu_to_le64(inum);
|
||||
ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
|
||||
bch_err_msg(c, ret, "writing root subvol");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
|
||||
prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
|
||||
prt_newline(out);
|
||||
|
||||
prt_newline(out);
|
||||
prt_printf(out, "bi_version=%llu", inode->bi_version);
|
||||
prt_newline(out);
|
||||
|
||||
#define x(_name, _bits) \
|
||||
prt_printf(out, #_name "=%llu", (u64) inode->_name); \
|
||||
|
@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
|
||||
ret = 0;
|
||||
err:
|
||||
bch2_logged_op_finish(trans, op_k);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish:
|
||||
break;
|
||||
}
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
bch2_logged_op_finish(trans, op_k);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
|
@ -511,18 +511,18 @@ retry:
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
return 0;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
|
||||
if (j->blocked)
|
||||
return -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
|
||||
ret = JOURNAL_ERR_journal_full;
|
||||
can_discard = j->can_discard;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (j->blocked)
|
||||
return -BCH_ERR_journal_res_get_blocked;
|
||||
|
||||
if (bch2_journal_error(j))
|
||||
return -BCH_ERR_erofs_journal_err;
|
||||
|
||||
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
|
||||
ret = JOURNAL_ERR_max_in_flight;
|
||||
goto out;
|
||||
|
@ -1082,9 +1082,7 @@ reread:
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %s",
|
||||
bch2_err_str(ret));
|
||||
bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret));
|
||||
|
||||
mutex_lock(&jlist->lock);
|
||||
ret = journal_entry_add(c, ca, (struct journal_ptr) {
|
||||
@ -1820,7 +1818,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
jset_entry_for_each_key(i, k) {
|
||||
ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
|
||||
if (ret) {
|
||||
bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
|
||||
bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s",
|
||||
bch2_err_str(ret));
|
||||
bch2_journal_keys_to_write_buffer_end(c, &wb);
|
||||
return ret;
|
||||
}
|
||||
@ -1848,7 +1847,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
|
||||
bch2_journal_super_entries_add_common(c, &end, seq);
|
||||
u64s = (u64 *) end - (u64 *) start;
|
||||
BUG_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
WARN_ON(u64s > j->entry_u64s_reserved);
|
||||
|
||||
le32_add_cpu(&jset->u64s, u64s);
|
||||
|
||||
@ -1856,7 +1856,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
bytes = vstruct_bytes(jset);
|
||||
|
||||
if (sectors > w->sectors) {
|
||||
bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
|
||||
bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
|
||||
vstruct_bytes(jset), w->sectors << 9,
|
||||
u64s, w->u64s_reserved, j->entry_u64s_reserved);
|
||||
return -EINVAL;
|
||||
@ -1884,8 +1884,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
if (bch2_fs_fatal_err_on(ret, c,
|
||||
"error decrypting journal entry: %i", ret))
|
||||
if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
||||
|
@ -95,8 +95,7 @@ out:
|
||||
return ret ?: bch2_blacklist_table_initialize(c);
|
||||
}
|
||||
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l,
|
||||
const void *_r, size_t size)
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct journal_seq_blacklist_table_entry *l = _l;
|
||||
const struct journal_seq_blacklist_table_entry *r = _r;
|
||||
|
@ -101,8 +101,8 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
|
||||
bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
|
||||
__func__, buf.buf, bch2_err_str(ret));
|
||||
bch2_fs_fatal_error(c, "deleting logged operation %s: %s",
|
||||
buf.buf, bch2_err_str(ret));
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
}
|
||||
|
@ -155,8 +155,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
if (bch2_err_matches(ret, EROFS))
|
||||
return ret;
|
||||
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
|
||||
__func__, bch2_err_str(ret)))
|
||||
if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
|
||||
return ret;
|
||||
|
||||
ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
|
||||
|
@ -363,11 +363,11 @@ enum fsck_err_opts {
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
NULL, "Don't replay the journal") \
|
||||
x(keep_journal, u8, \
|
||||
x(retain_recovery_info, u8, \
|
||||
0, \
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
NULL, "Don't free journal entries/keys after startup")\
|
||||
NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\
|
||||
x(read_entire_journal, u8, \
|
||||
0, \
|
||||
OPT_BOOL(), \
|
||||
|
@ -1,35 +1,31 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "alloc_background.h"
|
||||
#include "btree_gc.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "btree_node_scan.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
#include "buckets.h"
|
||||
#include "dirent.h"
|
||||
#include "ec.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "fs-common.h"
|
||||
#include "fsck.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "lru.h"
|
||||
#include "logged_ops.h"
|
||||
#include "move.h"
|
||||
#include "quota.h"
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "replicas.h"
|
||||
#include "sb-clean.h"
|
||||
#include "sb-downgrade.h"
|
||||
#include "snapshot.h"
|
||||
#include "subvolume.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
@ -52,7 +48,7 @@ static bool btree_id_is_alloc(enum btree_id id)
|
||||
}
|
||||
|
||||
/* for -o reconstruct_alloc: */
|
||||
static void do_reconstruct_alloc(struct bch_fs *c)
|
||||
static void bch2_reconstruct_alloc(struct bch_fs *c)
|
||||
{
|
||||
bch2_journal_log_msg(c, "dropping alloc info");
|
||||
bch_info(c, "dropping and reconstructing all alloc info");
|
||||
@ -87,13 +83,17 @@ static void do_reconstruct_alloc(struct bch_fs *c)
|
||||
|
||||
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t src, dst;
|
||||
|
||||
for (src = 0, dst = 0; src < keys->nr; src++)
|
||||
if (!btree_id_is_alloc(keys->data[src].btree_id))
|
||||
keys->data[dst++] = keys->data[src];
|
||||
keys->nr = dst;
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
|
||||
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -184,7 +184,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
|
||||
return cmp_int(l->journal_seq, r->journal_seq);
|
||||
}
|
||||
|
||||
static int bch2_journal_replay(struct bch_fs *c)
|
||||
int bch2_journal_replay(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
DARRAY(struct journal_key *) keys_sorted = { 0 };
|
||||
@ -192,6 +192,7 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
u64 start_seq = c->journal_replay_seq_start;
|
||||
u64 end_seq = c->journal_replay_seq_start;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
bool immediate_flush = false;
|
||||
int ret = 0;
|
||||
|
||||
if (keys->nr) {
|
||||
@ -203,6 +204,8 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
|
||||
BUG_ON(!atomic_read(&keys->ref));
|
||||
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
/*
|
||||
* First, attempt to replay keys in sorted order. This is more
|
||||
* efficient - better locality of btree access - but some might fail if
|
||||
@ -211,6 +214,13 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
darray_for_each(*keys, k) {
|
||||
cond_resched();
|
||||
|
||||
/*
|
||||
* k->allocated means the key wasn't read in from the journal,
|
||||
* rather it was from early repair code
|
||||
*/
|
||||
if (k->allocated)
|
||||
immediate_flush = true;
|
||||
|
||||
/* Skip fastpath if we're low on space in the journal */
|
||||
ret = c->journal.watermark ? -1 :
|
||||
commit_do(trans, NULL, NULL,
|
||||
@ -262,7 +272,7 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
bch2_trans_put(trans);
|
||||
trans = NULL;
|
||||
|
||||
if (!c->opts.keep_journal)
|
||||
if (!c->opts.retain_recovery_info)
|
||||
bch2_journal_keys_put_initial(c);
|
||||
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
@ -270,6 +280,12 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
|
||||
bch2_journal_set_replay_done(j);
|
||||
|
||||
/* if we did any repair, flush it immediately */
|
||||
if (immediate_flush) {
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
ret = bch2_journal_meta(&c->journal);
|
||||
}
|
||||
|
||||
if (keys->nr)
|
||||
bch2_journal_log_msg(c, "journal replay finished");
|
||||
err:
|
||||
@ -419,10 +435,9 @@ static int journal_replay_early(struct bch_fs *c,
|
||||
|
||||
static int read_btree_roots(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
|
||||
if (!r->alive)
|
||||
@ -431,186 +446,46 @@ static int read_btree_roots(struct bch_fs *c)
|
||||
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
|
||||
continue;
|
||||
|
||||
if (r->error) {
|
||||
__fsck_err(c,
|
||||
btree_id_is_alloc(i)
|
||||
? FSCK_CAN_IGNORE : 0,
|
||||
btree_root_bkey_invalid,
|
||||
"invalid btree root %s",
|
||||
bch2_btree_id_str(i));
|
||||
if (i == BTREE_ID_alloc)
|
||||
if (mustfix_fsck_err_on((ret = r->error),
|
||||
c, btree_root_bkey_invalid,
|
||||
"invalid btree root %s",
|
||||
bch2_btree_id_str(i)) ||
|
||||
mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
|
||||
c, btree_root_read_error,
|
||||
"error reading btree root %s l=%u: %s",
|
||||
bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
|
||||
if (btree_id_is_alloc(i)) {
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
}
|
||||
r->error = 0;
|
||||
} else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
|
||||
bch_info(c, "will run btree node scan");
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
|
||||
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
|
||||
}
|
||||
|
||||
ret = bch2_btree_root_read(c, i, &r->key, r->level);
|
||||
if (ret) {
|
||||
fsck_err(c,
|
||||
btree_root_read_error,
|
||||
"error reading btree root %s",
|
||||
bch2_btree_id_str(i));
|
||||
if (btree_id_is_alloc(i))
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
set_bit(i, &c->btrees_lost_data);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++) {
|
||||
struct btree_root *r = bch2_btree_id_root(c, i);
|
||||
|
||||
if (!r->b) {
|
||||
if (!r->b && !r->error) {
|
||||
r->alive = false;
|
||||
r->level = 0;
|
||||
bch2_btree_root_alloc(c, i);
|
||||
bch2_btree_root_alloc_fake(c, i, 0);
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_initialize_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_i_snapshot_tree root_tree;
|
||||
struct bkey_i_snapshot root_snapshot;
|
||||
struct bkey_i_subvolume root_volume;
|
||||
int ret;
|
||||
|
||||
bkey_snapshot_tree_init(&root_tree.k_i);
|
||||
root_tree.k.p.offset = 1;
|
||||
root_tree.v.master_subvol = cpu_to_le32(1);
|
||||
root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
|
||||
|
||||
bkey_snapshot_init(&root_snapshot.k_i);
|
||||
root_snapshot.k.p.offset = U32_MAX;
|
||||
root_snapshot.v.flags = 0;
|
||||
root_snapshot.v.parent = 0;
|
||||
root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
|
||||
root_snapshot.v.tree = cpu_to_le32(1);
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
|
||||
|
||||
bkey_subvolume_init(&root_volume.k_i);
|
||||
root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_volume.v.flags = 0;
|
||||
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
|
||||
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
|
||||
bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
|
||||
bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret;
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bkey_is_inode(k.k)) {
|
||||
bch_err(trans->c, "root inode not found");
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
BUG_ON(ret);
|
||||
|
||||
inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
|
||||
|
||||
ret = bch2_inode_write(trans, &iter, &inode);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* set bi_subvol on root inode */
|
||||
noinline_for_stack
|
||||
static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
__bch2_fs_upgrade_for_subvolumes(trans));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char * const bch2_recovery_passes[] = {
|
||||
#define x(_fn, ...) #_fn,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
static int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
return bch2_gc(c, true, c->opts.norecovery);
|
||||
}
|
||||
|
||||
static int bch2_set_may_go_rw(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
||||
/*
|
||||
* After we go RW, the journal keys buffer can't be modified (except for
|
||||
* setting journal_key->overwritten: it will be accessed by multiple
|
||||
* threads
|
||||
*/
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||
|
||||
if (keys->nr || c->opts.fsck || !c->sb.clean)
|
||||
return bch2_fs_read_write_early(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct recovery_pass_fn {
|
||||
int (*fn)(struct bch_fs *);
|
||||
unsigned when;
|
||||
};
|
||||
|
||||
static struct recovery_pass_fn recovery_pass_fns[] = {
|
||||
#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 bch2_recovery_passes_to_stable(u64 v)
|
||||
{
|
||||
static const u8 map[] = {
|
||||
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
|
||||
if (v & BIT_ULL(i))
|
||||
ret |= BIT_ULL(map[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 bch2_recovery_passes_from_stable(u64 v)
|
||||
{
|
||||
static const u8 map[] = {
|
||||
#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
|
||||
if (v & BIT_ULL(i))
|
||||
ret |= BIT_ULL(map[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool check_version_upgrade(struct bch_fs *c)
|
||||
{
|
||||
unsigned latest_version = bcachefs_metadata_version_current;
|
||||
@ -683,96 +558,6 @@ static bool check_version_upgrade(struct bch_fs *c)
|
||||
return false;
|
||||
}
|
||||
|
||||
u64 bch2_fsck_recovery_passes(void)
|
||||
{
|
||||
u64 ret = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
|
||||
if (recovery_pass_fns[i].when & PASS_FSCK)
|
||||
ret |= BIT_ULL(i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
||||
{
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
||||
|
||||
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
|
||||
return false;
|
||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
||||
return true;
|
||||
if ((p->when & PASS_FSCK) && c->opts.fsck)
|
||||
return true;
|
||||
if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
|
||||
return true;
|
||||
if (p->when & PASS_ALWAYS)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
||||
{
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
||||
int ret;
|
||||
|
||||
if (!(p->when & PASS_SILENT))
|
||||
bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
|
||||
bch2_recovery_passes[pass]);
|
||||
ret = p->fn(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!(p->when & PASS_SILENT))
|
||||
bch2_print(c, KERN_CONT " done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_run_recovery_passes(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
|
||||
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
|
||||
unsigned pass = c->curr_recovery_pass;
|
||||
|
||||
ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
|
||||
if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
|
||||
(ret && c->curr_recovery_pass < pass))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
|
||||
}
|
||||
c->curr_recovery_pass++;
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_run_online_recovery_passes(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + i;
|
||||
|
||||
if (!(p->when & PASS_ONLINE))
|
||||
continue;
|
||||
|
||||
ret = bch2_run_recovery_pass(c, i);
|
||||
if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
|
||||
i = c->curr_recovery_pass;
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_recovery(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_clean *clean = NULL;
|
||||
@ -811,6 +596,9 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
c->opts.retain_recovery_info |= c->opts.norecovery;
|
||||
c->opts.nochanges |= c->opts.norecovery;
|
||||
|
||||
if (!c->opts.nochanges) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
bool write_sb = false;
|
||||
@ -881,7 +669,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
|
||||
if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
|
||||
struct genradix_iter iter;
|
||||
struct journal_replay **i;
|
||||
|
||||
@ -961,7 +749,7 @@ use_clean:
|
||||
c->journal_replay_seq_end = blacklist_seq - 1;
|
||||
|
||||
if (c->opts.reconstruct_alloc)
|
||||
do_reconstruct_alloc(c);
|
||||
bch2_reconstruct_alloc(c);
|
||||
|
||||
zero_out_btree_mem_ptr(&c->journal_keys);
|
||||
|
||||
@ -1013,6 +801,12 @@ use_clean:
|
||||
|
||||
clear_bit(BCH_FS_fsck_running, &c->flags);
|
||||
|
||||
/* fsync if we fixed errors */
|
||||
if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
bch2_journal_meta(&c->journal);
|
||||
}
|
||||
|
||||
/* If we fixed errors, verify that fs is actually clean now: */
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
|
||||
test_bit(BCH_FS_errors_fixed, &c->flags) &&
|
||||
@ -1109,9 +903,10 @@ use_clean:
|
||||
out:
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
if (!c->opts.keep_journal &&
|
||||
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
if (!c->opts.retain_recovery_info) {
|
||||
bch2_journal_keys_put_initial(c);
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
}
|
||||
kfree(clean);
|
||||
|
||||
if (!ret &&
|
||||
@ -1151,11 +946,11 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
|
||||
c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
|
||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||
|
||||
for (unsigned i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
bch2_btree_root_alloc_fake(c, i, 0);
|
||||
|
||||
for_each_member_device(c, ca)
|
||||
bch2_dev_usage_init(ca);
|
||||
@ -1226,7 +1021,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
|
||||
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
|
||||
|
||||
if (enabled_qtypes(c)) {
|
||||
ret = bch2_fs_quota_read(c);
|
||||
|
@ -2,37 +2,7 @@
|
||||
#ifndef _BCACHEFS_RECOVERY_H
|
||||
#define _BCACHEFS_RECOVERY_H
|
||||
|
||||
extern const char * const bch2_recovery_passes[];
|
||||
|
||||
u64 bch2_recovery_passes_to_stable(u64 v);
|
||||
u64 bch2_recovery_passes_from_stable(u64 v);
|
||||
|
||||
/*
|
||||
* For when we need to rewind recovery passes and run a pass we skipped:
|
||||
*/
|
||||
static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
||||
return 0;
|
||||
|
||||
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||
bch2_recovery_passes[pass], pass,
|
||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||
|
||||
c->recovery_passes_explicit |= BIT_ULL(pass);
|
||||
|
||||
if (c->curr_recovery_pass >= pass) {
|
||||
c->curr_recovery_pass = pass;
|
||||
c->recovery_passes_complete &= (1ULL << pass) >> 1;
|
||||
return -BCH_ERR_restart_recovery;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_run_online_recovery_passes(struct bch_fs *);
|
||||
u64 bch2_fsck_recovery_passes(void);
|
||||
int bch2_journal_replay(struct bch_fs *);
|
||||
|
||||
int bch2_fs_recovery(struct bch_fs *);
|
||||
int bch2_fs_initialize(struct bch_fs *);
|
||||
|
204
libbcachefs/recovery_passes.c
Normal file
204
libbcachefs/recovery_passes.c
Normal file
@ -0,0 +1,204 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc_background.h"
|
||||
#include "backpointers.h"
|
||||
#include "btree_gc.h"
|
||||
#include "btree_node_scan.h"
|
||||
#include "ec.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "lru.h"
|
||||
#include "logged_ops.h"
|
||||
#include "rebalance.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "snapshot.h"
|
||||
#include "subvolume.h"
|
||||
#include "super.h"
|
||||
|
||||
const char * const bch2_recovery_passes[] = {
|
||||
#define x(_fn, ...) #_fn,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
static int bch2_check_allocations(struct bch_fs *c)
|
||||
{
|
||||
return bch2_gc(c, true, c->opts.norecovery);
|
||||
}
|
||||
|
||||
static int bch2_set_may_go_rw(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
||||
/*
|
||||
* After we go RW, the journal keys buffer can't be modified (except for
|
||||
* setting journal_key->overwritten: it will be accessed by multiple
|
||||
* threads
|
||||
*/
|
||||
move_gap(keys, keys->nr);
|
||||
|
||||
set_bit(BCH_FS_may_go_rw, &c->flags);
|
||||
|
||||
if (keys->nr || c->opts.fsck || !c->sb.clean)
|
||||
return bch2_fs_read_write_early(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct recovery_pass_fn {
|
||||
int (*fn)(struct bch_fs *);
|
||||
unsigned when;
|
||||
};
|
||||
|
||||
static struct recovery_pass_fn recovery_pass_fns[] = {
|
||||
#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 bch2_recovery_passes_to_stable(u64 v)
|
||||
{
|
||||
static const u8 map[] = {
|
||||
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
|
||||
if (v & BIT_ULL(i))
|
||||
ret |= BIT_ULL(map[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 bch2_recovery_passes_from_stable(u64 v)
|
||||
{
|
||||
static const u8 map[] = {
|
||||
#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
u64 ret = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
|
||||
if (v & BIT_ULL(i))
|
||||
ret |= BIT_ULL(map[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For when we need to rewind recovery passes and run a pass we skipped:
|
||||
*/
|
||||
int bch2_run_explicit_recovery_pass(struct bch_fs *c,
|
||||
enum bch_recovery_pass pass)
|
||||
{
|
||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
||||
return 0;
|
||||
|
||||
bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
|
||||
bch2_recovery_passes[pass], pass,
|
||||
bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
|
||||
|
||||
c->recovery_passes_explicit |= BIT_ULL(pass);
|
||||
|
||||
if (c->curr_recovery_pass >= pass) {
|
||||
c->curr_recovery_pass = pass;
|
||||
c->recovery_passes_complete &= (1ULL << pass) >> 1;
|
||||
return -BCH_ERR_restart_recovery;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u64 bch2_fsck_recovery_passes(void)
|
||||
{
|
||||
u64 ret = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
|
||||
if (recovery_pass_fns[i].when & PASS_FSCK)
|
||||
ret |= BIT_ULL(i);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
||||
{
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
||||
|
||||
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
|
||||
return false;
|
||||
if (c->recovery_passes_explicit & BIT_ULL(pass))
|
||||
return true;
|
||||
if ((p->when & PASS_FSCK) && c->opts.fsck)
|
||||
return true;
|
||||
if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
|
||||
return true;
|
||||
if (p->when & PASS_ALWAYS)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
|
||||
{
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + pass;
|
||||
int ret;
|
||||
|
||||
if (!(p->when & PASS_SILENT))
|
||||
bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
|
||||
bch2_recovery_passes[pass]);
|
||||
ret = p->fn(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!(p->when & PASS_SILENT))
|
||||
bch2_print(c, KERN_CONT " done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_run_online_recovery_passes(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + i;
|
||||
|
||||
if (!(p->when & PASS_ONLINE))
|
||||
continue;
|
||||
|
||||
ret = bch2_run_recovery_pass(c, i);
|
||||
if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
|
||||
i = c->curr_recovery_pass;
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_run_recovery_passes(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
|
||||
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
|
||||
unsigned pass = c->curr_recovery_pass;
|
||||
|
||||
ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
|
||||
if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
|
||||
(ret && c->curr_recovery_pass < pass))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
|
||||
}
|
||||
c->curr_recovery_pass++;
|
||||
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
16
libbcachefs/recovery_passes.h
Normal file
16
libbcachefs/recovery_passes.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef _BCACHEFS_RECOVERY_PASSES_H
|
||||
#define _BCACHEFS_RECOVERY_PASSES_H
|
||||
|
||||
extern const char * const bch2_recovery_passes[];
|
||||
|
||||
u64 bch2_recovery_passes_to_stable(u64 v);
|
||||
u64 bch2_recovery_passes_from_stable(u64 v);
|
||||
|
||||
u64 bch2_fsck_recovery_passes(void);
|
||||
|
||||
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
|
||||
|
||||
int bch2_run_online_recovery_passes(struct bch_fs *);
|
||||
int bch2_run_recovery_passes(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_RECOVERY_PASSES_H */
|
@ -1,6 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_RECOVERY_TYPES_H
|
||||
#define _BCACHEFS_RECOVERY_TYPES_H
|
||||
#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
|
||||
#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
|
||||
|
||||
#define PASS_SILENT BIT(0)
|
||||
#define PASS_FSCK BIT(1)
|
||||
@ -13,11 +13,12 @@
|
||||
* must never change:
|
||||
*/
|
||||
#define BCH_RECOVERY_PASSES() \
|
||||
x(scan_for_btree_nodes, 37, 0) \
|
||||
x(check_topology, 4, 0) \
|
||||
x(alloc_read, 0, PASS_ALWAYS) \
|
||||
x(stripes_read, 1, PASS_ALWAYS) \
|
||||
x(initialize_subvolumes, 2, 0) \
|
||||
x(snapshots_read, 3, PASS_ALWAYS) \
|
||||
x(check_topology, 4, 0) \
|
||||
x(check_allocations, 5, PASS_FSCK) \
|
||||
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
|
||||
@ -37,7 +38,6 @@
|
||||
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
|
||||
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
|
||||
x(fs_upgrade_for_subvolumes, 22, 0) \
|
||||
x(resume_logged_ops, 23, PASS_ALWAYS) \
|
||||
x(check_inodes, 24, PASS_FSCK) \
|
||||
x(check_extents, 25, PASS_FSCK) \
|
||||
x(check_indirect_extents, 26, PASS_FSCK) \
|
||||
@ -47,6 +47,7 @@
|
||||
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
|
||||
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
|
||||
x(check_nlinks, 31, PASS_FSCK) \
|
||||
x(resume_logged_ops, 23, PASS_ALWAYS) \
|
||||
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
|
||||
x(fix_reflink_p, 33, 0) \
|
||||
x(set_fs_needs_rebalance, 34, 0) \
|
||||
@ -56,6 +57,7 @@ enum bch_recovery_pass {
|
||||
#define x(n, id, when) BCH_RECOVERY_PASS_##n,
|
||||
BCH_RECOVERY_PASSES()
|
||||
#undef x
|
||||
BCH_RECOVERY_PASS_NR
|
||||
};
|
||||
|
||||
/* But we also need stable identifiers that can be used in the superblock */
|
||||
@ -65,4 +67,4 @@ enum bch_recovery_pass_stable {
|
||||
#undef x
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_RECOVERY_TYPES_H */
|
||||
#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
|
@ -185,8 +185,7 @@ not_found:
|
||||
} else {
|
||||
bkey_error_init(update);
|
||||
update->k.p = p.k->p;
|
||||
update->k.p.offset = next_idx;
|
||||
update->k.size = next_idx - *idx;
|
||||
update->k.size = p.k->size;
|
||||
set_bkey_val_u64s(&update->k, 0);
|
||||
}
|
||||
|
||||
|
@ -6,12 +6,15 @@
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
|
||||
static int bch2_memcmp(const void *l, const void *r, size_t size)
|
||||
static int bch2_memcmp(const void *l, const void *r, const void *priv)
|
||||
{
|
||||
size_t size = (size_t) priv;
|
||||
return memcmp(l, r, size);
|
||||
}
|
||||
|
||||
@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
|
||||
eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
|
||||
bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
|
||||
}
|
||||
|
||||
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
||||
@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
|
||||
verify_replicas_entry(search);
|
||||
|
||||
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
|
||||
#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
|
||||
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
|
||||
entry_cmp, search);
|
||||
#undef entry_cmp
|
||||
@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL);
|
||||
sort_r(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL,
|
||||
(void *)(size_t)cpu_r->entry_size);
|
||||
|
||||
for (i = 0; i < cpu_r->nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "darray.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "sb-downgrade.h"
|
||||
#include "sb-errors.h"
|
||||
#include "super-io.h"
|
||||
|
@ -265,7 +265,10 @@
|
||||
x(subvol_children_bad, 257) \
|
||||
x(subvol_loop, 258) \
|
||||
x(subvol_unreachable, 259) \
|
||||
x(btree_node_bkey_bad_u64s, 260)
|
||||
x(btree_node_bkey_bad_u64s, 260) \
|
||||
x(btree_node_topology_empty_interior_node, 261) \
|
||||
x(btree_ptr_v2_min_key_bad, 262) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -91,23 +91,29 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
|
||||
|
||||
/* Snapshot nodes: */
|
||||
|
||||
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
|
||||
{
|
||||
while (id && id < ancestor) {
|
||||
const struct snapshot_t *s = __snapshot_t(t, id);
|
||||
id = s ? s->parent : 0;
|
||||
}
|
||||
return id == ancestor;
|
||||
}
|
||||
|
||||
static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
struct snapshot_table *t;
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(c->snapshots);
|
||||
|
||||
while (id && id < ancestor)
|
||||
id = __snapshot_t(t, id)->parent;
|
||||
bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
|
||||
rcu_read_unlock();
|
||||
|
||||
return id == ancestor;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
|
||||
{
|
||||
const struct snapshot_t *s = __snapshot_t(t, id);
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
if (s->skip[2] <= ancestor)
|
||||
return s->skip[2];
|
||||
@ -120,13 +126,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
|
||||
|
||||
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
{
|
||||
struct snapshot_table *t;
|
||||
bool ret;
|
||||
|
||||
EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots);
|
||||
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(c->snapshots);
|
||||
struct snapshot_table *t = rcu_dereference(c->snapshots);
|
||||
|
||||
if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
|
||||
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
|
||||
id = get_ancestor_below(t, id, ancestor);
|
||||
@ -134,11 +142,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
if (id && id < ancestor) {
|
||||
ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
|
||||
|
||||
EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
|
||||
EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
|
||||
} else {
|
||||
ret = id == ancestor;
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
@ -147,36 +155,39 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
|
||||
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
|
||||
{
|
||||
size_t idx = U32_MAX - id;
|
||||
size_t new_size;
|
||||
struct snapshot_table *new, *old;
|
||||
|
||||
new_size = max(16UL, roundup_pow_of_two(idx + 1));
|
||||
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
|
||||
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
|
||||
|
||||
new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
|
||||
new = kvzalloc(new_bytes, GFP_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
new->nr = new_size;
|
||||
|
||||
old = rcu_dereference_protected(c->snapshots, true);
|
||||
if (old)
|
||||
memcpy(new->s,
|
||||
rcu_dereference_protected(c->snapshots, true)->s,
|
||||
sizeof(new->s[0]) * c->snapshot_table_size);
|
||||
memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
|
||||
|
||||
rcu_assign_pointer(c->snapshots, new);
|
||||
c->snapshot_table_size = new_size;
|
||||
kvfree_rcu_mightsleep(old);
|
||||
kvfree_rcu(old, rcu);
|
||||
|
||||
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
|
||||
return &rcu_dereference_protected(c->snapshots,
|
||||
lockdep_is_held(&c->snapshot_table_lock))->s[idx];
|
||||
}
|
||||
|
||||
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
|
||||
{
|
||||
size_t idx = U32_MAX - id;
|
||||
struct snapshot_table *table =
|
||||
rcu_dereference_protected(c->snapshots,
|
||||
lockdep_is_held(&c->snapshot_table_lock));
|
||||
|
||||
lockdep_assert_held(&c->snapshot_table_lock);
|
||||
|
||||
if (likely(idx < c->snapshot_table_size))
|
||||
return &rcu_dereference_protected(c->snapshots, true)->s[idx];
|
||||
if (likely(table && idx < table->nr))
|
||||
return &table->s[idx];
|
||||
|
||||
return __snapshot_t_mut(c, id);
|
||||
}
|
||||
@ -547,7 +558,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
"snapshot tree points to missing subvolume:\n %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
|
||||
fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
|
||||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
|
||||
le32_to_cpu(subvol.snapshot),
|
||||
root_id),
|
||||
c, snapshot_tree_to_wrong_subvol,
|
||||
|
@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
|
||||
|
||||
static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
|
||||
{
|
||||
return &t->s[U32_MAX - id];
|
||||
u32 idx = U32_MAX - id;
|
||||
|
||||
return likely(t && idx < t->nr)
|
||||
? &t->s[idx]
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
||||
@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
||||
static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
|
||||
{
|
||||
rcu_read_lock();
|
||||
id = snapshot_t(c, id)->tree;
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
id = s ? s->tree : 0;
|
||||
rcu_read_unlock();
|
||||
|
||||
return id;
|
||||
@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
|
||||
|
||||
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return snapshot_t(c, id)->parent;
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
return s ? s->parent : 0;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
|
||||
@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
|
||||
|
||||
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
u32 parent = snapshot_t(c, id)->parent;
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
if (parent &&
|
||||
snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
|
||||
u32 parent = s->parent;
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) &&
|
||||
parent &&
|
||||
s->depth != snapshot_t(c, parent)->depth + 1)
|
||||
panic("id %u depth=%u parent %u depth=%u\n",
|
||||
id, snapshot_t(c, id)->depth,
|
||||
parent, snapshot_t(c, parent)->depth);
|
||||
|
||||
return parent;
|
||||
#else
|
||||
return snapshot_t(c, id)->parent;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
|
||||
@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
|
||||
|
||||
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return snapshot_t(c, id)->equiv;
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
return s ? s->equiv : 0;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
|
||||
@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
|
||||
return id == bch2_snapshot_equiv(c, id);
|
||||
}
|
||||
|
||||
static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
|
||||
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
|
||||
{
|
||||
const struct snapshot_t *s;
|
||||
bool ret;
|
||||
|
||||
rcu_read_lock();
|
||||
s = snapshot_t(c, id);
|
||||
ret = s->children[0];
|
||||
const struct snapshot_t *s = snapshot_t(c, id);
|
||||
int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
|
||||
static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
|
||||
{
|
||||
return !bch2_snapshot_is_internal_node(c, id);
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
|
||||
{
|
||||
const struct snapshot_t *s;
|
||||
u32 parent = __bch2_snapshot_parent(c, id);
|
||||
|
||||
if (!parent)
|
||||
return 0;
|
||||
|
||||
s = snapshot_t(c, __bch2_snapshot_parent(c, id));
|
||||
if (id == s->children[0])
|
||||
return s->children[1];
|
||||
if (id == s->children[1])
|
||||
return s->children[0];
|
||||
return 0;
|
||||
int ret = bch2_snapshot_is_internal_node(c, id);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return !ret;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
|
||||
@ -249,7 +240,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
|
||||
struct bpos pos)
|
||||
{
|
||||
if (!btree_type_has_snapshots(id) ||
|
||||
bch2_snapshot_is_leaf(trans->c, pos.snapshot))
|
||||
bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0)
|
||||
return 0;
|
||||
|
||||
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
|
||||
|
@ -595,6 +595,78 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_initialize_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_i_snapshot_tree root_tree;
|
||||
struct bkey_i_snapshot root_snapshot;
|
||||
struct bkey_i_subvolume root_volume;
|
||||
int ret;
|
||||
|
||||
bkey_snapshot_tree_init(&root_tree.k_i);
|
||||
root_tree.k.p.offset = 1;
|
||||
root_tree.v.master_subvol = cpu_to_le32(1);
|
||||
root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
|
||||
|
||||
bkey_snapshot_init(&root_snapshot.k_i);
|
||||
root_snapshot.k.p.offset = U32_MAX;
|
||||
root_snapshot.v.flags = 0;
|
||||
root_snapshot.v.parent = 0;
|
||||
root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
|
||||
root_snapshot.v.tree = cpu_to_le32(1);
|
||||
SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
|
||||
|
||||
bkey_subvolume_init(&root_volume.k_i);
|
||||
root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
|
||||
root_volume.v.flags = 0;
|
||||
root_volume.v.snapshot = cpu_to_le32(U32_MAX);
|
||||
root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
|
||||
bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
|
||||
bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret;
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
|
||||
SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
|
||||
ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bkey_is_inode(k.k)) {
|
||||
bch_err(trans->c, "root inode not found");
|
||||
ret = -BCH_ERR_ENOENT_inode;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_inode_unpack(k, &inode);
|
||||
BUG_ON(ret);
|
||||
|
||||
inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
|
||||
|
||||
ret = bch2_inode_write(trans, &iter, &inode);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* set bi_subvol on root inode */
|
||||
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
|
||||
{
|
||||
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
|
||||
__bch2_fs_upgrade_for_subvolumes(trans));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_fs_subvolumes_init(struct bch_fs *c)
|
||||
{
|
||||
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
|
||||
|
@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *);
|
||||
int bch2_subvolume_unlink(struct btree_trans *, u32);
|
||||
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
|
||||
|
||||
int bch2_initialize_subvolumes(struct bch_fs *);
|
||||
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
|
||||
|
||||
int bch2_fs_subvolumes_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_SUBVOLUME_H */
|
||||
|
@ -20,6 +20,8 @@ struct snapshot_t {
|
||||
};
|
||||
|
||||
struct snapshot_table {
|
||||
struct rcu_head rcu;
|
||||
size_t nr;
|
||||
#ifndef RUST_BINDGEN
|
||||
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
|
||||
#else
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include "journal.h"
|
||||
#include "journal_sb.h"
|
||||
#include "journal_seq_blacklist.h"
|
||||
#include "recovery.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "replicas.h"
|
||||
#include "quota.h"
|
||||
#include "sb-clean.h"
|
||||
@ -985,7 +985,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
prt_str(&buf, " > ");
|
||||
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
|
||||
prt_str(&buf, ")");
|
||||
bch2_fs_fatal_error(c, "%s", buf.buf);
|
||||
bch2_fs_fatal_error(c, ": %s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -BCH_ERR_sb_not_downgraded;
|
||||
}
|
||||
@ -1005,7 +1005,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
": Superblock write was silently dropped! (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -1015,7 +1015,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
|
||||
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
|
||||
bch2_fs_fatal_error(c,
|
||||
"Superblock modified by another process (seq %llu expected %llu)",
|
||||
": Superblock modified by another process (seq %llu expected %llu)",
|
||||
le64_to_cpu(ca->sb_read_scratch->seq),
|
||||
ca->disk_sb.seq);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
@ -1066,7 +1066,7 @@ int bch2_write_super(struct bch_fs *c)
|
||||
!can_mount_with_written ||
|
||||
(can_mount_without_written &&
|
||||
!can_mount_with_written), c,
|
||||
"Unable to write superblock to sufficient devices (from %ps)",
|
||||
": Unable to write superblock to sufficient devices (from %ps)",
|
||||
(void *) _RET_IP_))
|
||||
ret = -1;
|
||||
out:
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "btree_gc.h"
|
||||
#include "btree_journal_iter.h"
|
||||
#include "btree_key_cache.h"
|
||||
#include "btree_node_scan.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_write_buffer.h"
|
||||
@ -87,6 +88,7 @@ const char * const bch2_fs_flag_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
__printf(2, 0)
|
||||
static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
@ -534,6 +536,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
for (i = 0; i < BCH_TIME_STAT_NR; i++)
|
||||
bch2_time_stats_exit(&c->times[i]);
|
||||
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
bch2_fs_counters_exit(c);
|
||||
@ -558,6 +561,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
bch2_journal_keys_put_initial(c);
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
bch2_fs_btree_write_buffer_exit(c);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
|
@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
}
|
||||
}
|
||||
|
||||
static int alignment_ok(const void *base, size_t align)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
((unsigned long)base & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
static void u32_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u32 t = *(u32 *)a;
|
||||
*(u32 *)a = *(u32 *)b;
|
||||
*(u32 *)b = t;
|
||||
}
|
||||
|
||||
static void u64_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u64 t = *(u64 *)a;
|
||||
*(u64 *)a = *(u64 *)b;
|
||||
*(u64 *)b = t;
|
||||
}
|
||||
|
||||
static void generic_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
char t;
|
||||
|
||||
do {
|
||||
t = *(char *)a;
|
||||
*(char *)a++ = *(char *)b;
|
||||
*(char *)b++ = t;
|
||||
} while (--size > 0);
|
||||
}
|
||||
|
||||
static inline int do_cmp(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
static inline void do_swap(void *base, size_t n, size_t size,
|
||||
void (*swap_func)(void *, void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
swap_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t))
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
do_swap(base, n, size, swap_func, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t size))
|
||||
{
|
||||
/* pre-scale counters for performance */
|
||||
int i = (num/2 - 1) * size, n = num * size, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for ( ; i >= 0; i -= size) {
|
||||
for (r = i; r * 2 + size < n; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < n - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - size; i > 0; i -= size) {
|
||||
swap_func(base, base + i, size);
|
||||
for (r = 0; r * 2 + size < i; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < i - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void eytzinger1_test(void)
|
||||
{
|
||||
|
@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
|
||||
memset(s + bytes, c, rem);
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
|
||||
/* just the memmove, doesn't update @_nr */
|
||||
#define __array_insert_item(_array, _nr, _pos) \
|
||||
memmove(&(_array)[(_pos) + 1], \
|
||||
@ -683,6 +679,9 @@ static inline void __move_gap(void *array, size_t element_size,
|
||||
/* Move the gap in a gap buffer: */
|
||||
#define move_gap(_d, _new_gap) \
|
||||
do { \
|
||||
BUG_ON(_new_gap > (_d)->nr); \
|
||||
BUG_ON((_d)->gap > (_d)->nr); \
|
||||
\
|
||||
__move_gap((_d)->data, sizeof((_d)->data[0]), \
|
||||
(_d)->nr, (_d)->size, (_d)->gap, _new_gap); \
|
||||
(_d)->gap = _new_gap; \
|
||||
|
14
linux/bio.c
14
linux/bio.c
@ -306,6 +306,20 @@ struct bio *bio_kmalloc(unsigned int nr_iovecs, gfp_t gfp_mask)
|
||||
return bio;
|
||||
}
|
||||
|
||||
struct bio *bio_alloc(struct block_device *bdev, unsigned nr_iovecs,
|
||||
blk_opf_t opf, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = kmalloc(sizeof(struct bio) +
|
||||
sizeof(struct bio_vec) * nr_iovecs, gfp_mask);
|
||||
if (unlikely(!bio))
|
||||
return NULL;
|
||||
bio_init(bio, bdev, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs, opf);
|
||||
bio->bi_pool = NULL;
|
||||
return bio;
|
||||
}
|
||||
|
||||
static struct bio_vec *bvec_alloc(mempool_t *pool, int *nr_vecs,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user