Update bcachefs sources to a0d7001b0f bcachefs: Change bch2_dev_lookup() to not use lookup_bdev()

This commit is contained in:
Kent Overstreet 2022-02-16 06:31:28 -05:00
parent a1d66a2a4e
commit de4e778a6a
24 changed files with 243 additions and 254 deletions

View File

@ -1 +1 @@
ba398d29060ecc2e2c9d6292a94ddc181761de1a
a0d7001b0f35580ec941acc553cf5fe28d6efea9

View File

@ -702,6 +702,7 @@ static int migrate_fs(const char *fs_path,
opt_set(opts, sb, sb_offset);
opt_set(opts, nostart, true);
opt_set(opts, noexcl, true);
opt_set(opts, buckets_nouse, true);
c = bch2_fs_open(path, 1, opts);
if (IS_ERR(c))

View File

@ -358,7 +358,7 @@ TRACE_EVENT(btree_node_relock_fail,
TP_STRUCT__entry(
__array(char, trans_fn, 24 )
__array(char, caller, 32 )
__field(unsigned long, caller_ip )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
@ -370,7 +370,7 @@ TRACE_EVENT(btree_node_relock_fail,
TP_fast_assign(
strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
snprintf(__entry->caller, sizeof(__entry->caller), "%pS", (void *) caller_ip);
__entry->caller_ip = caller_ip;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
@ -380,9 +380,9 @@ TRACE_EVENT(btree_node_relock_fail,
__entry->node_lock_seq = node_lock_seq;
),
TP_printk("%s %s btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
TP_printk("%s %pS btree %u pos %llu:%llu:%u, node %lu iter seq %u lock seq %u",
__entry->trans_fn,
__entry->caller,
(void *) __entry->caller_ip,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,
@ -673,7 +673,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
TP_STRUCT__entry(
__array(char, trans_fn, 24 )
__array(char, caller, 32 )
__field(unsigned long, caller_ip )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
@ -682,16 +682,16 @@ DECLARE_EVENT_CLASS(transaction_restart_iter,
TP_fast_assign(
strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
snprintf(__entry->caller, sizeof(__entry->caller), "%pS", (void *) caller_ip);
__entry->caller_ip = caller_ip;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
__entry->pos_snapshot = pos->snapshot;
),
TP_printk("%s %s btree %u pos %llu:%llu:%u",
TP_printk("%s %pS btree %u pos %llu:%llu:%u",
__entry->trans_fn,
__entry->caller,
(void *) __entry->caller_ip,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,

View File

@ -151,22 +151,6 @@ static void open_bucket_free_unused(struct bch_fs *c,
}
}
static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct open_bucket *ob;
unsigned i;
rcu_read_lock();
open_bucket_for_each(c, obs, ob, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
BUG_ON(*bucket_gen(ca, ob->bucket) != ob->gen);
}
rcu_read_unlock();
#endif
}
/* _only_ for allocating the journal on a new device: */
long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
{
@ -857,8 +841,6 @@ alloc_done:
BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
verify_not_stale(c, &wp->ptrs);
return wp;
err:
open_bucket_for_each(c, &wp->ptrs, ob, i)

View File

@ -281,9 +281,6 @@ do { \
"significantly affect performance") \
BCH_DEBUG_PARAM(debug_check_iterators, \
"Enables extra verification for btree iterators") \
BCH_DEBUG_PARAM(debug_check_bkeys, \
"Run bkey_debugcheck (primarily checking GC/allocation "\
"information) when iterating over keys") \
BCH_DEBUG_PARAM(debug_check_btree_accounting, \
"Verify btree accounting for keys within a node") \
BCH_DEBUG_PARAM(journal_seq_verify, \
@ -807,6 +804,7 @@ struct bch_fs {
* it's not while a gc is in progress.
*/
struct rw_semaphore gc_lock;
struct mutex gc_gens_lock;
/* IO PATH */
struct semaphore io_in_flight;

View File

@ -212,22 +212,6 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
return NULL;
}
void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
{
const char *invalid;
BUG_ON(!k.k->u64s);
invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
bch2_bkey_in_btree_node(b, k);
if (invalid) {
char buf[160];
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
}
}
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
{
if (!bpos_cmp(pos, POS_MIN))

View File

@ -34,8 +34,6 @@ const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
enum btree_node_type);
const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_bpos_to_text(struct printbuf *, struct bpos);
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
void bch2_val_to_text(struct printbuf *, struct bch_fs *,

View File

@ -726,11 +726,9 @@ fsck_err:
static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c *k,
u8 *max_stale, bool initial)
bool initial)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs;
const struct bch_extent_ptr *ptr;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
unsigned flags =
@ -755,17 +753,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
atomic64_set(&c->key_version, k->k->version.lo);
}
ptrs = bch2_bkey_ptrs_c(*k);
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
if (gen_after(g->oldest_gen, ptr->gen))
g->oldest_gen = ptr->gen;
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
ret = bch2_mark_key(trans, old, *k, flags);
fsck_err:
err:
@ -774,8 +761,7 @@ err:
return ret;
}
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, u8 *max_stale,
bool initial)
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
{
struct bch_fs *c = trans->c;
struct btree_node_iter iter;
@ -784,8 +770,6 @@ static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, u8 *ma
struct bkey_buf prev, cur;
int ret = 0;
*max_stale = 0;
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
@ -796,7 +780,7 @@ static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, u8 *ma
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
&k, max_stale, initial);
&k, initial);
if (ret)
break;
@ -827,7 +811,6 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
: bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
u8 max_stale = 0;
int ret = 0;
gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
@ -838,21 +821,9 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
gc_pos_set(c, gc_pos_btree_node(b));
ret = btree_gc_mark_node(trans, b, &max_stale, initial);
ret = btree_gc_mark_node(trans, b, initial);
if (ret)
break;
if (!initial) {
if (max_stale > 64)
bch2_btree_node_rewrite(trans, &iter, b,
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!bch2_btree_gc_rewrite_disabled &&
(bch2_btree_gc_always_rewrite || max_stale > 16))
bch2_btree_node_rewrite(trans, &iter,
b, BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
}
}
bch2_trans_iter_exit(trans, &iter);
@ -864,8 +835,8 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
if (!btree_node_fake(b)) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
&k, &max_stale, initial);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
true, &k, initial);
}
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
mutex_unlock(&c->btree_root_lock);
@ -880,7 +851,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf cur, prev;
u8 max_stale = 0;
char buf[200];
int ret = 0;
@ -893,8 +863,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
BUG_ON(bpos_cmp(k.k->p, b->data->min_key) < 0);
BUG_ON(bpos_cmp(k.k->p, b->data->max_key) > 0);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
&k, &max_stale, true);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
false, &k, true);
if (ret) {
bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
goto fsck_err;
@ -985,7 +955,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
: bch2_expensive_debug_checks ? 0
: !btree_node_type_needs_gc(btree_id) ? 1
: 0;
u8 max_stale = 0;
char buf[100];
int ret = 0;
@ -1018,7 +987,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
&k, &max_stale, true);
&k, true);
}
fsck_err:
six_unlock_read(&b->c.lock);
@ -1313,7 +1282,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
.dev = iter->pos.inode,
.bucket = iter->pos.offset,
.gen = g->mark.gen,
.oldest_gen = g->oldest_gen,
.data_type = g->mark.data_type,
.dirty_sectors = g->mark.dirty_sectors,
.cached_sectors = g->mark.cached_sectors,
@ -1330,8 +1298,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
gc_u.data_type != BCH_DATA_btree)
return 0;
if (!bkey_alloc_unpacked_cmp(old_u, gc_u) ||
gen_after(old_u.gen, gc_u.gen))
if (gen_after(old_u.gen, gc_u.gen))
return 0;
#define copy_bucket_field(_f) \
@ -1353,8 +1320,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
copy_bucket_field(stripe);
#undef copy_bucket_field
new_u.oldest_gen = gc_u.oldest_gen;
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
return 0;
@ -1905,6 +1870,9 @@ int bch2_gc_gens(struct bch_fs *c)
* introduces a deadlock in the RO path - we currently take the state
* lock at the start of going RO, thus the gc thread may get stuck:
*/
if (!mutex_trylock(&c->gc_gens_lock))
return 0;
down_read(&c->gc_lock);
bch2_trans_init(&trans, c, 0, 0);
@ -1964,6 +1932,7 @@ err:
bch2_trans_exit(&trans);
up_read(&c->gc_lock);
mutex_unlock(&c->gc_gens_lock);
return ret;
}

View File

@ -58,6 +58,9 @@ static inline int __btree_path_cmp(const struct btree_path *l,
struct bpos r_pos,
unsigned r_level)
{
/*
* Must match lock ordering as defined by __bch2_btree_node_lock:
*/
return cmp_int(l->btree_id, r_btree_id) ?:
cmp_int((int) l->cached, (int) r_cached) ?:
bpos_cmp(l->pos, r_pos) ?:
@ -162,7 +165,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
else
this_cpu_sub(*b->c.lock.readers, readers);
btree_node_lock_type(trans->c, b, SIX_LOCK_write);
six_lock_write(&b->c.lock, NULL, NULL);
if (!b->c.lock.readers)
atomic64_add(__SIX_VAL(read_lock, readers),
@ -300,10 +303,8 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
struct btree_path *linked, *deadlock_path = NULL;
u64 start_time = local_clock();
unsigned reason = 9;
bool ret;
struct btree_path *linked;
unsigned reason;
/* Check if it's safe to block: */
trans_for_each_path(trans, linked) {
@ -324,28 +325,28 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
*/
if (type == SIX_LOCK_intent &&
linked->nodes_locked != linked->nodes_intent_locked) {
deadlock_path = linked;
reason = 1;
goto deadlock;
}
if (linked->btree_id != path->btree_id) {
if (linked->btree_id > path->btree_id) {
deadlock_path = linked;
reason = 3;
}
continue;
if (linked->btree_id < path->btree_id)
continue;
reason = 3;
goto deadlock;
}
/*
* Within the same btree, cached paths come before non
* cached paths:
* Within the same btree, non-cached paths come before cached
* paths:
*/
if (linked->cached != path->cached) {
if (path->cached) {
deadlock_path = linked;
reason = 4;
}
continue;
if (!linked->cached)
continue;
reason = 4;
goto deadlock;
}
/*
@ -354,50 +355,33 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
* we're about to lock, it must have the ancestors locked too:
*/
if (level > __fls(linked->nodes_locked)) {
deadlock_path = linked;
reason = 5;
goto deadlock;
}
/* Must lock btree nodes in key order: */
if (btree_node_locked(linked, level) &&
bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
linked->cached)) <= 0) {
deadlock_path = linked;
reason = 7;
BUG_ON(trans->in_traverse_all);
reason = 7;
goto deadlock;
}
}
if (unlikely(deadlock_path)) {
trace_trans_restart_would_deadlock(trans->fn, ip,
trans->in_traverse_all, reason,
deadlock_path->btree_id,
deadlock_path->cached,
&deadlock_path->pos,
path->btree_id,
path->cached,
&pos);
btree_trans_restart(trans);
return false;
}
if (six_trylock_type(&b->c.lock, type))
return true;
trans->locking_path_idx = path->idx;
trans->locking_pos = pos;
trans->locking_btree_id = path->btree_id;
trans->locking_level = level;
trans->locking = b;
ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
trans->locking = NULL;
if (ret)
bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
start_time);
return ret;
return btree_node_lock_type(trans, path, b, pos, level,
type, should_sleep_fn, p);
deadlock:
trace_trans_restart_would_deadlock(trans->fn, ip,
trans->in_traverse_all, reason,
linked->btree_id,
linked->cached,
&linked->pos,
path->btree_id,
path->cached,
&pos);
btree_trans_restart(trans);
return false;
}
/* Btree iterator locking: */
@ -1005,8 +989,6 @@ static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
struct bkey *u,
struct bkey_packed *k)
{
struct bkey_s_c ret;
if (unlikely(!k)) {
/*
* signal to bch2_btree_iter_peek_slot() that we're currently at
@ -1016,19 +998,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
return bkey_s_c_null;
}
ret = bkey_disassemble(l->b, k, u);
/*
* XXX: bch2_btree_bset_insert_key() generates invalid keys when we
* overwrite extents - it sets k->type = KEY_TYPE_deleted on the key
* being overwritten but doesn't change k->size. But this is ok, because
* those keys are never written out, we just have to avoid a spurious
* assertion here:
*/
if (bch2_debug_check_bkeys && !bkey_deleted(ret.k))
bch2_bkey_debugcheck(c, l->b, ret);
return ret;
return bkey_disassemble(l->b, k, u);
}
static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
@ -1504,17 +1474,17 @@ retry_all:
while (i < trans->nr_sorted) {
path = trans->paths + trans->sorted[i];
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
if (ret)
goto retry_all;
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
if (path->nodes_locked ||
!btree_path_node(path, path->level))
/*
* Traversing a path can cause another path to be added at about
* the same position:
*/
if (path->uptodate) {
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
if (ret)
goto retry_all;
} else {
i++;
}
}
/*
@ -3092,6 +3062,8 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
const char *fn)
__acquires(&c->btree_trans_barrier)
{
BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
memset(trans, 0, sizeof(*trans));
trans->c = c;
trans->fn = fn;
@ -3213,6 +3185,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
struct btree_trans *trans;
struct btree_path *path;
struct btree *b;
static char lock_types[] = { 'r', 'i', 'w' };
unsigned l;
mutex_lock(&c->btree_trans_lock);
@ -3249,10 +3222,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
b = READ_ONCE(trans->locking);
if (b) {
path = &trans->paths[trans->locking_path_idx];
pr_buf(out, " locking path %u %c l=%u %s:",
pr_buf(out, " locking path %u %c l=%u %c %s:",
trans->locking_path_idx,
path->cached ? 'c' : 'b',
trans->locking_level,
lock_types[trans->locking_lock_type],
bch2_btree_ids[trans->locking_btree_id]);
bch2_bpos_to_text(out, trans->locking_pos);

View File

@ -320,7 +320,6 @@ retry:
if (!trans->restarted)
goto retry;
trace_transaction_restart_ip(trans->fn, _THIS_IP_);
ret = -EINTR;
goto err;
}

View File

@ -128,23 +128,35 @@ static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
}
}
/*
* wrapper around six locks that just traces lock contended time
*/
static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
enum six_lock_type type)
static inline bool btree_node_lock_type(struct btree_trans *trans,
struct btree_path *path,
struct btree *b,
struct bpos pos, unsigned level,
enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p)
{
u64 start_time = local_clock();
struct bch_fs *c = trans->c;
u64 start_time;
bool ret;
six_lock_type(&b->c.lock, type, NULL, NULL);
bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
}
if (six_trylock_type(&b->c.lock, type))
return true;
static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
enum six_lock_type type)
{
if (!six_trylock_type(&b->c.lock, type))
__btree_node_lock_type(c, b, type);
start_time = local_clock();
trans->locking_path_idx = path->idx;
trans->locking_pos = pos;
trans->locking_btree_id = path->btree_id;
trans->locking_level = level;
trans->locking_lock_type = type;
trans->locking = b;
ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
trans->locking = NULL;
if (ret)
bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
return ret;
}
/*

View File

@ -377,6 +377,7 @@ struct btree_trans {
struct bpos locking_pos;
u8 locking_btree_id;
u8 locking_level;
u8 locking_lock_type;
pid_t pid;
int srcu_idx;

View File

@ -620,8 +620,8 @@ err:
* we're in journal error state:
*/
btree_node_lock_type(c, b, SIX_LOCK_intent);
btree_node_lock_type(c, b, SIX_LOCK_write);
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
@ -675,7 +675,7 @@ err:
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
btree_node_lock_type(c, b, SIX_LOCK_read);
six_lock_read(&b->c.lock, NULL, NULL);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}

View File

@ -168,7 +168,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
btree_node_lock_type(c, b, SIX_LOCK_read);
six_lock_read(&b->c.lock, NULL, NULL);
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
six_unlock_read(&b->c.lock);
@ -619,8 +619,10 @@ static inline int trans_lock_write(struct btree_trans *trans)
if (have_conflicting_read_lock(trans, i->path))
goto fail;
__btree_node_lock_type(trans->c, insert_l(i)->b,
SIX_LOCK_write);
btree_node_lock_type(trans, i->path,
insert_l(i)->b,
i->path->pos, i->level,
SIX_LOCK_write, NULL, NULL);
}
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);

View File

@ -666,49 +666,50 @@ static int check_bucket_ref(struct bch_fs *c,
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
s64 sectors, enum bch_data_type ptr_data_type,
u8 bucket_gen, u8 bucket_data_type,
u8 b_gen, u8 bucket_data_type,
u16 dirty_sectors, u16 cached_sectors)
{
size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr);
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
u16 bucket_sectors = !ptr->cached
? dirty_sectors
: cached_sectors;
char buf[200];
if (gen_after(ptr->gen, bucket_gen)) {
if (gen_after(ptr->gen, b_gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
"while marking %s",
ptr->dev, bucket_nr, bucket_gen,
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
"while marking %s",
ptr->dev, bucket_nr, bucket_gen,
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (bucket_gen != ptr->gen && !ptr->cached) {
if (b_gen != ptr->gen && !ptr->cached) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
"while marking %s",
ptr->dev, bucket_nr, bucket_gen,
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type ?: ptr_data_type],
ptr->gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
return -EIO;
}
if (bucket_gen != ptr->gen)
if (b_gen != ptr->gen)
return 1;
if (bucket_data_type && ptr_data_type &&
@ -716,7 +717,7 @@ static int check_bucket_ref(struct bch_fs *c,
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
"while marking %s",
ptr->dev, bucket_nr, bucket_gen,
ptr->dev, bucket_nr, b_gen,
bch2_data_types[bucket_data_type],
bch2_data_types[ptr_data_type],
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
@ -725,9 +726,10 @@ static int check_bucket_ref(struct bch_fs *c,
if ((unsigned) (bucket_sectors + sectors) > U16_MAX) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
"bucket %u:%zu gen %u (mem gen %u) data type %s sector count overflow: %u + %lli > U16_MAX\n"
"while marking %s",
ptr->dev, bucket_nr, bucket_gen,
ptr->dev, bucket_nr, b_gen,
*bucket_gen(ca, bucket_nr),
bch2_data_types[bucket_data_type ?: ptr_data_type],
bucket_sectors, sectors,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
@ -2141,9 +2143,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
GFP_KERNEL|__GFP_ZERO)) ||
!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
GFP_KERNEL|__GFP_ZERO)) ||
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
(c->opts.buckets_nouse &&
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
GFP_KERNEL|__GFP_ZERO))) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
@ -2176,9 +2179,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
memcpy(bucket_gens->b,
old_bucket_gens->b,
n);
memcpy(buckets_nouse,
ca->buckets_nouse,
BITS_TO_LONGS(n) * sizeof(unsigned long));
if (buckets_nouse)
memcpy(buckets_nouse,
ca->buckets_nouse,
BITS_TO_LONGS(n) * sizeof(unsigned long));
}
rcu_assign_pointer(ca->buckets[0], buckets);

View File

@ -1062,8 +1062,6 @@ retry:
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_trans_unlock(trans);
if (readpages_iter)
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));

View File

@ -1316,8 +1316,9 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
if (i->inode.bi_nlink == i->count)
continue;
count2 = lockrestart_do(trans,
bch2_count_subdirs(trans, w->cur_inum, i->snapshot));
count2 = bch2_count_subdirs(trans, w->cur_inum, i->snapshot);
if (count2 < 0)
return count2;
if (i->count != count2) {
bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",

View File

@ -1953,6 +1953,33 @@ err:
return ret;
}
static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
struct bkey_s_c k,
struct bch_extent_ptr ptr)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
struct btree_iter iter;
char buf[200];
int ret;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
BTREE_ITER_CACHED);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (ret)
return;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch_err(c, "%s", buf);
bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
bch2_trans_iter_exit(trans, &iter);
}
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
@ -1962,7 +1989,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
struct bch_dev *ca = NULL;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
@ -1979,7 +2006,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
zero_fill_bio_iter(&orig->bio, iter);
goto out_read_done;
}
retry_pick:
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
@ -1992,8 +2019,20 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto err;
}
if (pick_ret > 0)
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
if (!pick.ptr.cached &&
unlikely(ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, k, pick.ptr);
bch2_mark_io_failure(failed, &pick);
goto retry_pick;
}
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
bch2_trans_unlock(trans);
if (flags & BCH_READ_NODECODE) {
/*
@ -2281,12 +2320,6 @@ retry:
*/
sectors = min(sectors, k.k->size - offset_into_extent);
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
bch2_trans_unlock(&trans);
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);

View File

@ -299,11 +299,17 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
struct jset_entry *entry)
{
struct bkey_i *k;
bool first = true;
pr_buf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level);
vstruct_for_each(entry, k)
vstruct_for_each(entry, k) {
if (!first) {
printbuf_newline(out);
pr_buf(out, "%s: ", bch2_jset_entry_types[entry->type]);
}
pr_buf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
first = false;
}
}
static int journal_entry_btree_root_validate(struct bch_fs *c,

View File

@ -487,19 +487,22 @@ static void move_read_endio(struct bio *bio)
closure_put(&ctxt->cl);
}
static void do_pending_writes(struct moving_context *ctxt)
static void do_pending_writes(struct moving_context *ctxt, struct btree_trans *trans)
{
struct moving_io *io;
if (trans)
bch2_trans_unlock(trans);
while ((io = next_pending_write(ctxt))) {
list_del(&io->list);
closure_call(&io->cl, move_write, NULL, &ctxt->cl);
}
}
#define move_ctxt_wait_event(_ctxt, _cond) \
#define move_ctxt_wait_event(_ctxt, _trans, _cond) \
do { \
do_pending_writes(_ctxt); \
do_pending_writes(_ctxt, _trans); \
\
if (_cond) \
break; \
@ -507,11 +510,12 @@ do { \
next_pending_write(_ctxt) || (_cond)); \
} while (1)
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt,
struct btree_trans *trans)
{
unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
move_ctxt_wait_event(ctxt,
move_ctxt_wait_event(ctxt, trans,
!atomic_read(&ctxt->write_sectors) ||
atomic_read(&ctxt->write_sectors) != sectors_pending);
}
@ -533,14 +537,6 @@ static int bch2_move_extent(struct btree_trans *trans,
unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
move_ctxt_wait_event(ctxt,
atomic_read(&ctxt->write_sectors) <
SECTORS_IN_FLIGHT_PER_DEVICE);
move_ctxt_wait_event(ctxt,
atomic_read(&ctxt->read_sectors) <
SECTORS_IN_FLIGHT_PER_DEVICE);
/* write path might have to decompress data: */
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
@ -691,12 +687,19 @@ static int __bch2_move_data(struct bch_fs *c,
schedule_timeout(delay);
if (unlikely(freezing(current))) {
bch2_trans_unlock(&trans);
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
try_to_freeze();
}
} while (delay);
move_ctxt_wait_event(ctxt, &trans,
atomic_read(&ctxt->write_sectors) <
SECTORS_IN_FLIGHT_PER_DEVICE);
move_ctxt_wait_event(ctxt, &trans,
atomic_read(&ctxt->read_sectors) <
SECTORS_IN_FLIGHT_PER_DEVICE);
bch2_trans_begin(&trans);
k = bch2_btree_iter_peek(&iter);
@ -748,10 +751,12 @@ static int __bch2_move_data(struct bch_fs *c,
BUG();
}
/* unlock before doing IO: */
/*
* The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @k elsewhere:
*/
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
@ -761,7 +766,7 @@ static int __bch2_move_data(struct bch_fs *c,
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt);
bch2_move_ctxt_wait_for_io(ctxt, &trans);
continue;
}
@ -846,7 +851,7 @@ int bch2_move_data(struct bch_fs *c,
}
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
move_ctxt_wait_event(&ctxt, NULL, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
EBUG_ON(atomic_read(&ctxt.write_sectors));

View File

@ -365,6 +365,11 @@ enum opt_type {
NO_SB_OPT, false, \
NULL, "Set superblock to latest version,\n" \
"allowing any new features to be used") \
x(buckets_nouse, u8, \
0, \
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Allocate the buckets_nouse bitmap") \
x(project, u8, \
OPT_INODE, \
OPT_BOOL(), \

View File

@ -1126,12 +1126,12 @@ use_clean:
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bool metadata_only = c->opts.norecovery;
bch_info(c, "starting mark and sweep");
bch_info(c, "checking allocations");
err = "error in mark and sweep";
ret = bch2_gc(c, true, metadata_only);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
bch_verbose(c, "done checking allocations");
}
bch2_stripes_heap_start(c);

View File

@ -674,6 +674,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
@ -1879,20 +1880,14 @@ err:
}
/* return with ref on ca->ref: */
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
{
struct bch_dev *ca;
dev_t dev;
unsigned i;
int ret;
ret = lookup_bdev(path, &dev);
if (ret)
return ERR_PTR(ret);
rcu_read_lock();
for_each_member_device_rcu(ca, c, i, NULL)
if (ca->dev == dev)
if (!strcmp(name, ca->name))
goto found;
ca = ERR_PTR(-ENOENT);
found:

View File

@ -238,6 +238,7 @@ do { \
struct printbuf {
char *pos;
char *end;
unsigned indent;
};
static inline size_t printbuf_remaining(struct printbuf *buf)
@ -259,6 +260,27 @@ do { \
__VA_ARGS__); \
} while (0)
static inline void printbuf_indent_push(struct printbuf *buf, unsigned spaces)
{
buf->indent += spaces;
while (spaces--)
pr_buf(buf, " ");
}
static inline void printbuf_indent_pop(struct printbuf *buf, unsigned spaces)
{
buf->indent -= spaces;
}
static inline void printbuf_newline(struct printbuf *buf)
{
unsigned i;
pr_buf(buf, "\n");
for (i = 0; i < buf->indent; i++)
pr_buf(buf, " ");
}
void bch_scnmemcpy(struct printbuf *, const char *, size_t);
int bch2_strtoint_h(const char *, int *);