Update bcachefs sources to ffe09df106 bcachefs: Verify fs hasn't been modified before going rw

This commit is contained in:
Kent Overstreet 2019-03-24 21:06:58 -04:00
parent 0894d54750
commit ddb58076ef
38 changed files with 1373 additions and 1110 deletions

View File

@ -1 +1 @@
986543d24e08a0c0308472403b230d546e7ecbbb
ffe09df1065dd1b326913b21381ed1ad35ab8ef9

View File

@ -266,7 +266,7 @@ int bch2_set_acl_trans(struct btree_trans *trans,
if (IS_ERR(xattr))
return PTR_ERR(xattr);
ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
inode_u->bi_inum, &xattr->k_i, 0);
} else {
struct xattr_search_key search =

View File

@ -309,10 +309,54 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
return 0;
}
static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bch_dev *ca;
int ret;
if (k->k.p.inode >= c->sb.nr_devices ||
!c->devs[k->k.p.inode])
return 0;
ca = bch_dev_bkey_exists(c, k->k.p.inode);
if (k->k.p.offset >= ca->mi.nbuckets)
return 0;
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(iter);
if (ret)
goto err;
/* check buckets_written with btree node locked: */
if (test_bit(k->k.p.offset, ca->buckets_written)) {
ret = 0;
goto err;
}
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
err:
bch2_trans_exit(&trans);
return ret;
}
static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
size_t b, struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
struct bch_fs *c = trans->c;
#if 0
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
#else
@ -348,14 +392,15 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
bch2_btree_iter_cond_resched(iter);
ret = bch2_btree_insert_at(c, NULL, journal_seq,
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
ret = bch2_trans_commit(trans, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_NOMARK|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
flags);
if (ret)
return ret;
@ -369,42 +414,6 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
return 0;
}
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct bch_dev *ca;
struct btree_iter iter;
int ret;
if (k->k.p.inode >= c->sb.nr_devices ||
!c->devs[k->k.p.inode])
return 0;
ca = bch_dev_bkey_exists(c, k->k.p.inode);
if (k->k.p.offset >= ca->mi.nbuckets)
return 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p,
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter);
if (ret)
goto err;
/* check buckets_written with btree node locked: */
ret = test_bit(k->k.p.offset, ca->buckets_written)
? 0
: bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK,
BTREE_INSERT_ENTRY(&iter, k));
err:
bch2_btree_iter_unlock(&iter);
return ret;
}
int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
{
struct bch_dev *ca;
@ -414,11 +423,14 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
*wrote = false;
for_each_rw_member(ca, c, i) {
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bucket_array *buckets;
size_t b;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
down_read(&ca->bucket_lock);
@ -430,7 +442,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
if (!buckets->b[b].mark.dirty)
continue;
ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
ret = __bch2_alloc_write_key(&trans, ca, b, iter, NULL,
nowait
? BTREE_INSERT_NOWAIT
: 0);
@ -440,7 +452,8 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
*wrote = true;
}
up_read(&ca->bucket_lock);
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
if (ret) {
percpu_ref_put(&ca->io_ref);
@ -886,7 +899,8 @@ static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
}
}
static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
@ -896,6 +910,7 @@ static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
/* hack: */
__BKEY_PADDED(k, 8) alloc_key;
#endif
struct bch_fs *c = trans->c;
struct bkey_i_alloc *a;
struct bkey_alloc_unpacked u;
struct bucket_mark m;
@ -958,6 +973,8 @@ retry:
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
/*
* XXX:
* when using deferred btree updates, we have journal reclaim doing
@ -965,7 +982,7 @@ retry:
* progress, and here the allocator is requiring space in the journal -
* so we need a journal pre-reservation:
*/
ret = bch2_btree_insert_at(c, NULL,
ret = bch2_trans_commit(trans, NULL,
invalidating_cached_data ? journal_seq : NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOUNLOCK|
@ -973,8 +990,7 @@ retry:
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
flags);
if (ret == -EINTR)
goto retry;
@ -1048,23 +1064,27 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
*/
static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
u64 journal_seq = 0;
int ret = 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
POS(ca->dev_idx, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
/* Only use nowait if we've already invalidated at least one bucket: */
while (!ret &&
!fifo_full(&ca->free_inc) &&
ca->alloc_heap.used)
ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
BTREE_INSERT_GC_LOCK_HELD|
(!fifo_empty(&ca->free_inc)
? BTREE_INSERT_NOWAIT : 0));
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
/* If we used NOWAIT, don't return the error: */
if (!fifo_empty(&ca->free_inc))
@ -1606,7 +1626,7 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c)
return ret;
}
static int __bch2_fs_allocator_start(struct bch_fs *c)
int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned dev_iter;
@ -1615,6 +1635,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
long bu;
int ret = 0;
if (!test_alloc_startup(c) &&
bch2_fs_allocator_start_fast(c))
return 0;
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
/*
@ -1689,31 +1713,6 @@ err:
return ret;
}
int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
int ret;
ret = bch2_fs_allocator_start_fast(c) ? 0 :
__bch2_fs_allocator_start(c);
if (ret)
return ret;
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
for_each_rw_member(ca, c, i) {
ret = bch2_dev_allocator_start(ca);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
}
}
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
return 0;
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);

View File

@ -245,6 +245,10 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
if (!c->blocked_allocate_open_bucket)
c->blocked_allocate_open_bucket = local_clock();
spin_unlock(&c->freelist_lock);
trace_open_bucket_alloc_fail(ca, reserve);
return ERR_PTR(-OPEN_BUCKETS_EMPTY);
@ -275,6 +279,9 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
if (cl)
closure_wait(&c->freelist_wait, cl);
if (!c->blocked_allocate)
c->blocked_allocate = local_clock();
spin_unlock(&c->freelist_lock);
trace_bucket_alloc_fail(ca, reserve);
@ -300,6 +307,20 @@ out:
bucket_io_clock_reset(c, ca, bucket, WRITE);
spin_unlock(&ob->lock);
if (c->blocked_allocate_open_bucket) {
bch2_time_stats_update(
&c->times[BCH_TIME_blocked_allocate_open_bucket],
c->blocked_allocate_open_bucket);
c->blocked_allocate_open_bucket = 0;
}
if (c->blocked_allocate) {
bch2_time_stats_update(
&c->times[BCH_TIME_blocked_allocate],
c->blocked_allocate);
c->blocked_allocate = 0;
}
spin_unlock(&c->freelist_lock);
bch2_wake_allocator(ca);

View File

@ -275,7 +275,11 @@ do { \
"cached data") \
BCH_DEBUG_PARAM(force_reconstruct_read, \
"Force reads to use the reconstruct path, when reading" \
"from erasure coded extents")
"from erasure coded extents") \
BCH_DEBUG_PARAM(test_restart_gc, \
"Test restarting mark and sweep gc when bucket gens change")\
BCH_DEBUG_PARAM(test_reconstruct_alloc, \
"Test reconstructing the alloc btree")
#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
@ -287,10 +291,11 @@ do { \
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
x(btree_node_sort) \
x(btree_node_read) \
x(btree_gc) \
x(btree_split) \
x(btree_sort) \
x(btree_read) \
x(btree_update) \
x(btree_lock_contended_read) \
x(btree_lock_contended_intent) \
x(btree_lock_contended_write) \
@ -299,8 +304,10 @@ do { \
x(data_promote) \
x(journal_write) \
x(journal_delay) \
x(journal_blocked) \
x(journal_flush_seq)
x(journal_flush_seq) \
x(blocked_journal) \
x(blocked_allocate) \
x(blocked_allocate_open_bucket)
enum bch_time_stats {
#define x(name) BCH_TIME_##name,
@ -380,6 +387,7 @@ struct bch_dev {
char name[BDEVNAME_SIZE];
struct bch_sb_handle disk_sb;
struct bch_sb *sb_read_scratch;
int sb_write_error;
struct bch_devs_mask self;
@ -476,6 +484,7 @@ enum {
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
BCH_FS_RW,
/* shutdown: */
BCH_FS_EMERGENCY_RO,
@ -500,13 +509,6 @@ struct btree_debug {
struct dentry *failed;
};
enum bch_fs_state {
BCH_FS_STARTING = 0,
BCH_FS_STOPPING,
BCH_FS_RO,
BCH_FS_RW,
};
struct bch_fs_pcpu {
u64 sectors_available;
};
@ -528,7 +530,6 @@ struct bch_fs {
/* ro/rw, add/remove devices: */
struct mutex state_lock;
enum bch_fs_state state;
/* Counts outstanding writes, for clean transition to read-only */
struct percpu_ref writes;
@ -632,7 +633,10 @@ struct bch_fs {
struct percpu_rw_semaphore mark_lock;
struct bch_fs_usage __percpu *usage[2];
struct bch_fs_usage __percpu *usage_scratch;
/* single element mempool: */
struct mutex usage_scratch_lock;
struct bch_fs_usage *usage_scratch;
/*
* When we invalidate buckets, we use both the priority and the amount
@ -647,6 +651,8 @@ struct bch_fs {
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
u64 blocked_allocate;
u64 blocked_allocate_open_bucket;
u8 open_buckets_freelist;
u8 open_buckets_nr_free;
struct closure_waitlist open_buckets_wait;
@ -785,11 +791,6 @@ static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
#endif
}
static inline bool bch2_fs_running(struct bch_fs *c)
{
return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
}
static inline unsigned bucket_bytes(const struct bch_dev *ca)
{
return ca->mi.bucket_size << 9;

View File

@ -258,15 +258,14 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
return ret;
mutex_lock(&c->btree_root_lock);
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
&max_stale, initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
return 0;
return ret;
}
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
@ -747,7 +746,9 @@ again:
c->gc_count++;
out:
if (!ret && test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
if (!ret &&
(test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
(!iter && test_restart_gc(c)))) {
/*
* XXX: make sure gens we fixed got saved
*/

View File

@ -327,7 +327,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
if (sorting_entire_node)
bch2_time_stats_update(&c->times[BCH_TIME_btree_sort],
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
start_time);
/* Make sure we preserve bset journal_seq: */
@ -403,7 +403,8 @@ void bch2_btree_sort_into(struct bch_fs *c,
&dst->format,
true);
bch2_time_stats_update(&c->times[BCH_TIME_btree_sort], start_time);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
start_time);
set_btree_bset_end(dst, dst->set);
@ -989,7 +990,8 @@ start:
}
}
bch2_time_stats_update(&c->times[BCH_TIME_btree_read], rb->start_time);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);

View File

@ -273,6 +273,7 @@ struct btree_insert_entry {
struct btree_trans {
struct bch_fs *c;
size_t nr_restarts;
u64 commit_start;
u64 iters_live;
u64 iters_linked;
@ -289,6 +290,13 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
/* update path: */
struct journal_res journal_res;
struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
unsigned flags;
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
};
@ -489,12 +497,11 @@ struct btree_root {
enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
BTREE_INSERT_NEED_TRAVERSE,
/* leaf node needs to be split */
BTREE_INSERT_BTREE_NODE_FULL,
BTREE_INSERT_ENOSPC,
BTREE_INSERT_NEED_MARK_REPLICAS,
BTREE_INSERT_NEED_JOURNAL_RES,
};
enum btree_gc_coalesce_fail_reason {

View File

@ -6,13 +6,12 @@
struct bch_fs;
struct btree;
struct btree_insert;
void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
struct btree_iter *);
bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_i *);
void bch2_btree_journal_key(struct btree_insert *trans, struct btree_iter *,
void bch2_btree_journal_key(struct btree_trans *, struct btree_iter *,
struct bkey_i *);
void bch2_deferred_update_free(struct bch_fs *,
@ -20,23 +19,6 @@ void bch2_deferred_update_free(struct bch_fs *,
struct deferred_update *
bch2_deferred_update_alloc(struct bch_fs *, enum btree_id, unsigned);
/* Normal update interface: */
struct btree_insert {
struct bch_fs *c;
struct disk_reservation *disk_res;
struct journal_res journal_res;
struct journal_preres journal_preres;
u64 *journal_seq;
unsigned flags;
bool did_work;
unsigned short nr;
struct btree_insert_entry *entries;
};
int __bch2_btree_insert_at(struct btree_insert *);
#define BTREE_INSERT_ENTRY(_iter, _k) \
((struct btree_insert_entry) { \
.iter = (_iter), \
@ -50,35 +32,12 @@ int __bch2_btree_insert_at(struct btree_insert *);
.deferred = true, \
})
/**
* bch_btree_insert_at - insert one or more keys at iterator positions
* @iter: btree iterator
* @insert_key: key to insert
* @disk_res: disk reservation
* @hook: extent insert callback
*
* Return values:
* -EINTR: locking changed, this function should be called again. Only returned
* if passed BTREE_INSERT_ATOMIC.
* -EROFS: filesystem read only
* -EIO: journal or btree node IO error
*/
#define bch2_btree_insert_at(_c, _disk_res, _journal_seq, _flags, ...) \
__bch2_btree_insert_at(&(struct btree_insert) { \
.c = (_c), \
.disk_res = (_disk_res), \
.journal_seq = (_journal_seq), \
.flags = (_flags), \
.nr = COUNT_ARGS(__VA_ARGS__), \
.entries = (struct btree_insert_entry[]) { \
__VA_ARGS__ \
}})
enum {
__BTREE_INSERT_ATOMIC,
__BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
@ -105,6 +64,7 @@ enum {
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
#define BTREE_INSERT_LAZY_RW (1 << __BTREE_INSERT_LAZY_RW)
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
@ -125,10 +85,7 @@ enum {
#define BCH_HASH_SET_MUST_CREATE (1 << __BCH_HASH_SET_MUST_CREATE)
#define BCH_HASH_SET_MUST_REPLACE (1 << __BCH_HASH_SET_MUST_REPLACE)
int bch2_btree_delete_at(struct btree_iter *, unsigned);
int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
struct disk_reservation *, u64 *, unsigned);
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags);
@ -141,8 +98,6 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
struct btree *, struct bkey_i_btree_ptr *);
/* new transactional interface: */
static inline void
bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
@ -174,4 +129,39 @@ int bch2_trans_commit(struct btree_trans *,
_ret; \
})
/*
* We sort transaction entries so that if multiple iterators point to the same
* leaf node they'll be adjacent:
*/
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
return i != trans->updates &&
!i->deferred &&
i[0].iter->l[0].b == i[-1].iter->l[0].b;
}
#define __trans_next_update(_trans, _i, _filter) \
({ \
while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
(_i)++; \
\
(_i) < (_trans)->updates + (_trans->nr_updates); \
})
#define __trans_for_each_update(_trans, _i, _filter) \
for ((_i) = (_trans)->updates; \
__trans_next_update(_trans, _i, _filter); \
(_i)++)
#define trans_for_each_update(trans, i) \
__trans_for_each_update(trans, i, true)
#define trans_for_each_update_iter(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred)
#define trans_for_each_update_leaf(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred && \
!same_leaf_as_prev(trans, i))
#endif /* _BCACHEFS_BTREE_UPDATE_H */

View File

@ -1074,8 +1074,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
__bch2_btree_set_root_inmem(c, b);
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->mark_lock);
fs_usage = bch2_fs_usage_get_scratch(c);
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
@ -1088,7 +1088,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
percpu_up_read_preempt_enable(&c->mark_lock);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
}
@ -1167,8 +1168,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->mark_lock);
fs_usage = bch2_fs_usage_get_scratch(c);
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
@ -1189,7 +1190,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
percpu_up_read_preempt_enable(&c->mark_lock);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
@ -1437,7 +1439,8 @@ static void btree_split(struct btree_update *as, struct btree *b,
bch2_btree_iter_verify_locks(iter);
bch2_time_stats_update(&c->times[BCH_TIME_btree_split], start_time);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
start_time);
}
static void
@ -1981,8 +1984,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_lock_write(b, iter);
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read_preempt_disable(&c->mark_lock);
fs_usage = bch2_fs_usage_get_scratch(c);
percpu_down_read(&c->mark_lock);
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
@ -1993,7 +1996,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
percpu_up_read_preempt_enable(&c->mark_lock);
bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {

File diff suppressed because it is too large Load Diff

View File

@ -143,6 +143,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
if (fs_usage == c->usage_scratch)
mutex_unlock(&c->usage_scratch_lock);
else
kfree(fs_usage);
}
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
{
struct bch_fs_usage *ret;
unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
ret = kzalloc(bytes, GFP_NOWAIT);
if (ret)
return ret;
if (mutex_trylock(&c->usage_scratch_lock))
goto out_pool;
ret = kzalloc(bytes, GFP_NOFS);
if (ret)
return ret;
mutex_lock(&c->usage_scratch_lock);
out_pool:
ret = c->usage_scratch;
memset(ret, 0, bytes);
return ret;
}
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_dev_usage ret;
@ -290,8 +321,10 @@ int bch2_fs_usage_apply(struct bch_fs *c,
fs_usage->online_reserved -= added;
}
preempt_disable();
acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
(u64 *) fs_usage, fs_usage_u64s(c));
preempt_enable();
return ret;
}
@ -549,7 +582,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type type,
unsigned sectors, bool gc)
{
struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
bool overflow;
@ -568,7 +600,8 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
old.dirty_sectors, sectors);
if (c)
bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
bch2_dev_usage_update(c, ca, this_cpu_ptr(c->usage[gc]),
old, new, gc);
return 0;
}
@ -897,31 +930,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
unsigned journal_seq, unsigned flags,
bool gc)
{
int ret = 0;
preempt_disable();
if (!fs_usage || gc)
fs_usage = this_cpu_ptr(c->usage[gc]);
switch (k.k->type) {
case KEY_TYPE_alloc:
return bch2_mark_alloc(c, k, inserting,
ret = bch2_mark_alloc(c, k, inserting,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_btree_ptr:
return bch2_mark_extent(c, k, inserting
ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_extent:
return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_stripe:
return bch2_mark_stripe(c, k, inserting,
ret = bch2_mark_stripe(c, k, inserting,
fs_usage, journal_seq, flags, gc);
break;
case KEY_TYPE_inode:
if (inserting)
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
return 0;
break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@ -931,11 +972,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
return 0;
break;
}
default:
return 0;
}
preempt_enable();
return ret;
}
int bch2_mark_key_locked(struct bch_fs *c,
@ -966,25 +1009,20 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret;
}
void bch2_mark_update(struct btree_insert *trans,
struct btree_insert_entry *insert)
void bch2_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert,
struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bch_fs_usage *fs_usage;
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
static int warned_disk_usage = 0;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
percpu_down_read_preempt_disable(&c->mark_lock);
fs_usage = bch2_fs_usage_get_scratch(c);
if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
@ -1037,16 +1075,32 @@ void bch2_mark_update(struct btree_insert *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
!warned_disk_usage &&
!xchg(&warned_disk_usage, 1)) {
void bch2_trans_fs_usage_apply(struct btree_trans *trans,
struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
static int warned_disk_usage = 0;
u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
char buf[200];
if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
warned_disk_usage ||
xchg(&warned_disk_usage, 1))
return;
pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
trans_for_each_update_iter(trans, i) {
struct btree_iter *iter = i->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
pr_err("while inserting");
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
pr_err("%s", buf);
pr_err("overlapping with");
@ -1059,8 +1113,8 @@ void bch2_mark_update(struct btree_insert *trans,
k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b)
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(insert->k->k.p, k.k->p))
? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(i->k->k.p, k.k->p))
break;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
@ -1069,8 +1123,6 @@ void bch2_mark_update(struct btree_insert *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
percpu_up_read_preempt_enable(&c->mark_lock);
}
/* Disk reservations: */

View File

@ -218,13 +218,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
READ_ONCE(c->replicas.nr);
}
static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
{
struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
return ret;
}
void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
@ -255,10 +250,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);

View File

@ -150,7 +150,7 @@ int __bch2_dirent_create(struct btree_trans *trans,
if (ret)
return ret;
return __bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
dir_inum, &dirent->k_i, flags);
}

View File

@ -628,36 +628,12 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
/* stripe deletion */
static void ec_stripe_delete(struct bch_fs *c, size_t idx)
static int ec_stripe_delete(struct bch_fs *c, size_t idx)
{
struct btree_iter iter;
struct bch_stripe *v = NULL;
struct bkey_s_c k;
struct bkey_i delete;
u64 journal_seq = 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_EC,
return bch2_btree_delete_range(c, BTREE_ID_EC,
POS(0, idx),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe)
goto out;
v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL);
BUG_ON(!v);
memcpy(v, bkey_s_c_to_stripe(k).v, bkey_val_bytes(k.k));
bkey_init(&delete.k);
delete.k.p = iter.pos;
bch2_btree_insert_at(c, NULL, &journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOUNLOCK,
BTREE_INSERT_ENTRY(&iter, &delete));
out:
bch2_btree_iter_unlock(&iter);
kfree(v);
POS(0, idx + 1),
NULL);
}
static void ec_stripe_delete_work(struct work_struct *work)
@ -689,39 +665,46 @@ static void ec_stripe_delete_work(struct work_struct *work)
static int ec_stripe_bkey_insert(struct bch_fs *c,
struct bkey_i_stripe *stripe)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
/* XXX: start pos hint */
bch2_trans_init(&trans, c);
retry:
for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
bch2_btree_iter_unlock(&iter);
return -ENOSPC;
}
bch2_trans_begin(&trans);
/* XXX: start pos hint */
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
break;
if (bkey_deleted(k.k))
goto found_slot;
}
return bch2_btree_iter_unlock(&iter) ?: -ENOSPC;
ret = -ENOSPC;
goto out;
found_slot:
ret = ec_stripe_mem_alloc(c, &iter);
ret = ec_stripe_mem_alloc(c, iter);
if (ret == -EINTR)
goto retry;
if (ret)
return ret;
stripe->k.p = iter.pos;
stripe->k.p = iter->pos;
ret = bch2_btree_insert_at(c, NULL, NULL,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&iter, &stripe->k_i));
bch2_btree_iter_unlock(&iter);
BTREE_INSERT_USE_RESERVE);
out:
bch2_trans_exit(&trans);
return ret;
}
@ -748,23 +731,26 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
struct ec_stripe_buf *s,
struct bkey *pos)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
BKEY_PADDED(k) tmp;
int ret = 0, dev, idx;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(pos),
BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(&iter)).k &&
!btree_iter_err(k) &&
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k)) &&
bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
idx = extent_matches_stripe(c, &s->key.v, k);
if (idx < 0) {
bch2_btree_iter_next(&iter);
bch2_btree_iter_next(iter);
continue;
}
@ -782,18 +768,21 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
extent_stripe_ptr_add(e, s, ptr, idx);
ret = bch2_btree_insert_at(c, NULL, NULL,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.k));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
BTREE_INSERT_ENTRY(&iter, &tmp.k));
BTREE_INSERT_USE_RESERVE);
if (ret == -EINTR)
ret = 0;
if (ret)
break;
}
return bch2_btree_iter_unlock(&iter) ?: ret;
bch2_trans_exit(&trans);
return ret;
}
/*
@ -1162,13 +1151,14 @@ unlock:
mutex_unlock(&c->ec_new_stripe_lock);
}
static int __bch2_stripe_write_key(struct bch_fs *c,
static int __bch2_stripe_write_key(struct btree_trans *trans,
struct btree_iter *iter,
struct stripe *m,
size_t idx,
struct bkey_i_stripe *new_key,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k;
unsigned i;
int ret;
@ -1194,14 +1184,16 @@ static int __bch2_stripe_write_key(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
return bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|flags,
BTREE_INSERT_ENTRY(iter, &new_key->k_i));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new_key->k_i));
return bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|flags);
}
int bch2_stripes_write(struct bch_fs *c, bool *wrote)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct genradix_iter giter;
struct bkey_i_stripe *new_key;
struct stripe *m;
@ -1210,14 +1202,16 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
BUG_ON(!new_key);
bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS_MIN,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
genradix_for_each(&c->stripes[0], giter, m) {
if (!m->dirty)
continue;
ret = __bch2_stripe_write_key(c, &iter, m, giter.pos,
ret = __bch2_stripe_write_key(&trans, iter, m, giter.pos,
new_key, BTREE_INSERT_NOCHECK_RW);
if (ret)
break;
@ -1225,7 +1219,7 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
*wrote = true;
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
kfree(new_key);

View File

@ -782,18 +782,6 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
return true;
}
struct extent_insert_state {
struct btree_insert *trans;
struct btree_insert_entry *insert;
struct bpos committed;
/* for deleting: */
struct bkey_i whiteout;
bool update_journal;
bool update_btree;
bool deleting;
};
static bool bch2_extent_merge_inline(struct bch_fs *,
struct btree_iter *,
struct bkey_packed *,
@ -880,67 +868,29 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_iter_verify(iter, l->b);
}
static void extent_insert_committed(struct extent_insert_state *s)
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
struct bkey_i *insert = s->insert->k;
BKEY_PADDED(k) split;
EBUG_ON(bkey_cmp(insert->k.p, s->committed) < 0);
EBUG_ON(bkey_cmp(s->committed, bkey_start_pos(&insert->k)) < 0);
bkey_copy(&split.k, insert);
if (s->deleting)
split.k.k.type = KEY_TYPE_discard;
bch2_cut_back(s->committed, &split.k.k);
if (!bkey_cmp(s->committed, iter->pos))
return;
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
if (s->update_btree) {
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&split.k));
EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
extent_bset_insert(c, iter, &split.k);
}
if (s->update_journal) {
bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout);
if (s->deleting)
split.k.k.type = KEY_TYPE_discard;
bch2_cut_back(s->committed, &split.k.k);
EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
bch2_btree_journal_key(s->trans, iter, &split.k);
}
bch2_cut_front(s->committed, insert);
insert->k.needs_whiteout = false;
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
static inline struct bpos
bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter)
{
struct btree *b = iter->l[0].b;
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
bch2_cut_back(b->key.k.p, &k->k);
BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
return bpos_min(k->k.p, b->key.k.p);
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
{
bch2_cut_back(bch2_extent_atomic_end(k, iter), &k->k);
}
bool bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
{
return !bkey_cmp(bch2_extent_atomic_end(k, iter), k->k.p);
}
enum btree_insert_ret
bch2_extent_can_insert(struct btree_insert *trans,
bch2_extent_can_insert(struct btree_trans *trans,
struct btree_insert_entry *insert,
unsigned *u64s)
{
@ -952,9 +902,6 @@ bch2_extent_can_insert(struct btree_insert *trans,
struct bkey_s_c k;
int sectors;
BUG_ON(trans->flags & BTREE_INSERT_ATOMIC &&
!bch2_extent_is_atomic(&insert->k->k, insert->iter));
/*
* We avoid creating whiteouts whenever possible when deleting, but
* those optimizations mean we may potentially insert two whiteouts
@ -998,12 +945,11 @@ bch2_extent_can_insert(struct btree_insert *trans,
}
static void
extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
extent_squash(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert,
struct bkey_packed *_k, struct bkey_s k,
enum bch_extent_overlap overlap)
{
struct bch_fs *c = s->trans->c;
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
switch (overlap) {
@ -1089,34 +1035,39 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
}
}
static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
struct extent_insert_state {
struct bkey_i whiteout;
bool update_journal;
bool update_btree;
bool deleting;
};
static void __bch2_insert_fixup_extent(struct bch_fs *c,
struct btree_iter *iter,
struct bkey_i *insert,
struct extent_insert_state *s)
{
struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct bkey_packed *_k;
struct bkey unpacked;
struct bkey_i *insert = s->insert->k;
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
KEY_TYPE_discard))) {
struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k);
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
enum bch_extent_overlap overlap =
bch2_extent_overlap(&insert->k, k.k);
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
s->committed = bpos_min(s->insert->k->k.p, k.k->p);
if (!bkey_whiteout(k.k))
s->update_journal = true;
if (!s->update_journal) {
bch2_cut_front(s->committed, insert);
bch2_cut_front(s->committed, &s->whiteout);
bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
bch2_cut_front(cur_end, insert);
bch2_cut_front(cur_end, &s->whiteout);
bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
goto next;
}
@ -1150,19 +1101,16 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
_k->needs_whiteout = false;
}
extent_squash(s, insert, _k, k, overlap);
extent_squash(c, iter, insert, _k, k, overlap);
if (!s->update_btree)
bch2_cut_front(s->committed, insert);
bch2_cut_front(cur_end, insert);
next:
if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
}
if (bkey_cmp(s->committed, insert->k.p) < 0)
s->committed = bpos_min(s->insert->k->k.p, l->b->key.k.p);
/*
* may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
@ -1172,7 +1120,7 @@ next:
struct btree_node_iter node_iter = l->iter;
while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
bkey_cmp_left_packed(l->b, _k, &s->committed) > 0)
bkey_cmp_left_packed(l->b, _k, &insert->k.p) > 0)
l->iter = node_iter;
}
}
@ -1216,48 +1164,55 @@ next:
* If the end of iter->pos is not the same as the end of insert, then
* key insertion needs to continue/be retried.
*/
enum btree_insert_ret
bch2_insert_fixup_extent(struct btree_insert *trans,
void bch2_insert_fixup_extent(struct btree_trans *trans,
struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct extent_insert_state s = {
.trans = trans,
.insert = insert,
.committed = iter->pos,
.whiteout = *insert->k,
.update_journal = !bkey_whiteout(&insert->k->k),
.update_btree = !bkey_whiteout(&insert->k->k),
.deleting = bkey_whiteout(&insert->k->k),
};
BKEY_PADDED(k) tmp;
EBUG_ON(iter->level);
EBUG_ON(!insert->k->k.size);
/*
* As we process overlapping extents, we advance @iter->pos both to
* signal to our caller (btree_insert_key()) how much of @insert->k has
* been inserted, and also to keep @iter->pos consistent with
* @insert->k and the node iterator that we're advancing:
*/
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
__bch2_insert_fixup_extent(&s);
__bch2_insert_fixup_extent(c, iter, insert->k, &s);
extent_insert_committed(&s);
bch2_btree_iter_set_pos_same_leaf(iter, insert->k->k.p);
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
EBUG_ON(bkey_cmp(iter->pos, s.committed));
if (s.update_btree) {
bkey_copy(&tmp.k, insert->k);
if (insert->k->k.size) {
/* got to the end of this leaf node */
BUG_ON(bkey_cmp(iter->pos, b->key.k.p));
return BTREE_INSERT_NEED_TRAVERSE;
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
#if 0
/* disabled due to lock recursion - mark_lock: */
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&tmp.k));
#endif
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
}
return BTREE_INSERT_OK;
if (s.update_journal) {
bkey_copy(&tmp.k, !s.deleting ? insert->k : &s.whiteout);
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
bch2_btree_journal_key(trans, iter, &tmp.k);
}
bch2_cut_front(insert->k->k.p, insert->k);
}
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)

View File

@ -6,7 +6,7 @@
#include "extents_types.h"
struct bch_fs;
struct btree_insert;
struct btree_trans;
struct btree_insert_entry;
/* extent entries: */
@ -406,21 +406,13 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
}
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
static inline bool bch2_extent_is_atomic(struct bkey *k,
struct btree_iter *iter)
{
struct btree *b = iter->l[0].b;
return bkey_cmp(k->p, b->key.k.p) <= 0 &&
bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
}
bool bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
enum btree_insert_ret
bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
unsigned *);
enum btree_insert_ret
bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
void bch2_insert_fixup_extent(struct btree_trans *,
struct btree_insert_entry *);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned, unsigned);

View File

@ -1530,7 +1530,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons
mutex_lock(&c->state_lock);
if (!bch2_fs_running(c)) {
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
mutex_unlock(&c->state_lock);
closure_put(&c->cl);
pr_err("err mounting %s: incomplete filesystem", dev_name);
@ -1586,8 +1586,6 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
return ret;
if (opts.read_only != c->opts.read_only) {
const char *err = NULL;
mutex_lock(&c->state_lock);
if (opts.read_only) {
@ -1595,9 +1593,10 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags |= MS_RDONLY;
} else {
err = bch2_fs_read_write(c);
if (err) {
bch_err(c, "error going rw: %s", err);
ret = bch2_fs_read_write(c);
if (ret) {
bch_err(c, "error going rw: %i", ret);
mutex_unlock(&c->state_lock);
return -EINVAL;
}

View File

@ -151,7 +151,7 @@ static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
}
static int hash_redo_key(const struct bch_hash_desc desc,
struct hash_check *h, struct bch_fs *c,
struct btree_trans *trans, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k,
u64 hashed)
{
@ -164,15 +164,17 @@ static int hash_redo_key(const struct bch_hash_desc desc,
bkey_reassemble(tmp, k);
ret = bch2_btree_delete_at(k_iter, 0);
ret = bch2_btree_delete_at(trans, k_iter, 0);
if (ret)
goto err;
bch2_btree_iter_unlock(k_iter);
bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp,
bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
tmp, BCH_HASH_SET_MUST_CREATE);
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BCH_HASH_SET_MUST_CREATE);
BTREE_INSERT_LAZY_RW);
err:
kfree(tmp);
return ret;
@ -202,7 +204,8 @@ retry:
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
err:
if (ret == -EINTR)
goto retry;
@ -271,9 +274,10 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
}
static int hash_check_key(const struct bch_hash_desc desc,
struct hash_check *h, struct bch_fs *c,
struct btree_trans *trans, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
char buf[200];
u64 hashed;
int ret = 0;
@ -299,7 +303,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
hashed, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
ret = hash_redo_key(desc, trans, h, k_iter, k, hashed);
if (ret) {
bch_err(c, "hash_redo_key err %i", ret);
return ret;
@ -312,9 +316,10 @@ fsck_err:
return ret;
}
static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
struct btree_iter *iter, struct bkey_s_c *k)
{
struct bch_fs *c = trans->c;
struct bkey_i_dirent *d = NULL;
int ret = -EINVAL;
char buf[200];
@ -359,9 +364,11 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
buf, strlen(buf), d->v.d_name, len)) {
ret = bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(iter, &d->k_i));
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i));
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret)
goto err;
@ -383,8 +390,8 @@ err_redo:
k->k->p.offset, hash, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
*k), buf))) {
ret = hash_redo_key(bch2_dirent_hash_desc,
h, c, iter, *k, hash);
ret = hash_redo_key(bch2_dirent_hash_desc, trans,
h, iter, *k, hash);
if (ret)
bch_err(c, "hash_redo_key err %i", ret);
else
@ -531,7 +538,7 @@ static int check_dirents(struct bch_fs *c)
mode_to_type(w.inode.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
ret = bch2_btree_delete_at(iter, 0);
ret = bch2_btree_delete_at(&trans, iter, 0);
if (ret)
goto err;
continue;
@ -540,7 +547,7 @@ static int check_dirents(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
ret = check_dirent_hash(&h, c, iter, &k);
ret = check_dirent_hash(&trans, &h, iter, &k);
if (ret > 0) {
ret = 0;
continue;
@ -622,9 +629,12 @@ static int check_dirents(struct bch_fs *c)
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = mode_to_type(target.bi_mode);
ret = bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL,
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &n->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
kfree(n);
if (ret)
goto err;
@ -668,7 +678,7 @@ static int check_xattrs(struct bch_fs *c)
if (fsck_err_on(!w.have_inode, c,
"xattr for missing inode %llu",
k.k->p.inode)) {
ret = bch2_btree_delete_at(iter, 0);
ret = bch2_btree_delete_at(&trans, iter, 0);
if (ret)
goto err;
continue;
@ -677,7 +687,7 @@ static int check_xattrs(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
ret = hash_check_key(bch2_xattr_hash_desc, &h, c, iter, k);
ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k);
if (ret)
goto fsck_err;
}
@ -1162,12 +1172,13 @@ fsck_err:
return ret;
}
static int check_inode(struct bch_fs *c,
static int check_inode(struct btree_trans *trans,
struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode,
struct nlink *link)
{
struct bch_fs *c = trans->c;
struct bch_inode_unpacked u;
bool do_update = false;
int ret = 0;
@ -1258,10 +1269,11 @@ static int check_inode(struct bch_fs *c,
struct bkey_inode_buf p;
bch2_inode_pack(&p, &u);
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
ret = bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
ret = bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret && ret != -EINTR)
bch_err(c, "error in fs gc: error %i "
"updating inode", ret);
@ -1276,25 +1288,29 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
nlink_table *links,
u64 range_start, u64 range_end)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct nlink *link, zero_links = { 0, 0 };
struct genradix_iter nlinks_iter;
int ret = 0, ret2 = 0;
u64 nlinks_pos;
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
POS(range_start, 0), 0);
nlinks_iter = genradix_iter_init(links, 0);
while ((k = bch2_btree_iter_peek(&iter)).k &&
!btree_iter_err(k)) {
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret2 = btree_iter_err(k))) {
peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
if (!link && (!k.k || iter.pos.inode >= range_end))
if (!link && (!k.k || iter->pos.inode >= range_end))
break;
nlinks_pos = range_start + nlinks_iter.pos;
if (iter.pos.inode > nlinks_pos) {
if (iter->pos.inode > nlinks_pos) {
/* Should have been caught by dirents pass: */
need_fsck_err_on(link && link->count, c,
"missing inode %llu (nlink %u)",
@ -1303,7 +1319,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
goto peek_nlinks;
}
if (iter.pos.inode < nlinks_pos || !link)
if (iter->pos.inode < nlinks_pos || !link)
link = &zero_links;
if (k.k && k.k->type == KEY_TYPE_inode) {
@ -1311,9 +1327,9 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
* Avoid potential deadlocks with iter for
* truncate/rm/etc.:
*/
bch2_btree_iter_unlock(&iter);
bch2_btree_iter_unlock(iter);
ret = check_inode(c, lostfound_inode, &iter,
ret = check_inode(&trans, lostfound_inode, iter,
bkey_s_c_to_inode(k), link);
BUG_ON(ret == -EINTR);
if (ret)
@ -1325,14 +1341,15 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
nlinks_pos, link->count);
}
if (nlinks_pos == iter.pos.inode)
if (nlinks_pos == iter->pos.inode)
genradix_iter_advance(&nlinks_iter, links);
bch2_btree_iter_next(&iter);
bch2_btree_iter_cond_resched(&iter);
bch2_btree_iter_next(iter);
bch2_btree_iter_cond_resched(iter);
}
fsck_err:
ret2 = bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
if (ret2)
bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
@ -1378,12 +1395,18 @@ static int check_inode_nlinks(struct bch_fs *c,
noinline_for_stack
static int check_inodes_fast(struct bch_fs *c)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
int ret = 0;
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
POS_MIN, 0);
for_each_btree_key_continue(iter, 0, k) {
if (k.k->type != KEY_TYPE_inode)
continue;
@ -1393,14 +1416,19 @@ static int check_inodes_fast(struct bch_fs *c)
(BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)) {
ret = check_inode(c, NULL, &iter, inode, NULL);
ret = check_inode(&trans, NULL, iter, inode, NULL);
BUG_ON(ret == -EINTR);
if (ret)
break;
}
}
return bch2_btree_iter_unlock(&iter) ?: ret;
if (!ret)
ret = bch2_btree_iter_unlock(iter);
bch2_trans_exit(&trans);
return ret;
}
/*

View File

@ -368,7 +368,8 @@ int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_i_inode_generation delete;
struct bpos start = POS(inode_nr, 0);
struct bpos end = POS(inode_nr + 1, 0);
@ -391,17 +392,17 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
if (ret)
return ret;
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0),
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do {
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
u32 bi_generation = 0;
ret = btree_iter_err(k);
if (ret) {
bch2_btree_iter_unlock(&iter);
return ret;
}
if (ret)
break;
bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
"inode %llu not found when deleting",
@ -432,13 +433,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
delete.v.bi_generation = cpu_to_le32(bi_generation);
}
ret = bch2_btree_insert_at(c, NULL, NULL,
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &delete.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&iter, &delete.k_i));
BTREE_INSERT_NOFAIL);
} while (ret == -EINTR);
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
return ret;
}

View File

@ -276,19 +276,44 @@ static void bch2_write_done(struct closure *cl)
int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct btree_trans trans;
struct btree_iter *iter;
struct keylist *keys = &op->insert_keys;
struct btree_iter iter;
int ret;
bch2_btree_iter_init(&iter, op->c, BTREE_ID_EXTENTS,
BUG_ON(bch2_keylist_empty(keys));
bch2_verify_keylist_sorted(keys);
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_INTENT);
ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
op_journal_seq(op),
do {
BKEY_PADDED(k) split;
bkey_copy(&split.k, bch2_keylist_front(keys));
bch2_extent_trim_atomic(&split.k, iter);
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &split.k));
ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op),
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
bch2_btree_iter_unlock(&iter);
if (ret)
break;
if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0)
bch2_cut_front(iter->pos, bch2_keylist_front(keys));
else
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
bch2_trans_exit(&trans);
return ret;
}
@ -1367,7 +1392,8 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
struct bch_fs *c = rbio->c;
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_extent *e;
BKEY_PADDED(k) new;
@ -1378,10 +1404,13 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
if (rbio->pick.crc.compression_type)
return;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, rbio->pos,
BTREE_ITER_INTENT);
bch2_trans_init(&trans, c);
retry:
k = bch2_btree_iter_peek(&iter);
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, rbio->pos,
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek(iter);
if (IS_ERR_OR_NULL(k.k))
goto out;
@ -1417,15 +1446,15 @@ retry:
if (!bch2_extent_narrow_crcs(e, new_crc))
goto out;
ret = bch2_btree_insert_at(c, NULL, NULL,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOWAIT,
BTREE_INSERT_ENTRY(&iter, &e->k_i));
BTREE_INSERT_NOWAIT);
if (ret == -EINTR)
goto retry;
out:
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
static bool should_narrow_crcs(struct bkey_s_c k,

View File

@ -1027,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j)
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
}
/* init/exit: */

View File

@ -825,6 +825,8 @@ fsck_err:
static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
{
struct btree_trans trans;
struct btree_iter *iter;
/*
* We might cause compressed extents to be
* split, so we need to pass in a
@ -833,20 +835,21 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
BKEY_PADDED(k) split;
struct btree_iter iter;
int ret;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
do {
ret = bch2_btree_iter_traverse(&iter);
ret = bch2_btree_iter_traverse(iter);
if (ret)
break;
bkey_copy(&split.k, k);
bch2_cut_front(iter.pos, &split.k);
bch2_extent_trim_atomic(&split.k, &iter);
bch2_cut_front(iter->pos, &split.k);
bch2_extent_trim_atomic(&split.k, iter);
ret = bch2_disk_reservation_add(c, &disk_res,
split.k.k.size *
@ -854,13 +857,14 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
ret = bch2_btree_insert_at(c, &disk_res, NULL,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k));
ret = bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_JOURNAL_REPLAY,
BTREE_INSERT_ENTRY(&iter, &split.k));
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY);
} while ((!ret || ret == -EINTR) &&
bkey_cmp(k->k.p, iter.pos));
bkey_cmp(k->k.p, iter->pos));
bch2_disk_reservation_put(c, &disk_res);
@ -873,9 +877,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
* before journal replay finishes
*/
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
gc_pos_btree_node(iter.l[0].b),
gc_pos_btree_node(iter->l[0].b),
NULL, 0, 0);
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
return ret;
}
@ -903,6 +907,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
ret = bch2_btree_insert(c, entry->btree_id, k,
NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
break;

View File

@ -35,25 +35,29 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
BKEY_PADDED(key) tmp;
struct btree_iter iter;
int ret = 0;
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
POS_MIN, BTREE_ITER_PREFETCH);
while ((k = bch2_btree_iter_peek(&iter)).k &&
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
bch2_btree_iter_next(&iter);
bch2_btree_iter_next(iter);
continue;
}
@ -71,12 +75,14 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/
bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
iter.pos = bkey_start_pos(&tmp.key.k);
/* XXX not sketchy at all */
iter->pos = bkey_start_pos(&tmp.key.k);
ret = bch2_btree_insert_at(c, NULL, NULL,
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.key));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&iter, &tmp.key));
BTREE_INSERT_NOFAIL);
/*
* don't want to leave ret == -EINTR, since if we raced and
@ -89,7 +95,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
break;
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);

View File

@ -54,18 +54,21 @@ struct moving_context {
static int bch2_migrate_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
struct btree_trans trans;
struct btree_iter *iter;
struct migrate_write *m =
container_of(op, struct migrate_write, op);
struct keylist *keys = &op->insert_keys;
struct btree_iter iter;
int ret = 0;
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (1) {
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_i_extent *insert, *new =
bkey_i_to_extent(bch2_keylist_front(keys));
BKEY_PADDED(k) _new, _insert;
@ -74,10 +77,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bool did_work = false;
int nr;
if (btree_iter_err(k)) {
ret = bch2_btree_iter_unlock(&iter);
ret = btree_iter_err(k);
if (ret)
break;
}
if (bversion_cmp(k.k->version, new->k.version) ||
!bkey_extent_is_data(k.k) ||
@ -96,7 +98,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bkey_copy(&_new.k, bch2_keylist_front(keys));
new = bkey_i_to_extent(&_new.k);
bch2_cut_front(iter.pos, &insert->k_i);
bch2_cut_front(iter->pos, &insert->k_i);
bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k);
@ -138,12 +140,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
if (insert->k.size < k.k->size &&
bch2_extent_is_compressed(k) &&
nr > 0) {
/*
* can't call bch2_disk_reservation_add() with btree
* locks held, at least not without a song and dance
*/
bch2_btree_iter_unlock(&iter);
ret = bch2_disk_reservation_add(c, &op->res,
keylist_sectors(keys) * nr, 0);
if (ret)
@ -153,13 +149,15 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
goto next;
}
ret = bch2_btree_insert_at(c, &op->res,
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &insert->k_i));
ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
m->data_opts.btree_insert_flags,
BTREE_INSERT_ENTRY(&iter, &insert->k_i));
m->data_opts.btree_insert_flags);
if (!ret)
atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR)
@ -167,25 +165,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
if (ret)
break;
next:
while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) {
while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
bch2_keylist_pop_front(keys);
if (bch2_keylist_empty(keys))
goto out;
}
bch2_cut_front(iter.pos, bch2_keylist_front(keys));
bch2_cut_front(iter->pos, bch2_keylist_front(keys));
continue;
nomatch:
if (m->ctxt)
atomic64_add(k.k->p.offset - iter.pos.offset,
atomic64_add(k.k->p.offset - iter->pos.offset,
&m->ctxt->stats->sectors_raced);
atomic_long_inc(&c->extent_migrate_raced);
trace_move_race(&new->k);
bch2_btree_iter_next_slot(&iter);
bch2_btree_iter_next_slot(iter);
goto next;
}
out:
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
return ret;
}

View File

@ -707,7 +707,8 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct qc_dqblk *qdq)
{
struct bch_fs *c = sb->s_fs_info;
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_quota new_quota;
int ret;
@ -718,9 +719,11 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
bch2_btree_iter_init(&iter, c, BTREE_ID_QUOTAS, new_quota.k.p,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k);
if (unlikely(ret))
@ -742,9 +745,11 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
if (qdq->d_fieldmask & QC_INO_HARD)
new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));
bch2_btree_iter_unlock(&iter);
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new_quota.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
bch2_trans_exit(&trans);
if (ret)
return ret;

View File

@ -106,10 +106,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
}
static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean *clean,
struct bch_sb_field_clean **cleanp,
struct jset *j)
{
unsigned i;
struct bch_sb_field_clean *clean = *cleanp;
int ret = 0;
if (!clean || !j)
@ -118,8 +119,11 @@ static int verify_superblock_clean(struct bch_fs *c,
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq),
le64_to_cpu(j->seq)))
bch2_fs_mark_clean(c, false);
le64_to_cpu(j->seq))) {
kfree(clean);
*cleanp = NULL;
return 0;
}
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
@ -186,6 +190,8 @@ int bch2_fs_recovery(struct bch_fs *c)
LIST_HEAD(journal);
struct jset *j = NULL;
unsigned i;
bool run_gc = c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
int ret;
mutex_lock(&c->sb_lock);
@ -228,7 +234,7 @@ int bch2_fs_recovery(struct bch_fs *c)
BUG_ON(ret);
}
ret = verify_superblock_clean(c, clean, j);
ret = verify_superblock_clean(c, &clean, j);
if (ret)
goto err;
@ -270,15 +276,22 @@ int bch2_fs_recovery(struct bch_fs *c)
continue;
err = "invalid btree root pointer";
ret = -1;
if (r->error)
goto err;
if (i == BTREE_ID_ALLOC &&
test_reconstruct_alloc(c))
continue;
err = "error reading btree root";
if (bch2_btree_root_read(c, i, &r->key, r->level)) {
ret = bch2_btree_root_read(c, i, &r->key, r->level);
if (ret) {
if (i != BTREE_ID_ALLOC)
goto err;
mustfix_fsck_err(c, "error reading btree root");
run_gc = true;
}
}
@ -299,8 +312,7 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
c->opts.fsck) {
if (run_gc) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
ret = bch2_gc(c, &journal, true);
@ -322,13 +334,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.noreplay)
goto out;
/*
* Mark dirty before journal replay, fsck:
* XXX: after a clean shutdown, this could be done lazily only when fsck
* finds an error
*/
bch2_fs_mark_clean(c, false);
/*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
@ -336,11 +341,6 @@ int bch2_fs_recovery(struct bch_fs *c)
*/
bch2_fs_journal_start(&c->journal);
err = "error starting allocator";
ret = bch2_fs_allocator_start(c);
if (ret)
goto err;
bch_verbose(c, "starting journal replay:");
err = "journal replay failed";
ret = bch2_journal_replay(c, &journal);
@ -427,8 +427,8 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_fs_journal_start(&c->journal);
bch2_journal_set_replay_done(&c->journal);
err = "error starting allocator";
ret = bch2_fs_allocator_start(c);
err = "error going read write";
ret = __bch2_fs_read_write(c, true);
if (ret)
goto err;

View File

@ -206,22 +206,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
return __replicas_entry_idx(r, search) >= 0;
}
static bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
}
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
bool marked;
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
percpu_down_read_preempt_disable(&c->mark_lock);
marked = __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
percpu_up_read_preempt_enable(&c->mark_lock);
return marked;
@ -262,7 +269,7 @@ static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
struct bch_fs_usage __percpu *new_scratch = NULL;
struct bch_fs_usage *new_scratch = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
int ret = -ENOMEM;
@ -272,8 +279,7 @@ static int replicas_table_update(struct bch_fs *c,
(c->usage[1] &&
!(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO))) ||
!(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO)))
!(new_scratch = kmalloc(bytes, GFP_NOIO)))
goto err;
if (c->usage[0])
@ -289,7 +295,7 @@ static int replicas_table_update(struct bch_fs *c,
swap(c->replicas, *new_r);
ret = 0;
err:
free_percpu(new_scratch);
kfree(new_scratch);
free_percpu(new_usage[1]);
free_percpu(new_usage[0]);
return ret;
@ -389,7 +395,7 @@ int bch2_mark_replicas(struct bch_fs *c,
: bch2_mark_replicas_slowpath(c, r);
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
struct bkey_s_c k,
bool check_gc_replicas)
{
@ -400,13 +406,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
for (i = 0; i < cached.nr; i++) {
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
if (!bch2_replicas_marked_locked(c, &search.e,
check_gc_replicas))
return false;
}
bkey_to_replicas(&search.e, k);
return bch2_replicas_marked(c, &search.e, check_gc_replicas);
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
struct bkey_s_c k,
bool check_gc_replicas)
{
bool marked;
percpu_down_read_preempt_disable(&c->mark_lock);
marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
percpu_up_read_preempt_enable(&c->mark_lock);
return marked;
}
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)

View File

@ -25,6 +25,8 @@ bool bch2_replicas_marked(struct bch_fs *,
int bch2_mark_replicas(struct bch_fs *,
struct bch_replicas_entry *);
bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
struct bkey_s_c, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);

View File

@ -213,7 +213,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
}
static __always_inline
int __bch2_hash_set(struct btree_trans *trans,
int bch2_hash_set(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
u64 inode, struct bkey_i *insert, int flags)
@ -267,17 +267,6 @@ found:
return 0;
}
static inline int bch2_hash_set(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
struct bch_fs *c, u64 inode,
u64 *journal_seq,
struct bkey_i *insert, int flags)
{
return bch2_trans_do(c, journal_seq, flags|BTREE_INSERT_ATOMIC,
__bch2_hash_set(&trans, desc, info,
inode, insert, flags));
}
static __always_inline
int bch2_hash_delete_at(struct btree_trans *trans,
const struct bch_hash_desc desc,

View File

@ -502,6 +502,8 @@ reread:
if (bch2_crc_cmp(csum, sb->sb->csum))
return "bad checksum reading superblock";
sb->seq = le64_to_cpu(sb->sb->seq);
return NULL;
}
@ -637,6 +639,27 @@ static void write_super_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_sb *sb = ca->disk_sb.sb;
struct bio *bio = ca->disk_sb.bio;
bio_reset(bio);
bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
bio->bi_iter.bi_size = 4096;
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
bch2_bio_map(bio, ca->sb_read_scratch);
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
bio_sectors(bio));
percpu_ref_get(&ca->io_ref);
closure_bio_submit(bio, &c->sb_write);
}
static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
{
struct bch_sb *sb = ca->disk_sb.sb;
@ -666,7 +689,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
closure_bio_submit(bio, &c->sb_write);
}
void bch2_write_super(struct bch_fs *c)
int bch2_write_super(struct bch_fs *c)
{
struct closure *cl = &c->sb_write;
struct bch_dev *ca;
@ -674,6 +697,7 @@ void bch2_write_super(struct bch_fs *c)
const char *err;
struct bch_devs_mask sb_written;
bool wrote, can_mount_without_written, can_mount_with_written;
int ret = 0;
lockdep_assert_held(&c->sb_lock);
@ -689,6 +713,7 @@ void bch2_write_super(struct bch_fs *c)
err = bch2_sb_validate(&ca->disk_sb);
if (err) {
bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
ret = -1;
goto out;
}
}
@ -702,10 +727,27 @@ void bch2_write_super(struct bch_fs *c)
ca->sb_write_error = 0;
}
for_each_online_member(ca, c, i)
read_back_super(c, ca);
closure_sync(cl);
for_each_online_member(ca, c, i) {
if (!ca->sb_write_error &&
ca->disk_sb.seq !=
le64_to_cpu(ca->sb_read_scratch->seq)) {
bch2_fs_fatal_error(c,
"Superblock modified by another process");
percpu_ref_put(&ca->io_ref);
ret = -EROFS;
goto out;
}
}
do {
wrote = false;
for_each_online_member(ca, c, i)
if (sb < ca->disk_sb.sb->layout.nr_superblocks) {
if (!ca->sb_write_error &&
sb < ca->disk_sb.sb->layout.nr_superblocks) {
write_one_super(c, ca, sb);
wrote = true;
}
@ -713,9 +755,12 @@ void bch2_write_super(struct bch_fs *c)
sb++;
} while (wrote);
for_each_online_member(ca, c, i)
for_each_online_member(ca, c, i) {
if (ca->sb_write_error)
__clear_bit(ca->dev_idx, sb_written.d);
else
ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
}
nr_wrote = dev_mask_nr(&sb_written);
@ -738,13 +783,15 @@ void bch2_write_super(struct bch_fs *c)
* written anything (new filesystem), we continue if we'd be able to
* mount with the devices we did successfully write to:
*/
bch2_fs_fatal_err_on(!nr_wrote ||
if (bch2_fs_fatal_err_on(!nr_wrote ||
(can_mount_without_written &&
!can_mount_with_written), c,
"Unable to write superblock to sufficient devices");
"Unable to write superblock to sufficient devices"))
ret = -1;
out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
return ret;
}
/* BCH_SB_FIELD_journal: */
@ -883,16 +930,22 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
}
static void bch2_fs_mark_dirty(struct bch_fs *c)
int bch2_fs_mark_dirty(struct bch_fs *c)
{
int ret;
/*
* Unconditionally write superblock, to verify it hasn't changed before
* we go rw:
*/
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb) ||
(c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
bch2_write_super(c);
}
ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);
return ret;
}
struct jset_entry *
@ -989,17 +1042,12 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
return entry;
}
void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
void bch2_fs_mark_clean(struct bch_fs *c)
{
struct bch_sb_field_clean *sb_clean;
struct jset_entry *entry;
unsigned u64s;
if (!clean) {
bch2_fs_mark_dirty(c);
return;
}
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
goto out;

View File

@ -88,7 +88,7 @@ int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
const char *bch2_sb_validate(struct bch_sb_handle *);
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
void bch2_write_super(struct bch_fs *);
int bch2_write_super(struct bch_fs *);
/* BCH_SB_FIELD_journal: */
@ -140,7 +140,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *,
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
void bch2_fs_mark_clean(struct bch_fs *, bool);
int bch2_fs_mark_dirty(struct bch_fs *);
void bch2_fs_mark_clean(struct bch_fs *);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);

View File

@ -289,8 +289,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c)
{
if (c->state == BCH_FS_RO)
if (!test_bit(BCH_FS_RW, &c->flags)) {
cancel_delayed_work_sync(&c->journal.reclaim_work);
return;
}
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
@ -332,10 +334,9 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags))
bch2_fs_mark_clean(c, true);
bch2_fs_mark_clean(c);
if (c->state != BCH_FS_STOPPING)
c->state = BCH_FS_RO;
clear_bit(BCH_FS_RW, &c->flags);
}
static void bch2_fs_read_only_work(struct work_struct *work)
@ -364,55 +365,106 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
return ret;
}
const char *bch2_fs_read_write(struct bch_fs *c)
static int bch2_fs_read_write_late(struct bch_fs *c)
{
struct bch_dev *ca;
const char *err = NULL;
unsigned i;
int ret;
if (c->state == BCH_FS_RW)
return NULL;
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
return ret;
}
bch2_fs_mark_clean(c, false);
for_each_rw_member(ca, c, i) {
ret = bch2_copygc_start(c, ca);
if (ret) {
bch_err(c, "error starting copygc threads");
percpu_ref_put(&ca->io_ref);
return ret;
}
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
return ret;
}
schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
return 0;
}
int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
struct bch_dev *ca;
unsigned i;
int ret;
if (test_bit(BCH_FS_RW, &c->flags))
return 0;
ret = bch2_fs_mark_dirty(c);
if (ret)
goto err;
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
err = "error starting allocator thread";
for_each_rw_member(ca, c, i)
if (bch2_dev_allocator_start(ca)) {
if (!test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
ret = bch2_fs_allocator_start(c);
if (ret) {
bch_err(c, "error initializing allocator");
goto err;
}
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
}
for_each_rw_member(ca, c, i) {
ret = bch2_dev_allocator_start(ca);
if (ret) {
bch_err(c, "error starting allocator threads");
percpu_ref_put(&ca->io_ref);
goto err;
}
}
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
err = "error starting btree GC thread";
if (bch2_gc_thread_start(c))
goto err;
err = "error starting copygc thread";
for_each_rw_member(ca, c, i)
if (bch2_copygc_start(c, ca)) {
percpu_ref_put(&ca->io_ref);
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
goto err;
}
err = "error starting rebalance thread";
if (bch2_rebalance_start(c))
goto err;
schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
if (c->state != BCH_FS_STARTING)
percpu_ref_reinit(&c->writes);
set_bit(BCH_FS_RW, &c->flags);
c->state = BCH_FS_RW;
return NULL;
queue_delayed_work(c->journal_reclaim_wq,
&c->journal.reclaim_work, 0);
return 0;
err:
__bch2_fs_read_only(c);
return err;
return ret;
}
int bch2_fs_read_write(struct bch_fs *c)
{
return __bch2_fs_read_write(c, false);
}
int bch2_fs_read_write_early(struct bch_fs *c)
{
lockdep_assert_held(&c->state_lock);
if (c->opts.read_only)
return -EROFS;
return __bch2_fs_read_write(c, true);
}
/* Filesystem startup/shutdown: */
@ -435,7 +487,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->mark_lock);
free_percpu(c->usage_scratch);
kfree(c->usage_scratch);
free_percpu(c->usage[0]);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
@ -604,6 +656,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_reserve_cache_lock);
mutex_init(&c->btree_interior_update_lock);
mutex_init(&c->usage_scratch_lock);
mutex_init(&c->bio_bounce_pages_lock);
bio_list_init(&c->btree_write_error_list);
@ -626,7 +680,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.write_time = &c->times[BCH_TIME_journal_write];
c->journal.delay_time = &c->times[BCH_TIME_journal_delay];
c->journal.blocked_time = &c->times[BCH_TIME_journal_blocked];
c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal];
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
bch2_fs_btree_cache_init_early(&c->btree_cache);
@ -668,7 +722,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
sizeof(struct btree_reserve)) ||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
@ -742,7 +797,7 @@ const char *bch2_fs_start(struct bch_fs *c)
mutex_lock(&c->state_lock);
BUG_ON(c->state != BCH_FS_STARTING);
BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
mutex_lock(&c->sb_lock);
@ -776,10 +831,13 @@ const char *bch2_fs_start(struct bch_fs *c)
if (c->opts.read_only) {
bch2_fs_read_only(c);
} else {
err = bch2_fs_read_write(c);
if (err)
if (!test_bit(BCH_FS_RW, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c)) {
err = "error going read write";
goto err;
}
}
set_bit(BCH_FS_STARTED, &c->flags);
@ -882,6 +940,7 @@ static void bch2_dev_free(struct bch_dev *ca)
free_percpu(ca->io_done);
bioset_exit(&ca->replica_set);
bch2_dev_buckets_free(ca);
kfree(ca->sb_read_scratch);
bch2_time_stats_exit(&ca->io_latency[WRITE]);
bch2_time_stats_exit(&ca->io_latency[READ]);
@ -995,6 +1054,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
0, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = kmalloc(4096, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio), 0) ||

View File

@ -217,7 +217,10 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);
const char *bch2_fs_read_write(struct bch_fs *);
int __bch2_fs_read_write(struct bch_fs *, bool);
int bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write_early(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);

View File

@ -10,6 +10,7 @@ struct bch_sb_handle {
unsigned have_layout:1;
unsigned have_bio:1;
unsigned fs_sb:1;
u64 seq;
};
struct bch_devs_mask {

View File

@ -288,7 +288,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
if (!bch2_fs_running(c))
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
@ -481,7 +481,7 @@ STORE(__bch2_fs)
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
if (!bch2_fs_running(c))
if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
/* Debugging: */

View File

@ -27,57 +27,63 @@ static void delete_test_keys(struct bch_fs *c)
static void test_delete(struct bch_fs *c, u64 nr)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_i_cookie k;
int ret;
bkey_cookie_init(&k.k_i);
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter);
ret = bch2_btree_iter_traverse(iter);
BUG_ON(ret);
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &k.k_i));
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
pr_info("deleting once");
ret = bch2_btree_delete_at(&iter, 0);
ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
pr_info("deleting twice");
ret = bch2_btree_delete_at(&iter, 0);
ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
static void test_delete_written(struct bch_fs *c, u64 nr)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_i_cookie k;
int ret;
bkey_cookie_init(&k.k_i);
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(&iter);
ret = bch2_btree_iter_traverse(iter);
BUG_ON(ret);
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &k.k_i));
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
bch2_journal_flush_all_pins(&c->journal);
ret = bch2_btree_delete_at(&iter, 0);
ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
static void test_iterate(struct bch_fs *c, u64 nr)
@ -414,26 +420,29 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
u64 i;
for (i = 0; i < nr; i++) {
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(0, test_rand()), 0);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek(iter);
if (!(i & 3) && k.k) {
struct bkey_i_cookie k;
bkey_cookie_init(&k.k_i);
k.k.p = iter.pos;
k.k.p = iter->pos;
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &k.k_i));
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
}
@ -456,7 +465,8 @@ static void rand_delete(struct bch_fs *c, u64 nr)
static void seq_insert(struct bch_fs *c, u64 nr)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_cookie insert;
int ret;
@ -464,18 +474,22 @@ static void seq_insert(struct bch_fs *c, u64 nr)
bkey_cookie_init(&insert.k_i);
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
insert.k.p = iter.pos;
bch2_trans_init(&trans, c);
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &insert.k_i));
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
insert.k.p = iter->pos;
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
if (++i == nr)
break;
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
static void seq_lookup(struct bch_fs *c, u64 nr)
@ -490,21 +504,26 @@ static void seq_lookup(struct bch_fs *c, u64 nr)
static void seq_overwrite(struct bch_fs *c, u64 nr)
{
struct btree_iter iter;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_INTENT, k) {
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN,
BTREE_ITER_INTENT);
for_each_btree_key_continue(iter, 0, k) {
struct bkey_i_cookie u;
bkey_reassemble(&u.k_i, k);
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
BTREE_INSERT_ENTRY(&iter, &u.k_i));
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &u.k_i));
ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
}
bch2_btree_iter_unlock(&iter);
bch2_trans_exit(&trans);
}
static void seq_delete(struct bch_fs *c, u64 nr)

View File

@ -179,7 +179,7 @@ int bch2_xattr_set(struct btree_trans *trans, u64 inum,
memcpy(xattr->v.x_name, name, namelen);
memcpy(xattr_val(&xattr->v), value, size);
ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
inum, &xattr->k_i,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));