Update bcachefs sources to d83b992f65 bcachefs: Rewrite journal_seq_blacklist machinery

This commit is contained in:
Kent Overstreet 2019-04-04 22:15:36 -04:00
parent be02db130b
commit d13bbb2955
43 changed files with 976 additions and 887 deletions

View File

@ -1 +1 @@
1712318522fdaa533f8622f4c7da05e44a4828b0
d83b992f653d9f742f3f8567dbcfd1f4f72e858f

View File

@ -8,8 +8,8 @@
#include <linux/types.h>
#include <linux/crypto.h>
#define CHACHA20_IV_SIZE 16
#define CHACHA20_KEY_SIZE 32
#define CHACHA20_BLOCK_SIZE 64
#define CHACHA_IV_SIZE 16
#define CHACHA_KEY_SIZE 32
#define CHACHA_BLOCK_SIZE 64
#endif

View File

@ -147,12 +147,9 @@ static inline u64 ktime_get_real_seconds(void)
return ts.tv_sec;
}
static inline struct timespec current_kernel_time(void)
static inline void ktime_get_real_ts64(struct timespec64 *ts)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts;
clock_gettime(CLOCK_MONOTONIC, ts);
}
#define current_kernel_time64() current_kernel_time()

View File

@ -619,6 +619,11 @@ static void bch2_sb_print_clean(struct bch_sb *sb, struct bch_sb_field *f,
{
}
static void bch2_sb_print_journal_seq_blacklist(struct bch_sb *sb, struct bch_sb_field *f,
enum units units)
{
}
typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
struct bch_sb_field_toolops {

View File

@ -290,8 +290,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
}
for_each_member_device(ca, c, i)
bch2_dev_usage_from_buckets(c, ca);
percpu_down_write(&c->mark_lock);
bch2_dev_usage_from_buckets(c);
percpu_up_write(&c->mark_lock);
mutex_lock(&c->bucket_clock[READ].lock);
for_each_member_device(ca, c, i) {

View File

@ -183,6 +183,7 @@
#include <linux/closure.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/math64.h>
#include <linux/mutex.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
@ -220,6 +221,8 @@
printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_warn(c, fmt, ...) \
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_warn_ratelimited(c, fmt, ...) \
printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err(c, fmt, ...) \
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
#define bch_err_ratelimited(c, fmt, ...) \
@ -481,6 +484,7 @@ enum {
BCH_FS_RW,
/* shutdown: */
BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE,
@ -506,6 +510,15 @@ struct bch_fs_pcpu {
u64 sectors_available;
};
struct journal_seq_blacklist_table {
size_t nr;
struct journal_seq_blacklist_table_entry {
u64 start;
u64 end;
bool dirty;
} entries[0];
};
struct bch_fs {
struct closure cl;
@ -641,6 +654,11 @@ struct bch_fs {
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
struct journal_seq_blacklist_table *
journal_seq_blacklist_table;
struct work_struct journal_seq_blacklist_gc_work;
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
@ -794,4 +812,27 @@ static inline unsigned block_bytes(const struct bch_fs *c)
return c->opts.block_size << 9;
}
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
{
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
}
static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
{
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
if (c->sb.time_precision == 1)
return ns;
return div_s64(ns, c->sb.time_precision);
}
static inline s64 bch2_current_time(struct bch_fs *c)
{
struct timespec64 now;
ktime_get_real_ts64(&now);
return timespec_to_bch2_time(c, now);
}
#endif /* _BCACHEFS_H */

View File

@ -904,7 +904,8 @@ struct bch_sb_field {
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7)
x(replicas, 7) \
x(journal_seq_blacklist, 8)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@ -1119,6 +1120,20 @@ struct bch_sb_field_clean {
};
};
struct journal_seq_blacklist_entry {
__le64 start;
__le64 end;
};
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
union {
struct journal_seq_blacklist_entry start[0];
__u64 _data[0];
};
};
/* Superblock: */
/*
@ -1274,6 +1289,7 @@ enum bch_sb_features {
BCH_FEATURE_ZSTD = 2,
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
BCH_FEATURE_EC = 4,
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
BCH_FEATURE_NR,
};

View File

@ -114,7 +114,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
struct gc_pos pos = { 0 };
unsigned flags =
BCH_BUCKET_MARK_GC|
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
@ -171,7 +170,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags);
bch2_mark_key(c, k, true, k.k->size, NULL, 0, flags);
fsck_err:
return ret;
}
@ -202,7 +201,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
bool initial)
bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
@ -222,7 +221,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
if (initial || expensive_debug_checks(c))
if (metadata_only)
depth = 1;
else if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
@ -278,7 +279,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
bool initial)
bool initial, bool metadata_only)
{
enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
@ -292,11 +293,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id);
int ret = bch2_gc_btree(c, id, initial);
int ret = bch2_gc_btree(c, id, initial, metadata_only);
if (ret)
return ret;
if (journal && btree_node_type_needs_gc(type)) {
if (journal && !metadata_only &&
btree_node_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
@ -397,7 +399,6 @@ static void bch2_mark_superblocks(struct bch_fs *c)
/* Also see bch2_pending_btree_node_free_insert_done() */
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
{
struct gc_pos pos = { 0 };
struct btree_update *as;
struct pending_btree_node_free *d;
@ -407,8 +408,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
true, 0,
pos, NULL, 0,
true, 0, NULL, 0,
BCH_BUCKET_MARK_GC);
mutex_unlock(&c->btree_interior_update_lock);
@ -481,25 +481,28 @@ static void bch2_gc_free(struct bch_fs *c)
c->usage[1] = NULL;
}
static void bch2_gc_done(struct bch_fs *c, bool initial)
static int bch2_gc_done(struct bch_fs *c,
bool initial, bool metadata_only)
{
struct bch_dev *ca;
bool verify = !initial ||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
bool verify = !metadata_only &&
(!initial ||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
unsigned i;
int ret = 0;
#define copy_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
if (verify) \
bch_err(c, _msg ": got %llu, should be %llu, fixing"\
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
if (verify) \
bch_err_ratelimited(c, "stripe %zu has wrong "_msg\
": got %u, should be %u, fixing", \
fsck_err(c, "stripe %zu has wrong "_msg \
": got %u, should be %u", \
dst_iter.pos, ##__VA_ARGS__, \
dst->_f, src->_f); \
dst->_f = src->_f; \
@ -508,8 +511,8 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
if (verify) \
bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
": got %u, should be %u, fixing", i, b, \
fsck_err(c, "dev %u bucket %zu has wrong " #_f \
": got %u, should be %u", i, b, \
dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
dst->b[b]._mark.dirty = true; \
@ -519,7 +522,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
{
if (!metadata_only) {
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
@ -571,26 +574,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
}
};
for_each_member_device(ca, c, i) {
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
struct bch_dev_usage *dst = (void *)
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
struct bch_dev_usage *src = (void *)
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
unsigned b;
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(buckets[b], "buckets[%s]",
bch2_data_types[b]);
copy_dev_field(buckets_alloc, "buckets_alloc");
copy_dev_field(buckets_ec, "buckets_ec");
copy_dev_field(buckets_unavailable, "buckets_unavailable");
for (b = 0; b < BCH_DATA_NR; b++)
copy_dev_field(sectors[b], "sectors[%s]",
bch2_data_types[b]);
copy_dev_field(sectors_fragmented, "sectors_fragmented");
}
bch2_dev_usage_from_buckets(c);
{
unsigned nr = fs_usage_u64s(c);
@ -600,20 +584,29 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
copy_fs_field(hidden, "hidden");
copy_fs_field(data, "data");
copy_fs_field(cached, "cached");
copy_fs_field(reserved, "reserved");
copy_fs_field(nr_inodes, "nr_inodes");
copy_fs_field(btree, "btree");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
"persistent_reserved[%i]", i);
if (!metadata_only) {
copy_fs_field(data, "data");
copy_fs_field(cached, "cached");
copy_fs_field(reserved, "reserved");
copy_fs_field(nr_inodes,"nr_inodes");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
"persistent_reserved[%i]", i);
}
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
char buf[80];
if (metadata_only &&
(e->data_type == BCH_DATA_USER ||
e->data_type == BCH_DATA_CACHED))
continue;
bch2_replicas_entry_to_text(&PBUF(buf), e);
copy_fs_field(replicas[i], "%s", buf);
@ -625,9 +618,12 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
#undef copy_bucket_field
#undef copy_stripe_field
#undef copy_field
fsck_err:
return ret;
}
static int bch2_gc_start(struct bch_fs *c)
static int bch2_gc_start(struct bch_fs *c,
bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
@ -673,10 +669,18 @@ static int bch2_gc_start(struct bch_fs *c)
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++) {
dst->b[b]._mark.gen =
dst->b[b].oldest_gen =
src->b[b].mark.gen;
dst->b[b].gen_valid = src->b[b].gen_valid;
struct bucket *d = &dst->b[b];
struct bucket *s = &src->b[b];
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
d->gen_valid = s->gen_valid;
if (metadata_only &&
(s->mark.data_type == BCH_DATA_USER ||
s->mark.data_type == BCH_DATA_CACHED)) {
d->_mark = s->mark;
d->_mark.owned_by_allocator = 0;
}
}
};
@ -701,7 +705,8 @@ static int bch2_gc_start(struct bch_fs *c)
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
int bch2_gc(struct bch_fs *c, struct list_head *journal,
bool initial, bool metadata_only)
{
struct bch_dev *ca;
u64 start_time = local_clock();
@ -713,7 +718,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
down_write(&c->gc_lock);
again:
percpu_down_write(&c->mark_lock);
ret = bch2_gc_start(c);
ret = bch2_gc_start(c, metadata_only);
percpu_up_write(&c->mark_lock);
if (ret)
@ -721,7 +726,7 @@ again:
bch2_mark_superblocks(c);
ret = bch2_gc_btrees(c, journal, initial);
ret = bch2_gc_btrees(c, journal, initial, metadata_only);
if (ret)
goto out;
@ -755,7 +760,7 @@ out:
percpu_down_write(&c->mark_lock);
if (!ret)
bch2_gc_done(c, initial);
ret = bch2_gc_done(c, initial, metadata_only);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
@ -1157,7 +1162,7 @@ static int bch2_gc_thread(void *arg)
last = atomic_long_read(&clock->now);
last_kick = atomic_read(&c->kick_gc);
ret = bch2_gc(c, NULL, false);
ret = bch2_gc(c, NULL, false, false);
if (ret)
bch_err(c, "btree gc failed: %i", ret);

View File

@ -4,7 +4,7 @@
#include "btree_types.h"
void bch2_coalesce(struct bch_fs *);
int bch2_gc(struct bch_fs *, struct list_head *, bool);
int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);

View File

@ -509,7 +509,7 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
bytes);
nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE));
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
}
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
struct btree_node *sorted;
struct bkey_packed *k;
struct bset *i;
bool used_mempool;
bool used_mempool, blacklisted;
unsigned u64s;
int ret, retry_read = 0, write = READ;
@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
b->written += sectors;
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
if (ret < 0) {
btree_err(BTREE_ERR_FATAL, c, b, i,
"insufficient memory");
goto err;
}
blacklisted = bch2_journal_seq_is_blacklisted(c,
le64_to_cpu(i->journal_seq),
true);
if (ret) {
btree_err_on(first,
BTREE_ERR_FIXABLE, c, b, i,
"first btree node bset has blacklisted journal seq");
if (!first)
continue;
}
btree_err_on(blacklisted && first,
BTREE_ERR_FIXABLE, c, b, i,
"first btree node bset has blacklisted journal seq");
if (blacklisted && !first)
continue;
bch2_btree_node_iter_large_push(iter, b,
i->start,
@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
out:
mempool_free(iter, &c->fill_iter);
return retry_read;
err:
fsck_err:
if (ret == BTREE_RETRY_READ) {
retry_read = 1;

View File

@ -818,14 +818,6 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
struct btree_iter *linked;
unsigned level = b->level;
/* caller now responsible for unlocking @b */
BUG_ON(iter->l[level].b != b);
BUG_ON(!btree_node_intent_locked(iter, level));
iter->l[level].b = BTREE_ITER_NOT_END;
mark_btree_node_unlocked(iter, level);
trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b) {
__btree_node_unlock(linked, level);
@ -990,6 +982,7 @@ retry_all:
}
if (unlikely(ret == -EIO)) {
trans->error = true;
iter->flags |= BTREE_ITER_ERROR;
iter->l[iter->level].b = BTREE_ITER_NOT_END;
goto out;
@ -1162,6 +1155,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
if (!btree_iter_node(iter, iter->level))
return NULL;
bch2_trans_cond_resched(iter->trans);
btree_iter_up(iter);
if (!bch2_btree_node_relock(iter, iter->level))
@ -1712,7 +1707,7 @@ void bch2_trans_preload_iters(struct btree_trans *trans)
static int btree_trans_iter_alloc(struct btree_trans *trans)
{
unsigned idx = ffz(trans->iters_linked);
unsigned idx = __ffs64(~trans->iters_linked);
if (idx < trans->nr_iters)
goto got_slot;
@ -1877,17 +1872,17 @@ void *bch2_trans_kmalloc(struct btree_trans *trans,
int bch2_trans_unlock(struct btree_trans *trans)
{
unsigned iters = trans->iters_linked;
u64 iters = trans->iters_linked;
int ret = 0;
while (iters) {
unsigned idx = __ffs(iters);
unsigned idx = __ffs64(iters);
struct btree_iter *iter = &trans->iters[idx];
ret = ret ?: btree_iter_err(iter);
__bch2_btree_iter_unlock(iter);
iters ^= 1 << idx;
iters ^= 1ULL << idx;
}
return ret;
@ -1949,7 +1944,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
int bch2_trans_exit(struct btree_trans *trans)
{
int ret = bch2_trans_unlock(trans);
bch2_trans_unlock(trans);
kfree(trans->mem);
if (trans->used_mempool)
@ -1958,5 +1953,6 @@ int bch2_trans_exit(struct btree_trans *trans)
kfree(trans->iters);
trans->mem = (void *) 0x1;
trans->iters = (void *) 0x1;
return ret;
return trans->error ? -EIO : 0;
}

View File

@ -279,6 +279,7 @@ struct btree_trans {
u8 nr_updates;
u8 size;
unsigned used_mempool:1;
unsigned error:1;
unsigned mem_top;
unsigned mem_bytes;

View File

@ -161,7 +161,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
{
struct bch_fs *c = as->c;
struct pending_btree_node_free *d;
struct gc_pos pos = { 0 };
for (d = as->pending; d < as->pending + as->nr_pending; d++)
if (!bkey_cmp(k.k->p, d->key.k.p) &&
@ -189,18 +188,12 @@ found:
* to cancel out one of mark and sweep's markings if necessary:
*/
/*
* bch2_mark_key() compares the current gc pos to the pos we're
* moving this reference from, hence one comparison here:
*/
if (gc_pos_cmp(c->gc_pos, b
? gc_pos_btree_node(b)
: gc_pos_btree_root(as->btree_id)) >= 0 &&
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
bch2_mark_key_locked(c,
bkey_i_to_s_c(&d->key),
false, 0, pos,
NULL, 0, BCH_BUCKET_MARK_GC);
bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key),
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
}
static void __btree_node_free(struct bch_fs *c, struct btree *b)
@ -272,8 +265,11 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0,
gc_phase(GC_PHASE_PENDING_DELETE),
NULL, 0, 0);
if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
}
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
@ -1078,9 +1074,11 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
gc_pos_btree_root(b->btree_id),
fs_usage, 0, 0);
true, 0, fs_usage, 0, 0);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0, NULL, 0,
BCH_BUCKET_MARK_GC);
if (old && !btree_node_fake(old))
bch2_btree_node_free_index(as, NULL,
@ -1172,8 +1170,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
gc_pos_btree_node(b), fs_usage, 0, 0);
true, 0, fs_usage, 0, 0);
if (gc_visited(c, gc_pos_btree_node(b)))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0, NULL, 0, BCH_BUCKET_MARK_GC);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@ -1428,6 +1429,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
/* Successful split, update the iterator to point to the new nodes: */
six_lock_increment(&b->lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
if (n3)
bch2_btree_iter_node_replace(iter, n3);
@ -1739,7 +1741,10 @@ retry:
bch2_open_buckets_put(c, &n->ob);
six_lock_increment(&b->lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_drop(iter, m);
bch2_btree_iter_node_replace(iter, n);
bch2_btree_iter_verify(iter, n);
@ -1837,6 +1842,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
bch2_open_buckets_put(c, &n->ob);
six_lock_increment(&b->lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_replace(iter, n);
bch2_btree_node_free_inmem(c, b, iter);
@ -1988,9 +1994,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
gc_pos_btree_root(b->btree_id),
fs_usage, 0, 0);
true, 0, fs_usage, 0, 0);
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0, NULL, 0,
BCH_BUCKET_MARK_GC);
bch2_btree_node_free_index(as, NULL,
bkey_i_to_s_c(&b->key),
fs_usage);

View File

@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_gc.h"
#include "btree_io.h"
#include "btree_iter.h"
#include "btree_locking.h"
@ -601,10 +602,17 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
}
trans_for_each_update_iter(trans, i)
bch2_mark_update(trans, i, fs_usage);
bch2_mark_update(trans, i, fs_usage, 0);
if (fs_usage)
bch2_trans_fs_usage_apply(trans, fs_usage);
if (unlikely(c->gc_pos.phase)) {
trans_for_each_update_iter(trans, i)
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
bch2_mark_update(trans, i, NULL,
BCH_BUCKET_MARK_GC);
}
trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
out:
@ -852,12 +860,15 @@ out_noupdates:
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
if (!ret) {
bch2_trans_unlink_iters(trans, ~trans->iters_touched);
bch2_trans_unlink_iters(trans, ~trans->iters_touched|
trans->iters_unlink_on_commit);
trans->iters_touched = 0;
} else {
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
}
trans->nr_updates = 0;
trans->nr_updates = 0;
trans->mem_top = 0;
return ret;
err:

View File

@ -131,6 +131,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
usage->btree += usage->replicas[i];
break;
case BCH_DATA_USER:
usage->data += usage->replicas[i];
break;
@ -225,6 +227,7 @@ static u64 avail_factor(u64 r)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
return min(fs_usage->hidden +
fs_usage->btree +
fs_usage->data +
reserve_factor(fs_usage->reserved +
fs_usage->online_reserved),
@ -240,7 +243,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
ret.capacity = c->capacity -
percpu_u64_get(&c->usage[0]->hidden);
data = percpu_u64_get(&c->usage[0]->data);
data = percpu_u64_get(&c->usage[0]->data) +
percpu_u64_get(&c->usage[0]->btree);
reserved = percpu_u64_get(&c->usage[0]->reserved) +
percpu_u64_get(&c->usage[0]->online_reserved);
@ -383,21 +387,32 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
struct bch_dev *ca;
struct bucket_mark old = { .v.counter = 0 };
struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
unsigned i;
int cpu;
percpu_down_read_preempt_disable(&c->mark_lock);
fs_usage = this_cpu_ptr(c->usage[0]);
buckets = bucket_array(ca);
percpu_u64_set(&c->usage[0]->hidden, 0);
for_each_bucket(g, buckets)
if (g->mark.data_type)
bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
percpu_up_read_preempt_enable(&c->mark_lock);
for_each_member_device(ca, c, i) {
for_each_possible_cpu(cpu)
memset(per_cpu_ptr(ca->usage[0], cpu), 0,
sizeof(*ca->usage[0]));
preempt_disable();
fs_usage = this_cpu_ptr(c->usage[0]);
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
bch2_dev_usage_update(c, ca, fs_usage,
old, g->mark, false);
preempt_enable();
}
}
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
@ -418,10 +433,17 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(idx < 0);
BUG_ON(!sectors);
if (r->data_type == BCH_DATA_CACHED)
fs_usage->cached += sectors;
else
switch (r->data_type) {
case BCH_DATA_BTREE:
fs_usage->btree += sectors;
break;
case BCH_DATA_USER:
fs_usage->data += sectors;
break;
case BCH_DATA_CACHED:
fs_usage->cached += sectors;
break;
}
fs_usage->replicas[idx] += sectors;
}
@ -924,12 +946,13 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
return 0;
}
static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
bool inserting, s64 sectors,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags,
bool gc)
int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k,
bool inserting, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
bool gc = flags & BCH_BUCKET_MARK_GC;
int ret = 0;
preempt_disable();
@ -981,21 +1004,8 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret;
}
int bch2_mark_key_locked(struct bch_fs *c,
struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
return do_mark_fn(__bch2_mark_key, c, pos, flags,
k, inserting, sectors, fs_usage,
journal_seq, flags);
}
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
bool inserting, s64 sectors,
struct gc_pos pos,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
@ -1003,7 +1013,7 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
percpu_down_read_preempt_disable(&c->mark_lock);
ret = bch2_mark_key_locked(c, k, inserting, sectors,
pos, fs_usage, journal_seq, flags);
fs_usage, journal_seq, flags);
percpu_up_read_preempt_enable(&c->mark_lock);
return ret;
@ -1011,13 +1021,13 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
void bch2_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert,
struct bch_fs_usage *fs_usage)
struct bch_fs_usage *fs_usage,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
if (!btree_node_type_needs_gc(iter->btree_id))
@ -1027,7 +1037,7 @@ void bch2_mark_update(struct btree_trans *trans,
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k),
pos, fs_usage, trans->journal_res.seq, 0);
fs_usage, trans->journal_res.seq, flags);
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
@ -1060,7 +1070,8 @@ void bch2_mark_update(struct btree_trans *trans,
BUG_ON(sectors <= 0);
bch2_mark_key_locked(c, k, true, sectors,
pos, fs_usage, trans->journal_res.seq, 0);
fs_usage, trans->journal_res.seq,
flags);
sectors = bkey_start_offset(&insert->k->k) -
k.k->p.offset;
@ -1071,7 +1082,7 @@ void bch2_mark_update(struct btree_trans *trans,
}
bch2_mark_key_locked(c, k, false, sectors,
pos, fs_usage, trans->journal_res.seq, 0);
fs_usage, trans->journal_res.seq, flags);
bch2_btree_node_iter_advance(&node_iter, b);
}

View File

@ -173,7 +173,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
void bch2_dev_usage_from_buckets(struct bch_fs *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats)
@ -245,16 +245,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_NOATOMIC (1 << 1)
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
bool, s64, struct bch_fs_usage *,
u64, unsigned);
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
bool, s64, struct bch_fs_usage *,
u64, unsigned);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
struct bch_fs_usage *);
struct bch_fs_usage *, unsigned);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */

View File

@ -69,6 +69,7 @@ struct bch_fs_usage {
u64 gc_start[0];
u64 hidden;
u64 btree;
u64 data;
u64 cached;
u64 reserved;

View File

@ -9,7 +9,7 @@
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <crypto/chacha.h>
#include <crypto/hash.h>
#include <crypto/poly1305.h>
#include <keys/user-type.h>

View File

@ -6,7 +6,7 @@
#include "super-io.h"
#include <linux/crc64.h>
#include <crypto/chacha20.h>
#include <crypto/chacha.h>
static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
{
@ -126,9 +126,9 @@ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
/* for skipping ahead and encrypting/decrypting at an offset: */
static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
{
EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1));
EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE);
le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
return nonce;
}

View File

@ -328,17 +328,18 @@ out:
return inum;
}
int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(trans, BTREE_ID_DIRENTS,
POS(dir_inum, 0), 0);
if (IS_ERR(iter))
return PTR_ERR(iter);
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
POS(dir_inum, 0), 0, k) {
for_each_btree_key_continue(iter, 0, k) {
if (k.k->p.inode > dir_inum)
break;
@ -347,11 +348,17 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
break;
}
}
bch2_trans_exit(&trans);
bch2_trans_iter_put(trans, iter);
return ret;
}
int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
{
return bch2_trans_do(c, NULL, 0,
bch2_empty_dir_trans(&trans, dir_inum));
}
int bch2_readdir(struct bch_fs *c, struct file *file,
struct dir_context *ctx)
{

View File

@ -54,6 +54,7 @@ int bch2_dirent_rename(struct btree_trans *,
u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
const struct qstr *);
int bch2_empty_dir_trans(struct btree_trans *, u64);
int bch2_empty_dir(struct bch_fs *, u64);
int bch2_readdir(struct bch_fs *, struct file *, struct dir_context *);

View File

@ -1231,10 +1231,7 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
{
struct gc_pos pos = { 0 };
bch2_mark_key(c, k, true, 0, pos, NULL, 0, 0);
bch2_mark_key(c, k, true, 0, NULL, 0, 0);
}
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)

View File

@ -757,7 +757,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
EBUG_ON(!PageLocked(page));
EBUG_ON(!PageLocked(newpage));
ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
if (ret != MIGRATEPAGE_SUCCESS)
return ret;

View File

@ -265,7 +265,7 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return -EPERM;
down_write(&sb->s_umount);
sb->s_flags |= MS_RDONLY;
sb->s_flags |= SB_RDONLY;
bch2_fs_emergency_read_only(c);
up_write(&sb->s_umount);
return 0;

View File

@ -1582,7 +1582,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
struct bch_opts opts = bch2_opts_empty();
int ret;
opt_set(opts, read_only, (*flags & MS_RDONLY) != 0);
opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
ret = bch2_parse_mount_opts(&opts, data);
if (ret)
@ -1594,7 +1594,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
if (opts.read_only) {
bch2_fs_read_only(c);
sb->s_flags |= MS_RDONLY;
sb->s_flags |= SB_RDONLY;
} else {
ret = bch2_fs_read_write(c);
if (ret) {
@ -1603,7 +1603,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
return -EINVAL;
}
sb->s_flags &= ~MS_RDONLY;
sb->s_flags &= ~SB_RDONLY;
}
c->opts.read_only = opts.read_only;
@ -1681,7 +1681,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
unsigned i;
int ret;
opt_set(opts, read_only, (flags & MS_RDONLY) != 0);
opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
ret = bch2_parse_mount_opts(&opts, data);
if (ret)
@ -1691,7 +1691,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
if (IS_ERR(c))
return ERR_CAST(c);
sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c);
sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
if (IS_ERR(sb)) {
closure_put(&c->cl);
return ERR_CAST(sb);
@ -1702,7 +1702,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
if (sb->s_root) {
closure_put(&c->cl);
if ((flags ^ sb->s_flags) & MS_RDONLY) {
if ((flags ^ sb->s_flags) & SB_RDONLY) {
ret = -EBUSY;
goto err_put_super;
}
@ -1745,7 +1745,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
if (c->opts.acl)
sb->s_flags |= MS_POSIXACL;
sb->s_flags |= SB_POSIXACL;
#endif
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
@ -1760,7 +1760,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
goto err_put_super;
}
sb->s_flags |= MS_ACTIVE;
sb->s_flags |= SB_ACTIVE;
out:
return dget(sb->s_root);

View File

@ -69,11 +69,6 @@ static inline unsigned nlink_bias(umode_t mode)
return S_ISDIR(mode) ? 2 : 1;
}
static inline u64 bch2_current_time(struct bch_fs *c)
{
return timespec_to_bch2_time(c, current_kernel_time64());
}
static inline bool inode_attr_changing(struct bch_inode_info *dir,
struct bch_inode_info *inode,
enum inode_opt_id id)

View File

@ -127,18 +127,21 @@ static struct inode_walker inode_walker_init(void)
};
}
static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
static int walk_inode(struct btree_trans *trans,
struct inode_walker *w, u64 inum)
{
w->first_this_inode = inum != w->cur_inum;
w->cur_inum = inum;
if (w->first_this_inode) {
int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
if (inum != w->cur_inum) {
int ret = bch2_inode_find_by_inum_trans(trans, inum,
&w->inode);
if (ret && ret != -ENOENT)
return ret;
w->have_inode = !ret;
w->have_inode = !ret;
w->cur_inum = inum;
w->first_this_inode = true;
} else {
w->first_this_inode = false;
}
return 0;
@ -444,12 +447,15 @@ static int check_extents(struct bch_fs *c)
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch_verbose(c, "checking extents");
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
ret = walk_inode(c, &w, k.k->p.inode);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
POS(BCACHEFS_ROOT_INO, 0), 0);
retry:
for_each_btree_key_continue(iter, 0, k) {
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
break;
@ -514,6 +520,8 @@ static int check_extents(struct bch_fs *c)
}
err:
fsck_err:
if (ret == -EINTR)
goto retry;
return bch2_trans_exit(&trans) ?: ret;
}
@ -536,21 +544,20 @@ static int check_dirents(struct bch_fs *c)
bch_verbose(c, "checking dirents");
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(&h);
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
POS(BCACHEFS_ROOT_INO, 0), 0);
retry:
for_each_btree_key_continue(iter, 0, k) {
struct bkey_s_c_dirent d;
struct bch_inode_unpacked target;
bool have_target;
u64 d_inum;
ret = walk_inode(c, &w, k.k->p.inode);
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
break;
@ -619,7 +626,7 @@ static int check_dirents(struct bch_fs *c)
continue;
}
ret = bch2_inode_find_by_inum(c, d_inum, &target);
ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
if (ret && ret != -ENOENT)
break;
@ -670,6 +677,9 @@ static int check_dirents(struct bch_fs *c)
hash_stop_chain(&trans, &h);
err:
fsck_err:
if (ret == -EINTR)
goto retry;
return bch2_trans_exit(&trans) ?: ret;
}
@ -688,17 +698,16 @@ static int check_xattrs(struct bch_fs *c)
bch_verbose(c, "checking xattrs");
bch2_trans_init(&trans, c);
hash_check_init(&h);
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
POS(BCACHEFS_ROOT_INO, 0), 0);
hash_check_init(&h);
retry:
for_each_btree_key_continue(iter, 0, k) {
ret = walk_inode(c, &w, k.k->p.inode);
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
break;
@ -721,6 +730,8 @@ static int check_xattrs(struct bch_fs *c)
}
err:
fsck_err:
if (ret == -EINTR)
goto retry;
return bch2_trans_exit(&trans) ?: ret;
}
@ -904,6 +915,7 @@ static int check_directory_structure(struct bch_fs *c,
int ret = 0;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
bch_verbose(c, "checking directory structure");
@ -918,9 +930,8 @@ restart_dfs:
}
ret = path_down(&path, BCACHEFS_ROOT_INO);
if (ret) {
return ret;
}
if (ret)
goto err;
while (path.nr) {
next:
@ -982,14 +993,19 @@ up:
path.nr--;
}
for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) {
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
retry:
for_each_btree_key_continue(iter, 0, k) {
if (k.k->type != KEY_TYPE_inode)
continue;
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
continue;
if (!bch2_empty_dir(c, k.k->p.inode))
ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
if (ret == -EINTR)
goto retry;
if (!ret)
continue;
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
@ -1017,15 +1033,12 @@ up:
memset(&path, 0, sizeof(path));
goto restart_dfs;
}
out:
kfree(dirs_done.bits);
kfree(path.entries);
return ret;
err:
fsck_err:
ret = bch2_trans_exit(&trans) ?: ret;
goto out;
kfree(dirs_done.bits);
kfree(path.entries);
return ret;
}
struct nlink {
@ -1069,6 +1082,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
int ret;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
@ -1225,12 +1239,10 @@ static int check_inode(struct btree_trans *trans,
return ret;
}
if (u.bi_flags & BCH_INODE_UNLINKED) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu unlinked",
u.bi_inum);
if (u.bi_flags & BCH_INODE_UNLINKED &&
(!c->sb.clean ||
fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
u.bi_inum))) {
bch_verbose(c, "deleting inode %llu", u.bi_inum);
ret = bch2_inode_rm(c, u.bi_inum);
@ -1240,12 +1252,10 @@ static int check_inode(struct btree_trans *trans,
return ret;
}
if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_size dirty",
u.bi_inum);
if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
(!c->sb.clean ||
fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
u.bi_inum))) {
bch_verbose(c, "truncating inode %llu", u.bi_inum);
/*
@ -1270,14 +1280,12 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) {
if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
(!c->sb.clean ||
fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
u.bi_inum))) {
s64 sectors;
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_sectors dirty",
u.bi_inum);
bch_verbose(c, "recounting sectors for inode %llu",
u.bi_inum);
@ -1326,6 +1334,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
u64 nlinks_pos;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
POS(range_start, 0), 0);
@ -1425,6 +1434,7 @@ static int check_inodes_fast(struct bch_fs *c)
int ret = 0, ret2;
bch2_trans_init(&trans, c);
bch2_trans_preload_iters(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
POS_MIN, 0);

View File

@ -251,9 +251,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
struct bch_inode_unpacked *parent)
{
s64 now = timespec_to_bch2_time(c,
timespec64_trunc(current_kernel_time64(),
c->sb.time_precision));
s64 now = bch2_current_time(c);
memset(inode_u, 0, sizeof(*inode_u));
@ -445,31 +443,32 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
return ret;
}
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode)
int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
int ret = -ENOENT;
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
POS(inode_nr, 0), BTREE_ITER_SLOTS);
if (IS_ERR(iter))
return PTR_ERR(iter);
for_each_btree_key(&trans, iter, BTREE_ID_INODES,
POS(inode_nr, 0), BTREE_ITER_SLOTS, k) {
switch (k.k->type) {
case KEY_TYPE_inode:
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
break;
default:
/* hole, not found */
break;
}
k = bch2_btree_iter_peek_slot(iter);
if (k.k->type == KEY_TYPE_inode)
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
break;
}
bch2_trans_iter_put(trans, iter);
return bch2_trans_exit(&trans) ?: ret;
return ret;
}
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
struct bch_inode_unpacked *inode)
{
return bch2_trans_do(c, NULL, 0,
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
}
#ifdef CONFIG_BCACHEFS_DEBUG

View File

@ -3,8 +3,6 @@
#include "opts.h"
#include <linux/math64.h>
extern const char * const bch2_inode_opts[];
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
@ -59,23 +57,9 @@ int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *,
int bch2_inode_rm(struct bch_fs *, u64);
int bch2_inode_find_by_inum(struct bch_fs *, u64,
struct bch_inode_unpacked *);
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
{
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
}
static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
{
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
if (c->sb.time_precision == 1)
return ns;
return div_s64(ns, c->sb.time_precision);
}
int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
struct bch_inode_unpacked *);
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
{

View File

@ -992,27 +992,57 @@ void bch2_fs_journal_stop(struct journal *j)
cancel_delayed_work_sync(&j->reclaim_work);
}
void bch2_fs_journal_start(struct journal *j)
int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
struct list_head *journal_entries)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_seq_blacklist *bl;
u64 blacklist = 0;
struct journal_entry_pin_list *p;
struct journal_replay *i;
u64 last_seq = cur_seq, nr, seq;
list_for_each_entry(bl, &j->seq_blacklist, list)
blacklist = max(blacklist, bl->end);
if (!list_empty(journal_entries))
last_seq = le64_to_cpu(list_last_entry(journal_entries,
struct journal_replay,
list)->j.last_seq);
nr = cur_seq - last_seq;
if (nr + 1 > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
list_for_each_entry(i, journal_entries, list) {
seq = le64_to_cpu(i->j.seq);
BUG_ON(seq < last_seq || seq >= cur_seq);
p = journal_seq_pin(j, seq);
atomic_set(&p->count, 1);
p->devs = i->devs;
}
spin_lock(&j->lock);
set_bit(JOURNAL_STARTED, &j->flags);
while (journal_cur_seq(j) < blacklist)
journal_pin_new_entry(j, 0);
/*
* __journal_entry_close() only inits the next journal entry when it
* closes an open journal entry - the very first journal entry gets
* initialized here:
*/
journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j);
@ -1021,12 +1051,7 @@ void bch2_fs_journal_start(struct journal *j)
bch2_journal_space_available(j);
spin_unlock(&j->lock);
/*
* Adding entries to the next journal entry before allocating space on
* disk for the next journal entry - this is ok, because these entries
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
return 0;
}
/* init/exit: */
@ -1091,8 +1116,6 @@ int bch2_fs_journal_init(struct journal *j)
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);

View File

@ -469,8 +469,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
int bch2_dev_journal_alloc(struct bch_dev *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
void bch2_fs_journal_start(struct journal *);
int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *);

View File

@ -9,7 +9,6 @@
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
#include <trace/events/bcachefs.h>
@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
}
}
int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
{
struct journal *j = &c->journal;
struct journal_entry_pin_list *p;
u64 seq, nr = end_seq - last_seq + 1;
if (nr > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -ENOMEM;
}
}
atomic64_set(&j->seq, end_seq);
j->last_seq_ondisk = last_seq;
j->pin.front = last_seq;
j->pin.back = end_seq + 1;
fifo_for_each_entry_ptr(p, &j->pin, seq) {
INIT_LIST_HEAD(&p->list);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, 0);
p->devs.nr = 0;
}
return 0;
}
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
{
struct journal *j = &c->journal;
struct journal_list jlist;
struct journal_replay *i;
struct journal_entry_pin_list *p;
struct bch_dev *ca;
u64 cur_seq, end_seq;
unsigned iter;
size_t keys = 0, entries = 0;
bool degraded = false;
@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (jlist.ret)
return jlist.ret;
if (list_empty(list)){
bch_err(c, "no journal entries found");
return BCH_FSCK_REPAIR_IMPOSSIBLE;
}
list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
struct bch_replicas_padded replicas;
char buf[80];
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
ret = jset_validate_entries(c, &i->j, READ);
if (ret)
goto fsck_err;
@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
* the devices - this is wrong:
*/
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (ret)
return ret;
}
}
i = list_last_entry(list, struct journal_replay, list);
ret = bch2_journal_set_seq(c,
le64_to_cpu(i->j.last_seq),
le64_to_cpu(i->j.seq));
if (ret)
return ret;
mutex_lock(&j->blacklist_lock);
list_for_each_entry(i, list, list) {
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
atomic_set(&p->count, 1);
p->devs = i->devs;
if (bch2_journal_seq_blacklist_read(j, i)) {
mutex_unlock(&j->blacklist_lock);
return -ENOMEM;
}
}
mutex_unlock(&j->blacklist_lock);
cur_seq = journal_last_seq(j);
end_seq = le64_to_cpu(list_last_entry(list,
struct journal_replay, list)->j.seq);
list_for_each_entry(i, list, list) {
struct jset_entry *entry;
struct bkey_i *k, *_n;
bool blacklisted;
mutex_lock(&j->blacklist_lock);
while (cur_seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_blacklist_find(j, cur_seq))
cur_seq++;
blacklisted = bch2_journal_seq_blacklist_find(j,
le64_to_cpu(i->j.seq));
mutex_unlock(&j->blacklist_lock);
fsck_err_on(blacklisted, c,
"found blacklisted journal entry %llu",
le64_to_cpu(i->j.seq));
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
cur_seq, le64_to_cpu(i->j.seq) - 1,
journal_last_seq(j), end_seq);
cur_seq = le64_to_cpu(i->j.seq) + 1;
for_each_jset_key(k, _n, entry, &i->j)
keys++;
entries++;
}
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
keys, entries, journal_cur_seq(j));
if (!list_empty(list)) {
i = list_last_entry(list, struct journal_replay, list);
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
keys, entries, le64_to_cpu(i->j.seq));
}
fsck_err:
return ret;
}
@ -876,8 +788,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
* but - there are other correctness issues if btree gc were to run
* before journal replay finishes
*/
BUG_ON(c->gc_pos.phase);
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
gc_pos_btree_node(iter->l[0].b),
NULL, 0, 0);
bch2_trans_exit(&trans);

View File

@ -34,7 +34,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
vstruct_for_each_safe(entry, k, _n)
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);

View File

@ -1,12 +1,9 @@
#include "bcachefs.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "btree_iter.h"
#include "eytzinger.h"
#include "journal_seq_blacklist.h"
#include "super-io.h"
/*
* journal_seq_blacklist machinery:
@ -36,327 +33,285 @@
* record that it was blacklisted so that a) on recovery we don't think we have
* missing journal entries and b) so that the btree code continues to ignore
* that bset, until that btree node is rewritten.
*
* Blacklisted journal sequence numbers are themselves recorded as entries in
* the journal.
*/
/*
* Called when journal needs to evict a blacklist entry to reclaim space: find
* any btree nodes that refer to the blacklist journal sequence numbers, and
* rewrite them:
*/
static void journal_seq_blacklist_flush(struct journal *j,
struct journal_entry_pin *pin, u64 seq)
static unsigned
blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
{
struct bch_fs *c =
container_of(j, struct bch_fs, journal);
struct journal_seq_blacklist *bl =
container_of(pin, struct journal_seq_blacklist, pin);
struct blacklisted_node n;
struct closure cl;
unsigned i;
int ret;
closure_init_stack(&cl);
for (i = 0;; i++) {
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
bch2_trans_init(&trans, c);
mutex_lock(&j->blacklist_lock);
if (i >= bl->nr_entries) {
mutex_unlock(&j->blacklist_lock);
break;
}
n = bl->entries[i];
mutex_unlock(&j->blacklist_lock);
iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
0, 0, 0);
b = bch2_btree_iter_peek_node(iter);
/* The node might have already been rewritten: */
if (b->data->keys.seq == n.seq) {
ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
if (ret) {
bch2_trans_exit(&trans);
bch2_fs_fatal_error(c,
"error %i rewriting btree node with blacklisted journal seq",
ret);
bch2_journal_halt(j);
return;
}
}
bch2_trans_exit(&trans);
}
for (i = 0;; i++) {
struct btree_update *as;
struct pending_btree_node_free *d;
mutex_lock(&j->blacklist_lock);
if (i >= bl->nr_entries) {
mutex_unlock(&j->blacklist_lock);
break;
}
n = bl->entries[i];
mutex_unlock(&j->blacklist_lock);
redo_wait:
mutex_lock(&c->btree_interior_update_lock);
/*
* Is the node on the list of pending interior node updates -
* being freed? If so, wait for that to finish:
*/
for_each_pending_btree_node_free(c, as, d)
if (n.seq == d->seq &&
n.btree_id == d->btree_id &&
!d->level &&
!bkey_cmp(n.pos, d->key.k.p)) {
closure_wait(&as->wait, &cl);
mutex_unlock(&c->btree_interior_update_lock);
closure_sync(&cl);
goto redo_wait;
}
mutex_unlock(&c->btree_interior_update_lock);
}
mutex_lock(&j->blacklist_lock);
bch2_journal_pin_drop(j, &bl->pin);
list_del(&bl->list);
kfree(bl->entries);
kfree(bl);
mutex_unlock(&j->blacklist_lock);
return bl
? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
sizeof(struct journal_seq_blacklist_entry))
: 0;
}
/*
* Determine if a particular sequence number is blacklisted - if so, return
* blacklist entry:
*/
struct journal_seq_blacklist *
bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
static unsigned sb_blacklist_u64s(unsigned nr)
{
struct journal_seq_blacklist *bl;
struct bch_sb_field_journal_seq_blacklist *bl;
lockdep_assert_held(&j->blacklist_lock);
return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
}
list_for_each_entry(bl, &j->seq_blacklist, list)
if (seq >= bl->start && seq <= bl->end)
return bl;
static struct bch_sb_field_journal_seq_blacklist *
blacklist_entry_try_merge(struct bch_fs *c,
struct bch_sb_field_journal_seq_blacklist *bl,
unsigned i)
{
unsigned nr = blacklist_nr_entries(bl);
if (le64_to_cpu(bl->start[i].end) >=
le64_to_cpu(bl->start[i + 1].start)) {
bl->start[i].end = bl->start[i + 1].end;
--nr;
memmove(&bl->start[i],
&bl->start[i + 1],
sizeof(bl->start[0]) * (nr - i));
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
sb_blacklist_u64s(nr));
BUG_ON(!bl);
}
return bl;
}
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
{
struct bch_sb_field_journal_seq_blacklist *bl;
unsigned i, nr;
int ret = 0;
mutex_lock(&c->sb_lock);
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
nr = blacklist_nr_entries(bl);
if (bl) {
for (i = 0; i < nr; i++) {
struct journal_seq_blacklist_entry *e =
bl->start + i;
if (start == le64_to_cpu(e->start) &&
end == le64_to_cpu(e->end))
goto out;
if (start <= le64_to_cpu(e->start) &&
end >= le64_to_cpu(e->end)) {
e->start = cpu_to_le64(start);
e->end = cpu_to_le64(end);
if (i + 1 < nr)
bl = blacklist_entry_try_merge(c,
bl, i);
if (i)
bl = blacklist_entry_try_merge(c,
bl, i - 1);
goto out_write_sb;
}
}
}
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
sb_blacklist_u64s(nr + 1));
if (!bl) {
ret = -ENOMEM;
goto out;
}
bl->start[nr].start = cpu_to_le64(start);
bl->start[nr].end = cpu_to_le64(end);
out_write_sb:
c->disk_sb.sb->features[0] |=
1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
ret = bch2_write_super(c);
out:
mutex_unlock(&c->sb_lock);
return ret;
}
static int journal_seq_blacklist_table_cmp(const void *_l,
const void *_r, size_t size)
{
const struct journal_seq_blacklist_table_entry *l = _l;
const struct journal_seq_blacklist_table_entry *r = _r;
return (l->start > r->start) - (l->start < r->start);
}
bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
bool dirty)
{
struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
struct journal_seq_blacklist_table_entry search = { .start = seq };
int idx;
if (!t)
return false;
idx = eytzinger0_find_le(t->entries, t->nr,
sizeof(t->entries[0]),
journal_seq_blacklist_table_cmp,
&search);
if (idx < 0)
return false;
BUG_ON(t->entries[idx].start > seq);
if (seq >= t->entries[idx].end)
return false;
if (dirty)
t->entries[idx].dirty = true;
return true;
}
int bch2_blacklist_table_initialize(struct bch_fs *c)
{
struct bch_sb_field_journal_seq_blacklist *bl =
bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
struct journal_seq_blacklist_table *t;
unsigned i, nr = blacklist_nr_entries(bl);
BUG_ON(c->journal_seq_blacklist_table);
if (!bl)
return 0;
t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
GFP_KERNEL);
if (!t)
return -ENOMEM;
t->nr = nr;
for (i = 0; i < nr; i++) {
t->entries[i].start = le64_to_cpu(bl->start[i].start);
t->entries[i].end = le64_to_cpu(bl->start[i].end);
}
eytzinger0_sort(t->entries,
t->nr,
sizeof(t->entries[0]),
journal_seq_blacklist_table_cmp,
NULL);
c->journal_seq_blacklist_table = t;
return 0;
}
static const char *
bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_journal_seq_blacklist *bl =
field_to_type(f, journal_seq_blacklist);
struct journal_seq_blacklist_entry *i;
unsigned nr = blacklist_nr_entries(bl);
for (i = bl->start; i < bl->start + nr; i++) {
if (le64_to_cpu(i->start) >=
le64_to_cpu(i->end))
return "entry start >= end";
if (i + 1 < bl->start + nr &&
le64_to_cpu(i[0].end) >
le64_to_cpu(i[1].start))
return "entries out of order";
}
return NULL;
}
/*
* Allocate a new, in memory blacklist entry:
*/
static struct journal_seq_blacklist *
bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
struct bch_sb *sb,
struct bch_sb_field *f)
{
struct journal_seq_blacklist *bl;
struct bch_sb_field_journal_seq_blacklist *bl =
field_to_type(f, journal_seq_blacklist);
struct journal_seq_blacklist_entry *i;
unsigned nr = blacklist_nr_entries(bl);
lockdep_assert_held(&j->blacklist_lock);
for (i = bl->start; i < bl->start + nr; i++) {
if (i != bl->start)
pr_buf(out, " ");
/*
* When we start the journal, bch2_journal_start() will skip over @seq:
*/
bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return NULL;
bl->start = start;
bl->end = end;
list_add_tail(&bl->list, &j->seq_blacklist);
return bl;
pr_buf(out, "%llu-%llu",
le64_to_cpu(i->start),
le64_to_cpu(i->end));
}
}
/*
* Returns true if @seq is newer than the most recent journal entry that got
* written, and data corresponding to @seq should be ignored - also marks @seq
* as blacklisted so that on future restarts the corresponding data will still
* be ignored:
*/
int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
.validate = bch2_sb_journal_seq_blacklist_validate,
.to_text = bch2_sb_journal_seq_blacklist_to_text
};
void bch2_blacklist_entries_gc(struct work_struct *work)
{
struct journal *j = &c->journal;
struct journal_seq_blacklist *bl = NULL;
struct blacklisted_node *n;
u64 journal_seq;
int ret = 0;
struct bch_fs *c = container_of(work, struct bch_fs,
journal_seq_blacklist_gc_work);
struct journal_seq_blacklist_table *t;
struct bch_sb_field_journal_seq_blacklist *bl;
struct journal_seq_blacklist_entry *src, *dst;
struct btree_trans trans;
unsigned i, nr, new_nr;
int ret;
if (!seq)
return 0;
bch2_trans_init(&trans, c);
spin_lock(&j->lock);
journal_seq = journal_cur_seq(j);
spin_unlock(&j->lock);
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_iter *iter;
struct btree *b;
/* Interier updates aren't journalled: */
BUG_ON(b->level);
BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
/*
* Decrease this back to j->seq + 2 when we next rev the on disk format:
* increasing it temporarily to work around bug in old kernels
*/
fsck_err_on(seq > journal_seq + 4, c,
"bset journal seq too far in the future: %llu > %llu",
seq, journal_seq);
if (seq <= journal_seq &&
list_empty_careful(&j->seq_blacklist))
return 0;
mutex_lock(&j->blacklist_lock);
if (seq <= journal_seq) {
bl = bch2_journal_seq_blacklist_find(j, seq);
if (!bl)
goto out;
} else {
bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
if (!j->new_blacklist) {
j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
journal_seq + 1,
journal_seq + 1);
if (!j->new_blacklist) {
ret = -ENOMEM;
goto out;
for_each_btree_node(&trans, iter, i, POS_MIN,
BTREE_ITER_PREFETCH, b)
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
bch2_trans_exit(&trans);
return;
}
}
bl = j->new_blacklist;
bl->end = max(bl->end, seq);
bch2_trans_iter_free(&trans, iter);
}
for (n = bl->entries; n < bl->entries + bl->nr_entries; n++)
if (b->data->keys.seq == n->seq &&
b->btree_id == n->btree_id &&
!bkey_cmp(b->key.k.p, n->pos))
goto found_entry;
if (!bl->nr_entries ||
is_power_of_2(bl->nr_entries)) {
n = krealloc(bl->entries,
max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
GFP_KERNEL);
if (!n) {
ret = -ENOMEM;
goto out;
}
bl->entries = n;
}
bl->entries[bl->nr_entries++] = (struct blacklisted_node) {
.seq = b->data->keys.seq,
.btree_id = b->btree_id,
.pos = b->key.k.p,
};
found_entry:
ret = 1;
out:
fsck_err:
mutex_unlock(&j->blacklist_lock);
return ret;
}
static int __bch2_journal_seq_blacklist_read(struct journal *j,
struct journal_replay *i,
u64 start, u64 end)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_seq_blacklist *bl;
bch_verbose(c, "blacklisting existing journal seq %llu-%llu",
start, end);
bl = bch2_journal_seq_blacklisted_new(j, start, end);
if (!bl)
return -ENOMEM;
bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin,
journal_seq_blacklist_flush);
return 0;
}
/*
* After reading the journal, find existing journal seq blacklist entries and
* read them into memory:
*/
int bch2_journal_seq_blacklist_read(struct journal *j,
struct journal_replay *i)
{
struct jset_entry *entry;
int ret = 0;
vstruct_for_each(&i->j, entry) {
switch (entry->type) {
case BCH_JSET_ENTRY_blacklist: {
struct jset_entry_blacklist *bl_entry =
container_of(entry, struct jset_entry_blacklist, entry);
ret = __bch2_journal_seq_blacklist_read(j, i,
le64_to_cpu(bl_entry->seq),
le64_to_cpu(bl_entry->seq));
break;
}
case BCH_JSET_ENTRY_blacklist_v2: {
struct jset_entry_blacklist_v2 *bl_entry =
container_of(entry, struct jset_entry_blacklist_v2, entry);
ret = __bch2_journal_seq_blacklist_read(j, i,
le64_to_cpu(bl_entry->start),
le64_to_cpu(bl_entry->end));
break;
}
}
if (ret)
break;
}
return ret;
}
/*
* After reading the journal and walking the btree, we might have new journal
* sequence numbers to blacklist - add entries to the next journal entry to be
* written:
*/
void bch2_journal_seq_blacklist_write(struct journal *j)
{
struct journal_seq_blacklist *bl = j->new_blacklist;
struct jset_entry_blacklist_v2 *bl_entry;
struct jset_entry *entry;
if (!bl)
ret = bch2_trans_exit(&trans);
if (ret)
return;
entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j),
(sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64));
mutex_lock(&c->sb_lock);
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
if (!bl)
goto out;
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
bl_entry->entry.type = BCH_JSET_ENTRY_blacklist_v2;
bl_entry->start = cpu_to_le64(bl->start);
bl_entry->end = cpu_to_le64(bl->end);
nr = blacklist_nr_entries(bl);
dst = bl->start;
bch2_journal_pin_add(j,
journal_cur_seq(j),
&bl->pin,
journal_seq_blacklist_flush);
t = c->journal_seq_blacklist_table;
BUG_ON(nr != t->nr);
j->new_blacklist = NULL;
for (src = bl->start, i = eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
if (t->entries[i].dirty)
*dst++ = *src;
}
new_nr = dst - bl->start;
bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
if (new_nr != nr) {
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
new_nr ? sb_blacklist_u64s(new_nr) : 0);
BUG_ON(new_nr && !bl);
if (!new_nr)
c->disk_sb.sb->features[0] &=
~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
bch2_write_super(c);
}
out:
mutex_unlock(&c->sb_lock);
}

View File

@ -1,13 +1,12 @@
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
struct journal_replay;
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
int bch2_blacklist_table_initialize(struct bch_fs *);
struct journal_seq_blacklist *
bch2_journal_seq_blacklist_find(struct journal *, u64);
int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *);
int bch2_journal_seq_blacklist_read(struct journal *,
struct journal_replay *);
void bch2_journal_seq_blacklist_write(struct journal *);
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
void bch2_blacklist_entries_gc(struct work_struct *);
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */

View File

@ -53,24 +53,6 @@ struct journal_entry_pin {
u64 seq;
};
/* corresponds to a btree node with a blacklisted bset: */
struct blacklisted_node {
__le64 seq;
enum btree_id btree_id;
struct bpos pos;
};
struct journal_seq_blacklist {
struct list_head list;
u64 start;
u64 end;
struct journal_entry_pin pin;
struct blacklisted_node *entries;
size_t nr_entries;
};
struct journal_res {
bool ref;
u8 idx;
@ -221,10 +203,6 @@ struct journal {
u64 replay_journal_seq;
struct mutex blacklist_lock;
struct list_head seq_blacklist;
struct journal_seq_blacklist *new_blacklist;
struct write_point wp;
spinlock_t err_lock;

View File

@ -208,7 +208,8 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
up_read(&ca->bucket_lock);
if (sectors_not_moved && !ret)
bch_warn(c, "copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved",
bch_warn_ratelimited(c,
"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved",
sectors_not_moved, sectors_to_move,
buckets_not_moved, buckets_to_move);

View File

@ -457,7 +457,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
{
struct bch_fs *c = sb->s_fs_info;
if (sb->s_flags & MS_RDONLY)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
/* Accounting must be enabled at mount time: */
@ -494,7 +494,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
{
struct bch_fs *c = sb->s_fs_info;
if (sb->s_flags & MS_RDONLY)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
mutex_lock(&c->sb_lock);
@ -518,7 +518,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
struct bch_fs *c = sb->s_fs_info;
int ret;
if (sb->s_flags & MS_RDONLY)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
if (uflags & FS_USER_QUOTA) {
@ -600,7 +600,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
struct bch_sb_field_quota *sb_quota;
struct bch_memquota_type *q;
if (sb->s_flags & MS_RDONLY)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
if (type >= QTYP_NR)
@ -719,7 +719,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct bkey_i_quota new_quota;
int ret;
if (sb->s_flags & MS_RDONLY)
if (sb->s_flags & SB_RDONLY)
return -EROFS;
bkey_quota_init(&new_quota.k_i);

View File

@ -11,6 +11,7 @@
#include "error.h"
#include "fsck.h"
#include "journal_io.h"
#include "journal_seq_blacklist.h"
#include "quota.h"
#include "recovery.h"
#include "replicas.h"
@ -51,6 +52,118 @@ found:
return k;
}
static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean **cleanp,
struct jset *j)
{
unsigned i;
struct bch_sb_field_clean *clean = *cleanp;
int ret = 0;
if (!clean || !j)
return 0;
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq),
le64_to_cpu(j->seq))) {
kfree(clean);
*cleanp = NULL;
return 0;
}
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
for (i = 0; i < BTREE_ID_NR; i++) {
struct bkey_i *k1, *k2;
unsigned l1 = 0, l2 = 0;
k1 = btree_root_find(c, clean, NULL, i, &l1);
k2 = btree_root_find(c, NULL, j, i, &l2);
if (!k1 && !k2)
continue;
mustfix_fsck_err_on(!k1 || !k2 ||
IS_ERR(k1) ||
IS_ERR(k2) ||
k1->k.u64s != k2->k.u64s ||
memcmp(k1, k2, bkey_bytes(k1)) ||
l1 != l2, c,
"superblock btree root doesn't match journal after clean shutdown");
}
fsck_err:
return ret;
}
static int
verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
struct list_head *journal)
{
struct journal_replay *i =
list_last_entry(journal, struct journal_replay, list);
u64 start_seq = le64_to_cpu(i->j.last_seq);
u64 end_seq = le64_to_cpu(i->j.seq);
u64 seq = start_seq;
int ret = 0;
list_for_each_entry(i, journal, list) {
fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
seq, le64_to_cpu(i->j.seq) - 1,
start_seq, end_seq);
seq = le64_to_cpu(i->j.seq);
fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
"found blacklisted journal entry %llu", seq);
do {
seq++;
} while (bch2_journal_seq_is_blacklisted(c, seq, false));
}
fsck_err:
return ret;
}
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
{
struct bch_sb_field_clean *clean, *sb_clean;
int ret;
mutex_lock(&c->sb_lock);
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
if (fsck_err_on(!sb_clean, c,
"superblock marked clean but clean section not present")) {
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
mutex_unlock(&c->sb_lock);
return NULL;
}
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
GFP_KERNEL);
if (!clean) {
mutex_unlock(&c->sb_lock);
return ERR_PTR(-ENOMEM);
}
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(clean, READ);
mutex_unlock(&c->sb_lock);
return clean;
fsck_err:
mutex_unlock(&c->sb_lock);
return ERR_PTR(ret);
}
static int journal_replay_entry_early(struct bch_fs *c,
struct jset_entry *entry)
{
@ -100,54 +213,108 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v));
break;
}
case BCH_JSET_ENTRY_blacklist: {
struct jset_entry_blacklist *bl_entry =
container_of(entry, struct jset_entry_blacklist, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->seq),
le64_to_cpu(bl_entry->seq) + 1);
break;
}
case BCH_JSET_ENTRY_blacklist_v2: {
struct jset_entry_blacklist_v2 *bl_entry =
container_of(entry, struct jset_entry_blacklist_v2, entry);
ret = bch2_journal_seq_blacklist_add(c,
le64_to_cpu(bl_entry->start),
le64_to_cpu(bl_entry->end) + 1);
break;
}
}
return ret;
}
static int verify_superblock_clean(struct bch_fs *c,
struct bch_sb_field_clean **cleanp,
struct jset *j)
static int journal_replay_early(struct bch_fs *c,
struct bch_sb_field_clean *clean,
struct list_head *journal)
{
struct jset_entry *entry;
int ret;
if (clean) {
c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
for (entry = clean->start;
entry != vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
ret = journal_replay_entry_early(c, entry);
if (ret)
return ret;
}
} else {
struct journal_replay *i =
list_last_entry(journal, struct journal_replay, list);
c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
list_for_each_entry(i, journal, list)
vstruct_for_each(&i->j, entry) {
ret = journal_replay_entry_early(c, entry);
if (ret)
return ret;
}
}
bch2_fs_usage_initialize(c);
return 0;
}
static int read_btree_roots(struct bch_fs *c)
{
unsigned i;
struct bch_sb_field_clean *clean = *cleanp;
int ret = 0;
if (!clean || !j)
return 0;
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq),
le64_to_cpu(j->seq))) {
kfree(clean);
*cleanp = NULL;
return 0;
}
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
for (i = 0; i < BTREE_ID_NR; i++) {
struct bkey_i *k1, *k2;
unsigned l1 = 0, l2 = 0;
struct btree_root *r = &c->btree_roots[i];
k1 = btree_root_find(c, clean, NULL, i, &l1);
k2 = btree_root_find(c, NULL, j, i, &l2);
if (!k1 && !k2)
if (!r->alive)
continue;
mustfix_fsck_err_on(!k1 || !k2 ||
IS_ERR(k1) ||
IS_ERR(k2) ||
k1->k.u64s != k2->k.u64s ||
memcmp(k1, k2, bkey_bytes(k1)) ||
l1 != l2, c,
"superblock btree root doesn't match journal after clean shutdown");
if (i == BTREE_ID_ALLOC &&
test_reconstruct_alloc(c)) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
continue;
}
if (r->error) {
__fsck_err(c, i == BTREE_ID_ALLOC
? FSCK_CAN_IGNORE : 0,
"invalid btree root %s",
bch2_btree_ids[i]);
if (i == BTREE_ID_ALLOC)
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
}
ret = bch2_btree_root_read(c, i, &r->key, r->level);
if (ret) {
__fsck_err(c, i == BTREE_ID_ALLOC
? FSCK_CAN_IGNORE : 0,
"error reading btree root %s",
bch2_btree_ids[i]);
if (i == BTREE_ID_ALLOC)
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
}
}
for (i = 0; i < BTREE_ID_NR; i++)
if (!c->btree_roots[i].b)
bch2_btree_root_alloc(c, i);
fsck_err:
return ret;
}
@ -185,119 +352,82 @@ static bool journal_empty(struct list_head *journal)
int bch2_fs_recovery(struct bch_fs *c)
{
const char *err = "cannot allocate memory";
struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL;
struct jset_entry *entry;
struct bch_sb_field_clean *clean = NULL;
u64 journal_seq;
LIST_HEAD(journal);
struct jset *j = NULL;
unsigned i;
bool run_gc = c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
int ret;
mutex_lock(&c->sb_lock);
if (c->sb.clean)
clean = read_superblock_clean(c);
ret = PTR_ERR_OR_ZERO(clean);
if (ret)
goto err;
if (c->sb.clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
if (!c->replicas.entries) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (c->sb.clean)
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
if (sb_clean) {
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
GFP_KERNEL);
if (!clean) {
ret = -ENOMEM;
mutex_unlock(&c->sb_lock);
goto err;
}
if (!c->sb.clean || c->opts.fsck) {
struct jset *j;
if (le16_to_cpu(c->disk_sb.sb->version) <
bcachefs_metadata_version_bkey_renumber)
bch2_sb_clean_renumber(clean, READ);
}
mutex_unlock(&c->sb_lock);
if (clean)
bch_info(c, "recovering from clean shutdown, journal seq %llu",
le64_to_cpu(clean->journal_seq));
if (!clean || c->opts.fsck) {
ret = bch2_journal_read(c, &journal);
if (ret)
goto err;
j = &list_entry(journal.prev, struct journal_replay, list)->j;
fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
"filesystem marked clean but journal not empty");
if (!c->sb.clean && list_empty(&journal)){
bch_err(c, "no journal entries found");
ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
goto err;
}
j = &list_last_entry(&journal, struct journal_replay, list)->j;
ret = verify_superblock_clean(c, &clean, j);
if (ret)
goto err;
journal_seq = le64_to_cpu(j->seq) + 1;
} else {
ret = bch2_journal_set_seq(c,
le64_to_cpu(clean->journal_seq),
le64_to_cpu(clean->journal_seq));
BUG_ON(ret);
journal_seq = le64_to_cpu(clean->journal_seq) + 1;
}
ret = verify_superblock_clean(c, &clean, j);
ret = journal_replay_early(c, clean, &journal);
if (ret)
goto err;
fsck_err_on(clean && !journal_empty(&journal), c,
"filesystem marked clean but journal not empty");
err = "insufficient memory";
if (clean) {
c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
for (entry = clean->start;
entry != vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
ret = journal_replay_entry_early(c, entry);
if (ret)
goto err;
}
} else {
struct journal_replay *i;
c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
list_for_each_entry(i, &journal, list)
vstruct_for_each(&i->j, entry) {
ret = journal_replay_entry_early(c, entry);
if (ret)
goto err;
}
}
bch2_fs_usage_initialize(c);
for (i = 0; i < BTREE_ID_NR; i++) {
struct btree_root *r = &c->btree_roots[i];
if (!r->alive)
continue;
err = "invalid btree root pointer";
ret = -1;
if (r->error)
goto err;
if (i == BTREE_ID_ALLOC &&
test_reconstruct_alloc(c))
continue;
err = "error reading btree root";
ret = bch2_btree_root_read(c, i, &r->key, r->level);
if (!c->sb.clean) {
ret = bch2_journal_seq_blacklist_add(c,
journal_seq,
journal_seq + 4);
if (ret) {
if (i != BTREE_ID_ALLOC)
goto err;
mustfix_fsck_err(c, "error reading btree root");
run_gc = true;
bch_err(c, "error creating new journal seq blacklist entry");
goto err;
}
journal_seq += 4;
}
for (i = 0; i < BTREE_ID_NR; i++)
if (!c->btree_roots[i].b)
bch2_btree_root_alloc(c, i);
ret = bch2_blacklist_table_initialize(c);
ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
if (ret)
goto err;
ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
if (ret)
goto err;
ret = read_btree_roots(c);
if (ret)
goto err;
err = "error reading allocation information";
ret = bch2_alloc_read(c, &journal);
@ -312,10 +442,12 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
if (run_gc) {
if (c->opts.fsck ||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
ret = bch2_gc(c, &journal, true);
ret = bch2_gc(c, &journal, true, false);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
@ -334,13 +466,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.noreplay)
goto out;
/*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
* this is a hack but oh well.
*/
bch2_fs_journal_start(&c->journal);
bch_verbose(c, "starting journal replay:");
err = "journal replay failed";
ret = bch2_journal_replay(c, &journal);
@ -356,6 +481,14 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:");
ret = bch2_fs_quota_read(c);
if (ret)
goto err;
bch_verbose(c, "quotas done");
}
mutex_lock(&c->sb_lock);
if (c->opts.version_upgrade) {
if (c->sb.version < bcachefs_metadata_version_new_versioning)
@ -371,14 +504,9 @@ int bch2_fs_recovery(struct bch_fs *c)
}
mutex_unlock(&c->sb_lock);
if (enabled_qtypes(c)) {
bch_verbose(c, "reading quotas:");
ret = bch2_fs_quota_read(c);
if (ret)
goto err;
bch_verbose(c, "quotas done");
}
if (c->journal_seq_blacklist_table &&
c->journal_seq_blacklist_table->nr > 128)
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
out:
bch2_journal_entries_free(&journal);
kfree(clean);
@ -427,7 +555,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
bch2_fs_journal_start(&c->journal);
bch2_fs_journal_start(&c->journal, 1, &journal);
bch2_journal_set_replay_done(&c->journal);
err = "error going read write";

View File

@ -6,6 +6,7 @@
#include "error.h"
#include "io.h"
#include "journal.h"
#include "journal_seq_blacklist.h"
#include "replicas.h"
#include "quota.h"
#include "super-io.h"

View File

@ -29,6 +29,7 @@
#include "io.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "move.h"
#include "migrate.h"
#include "movinggc.h"
@ -499,6 +500,7 @@ static void bch2_fs_free(struct bch_fs *c)
kfree(c->replicas.entries);
kfree(c->replicas_gc.entries);
kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq);
@ -527,6 +529,10 @@ void bch2_fs_stop(struct bch_fs *c)
bch_verbose(c, "shutting down");
set_bit(BCH_FS_STOPPING, &c->flags);
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev)
@ -663,6 +669,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
spin_lock_init(&c->btree_write_error_lock);
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
INIT_WORK(&c->journal_seq_blacklist_gc_work,
bch2_blacklist_entries_gc);
INIT_LIST_HEAD(&c->fsck_errors);
mutex_init(&c->fsck_error_lock);

View File

@ -496,7 +496,7 @@ STORE(__bch2_fs)
bch2_coalesce(c);
if (attr == &sysfs_trigger_gc)
bch2_gc(c, NULL, false);
bch2_gc(c, NULL, false, false);
if (attr == &sysfs_trigger_alloc_write) {
bool wrote;

View File

@ -17,7 +17,7 @@
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <crypto/chacha.h>
#include <crypto/skcipher.h>
#include <sodium/crypto_stream_chacha20.h>
@ -36,7 +36,7 @@ static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
container_of(tfm, struct chacha20_tfm, tfm);
int i;
if (keysize != CHACHA20_KEY_SIZE)
if (keysize != CHACHA_KEY_SIZE)
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
@ -72,8 +72,8 @@ static int crypto_chacha20_crypt(struct skcipher_request *req)
if (sg_is_last(sg))
break;
BUG_ON(sg->length % CHACHA20_BLOCK_SIZE);
iv[0] += sg->length / CHACHA20_BLOCK_SIZE;
BUG_ON(sg->length % CHACHA_BLOCK_SIZE);
iv[0] += sg->length / CHACHA_BLOCK_SIZE;
sg = sg_next(sg);
};
@ -93,8 +93,8 @@ static void *crypto_chacha20_alloc_tfm(void)
tfm->tfm.setkey = crypto_chacha20_setkey;
tfm->tfm.encrypt = crypto_chacha20_crypt;
tfm->tfm.decrypt = crypto_chacha20_crypt;
tfm->tfm.ivsize = CHACHA20_IV_SIZE;
tfm->tfm.keysize = CHACHA20_KEY_SIZE;
tfm->tfm.ivsize = CHACHA_IV_SIZE;
tfm->tfm.keysize = CHACHA_KEY_SIZE;
return tfm;
}