mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to d83b992f65 bcachefs: Rewrite journal_seq_blacklist machinery
This commit is contained in:
parent
be02db130b
commit
d13bbb2955
@ -1 +1 @@
|
|||||||
1712318522fdaa533f8622f4c7da05e44a4828b0
|
d83b992f653d9f742f3f8567dbcfd1f4f72e858f
|
||||||
|
@ -8,8 +8,8 @@
|
|||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/crypto.h>
|
#include <linux/crypto.h>
|
||||||
|
|
||||||
#define CHACHA20_IV_SIZE 16
|
#define CHACHA_IV_SIZE 16
|
||||||
#define CHACHA20_KEY_SIZE 32
|
#define CHACHA_KEY_SIZE 32
|
||||||
#define CHACHA20_BLOCK_SIZE 64
|
#define CHACHA_BLOCK_SIZE 64
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -147,12 +147,9 @@ static inline u64 ktime_get_real_seconds(void)
|
|||||||
return ts.tv_sec;
|
return ts.tv_sec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct timespec current_kernel_time(void)
|
static inline void ktime_get_real_ts64(struct timespec64 *ts)
|
||||||
{
|
{
|
||||||
struct timespec ts;
|
clock_gettime(CLOCK_MONOTONIC, ts);
|
||||||
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
return ts;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define current_kernel_time64() current_kernel_time()
|
#define current_kernel_time64() current_kernel_time()
|
||||||
|
@ -619,6 +619,11 @@ static void bch2_sb_print_clean(struct bch_sb *sb, struct bch_sb_field *f,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void bch2_sb_print_journal_seq_blacklist(struct bch_sb *sb, struct bch_sb_field *f,
|
||||||
|
enum units units)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
|
typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
|
||||||
|
|
||||||
struct bch_sb_field_toolops {
|
struct bch_sb_field_toolops {
|
||||||
|
@ -290,8 +290,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
|||||||
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_member_device(ca, c, i)
|
percpu_down_write(&c->mark_lock);
|
||||||
bch2_dev_usage_from_buckets(c, ca);
|
bch2_dev_usage_from_buckets(c);
|
||||||
|
percpu_up_write(&c->mark_lock);
|
||||||
|
|
||||||
mutex_lock(&c->bucket_clock[READ].lock);
|
mutex_lock(&c->bucket_clock[READ].lock);
|
||||||
for_each_member_device(ca, c, i) {
|
for_each_member_device(ca, c, i) {
|
||||||
|
@ -183,6 +183,7 @@
|
|||||||
#include <linux/closure.h>
|
#include <linux/closure.h>
|
||||||
#include <linux/kobject.h>
|
#include <linux/kobject.h>
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
|
#include <linux/math64.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/percpu-refcount.h>
|
#include <linux/percpu-refcount.h>
|
||||||
#include <linux/percpu-rwsem.h>
|
#include <linux/percpu-rwsem.h>
|
||||||
@ -220,6 +221,8 @@
|
|||||||
printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
|
printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||||
#define bch_warn(c, fmt, ...) \
|
#define bch_warn(c, fmt, ...) \
|
||||||
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
|
printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||||
|
#define bch_warn_ratelimited(c, fmt, ...) \
|
||||||
|
printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||||
#define bch_err(c, fmt, ...) \
|
#define bch_err(c, fmt, ...) \
|
||||||
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
|
printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
|
||||||
#define bch_err_ratelimited(c, fmt, ...) \
|
#define bch_err_ratelimited(c, fmt, ...) \
|
||||||
@ -481,6 +484,7 @@ enum {
|
|||||||
BCH_FS_RW,
|
BCH_FS_RW,
|
||||||
|
|
||||||
/* shutdown: */
|
/* shutdown: */
|
||||||
|
BCH_FS_STOPPING,
|
||||||
BCH_FS_EMERGENCY_RO,
|
BCH_FS_EMERGENCY_RO,
|
||||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||||
|
|
||||||
@ -506,6 +510,15 @@ struct bch_fs_pcpu {
|
|||||||
u64 sectors_available;
|
u64 sectors_available;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct journal_seq_blacklist_table {
|
||||||
|
size_t nr;
|
||||||
|
struct journal_seq_blacklist_table_entry {
|
||||||
|
u64 start;
|
||||||
|
u64 end;
|
||||||
|
bool dirty;
|
||||||
|
} entries[0];
|
||||||
|
};
|
||||||
|
|
||||||
struct bch_fs {
|
struct bch_fs {
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
|
|
||||||
@ -641,6 +654,11 @@ struct bch_fs {
|
|||||||
|
|
||||||
struct io_clock io_clock[2];
|
struct io_clock io_clock[2];
|
||||||
|
|
||||||
|
/* JOURNAL SEQ BLACKLIST */
|
||||||
|
struct journal_seq_blacklist_table *
|
||||||
|
journal_seq_blacklist_table;
|
||||||
|
struct work_struct journal_seq_blacklist_gc_work;
|
||||||
|
|
||||||
/* ALLOCATOR */
|
/* ALLOCATOR */
|
||||||
spinlock_t freelist_lock;
|
spinlock_t freelist_lock;
|
||||||
struct closure_waitlist freelist_wait;
|
struct closure_waitlist freelist_wait;
|
||||||
@ -794,4 +812,27 @@ static inline unsigned block_bytes(const struct bch_fs *c)
|
|||||||
return c->opts.block_size << 9;
|
return c->opts.block_size << 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
|
||||||
|
{
|
||||||
|
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
|
||||||
|
{
|
||||||
|
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
|
||||||
|
|
||||||
|
if (c->sb.time_precision == 1)
|
||||||
|
return ns;
|
||||||
|
|
||||||
|
return div_s64(ns, c->sb.time_precision);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline s64 bch2_current_time(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct timespec64 now;
|
||||||
|
|
||||||
|
ktime_get_real_ts64(&now);
|
||||||
|
return timespec_to_bch2_time(c, now);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _BCACHEFS_H */
|
#endif /* _BCACHEFS_H */
|
||||||
|
@ -904,7 +904,8 @@ struct bch_sb_field {
|
|||||||
x(quota, 4) \
|
x(quota, 4) \
|
||||||
x(disk_groups, 5) \
|
x(disk_groups, 5) \
|
||||||
x(clean, 6) \
|
x(clean, 6) \
|
||||||
x(replicas, 7)
|
x(replicas, 7) \
|
||||||
|
x(journal_seq_blacklist, 8)
|
||||||
|
|
||||||
enum bch_sb_field_type {
|
enum bch_sb_field_type {
|
||||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||||
@ -1119,6 +1120,20 @@ struct bch_sb_field_clean {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct journal_seq_blacklist_entry {
|
||||||
|
__le64 start;
|
||||||
|
__le64 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bch_sb_field_journal_seq_blacklist {
|
||||||
|
struct bch_sb_field field;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct journal_seq_blacklist_entry start[0];
|
||||||
|
__u64 _data[0];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/* Superblock: */
|
/* Superblock: */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1274,6 +1289,7 @@ enum bch_sb_features {
|
|||||||
BCH_FEATURE_ZSTD = 2,
|
BCH_FEATURE_ZSTD = 2,
|
||||||
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
|
BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
|
||||||
BCH_FEATURE_EC = 4,
|
BCH_FEATURE_EC = 4,
|
||||||
|
BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
|
||||||
BCH_FEATURE_NR,
|
BCH_FEATURE_NR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -114,7 +114,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
{
|
{
|
||||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||||
const struct bch_extent_ptr *ptr;
|
const struct bch_extent_ptr *ptr;
|
||||||
struct gc_pos pos = { 0 };
|
|
||||||
unsigned flags =
|
unsigned flags =
|
||||||
BCH_BUCKET_MARK_GC|
|
BCH_BUCKET_MARK_GC|
|
||||||
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
|
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
|
||||||
@ -171,7 +170,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
|
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags);
|
bch2_mark_key(c, k, true, k.k->size, NULL, 0, flags);
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -202,7 +201,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
||||||
bool initial)
|
bool initial, bool metadata_only)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
@ -222,7 +221,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
|||||||
* and on startup, we have to read every btree node (XXX: only if it was
|
* and on startup, we have to read every btree node (XXX: only if it was
|
||||||
* an unclean shutdown)
|
* an unclean shutdown)
|
||||||
*/
|
*/
|
||||||
if (initial || expensive_debug_checks(c))
|
if (metadata_only)
|
||||||
|
depth = 1;
|
||||||
|
else if (initial || expensive_debug_checks(c))
|
||||||
depth = 0;
|
depth = 0;
|
||||||
|
|
||||||
btree_node_range_checks_init(&r, depth);
|
btree_node_range_checks_init(&r, depth);
|
||||||
@ -278,7 +279,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
|
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
|
||||||
bool initial)
|
bool initial, bool metadata_only)
|
||||||
{
|
{
|
||||||
enum btree_id ids[BTREE_ID_NR];
|
enum btree_id ids[BTREE_ID_NR];
|
||||||
u8 max_stale;
|
u8 max_stale;
|
||||||
@ -292,11 +293,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
|
|||||||
enum btree_id id = ids[i];
|
enum btree_id id = ids[i];
|
||||||
enum btree_node_type type = __btree_node_type(0, id);
|
enum btree_node_type type = __btree_node_type(0, id);
|
||||||
|
|
||||||
int ret = bch2_gc_btree(c, id, initial);
|
int ret = bch2_gc_btree(c, id, initial, metadata_only);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (journal && btree_node_type_needs_gc(type)) {
|
if (journal && !metadata_only &&
|
||||||
|
btree_node_type_needs_gc(type)) {
|
||||||
struct bkey_i *k, *n;
|
struct bkey_i *k, *n;
|
||||||
struct jset_entry *j;
|
struct jset_entry *j;
|
||||||
struct journal_replay *r;
|
struct journal_replay *r;
|
||||||
@ -397,7 +399,6 @@ static void bch2_mark_superblocks(struct bch_fs *c)
|
|||||||
/* Also see bch2_pending_btree_node_free_insert_done() */
|
/* Also see bch2_pending_btree_node_free_insert_done() */
|
||||||
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct gc_pos pos = { 0 };
|
|
||||||
struct btree_update *as;
|
struct btree_update *as;
|
||||||
struct pending_btree_node_free *d;
|
struct pending_btree_node_free *d;
|
||||||
|
|
||||||
@ -407,8 +408,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
|||||||
for_each_pending_btree_node_free(c, as, d)
|
for_each_pending_btree_node_free(c, as, d)
|
||||||
if (d->index_update_done)
|
if (d->index_update_done)
|
||||||
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
|
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
|
||||||
true, 0,
|
true, 0, NULL, 0,
|
||||||
pos, NULL, 0,
|
|
||||||
BCH_BUCKET_MARK_GC);
|
BCH_BUCKET_MARK_GC);
|
||||||
|
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
@ -481,25 +481,28 @@ static void bch2_gc_free(struct bch_fs *c)
|
|||||||
c->usage[1] = NULL;
|
c->usage[1] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bch2_gc_done(struct bch_fs *c, bool initial)
|
static int bch2_gc_done(struct bch_fs *c,
|
||||||
|
bool initial, bool metadata_only)
|
||||||
{
|
{
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
bool verify = !initial ||
|
bool verify = !metadata_only &&
|
||||||
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
|
(!initial ||
|
||||||
|
(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
#define copy_field(_f, _msg, ...) \
|
#define copy_field(_f, _msg, ...) \
|
||||||
if (dst->_f != src->_f) { \
|
if (dst->_f != src->_f) { \
|
||||||
if (verify) \
|
if (verify) \
|
||||||
bch_err(c, _msg ": got %llu, should be %llu, fixing"\
|
fsck_err(c, _msg ": got %llu, should be %llu" \
|
||||||
, ##__VA_ARGS__, dst->_f, src->_f); \
|
, ##__VA_ARGS__, dst->_f, src->_f); \
|
||||||
dst->_f = src->_f; \
|
dst->_f = src->_f; \
|
||||||
}
|
}
|
||||||
#define copy_stripe_field(_f, _msg, ...) \
|
#define copy_stripe_field(_f, _msg, ...) \
|
||||||
if (dst->_f != src->_f) { \
|
if (dst->_f != src->_f) { \
|
||||||
if (verify) \
|
if (verify) \
|
||||||
bch_err_ratelimited(c, "stripe %zu has wrong "_msg\
|
fsck_err(c, "stripe %zu has wrong "_msg \
|
||||||
": got %u, should be %u, fixing", \
|
": got %u, should be %u", \
|
||||||
dst_iter.pos, ##__VA_ARGS__, \
|
dst_iter.pos, ##__VA_ARGS__, \
|
||||||
dst->_f, src->_f); \
|
dst->_f, src->_f); \
|
||||||
dst->_f = src->_f; \
|
dst->_f = src->_f; \
|
||||||
@ -508,8 +511,8 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
#define copy_bucket_field(_f) \
|
#define copy_bucket_field(_f) \
|
||||||
if (dst->b[b].mark._f != src->b[b].mark._f) { \
|
if (dst->b[b].mark._f != src->b[b].mark._f) { \
|
||||||
if (verify) \
|
if (verify) \
|
||||||
bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
|
fsck_err(c, "dev %u bucket %zu has wrong " #_f \
|
||||||
": got %u, should be %u, fixing", i, b, \
|
": got %u, should be %u", i, b, \
|
||||||
dst->b[b].mark._f, src->b[b].mark._f); \
|
dst->b[b].mark._f, src->b[b].mark._f); \
|
||||||
dst->b[b]._mark._f = src->b[b].mark._f; \
|
dst->b[b]._mark._f = src->b[b].mark._f; \
|
||||||
dst->b[b]._mark.dirty = true; \
|
dst->b[b]._mark.dirty = true; \
|
||||||
@ -519,7 +522,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
#define copy_fs_field(_f, _msg, ...) \
|
#define copy_fs_field(_f, _msg, ...) \
|
||||||
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
|
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
|
||||||
|
|
||||||
{
|
if (!metadata_only) {
|
||||||
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
|
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
|
||||||
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
|
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
|
||||||
struct stripe *dst, *src;
|
struct stripe *dst, *src;
|
||||||
@ -571,26 +574,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for_each_member_device(ca, c, i) {
|
bch2_dev_usage_from_buckets(c);
|
||||||
unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
|
|
||||||
struct bch_dev_usage *dst = (void *)
|
|
||||||
bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
|
|
||||||
struct bch_dev_usage *src = (void *)
|
|
||||||
bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
|
|
||||||
unsigned b;
|
|
||||||
|
|
||||||
for (b = 0; b < BCH_DATA_NR; b++)
|
|
||||||
copy_dev_field(buckets[b], "buckets[%s]",
|
|
||||||
bch2_data_types[b]);
|
|
||||||
copy_dev_field(buckets_alloc, "buckets_alloc");
|
|
||||||
copy_dev_field(buckets_ec, "buckets_ec");
|
|
||||||
copy_dev_field(buckets_unavailable, "buckets_unavailable");
|
|
||||||
|
|
||||||
for (b = 0; b < BCH_DATA_NR; b++)
|
|
||||||
copy_dev_field(sectors[b], "sectors[%s]",
|
|
||||||
bch2_data_types[b]);
|
|
||||||
copy_dev_field(sectors_fragmented, "sectors_fragmented");
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
unsigned nr = fs_usage_u64s(c);
|
unsigned nr = fs_usage_u64s(c);
|
||||||
@ -600,6 +584,9 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||||
|
|
||||||
copy_fs_field(hidden, "hidden");
|
copy_fs_field(hidden, "hidden");
|
||||||
|
copy_fs_field(btree, "btree");
|
||||||
|
|
||||||
|
if (!metadata_only) {
|
||||||
copy_fs_field(data, "data");
|
copy_fs_field(data, "data");
|
||||||
copy_fs_field(cached, "cached");
|
copy_fs_field(cached, "cached");
|
||||||
copy_fs_field(reserved, "reserved");
|
copy_fs_field(reserved, "reserved");
|
||||||
@ -608,12 +595,18 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||||
copy_fs_field(persistent_reserved[i],
|
copy_fs_field(persistent_reserved[i],
|
||||||
"persistent_reserved[%i]", i);
|
"persistent_reserved[%i]", i);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < c->replicas.nr; i++) {
|
for (i = 0; i < c->replicas.nr; i++) {
|
||||||
struct bch_replicas_entry *e =
|
struct bch_replicas_entry *e =
|
||||||
cpu_replicas_entry(&c->replicas, i);
|
cpu_replicas_entry(&c->replicas, i);
|
||||||
char buf[80];
|
char buf[80];
|
||||||
|
|
||||||
|
if (metadata_only &&
|
||||||
|
(e->data_type == BCH_DATA_USER ||
|
||||||
|
e->data_type == BCH_DATA_CACHED))
|
||||||
|
continue;
|
||||||
|
|
||||||
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
||||||
|
|
||||||
copy_fs_field(replicas[i], "%s", buf);
|
copy_fs_field(replicas[i], "%s", buf);
|
||||||
@ -625,9 +618,12 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
|||||||
#undef copy_bucket_field
|
#undef copy_bucket_field
|
||||||
#undef copy_stripe_field
|
#undef copy_stripe_field
|
||||||
#undef copy_field
|
#undef copy_field
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_start(struct bch_fs *c)
|
static int bch2_gc_start(struct bch_fs *c,
|
||||||
|
bool metadata_only)
|
||||||
{
|
{
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@ -673,10 +669,18 @@ static int bch2_gc_start(struct bch_fs *c)
|
|||||||
dst->nbuckets = src->nbuckets;
|
dst->nbuckets = src->nbuckets;
|
||||||
|
|
||||||
for (b = 0; b < src->nbuckets; b++) {
|
for (b = 0; b < src->nbuckets; b++) {
|
||||||
dst->b[b]._mark.gen =
|
struct bucket *d = &dst->b[b];
|
||||||
dst->b[b].oldest_gen =
|
struct bucket *s = &src->b[b];
|
||||||
src->b[b].mark.gen;
|
|
||||||
dst->b[b].gen_valid = src->b[b].gen_valid;
|
d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
|
||||||
|
d->gen_valid = s->gen_valid;
|
||||||
|
|
||||||
|
if (metadata_only &&
|
||||||
|
(s->mark.data_type == BCH_DATA_USER ||
|
||||||
|
s->mark.data_type == BCH_DATA_CACHED)) {
|
||||||
|
d->_mark = s->mark;
|
||||||
|
d->_mark.owned_by_allocator = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -701,7 +705,8 @@ static int bch2_gc_start(struct bch_fs *c)
|
|||||||
* move around - if references move backwards in the ordering GC
|
* move around - if references move backwards in the ordering GC
|
||||||
* uses, GC could skip past them
|
* uses, GC could skip past them
|
||||||
*/
|
*/
|
||||||
int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
|
int bch2_gc(struct bch_fs *c, struct list_head *journal,
|
||||||
|
bool initial, bool metadata_only)
|
||||||
{
|
{
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
u64 start_time = local_clock();
|
u64 start_time = local_clock();
|
||||||
@ -713,7 +718,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
|
|||||||
down_write(&c->gc_lock);
|
down_write(&c->gc_lock);
|
||||||
again:
|
again:
|
||||||
percpu_down_write(&c->mark_lock);
|
percpu_down_write(&c->mark_lock);
|
||||||
ret = bch2_gc_start(c);
|
ret = bch2_gc_start(c, metadata_only);
|
||||||
percpu_up_write(&c->mark_lock);
|
percpu_up_write(&c->mark_lock);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -721,7 +726,7 @@ again:
|
|||||||
|
|
||||||
bch2_mark_superblocks(c);
|
bch2_mark_superblocks(c);
|
||||||
|
|
||||||
ret = bch2_gc_btrees(c, journal, initial);
|
ret = bch2_gc_btrees(c, journal, initial, metadata_only);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@ -755,7 +760,7 @@ out:
|
|||||||
percpu_down_write(&c->mark_lock);
|
percpu_down_write(&c->mark_lock);
|
||||||
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
bch2_gc_done(c, initial);
|
ret = bch2_gc_done(c, initial, metadata_only);
|
||||||
|
|
||||||
/* Indicates that gc is no longer in progress: */
|
/* Indicates that gc is no longer in progress: */
|
||||||
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
|
||||||
@ -1157,7 +1162,7 @@ static int bch2_gc_thread(void *arg)
|
|||||||
last = atomic_long_read(&clock->now);
|
last = atomic_long_read(&clock->now);
|
||||||
last_kick = atomic_read(&c->kick_gc);
|
last_kick = atomic_read(&c->kick_gc);
|
||||||
|
|
||||||
ret = bch2_gc(c, NULL, false);
|
ret = bch2_gc(c, NULL, false, false);
|
||||||
if (ret)
|
if (ret)
|
||||||
bch_err(c, "btree gc failed: %i", ret);
|
bch_err(c, "btree gc failed: %i", ret);
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "btree_types.h"
|
#include "btree_types.h"
|
||||||
|
|
||||||
void bch2_coalesce(struct bch_fs *);
|
void bch2_coalesce(struct bch_fs *);
|
||||||
int bch2_gc(struct bch_fs *, struct list_head *, bool);
|
int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
|
||||||
void bch2_gc_thread_stop(struct bch_fs *);
|
void bch2_gc_thread_stop(struct bch_fs *);
|
||||||
int bch2_gc_thread_start(struct bch_fs *);
|
int bch2_gc_thread_start(struct bch_fs *);
|
||||||
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
|
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
|
||||||
|
@ -509,7 +509,7 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
|
|||||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
|
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
|
||||||
bytes);
|
bytes);
|
||||||
|
|
||||||
nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE));
|
nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
|
||||||
@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||||||
struct btree_node *sorted;
|
struct btree_node *sorted;
|
||||||
struct bkey_packed *k;
|
struct bkey_packed *k;
|
||||||
struct bset *i;
|
struct bset *i;
|
||||||
bool used_mempool;
|
bool used_mempool, blacklisted;
|
||||||
unsigned u64s;
|
unsigned u64s;
|
||||||
int ret, retry_read = 0, write = READ;
|
int ret, retry_read = 0, write = READ;
|
||||||
|
|
||||||
@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||||||
|
|
||||||
b->written += sectors;
|
b->written += sectors;
|
||||||
|
|
||||||
ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
|
blacklisted = bch2_journal_seq_is_blacklisted(c,
|
||||||
if (ret < 0) {
|
le64_to_cpu(i->journal_seq),
|
||||||
btree_err(BTREE_ERR_FATAL, c, b, i,
|
true);
|
||||||
"insufficient memory");
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret) {
|
btree_err_on(blacklisted && first,
|
||||||
btree_err_on(first,
|
|
||||||
BTREE_ERR_FIXABLE, c, b, i,
|
BTREE_ERR_FIXABLE, c, b, i,
|
||||||
"first btree node bset has blacklisted journal seq");
|
"first btree node bset has blacklisted journal seq");
|
||||||
if (!first)
|
if (blacklisted && !first)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
bch2_btree_node_iter_large_push(iter, b,
|
bch2_btree_node_iter_large_push(iter, b,
|
||||||
i->start,
|
i->start,
|
||||||
@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||||||
out:
|
out:
|
||||||
mempool_free(iter, &c->fill_iter);
|
mempool_free(iter, &c->fill_iter);
|
||||||
return retry_read;
|
return retry_read;
|
||||||
err:
|
|
||||||
fsck_err:
|
fsck_err:
|
||||||
if (ret == BTREE_RETRY_READ) {
|
if (ret == BTREE_RETRY_READ) {
|
||||||
retry_read = 1;
|
retry_read = 1;
|
||||||
|
@ -818,14 +818,6 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
|
|||||||
struct btree_iter *linked;
|
struct btree_iter *linked;
|
||||||
unsigned level = b->level;
|
unsigned level = b->level;
|
||||||
|
|
||||||
/* caller now responsible for unlocking @b */
|
|
||||||
|
|
||||||
BUG_ON(iter->l[level].b != b);
|
|
||||||
BUG_ON(!btree_node_intent_locked(iter, level));
|
|
||||||
|
|
||||||
iter->l[level].b = BTREE_ITER_NOT_END;
|
|
||||||
mark_btree_node_unlocked(iter, level);
|
|
||||||
|
|
||||||
trans_for_each_iter(iter->trans, linked)
|
trans_for_each_iter(iter->trans, linked)
|
||||||
if (linked->l[level].b == b) {
|
if (linked->l[level].b == b) {
|
||||||
__btree_node_unlock(linked, level);
|
__btree_node_unlock(linked, level);
|
||||||
@ -990,6 +982,7 @@ retry_all:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(ret == -EIO)) {
|
if (unlikely(ret == -EIO)) {
|
||||||
|
trans->error = true;
|
||||||
iter->flags |= BTREE_ITER_ERROR;
|
iter->flags |= BTREE_ITER_ERROR;
|
||||||
iter->l[iter->level].b = BTREE_ITER_NOT_END;
|
iter->l[iter->level].b = BTREE_ITER_NOT_END;
|
||||||
goto out;
|
goto out;
|
||||||
@ -1162,6 +1155,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
|||||||
if (!btree_iter_node(iter, iter->level))
|
if (!btree_iter_node(iter, iter->level))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
bch2_trans_cond_resched(iter->trans);
|
||||||
|
|
||||||
btree_iter_up(iter);
|
btree_iter_up(iter);
|
||||||
|
|
||||||
if (!bch2_btree_node_relock(iter, iter->level))
|
if (!bch2_btree_node_relock(iter, iter->level))
|
||||||
@ -1712,7 +1707,7 @@ void bch2_trans_preload_iters(struct btree_trans *trans)
|
|||||||
|
|
||||||
static int btree_trans_iter_alloc(struct btree_trans *trans)
|
static int btree_trans_iter_alloc(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
unsigned idx = ffz(trans->iters_linked);
|
unsigned idx = __ffs64(~trans->iters_linked);
|
||||||
|
|
||||||
if (idx < trans->nr_iters)
|
if (idx < trans->nr_iters)
|
||||||
goto got_slot;
|
goto got_slot;
|
||||||
@ -1877,17 +1872,17 @@ void *bch2_trans_kmalloc(struct btree_trans *trans,
|
|||||||
|
|
||||||
int bch2_trans_unlock(struct btree_trans *trans)
|
int bch2_trans_unlock(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
unsigned iters = trans->iters_linked;
|
u64 iters = trans->iters_linked;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
while (iters) {
|
while (iters) {
|
||||||
unsigned idx = __ffs(iters);
|
unsigned idx = __ffs64(iters);
|
||||||
struct btree_iter *iter = &trans->iters[idx];
|
struct btree_iter *iter = &trans->iters[idx];
|
||||||
|
|
||||||
ret = ret ?: btree_iter_err(iter);
|
ret = ret ?: btree_iter_err(iter);
|
||||||
|
|
||||||
__bch2_btree_iter_unlock(iter);
|
__bch2_btree_iter_unlock(iter);
|
||||||
iters ^= 1 << idx;
|
iters ^= 1ULL << idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -1949,7 +1944,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
|
|||||||
|
|
||||||
int bch2_trans_exit(struct btree_trans *trans)
|
int bch2_trans_exit(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
int ret = bch2_trans_unlock(trans);
|
bch2_trans_unlock(trans);
|
||||||
|
|
||||||
kfree(trans->mem);
|
kfree(trans->mem);
|
||||||
if (trans->used_mempool)
|
if (trans->used_mempool)
|
||||||
@ -1958,5 +1953,6 @@ int bch2_trans_exit(struct btree_trans *trans)
|
|||||||
kfree(trans->iters);
|
kfree(trans->iters);
|
||||||
trans->mem = (void *) 0x1;
|
trans->mem = (void *) 0x1;
|
||||||
trans->iters = (void *) 0x1;
|
trans->iters = (void *) 0x1;
|
||||||
return ret;
|
|
||||||
|
return trans->error ? -EIO : 0;
|
||||||
}
|
}
|
||||||
|
@ -279,6 +279,7 @@ struct btree_trans {
|
|||||||
u8 nr_updates;
|
u8 nr_updates;
|
||||||
u8 size;
|
u8 size;
|
||||||
unsigned used_mempool:1;
|
unsigned used_mempool:1;
|
||||||
|
unsigned error:1;
|
||||||
|
|
||||||
unsigned mem_top;
|
unsigned mem_top;
|
||||||
unsigned mem_bytes;
|
unsigned mem_bytes;
|
||||||
|
@ -161,7 +161,6 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = as->c;
|
struct bch_fs *c = as->c;
|
||||||
struct pending_btree_node_free *d;
|
struct pending_btree_node_free *d;
|
||||||
struct gc_pos pos = { 0 };
|
|
||||||
|
|
||||||
for (d = as->pending; d < as->pending + as->nr_pending; d++)
|
for (d = as->pending; d < as->pending + as->nr_pending; d++)
|
||||||
if (!bkey_cmp(k.k->p, d->key.k.p) &&
|
if (!bkey_cmp(k.k->p, d->key.k.p) &&
|
||||||
@ -189,18 +188,12 @@ found:
|
|||||||
* to cancel out one of mark and sweep's markings if necessary:
|
* to cancel out one of mark and sweep's markings if necessary:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* bch2_mark_key() compares the current gc pos to the pos we're
|
|
||||||
* moving this reference from, hence one comparison here:
|
|
||||||
*/
|
|
||||||
if (gc_pos_cmp(c->gc_pos, b
|
if (gc_pos_cmp(c->gc_pos, b
|
||||||
? gc_pos_btree_node(b)
|
? gc_pos_btree_node(b)
|
||||||
: gc_pos_btree_root(as->btree_id)) >= 0 &&
|
: gc_pos_btree_root(as->btree_id)) >= 0 &&
|
||||||
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
|
gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
|
||||||
bch2_mark_key_locked(c,
|
bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key),
|
||||||
bkey_i_to_s_c(&d->key),
|
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
|
||||||
false, 0, pos,
|
|
||||||
NULL, 0, BCH_BUCKET_MARK_GC);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
static void __btree_node_free(struct bch_fs *c, struct btree *b)
|
||||||
@ -272,8 +265,11 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
|
|||||||
|
|
||||||
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
|
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
|
||||||
false, 0,
|
false, 0,
|
||||||
gc_phase(GC_PHASE_PENDING_DELETE),
|
|
||||||
NULL, 0, 0);
|
NULL, 0, 0);
|
||||||
|
|
||||||
|
if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
|
||||||
|
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
|
||||||
|
false, 0, NULL, 0, BCH_BUCKET_MARK_GC);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
|
static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
|
||||||
@ -1078,9 +1074,11 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
|
|||||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||||
|
|
||||||
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
|
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
|
||||||
true, 0,
|
true, 0, fs_usage, 0, 0);
|
||||||
gc_pos_btree_root(b->btree_id),
|
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
|
||||||
fs_usage, 0, 0);
|
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
|
||||||
|
true, 0, NULL, 0,
|
||||||
|
BCH_BUCKET_MARK_GC);
|
||||||
|
|
||||||
if (old && !btree_node_fake(old))
|
if (old && !btree_node_fake(old))
|
||||||
bch2_btree_node_free_index(as, NULL,
|
bch2_btree_node_free_index(as, NULL,
|
||||||
@ -1172,8 +1170,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
|||||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||||
|
|
||||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
|
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
|
||||||
true, 0,
|
true, 0, fs_usage, 0, 0);
|
||||||
gc_pos_btree_node(b), fs_usage, 0, 0);
|
|
||||||
|
if (gc_visited(c, gc_pos_btree_node(b)))
|
||||||
|
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
|
||||||
|
true, 0, NULL, 0, BCH_BUCKET_MARK_GC);
|
||||||
|
|
||||||
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
|
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
|
||||||
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
|
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
|
||||||
@ -1428,6 +1429,7 @@ static void btree_split(struct btree_update *as, struct btree *b,
|
|||||||
|
|
||||||
/* Successful split, update the iterator to point to the new nodes: */
|
/* Successful split, update the iterator to point to the new nodes: */
|
||||||
|
|
||||||
|
six_lock_increment(&b->lock, SIX_LOCK_intent);
|
||||||
bch2_btree_iter_node_drop(iter, b);
|
bch2_btree_iter_node_drop(iter, b);
|
||||||
if (n3)
|
if (n3)
|
||||||
bch2_btree_iter_node_replace(iter, n3);
|
bch2_btree_iter_node_replace(iter, n3);
|
||||||
@ -1739,7 +1741,10 @@ retry:
|
|||||||
|
|
||||||
bch2_open_buckets_put(c, &n->ob);
|
bch2_open_buckets_put(c, &n->ob);
|
||||||
|
|
||||||
|
six_lock_increment(&b->lock, SIX_LOCK_intent);
|
||||||
bch2_btree_iter_node_drop(iter, b);
|
bch2_btree_iter_node_drop(iter, b);
|
||||||
|
bch2_btree_iter_node_drop(iter, m);
|
||||||
|
|
||||||
bch2_btree_iter_node_replace(iter, n);
|
bch2_btree_iter_node_replace(iter, n);
|
||||||
|
|
||||||
bch2_btree_iter_verify(iter, n);
|
bch2_btree_iter_verify(iter, n);
|
||||||
@ -1837,6 +1842,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
|
|
||||||
bch2_open_buckets_put(c, &n->ob);
|
bch2_open_buckets_put(c, &n->ob);
|
||||||
|
|
||||||
|
six_lock_increment(&b->lock, SIX_LOCK_intent);
|
||||||
bch2_btree_iter_node_drop(iter, b);
|
bch2_btree_iter_node_drop(iter, b);
|
||||||
bch2_btree_iter_node_replace(iter, n);
|
bch2_btree_iter_node_replace(iter, n);
|
||||||
bch2_btree_node_free_inmem(c, b, iter);
|
bch2_btree_node_free_inmem(c, b, iter);
|
||||||
@ -1988,9 +1994,12 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
|
|||||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||||
|
|
||||||
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
|
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
|
||||||
true, 0,
|
true, 0, fs_usage, 0, 0);
|
||||||
gc_pos_btree_root(b->btree_id),
|
if (gc_visited(c, gc_pos_btree_root(b->btree_id)))
|
||||||
fs_usage, 0, 0);
|
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
|
||||||
|
true, 0, NULL, 0,
|
||||||
|
BCH_BUCKET_MARK_GC);
|
||||||
|
|
||||||
bch2_btree_node_free_index(as, NULL,
|
bch2_btree_node_free_index(as, NULL,
|
||||||
bkey_i_to_s_c(&b->key),
|
bkey_i_to_s_c(&b->key),
|
||||||
fs_usage);
|
fs_usage);
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#include "bcachefs.h"
|
#include "bcachefs.h"
|
||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
#include "btree_update_interior.h"
|
#include "btree_update_interior.h"
|
||||||
|
#include "btree_gc.h"
|
||||||
#include "btree_io.h"
|
#include "btree_io.h"
|
||||||
#include "btree_iter.h"
|
#include "btree_iter.h"
|
||||||
#include "btree_locking.h"
|
#include "btree_locking.h"
|
||||||
@ -601,10 +602,17 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
trans_for_each_update_iter(trans, i)
|
trans_for_each_update_iter(trans, i)
|
||||||
bch2_mark_update(trans, i, fs_usage);
|
bch2_mark_update(trans, i, fs_usage, 0);
|
||||||
if (fs_usage)
|
if (fs_usage)
|
||||||
bch2_trans_fs_usage_apply(trans, fs_usage);
|
bch2_trans_fs_usage_apply(trans, fs_usage);
|
||||||
|
|
||||||
|
if (unlikely(c->gc_pos.phase)) {
|
||||||
|
trans_for_each_update_iter(trans, i)
|
||||||
|
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
|
||||||
|
bch2_mark_update(trans, i, NULL,
|
||||||
|
BCH_BUCKET_MARK_GC);
|
||||||
|
}
|
||||||
|
|
||||||
trans_for_each_update(trans, i)
|
trans_for_each_update(trans, i)
|
||||||
do_btree_insert_one(trans, i);
|
do_btree_insert_one(trans, i);
|
||||||
out:
|
out:
|
||||||
@ -852,12 +860,15 @@ out_noupdates:
|
|||||||
|
|
||||||
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
|
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
|
||||||
|
|
||||||
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
bch2_trans_unlink_iters(trans, ~trans->iters_touched);
|
bch2_trans_unlink_iters(trans, ~trans->iters_touched|
|
||||||
|
trans->iters_unlink_on_commit);
|
||||||
trans->iters_touched = 0;
|
trans->iters_touched = 0;
|
||||||
|
} else {
|
||||||
|
bch2_trans_unlink_iters(trans, trans->iters_unlink_on_commit);
|
||||||
}
|
}
|
||||||
trans->nr_updates = 0;
|
trans->nr_updates = 0;
|
||||||
|
trans->mem_top = 0;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
err:
|
err:
|
||||||
|
@ -131,6 +131,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
|||||||
|
|
||||||
switch (e->data_type) {
|
switch (e->data_type) {
|
||||||
case BCH_DATA_BTREE:
|
case BCH_DATA_BTREE:
|
||||||
|
usage->btree += usage->replicas[i];
|
||||||
|
break;
|
||||||
case BCH_DATA_USER:
|
case BCH_DATA_USER:
|
||||||
usage->data += usage->replicas[i];
|
usage->data += usage->replicas[i];
|
||||||
break;
|
break;
|
||||||
@ -225,6 +227,7 @@ static u64 avail_factor(u64 r)
|
|||||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||||
{
|
{
|
||||||
return min(fs_usage->hidden +
|
return min(fs_usage->hidden +
|
||||||
|
fs_usage->btree +
|
||||||
fs_usage->data +
|
fs_usage->data +
|
||||||
reserve_factor(fs_usage->reserved +
|
reserve_factor(fs_usage->reserved +
|
||||||
fs_usage->online_reserved),
|
fs_usage->online_reserved),
|
||||||
@ -240,7 +243,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
|
|||||||
ret.capacity = c->capacity -
|
ret.capacity = c->capacity -
|
||||||
percpu_u64_get(&c->usage[0]->hidden);
|
percpu_u64_get(&c->usage[0]->hidden);
|
||||||
|
|
||||||
data = percpu_u64_get(&c->usage[0]->data);
|
data = percpu_u64_get(&c->usage[0]->data) +
|
||||||
|
percpu_u64_get(&c->usage[0]->btree);
|
||||||
reserved = percpu_u64_get(&c->usage[0]->reserved) +
|
reserved = percpu_u64_get(&c->usage[0]->reserved) +
|
||||||
percpu_u64_get(&c->usage[0]->online_reserved);
|
percpu_u64_get(&c->usage[0]->online_reserved);
|
||||||
|
|
||||||
@ -383,21 +387,32 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
|||||||
bch2_wake_allocator(ca);
|
bch2_wake_allocator(ca);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
|
void bch2_dev_usage_from_buckets(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
|
struct bch_dev *ca;
|
||||||
struct bucket_mark old = { .v.counter = 0 };
|
struct bucket_mark old = { .v.counter = 0 };
|
||||||
struct bch_fs_usage *fs_usage;
|
struct bch_fs_usage *fs_usage;
|
||||||
struct bucket_array *buckets;
|
struct bucket_array *buckets;
|
||||||
struct bucket *g;
|
struct bucket *g;
|
||||||
|
unsigned i;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
percpu_u64_set(&c->usage[0]->hidden, 0);
|
||||||
|
|
||||||
|
for_each_member_device(ca, c, i) {
|
||||||
|
for_each_possible_cpu(cpu)
|
||||||
|
memset(per_cpu_ptr(ca->usage[0], cpu), 0,
|
||||||
|
sizeof(*ca->usage[0]));
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
fs_usage = this_cpu_ptr(c->usage[0]);
|
fs_usage = this_cpu_ptr(c->usage[0]);
|
||||||
buckets = bucket_array(ca);
|
buckets = bucket_array(ca);
|
||||||
|
|
||||||
for_each_bucket(g, buckets)
|
for_each_bucket(g, buckets)
|
||||||
if (g->mark.data_type)
|
bch2_dev_usage_update(c, ca, fs_usage,
|
||||||
bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
|
old, g->mark, false);
|
||||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
preempt_enable();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
|
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
|
||||||
@ -418,10 +433,17 @@ static inline void update_replicas(struct bch_fs *c,
|
|||||||
BUG_ON(idx < 0);
|
BUG_ON(idx < 0);
|
||||||
BUG_ON(!sectors);
|
BUG_ON(!sectors);
|
||||||
|
|
||||||
if (r->data_type == BCH_DATA_CACHED)
|
switch (r->data_type) {
|
||||||
fs_usage->cached += sectors;
|
case BCH_DATA_BTREE:
|
||||||
else
|
fs_usage->btree += sectors;
|
||||||
|
break;
|
||||||
|
case BCH_DATA_USER:
|
||||||
fs_usage->data += sectors;
|
fs_usage->data += sectors;
|
||||||
|
break;
|
||||||
|
case BCH_DATA_CACHED:
|
||||||
|
fs_usage->cached += sectors;
|
||||||
|
break;
|
||||||
|
}
|
||||||
fs_usage->replicas[idx] += sectors;
|
fs_usage->replicas[idx] += sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -924,12 +946,13 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
int bch2_mark_key_locked(struct bch_fs *c,
|
||||||
|
struct bkey_s_c k,
|
||||||
bool inserting, s64 sectors,
|
bool inserting, s64 sectors,
|
||||||
struct bch_fs_usage *fs_usage,
|
struct bch_fs_usage *fs_usage,
|
||||||
unsigned journal_seq, unsigned flags,
|
u64 journal_seq, unsigned flags)
|
||||||
bool gc)
|
|
||||||
{
|
{
|
||||||
|
bool gc = flags & BCH_BUCKET_MARK_GC;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
@ -981,21 +1004,8 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_mark_key_locked(struct bch_fs *c,
|
|
||||||
struct bkey_s_c k,
|
|
||||||
bool inserting, s64 sectors,
|
|
||||||
struct gc_pos pos,
|
|
||||||
struct bch_fs_usage *fs_usage,
|
|
||||||
u64 journal_seq, unsigned flags)
|
|
||||||
{
|
|
||||||
return do_mark_fn(__bch2_mark_key, c, pos, flags,
|
|
||||||
k, inserting, sectors, fs_usage,
|
|
||||||
journal_seq, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||||
bool inserting, s64 sectors,
|
bool inserting, s64 sectors,
|
||||||
struct gc_pos pos,
|
|
||||||
struct bch_fs_usage *fs_usage,
|
struct bch_fs_usage *fs_usage,
|
||||||
u64 journal_seq, unsigned flags)
|
u64 journal_seq, unsigned flags)
|
||||||
{
|
{
|
||||||
@ -1003,7 +1013,7 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
|
|
||||||
percpu_down_read_preempt_disable(&c->mark_lock);
|
percpu_down_read_preempt_disable(&c->mark_lock);
|
||||||
ret = bch2_mark_key_locked(c, k, inserting, sectors,
|
ret = bch2_mark_key_locked(c, k, inserting, sectors,
|
||||||
pos, fs_usage, journal_seq, flags);
|
fs_usage, journal_seq, flags);
|
||||||
percpu_up_read_preempt_enable(&c->mark_lock);
|
percpu_up_read_preempt_enable(&c->mark_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -1011,13 +1021,13 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
|||||||
|
|
||||||
void bch2_mark_update(struct btree_trans *trans,
|
void bch2_mark_update(struct btree_trans *trans,
|
||||||
struct btree_insert_entry *insert,
|
struct btree_insert_entry *insert,
|
||||||
struct bch_fs_usage *fs_usage)
|
struct bch_fs_usage *fs_usage,
|
||||||
|
unsigned flags)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_iter *iter = insert->iter;
|
struct btree_iter *iter = insert->iter;
|
||||||
struct btree *b = iter->l[0].b;
|
struct btree *b = iter->l[0].b;
|
||||||
struct btree_node_iter node_iter = iter->l[0].iter;
|
struct btree_node_iter node_iter = iter->l[0].iter;
|
||||||
struct gc_pos pos = gc_pos_btree_node(b);
|
|
||||||
struct bkey_packed *_k;
|
struct bkey_packed *_k;
|
||||||
|
|
||||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||||
@ -1027,7 +1037,7 @@ void bch2_mark_update(struct btree_trans *trans,
|
|||||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
||||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||||
bkey_start_offset(&insert->k->k),
|
bkey_start_offset(&insert->k->k),
|
||||||
pos, fs_usage, trans->journal_res.seq, 0);
|
fs_usage, trans->journal_res.seq, flags);
|
||||||
|
|
||||||
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
|
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
|
||||||
KEY_TYPE_discard))) {
|
KEY_TYPE_discard))) {
|
||||||
@ -1060,7 +1070,8 @@ void bch2_mark_update(struct btree_trans *trans,
|
|||||||
BUG_ON(sectors <= 0);
|
BUG_ON(sectors <= 0);
|
||||||
|
|
||||||
bch2_mark_key_locked(c, k, true, sectors,
|
bch2_mark_key_locked(c, k, true, sectors,
|
||||||
pos, fs_usage, trans->journal_res.seq, 0);
|
fs_usage, trans->journal_res.seq,
|
||||||
|
flags);
|
||||||
|
|
||||||
sectors = bkey_start_offset(&insert->k->k) -
|
sectors = bkey_start_offset(&insert->k->k) -
|
||||||
k.k->p.offset;
|
k.k->p.offset;
|
||||||
@ -1071,7 +1082,7 @@ void bch2_mark_update(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bch2_mark_key_locked(c, k, false, sectors,
|
bch2_mark_key_locked(c, k, false, sectors,
|
||||||
pos, fs_usage, trans->journal_res.seq, 0);
|
fs_usage, trans->journal_res.seq, flags);
|
||||||
|
|
||||||
bch2_btree_node_iter_advance(&node_iter, b);
|
bch2_btree_node_iter_advance(&node_iter, b);
|
||||||
}
|
}
|
||||||
|
@ -173,7 +173,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
|||||||
|
|
||||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
|
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
|
||||||
|
|
||||||
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
|
void bch2_dev_usage_from_buckets(struct bch_fs *);
|
||||||
|
|
||||||
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
static inline u64 __dev_buckets_available(struct bch_dev *ca,
|
||||||
struct bch_dev_usage stats)
|
struct bch_dev_usage stats)
|
||||||
@ -245,16 +245,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
|||||||
#define BCH_BUCKET_MARK_NOATOMIC (1 << 1)
|
#define BCH_BUCKET_MARK_NOATOMIC (1 << 1)
|
||||||
|
|
||||||
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
|
int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
|
||||||
bool, s64, struct gc_pos,
|
bool, s64, struct bch_fs_usage *,
|
||||||
struct bch_fs_usage *, u64, unsigned);
|
u64, unsigned);
|
||||||
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
|
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
|
||||||
bool, s64, struct gc_pos,
|
bool, s64, struct bch_fs_usage *,
|
||||||
struct bch_fs_usage *, u64, unsigned);
|
u64, unsigned);
|
||||||
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||||
struct disk_reservation *);
|
struct disk_reservation *);
|
||||||
|
|
||||||
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
|
void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
|
||||||
struct bch_fs_usage *);
|
struct bch_fs_usage *, unsigned);
|
||||||
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
|
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
|
||||||
|
|
||||||
/* disk reservations: */
|
/* disk reservations: */
|
||||||
|
@ -69,6 +69,7 @@ struct bch_fs_usage {
|
|||||||
u64 gc_start[0];
|
u64 gc_start[0];
|
||||||
|
|
||||||
u64 hidden;
|
u64 hidden;
|
||||||
|
u64 btree;
|
||||||
u64 data;
|
u64 data;
|
||||||
u64 cached;
|
u64 cached;
|
||||||
u64 reserved;
|
u64 reserved;
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
#include <crypto/algapi.h>
|
#include <crypto/algapi.h>
|
||||||
#include <crypto/chacha20.h>
|
#include <crypto/chacha.h>
|
||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
#include <crypto/poly1305.h>
|
#include <crypto/poly1305.h>
|
||||||
#include <keys/user-type.h>
|
#include <keys/user-type.h>
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#include "super-io.h"
|
#include "super-io.h"
|
||||||
|
|
||||||
#include <linux/crc64.h>
|
#include <linux/crc64.h>
|
||||||
#include <crypto/chacha20.h>
|
#include <crypto/chacha.h>
|
||||||
|
|
||||||
static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
|
static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
|
||||||
{
|
{
|
||||||
@ -126,9 +126,9 @@ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
|
|||||||
/* for skipping ahead and encrypting/decrypting at an offset: */
|
/* for skipping ahead and encrypting/decrypting at an offset: */
|
||||||
static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
|
static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
|
||||||
{
|
{
|
||||||
EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1));
|
EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
|
||||||
|
|
||||||
le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE);
|
le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
|
||||||
return nonce;
|
return nonce;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -328,17 +328,18 @@ out:
|
|||||||
return inum;
|
return inum;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
|
int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
iter = bch2_trans_get_iter(trans, BTREE_ID_DIRENTS,
|
||||||
|
POS(dir_inum, 0), 0);
|
||||||
|
if (IS_ERR(iter))
|
||||||
|
return PTR_ERR(iter);
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
|
for_each_btree_key_continue(iter, 0, k) {
|
||||||
POS(dir_inum, 0), 0, k) {
|
|
||||||
if (k.k->p.inode > dir_inum)
|
if (k.k->p.inode > dir_inum)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -347,11 +348,17 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_iter_put(trans, iter);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_empty_dir(struct bch_fs *c, u64 dir_inum)
|
||||||
|
{
|
||||||
|
return bch2_trans_do(c, NULL, 0,
|
||||||
|
bch2_empty_dir_trans(&trans, dir_inum));
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_readdir(struct bch_fs *c, struct file *file,
|
int bch2_readdir(struct bch_fs *c, struct file *file,
|
||||||
struct dir_context *ctx)
|
struct dir_context *ctx)
|
||||||
{
|
{
|
||||||
|
@ -54,6 +54,7 @@ int bch2_dirent_rename(struct btree_trans *,
|
|||||||
u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
|
u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
|
||||||
const struct qstr *);
|
const struct qstr *);
|
||||||
|
|
||||||
|
int bch2_empty_dir_trans(struct btree_trans *, u64);
|
||||||
int bch2_empty_dir(struct bch_fs *, u64);
|
int bch2_empty_dir(struct bch_fs *, u64);
|
||||||
int bch2_readdir(struct bch_fs *, struct file *, struct dir_context *);
|
int bch2_readdir(struct bch_fs *, struct file *, struct dir_context *);
|
||||||
|
|
||||||
|
@ -1231,10 +1231,7 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
|
|||||||
|
|
||||||
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
|
static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
bch2_mark_key(c, k, true, 0, NULL, 0, 0);
|
||||||
struct gc_pos pos = { 0 };
|
|
||||||
|
|
||||||
bch2_mark_key(c, k, true, 0, pos, NULL, 0, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||||
|
@ -757,7 +757,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
|
|||||||
EBUG_ON(!PageLocked(page));
|
EBUG_ON(!PageLocked(page));
|
||||||
EBUG_ON(!PageLocked(newpage));
|
EBUG_ON(!PageLocked(newpage));
|
||||||
|
|
||||||
ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
|
ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
|
||||||
if (ret != MIGRATEPAGE_SUCCESS)
|
if (ret != MIGRATEPAGE_SUCCESS)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -265,7 +265,7 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
|||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
down_write(&sb->s_umount);
|
down_write(&sb->s_umount);
|
||||||
sb->s_flags |= MS_RDONLY;
|
sb->s_flags |= SB_RDONLY;
|
||||||
bch2_fs_emergency_read_only(c);
|
bch2_fs_emergency_read_only(c);
|
||||||
up_write(&sb->s_umount);
|
up_write(&sb->s_umount);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1582,7 +1582,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
struct bch_opts opts = bch2_opts_empty();
|
struct bch_opts opts = bch2_opts_empty();
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
opt_set(opts, read_only, (*flags & MS_RDONLY) != 0);
|
opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
|
||||||
|
|
||||||
ret = bch2_parse_mount_opts(&opts, data);
|
ret = bch2_parse_mount_opts(&opts, data);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1594,7 +1594,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
if (opts.read_only) {
|
if (opts.read_only) {
|
||||||
bch2_fs_read_only(c);
|
bch2_fs_read_only(c);
|
||||||
|
|
||||||
sb->s_flags |= MS_RDONLY;
|
sb->s_flags |= SB_RDONLY;
|
||||||
} else {
|
} else {
|
||||||
ret = bch2_fs_read_write(c);
|
ret = bch2_fs_read_write(c);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
@ -1603,7 +1603,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->s_flags &= ~MS_RDONLY;
|
sb->s_flags &= ~SB_RDONLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->opts.read_only = opts.read_only;
|
c->opts.read_only = opts.read_only;
|
||||||
@ -1681,7 +1681,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
|||||||
unsigned i;
|
unsigned i;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
opt_set(opts, read_only, (flags & MS_RDONLY) != 0);
|
opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
|
||||||
|
|
||||||
ret = bch2_parse_mount_opts(&opts, data);
|
ret = bch2_parse_mount_opts(&opts, data);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1691,7 +1691,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
|||||||
if (IS_ERR(c))
|
if (IS_ERR(c))
|
||||||
return ERR_CAST(c);
|
return ERR_CAST(c);
|
||||||
|
|
||||||
sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c);
|
sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
|
||||||
if (IS_ERR(sb)) {
|
if (IS_ERR(sb)) {
|
||||||
closure_put(&c->cl);
|
closure_put(&c->cl);
|
||||||
return ERR_CAST(sb);
|
return ERR_CAST(sb);
|
||||||
@ -1702,7 +1702,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
|||||||
if (sb->s_root) {
|
if (sb->s_root) {
|
||||||
closure_put(&c->cl);
|
closure_put(&c->cl);
|
||||||
|
|
||||||
if ((flags ^ sb->s_flags) & MS_RDONLY) {
|
if ((flags ^ sb->s_flags) & SB_RDONLY) {
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
goto err_put_super;
|
goto err_put_super;
|
||||||
}
|
}
|
||||||
@ -1745,7 +1745,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
|||||||
|
|
||||||
#ifdef CONFIG_BCACHEFS_POSIX_ACL
|
#ifdef CONFIG_BCACHEFS_POSIX_ACL
|
||||||
if (c->opts.acl)
|
if (c->opts.acl)
|
||||||
sb->s_flags |= MS_POSIXACL;
|
sb->s_flags |= SB_POSIXACL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
|
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
|
||||||
@ -1760,7 +1760,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
|||||||
goto err_put_super;
|
goto err_put_super;
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->s_flags |= MS_ACTIVE;
|
sb->s_flags |= SB_ACTIVE;
|
||||||
out:
|
out:
|
||||||
return dget(sb->s_root);
|
return dget(sb->s_root);
|
||||||
|
|
||||||
|
@ -69,11 +69,6 @@ static inline unsigned nlink_bias(umode_t mode)
|
|||||||
return S_ISDIR(mode) ? 2 : 1;
|
return S_ISDIR(mode) ? 2 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 bch2_current_time(struct bch_fs *c)
|
|
||||||
{
|
|
||||||
return timespec_to_bch2_time(c, current_kernel_time64());
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool inode_attr_changing(struct bch_inode_info *dir,
|
static inline bool inode_attr_changing(struct bch_inode_info *dir,
|
||||||
struct bch_inode_info *inode,
|
struct bch_inode_info *inode,
|
||||||
enum inode_opt_id id)
|
enum inode_opt_id id)
|
||||||
|
@ -127,18 +127,21 @@ static struct inode_walker inode_walker_init(void)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
|
static int walk_inode(struct btree_trans *trans,
|
||||||
|
struct inode_walker *w, u64 inum)
|
||||||
{
|
{
|
||||||
w->first_this_inode = inum != w->cur_inum;
|
if (inum != w->cur_inum) {
|
||||||
w->cur_inum = inum;
|
int ret = bch2_inode_find_by_inum_trans(trans, inum,
|
||||||
|
&w->inode);
|
||||||
if (w->first_this_inode) {
|
|
||||||
int ret = bch2_inode_find_by_inum(c, inum, &w->inode);
|
|
||||||
|
|
||||||
if (ret && ret != -ENOENT)
|
if (ret && ret != -ENOENT)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
w->have_inode = !ret;
|
w->have_inode = !ret;
|
||||||
|
w->cur_inum = inum;
|
||||||
|
w->first_this_inode = true;
|
||||||
|
} else {
|
||||||
|
w->first_this_inode = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -444,12 +447,15 @@ static int check_extents(struct bch_fs *c)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
bch_verbose(c, "checking extents");
|
bch_verbose(c, "checking extents");
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||||
POS(BCACHEFS_ROOT_INO, 0), 0, k) {
|
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||||
ret = walk_inode(c, &w, k.k->p.inode);
|
retry:
|
||||||
|
for_each_btree_key_continue(iter, 0, k) {
|
||||||
|
ret = walk_inode(&trans, &w, k.k->p.inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -514,6 +520,8 @@ static int check_extents(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
if (ret == -EINTR)
|
||||||
|
goto retry;
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_exit(&trans) ?: ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -536,21 +544,20 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
bch_verbose(c, "checking dirents");
|
bch_verbose(c, "checking dirents");
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
|
||||||
bch2_trans_preload_iters(&trans);
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
|
|
||||||
POS(BCACHEFS_ROOT_INO, 0), 0);
|
|
||||||
|
|
||||||
hash_check_init(&h);
|
hash_check_init(&h);
|
||||||
|
|
||||||
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
|
||||||
|
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||||
|
retry:
|
||||||
for_each_btree_key_continue(iter, 0, k) {
|
for_each_btree_key_continue(iter, 0, k) {
|
||||||
struct bkey_s_c_dirent d;
|
struct bkey_s_c_dirent d;
|
||||||
struct bch_inode_unpacked target;
|
struct bch_inode_unpacked target;
|
||||||
bool have_target;
|
bool have_target;
|
||||||
u64 d_inum;
|
u64 d_inum;
|
||||||
|
|
||||||
ret = walk_inode(c, &w, k.k->p.inode);
|
ret = walk_inode(&trans, &w, k.k->p.inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -619,7 +626,7 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = bch2_inode_find_by_inum(c, d_inum, &target);
|
ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
|
||||||
if (ret && ret != -ENOENT)
|
if (ret && ret != -ENOENT)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -670,6 +677,9 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
hash_stop_chain(&trans, &h);
|
hash_stop_chain(&trans, &h);
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
if (ret == -EINTR)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_exit(&trans) ?: ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -688,17 +698,16 @@ static int check_xattrs(struct bch_fs *c)
|
|||||||
|
|
||||||
bch_verbose(c, "checking xattrs");
|
bch_verbose(c, "checking xattrs");
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
hash_check_init(&h);
|
||||||
|
|
||||||
|
bch2_trans_init(&trans, c);
|
||||||
bch2_trans_preload_iters(&trans);
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
|
||||||
POS(BCACHEFS_ROOT_INO, 0), 0);
|
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||||
|
retry:
|
||||||
hash_check_init(&h);
|
|
||||||
|
|
||||||
for_each_btree_key_continue(iter, 0, k) {
|
for_each_btree_key_continue(iter, 0, k) {
|
||||||
ret = walk_inode(c, &w, k.k->p.inode);
|
ret = walk_inode(&trans, &w, k.k->p.inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -721,6 +730,8 @@ static int check_xattrs(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
if (ret == -EINTR)
|
||||||
|
goto retry;
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_exit(&trans) ?: ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -904,6 +915,7 @@ static int check_directory_structure(struct bch_fs *c,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
bch_verbose(c, "checking directory structure");
|
bch_verbose(c, "checking directory structure");
|
||||||
|
|
||||||
@ -918,9 +930,8 @@ restart_dfs:
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = path_down(&path, BCACHEFS_ROOT_INO);
|
ret = path_down(&path, BCACHEFS_ROOT_INO);
|
||||||
if (ret) {
|
if (ret)
|
||||||
return ret;
|
goto err;
|
||||||
}
|
|
||||||
|
|
||||||
while (path.nr) {
|
while (path.nr) {
|
||||||
next:
|
next:
|
||||||
@ -982,14 +993,19 @@ up:
|
|||||||
path.nr--;
|
path.nr--;
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) {
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
|
||||||
|
retry:
|
||||||
|
for_each_btree_key_continue(iter, 0, k) {
|
||||||
if (k.k->type != KEY_TYPE_inode)
|
if (k.k->type != KEY_TYPE_inode)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
|
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!bch2_empty_dir(c, k.k->p.inode))
|
ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
|
||||||
|
if (ret == -EINTR)
|
||||||
|
goto retry;
|
||||||
|
if (!ret)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
|
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
|
||||||
@ -1017,15 +1033,12 @@ up:
|
|||||||
memset(&path, 0, sizeof(path));
|
memset(&path, 0, sizeof(path));
|
||||||
goto restart_dfs;
|
goto restart_dfs;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
|
||||||
kfree(dirs_done.bits);
|
|
||||||
kfree(path.entries);
|
|
||||||
return ret;
|
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
ret = bch2_trans_exit(&trans) ?: ret;
|
ret = bch2_trans_exit(&trans) ?: ret;
|
||||||
goto out;
|
kfree(dirs_done.bits);
|
||||||
|
kfree(path.entries);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct nlink {
|
struct nlink {
|
||||||
@ -1069,6 +1082,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
|
inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
|
||||||
|
|
||||||
@ -1225,12 +1239,10 @@ static int check_inode(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u.bi_flags & BCH_INODE_UNLINKED) {
|
if (u.bi_flags & BCH_INODE_UNLINKED &&
|
||||||
fsck_err_on(c->sb.clean, c,
|
(!c->sb.clean ||
|
||||||
"filesystem marked clean, "
|
fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
|
||||||
"but inode %llu unlinked",
|
u.bi_inum))) {
|
||||||
u.bi_inum);
|
|
||||||
|
|
||||||
bch_verbose(c, "deleting inode %llu", u.bi_inum);
|
bch_verbose(c, "deleting inode %llu", u.bi_inum);
|
||||||
|
|
||||||
ret = bch2_inode_rm(c, u.bi_inum);
|
ret = bch2_inode_rm(c, u.bi_inum);
|
||||||
@ -1240,12 +1252,10 @@ static int check_inode(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY) {
|
if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
|
||||||
fsck_err_on(c->sb.clean, c,
|
(!c->sb.clean ||
|
||||||
"filesystem marked clean, "
|
fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
|
||||||
"but inode %llu has i_size dirty",
|
u.bi_inum))) {
|
||||||
u.bi_inum);
|
|
||||||
|
|
||||||
bch_verbose(c, "truncating inode %llu", u.bi_inum);
|
bch_verbose(c, "truncating inode %llu", u.bi_inum);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1270,14 +1280,12 @@ static int check_inode(struct btree_trans *trans,
|
|||||||
do_update = true;
|
do_update = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY) {
|
if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
|
||||||
|
(!c->sb.clean ||
|
||||||
|
fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
|
||||||
|
u.bi_inum))) {
|
||||||
s64 sectors;
|
s64 sectors;
|
||||||
|
|
||||||
fsck_err_on(c->sb.clean, c,
|
|
||||||
"filesystem marked clean, "
|
|
||||||
"but inode %llu has i_sectors dirty",
|
|
||||||
u.bi_inum);
|
|
||||||
|
|
||||||
bch_verbose(c, "recounting sectors for inode %llu",
|
bch_verbose(c, "recounting sectors for inode %llu",
|
||||||
u.bi_inum);
|
u.bi_inum);
|
||||||
|
|
||||||
@ -1326,6 +1334,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
|
|||||||
u64 nlinks_pos;
|
u64 nlinks_pos;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
|
||||||
POS(range_start, 0), 0);
|
POS(range_start, 0), 0);
|
||||||
@ -1425,6 +1434,7 @@ static int check_inodes_fast(struct bch_fs *c)
|
|||||||
int ret = 0, ret2;
|
int ret = 0, ret2;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
bch2_trans_preload_iters(&trans);
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
|
||||||
POS_MIN, 0);
|
POS_MIN, 0);
|
||||||
|
@ -251,9 +251,7 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
|||||||
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
|
uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
|
||||||
struct bch_inode_unpacked *parent)
|
struct bch_inode_unpacked *parent)
|
||||||
{
|
{
|
||||||
s64 now = timespec_to_bch2_time(c,
|
s64 now = bch2_current_time(c);
|
||||||
timespec64_trunc(current_kernel_time64(),
|
|
||||||
c->sb.time_precision));
|
|
||||||
|
|
||||||
memset(inode_u, 0, sizeof(*inode_u));
|
memset(inode_u, 0, sizeof(*inode_u));
|
||||||
|
|
||||||
@ -445,31 +443,32 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
|
int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
|
||||||
struct bch_inode_unpacked *inode)
|
struct bch_inode_unpacked *inode)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
int ret = -ENOENT;
|
int ret = -ENOENT;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
|
||||||
|
POS(inode_nr, 0), BTREE_ITER_SLOTS);
|
||||||
|
if (IS_ERR(iter))
|
||||||
|
return PTR_ERR(iter);
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_INODES,
|
k = bch2_btree_iter_peek_slot(iter);
|
||||||
POS(inode_nr, 0), BTREE_ITER_SLOTS, k) {
|
if (k.k->type == KEY_TYPE_inode)
|
||||||
switch (k.k->type) {
|
|
||||||
case KEY_TYPE_inode:
|
|
||||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
|
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
|
||||||
break;
|
|
||||||
default:
|
bch2_trans_iter_put(trans, iter);
|
||||||
/* hole, not found */
|
|
||||||
break;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
|
||||||
}
|
struct bch_inode_unpacked *inode)
|
||||||
|
{
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_do(c, NULL, 0,
|
||||||
|
bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
|
@ -3,8 +3,6 @@
|
|||||||
|
|
||||||
#include "opts.h"
|
#include "opts.h"
|
||||||
|
|
||||||
#include <linux/math64.h>
|
|
||||||
|
|
||||||
extern const char * const bch2_inode_opts[];
|
extern const char * const bch2_inode_opts[];
|
||||||
|
|
||||||
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
|
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||||
@ -59,23 +57,9 @@ int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *,
|
|||||||
|
|
||||||
int bch2_inode_rm(struct bch_fs *, u64);
|
int bch2_inode_rm(struct bch_fs *, u64);
|
||||||
|
|
||||||
int bch2_inode_find_by_inum(struct bch_fs *, u64,
|
int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
|
||||||
struct bch_inode_unpacked *);
|
struct bch_inode_unpacked *);
|
||||||
|
int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
|
||||||
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
|
|
||||||
{
|
|
||||||
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
|
|
||||||
{
|
|
||||||
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
|
|
||||||
|
|
||||||
if (c->sb.time_precision == 1)
|
|
||||||
return ns;
|
|
||||||
|
|
||||||
return div_s64(ns, c->sb.time_precision);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
|
static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
|
||||||
{
|
{
|
||||||
|
@ -992,27 +992,57 @@ void bch2_fs_journal_stop(struct journal *j)
|
|||||||
cancel_delayed_work_sync(&j->reclaim_work);
|
cancel_delayed_work_sync(&j->reclaim_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_fs_journal_start(struct journal *j)
|
int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
|
||||||
|
struct list_head *journal_entries)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct journal_seq_blacklist *bl;
|
struct journal_entry_pin_list *p;
|
||||||
u64 blacklist = 0;
|
struct journal_replay *i;
|
||||||
|
u64 last_seq = cur_seq, nr, seq;
|
||||||
|
|
||||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
if (!list_empty(journal_entries))
|
||||||
blacklist = max(blacklist, bl->end);
|
last_seq = le64_to_cpu(list_last_entry(journal_entries,
|
||||||
|
struct journal_replay,
|
||||||
|
list)->j.last_seq);
|
||||||
|
|
||||||
|
nr = cur_seq - last_seq;
|
||||||
|
|
||||||
|
if (nr + 1 > j->pin.size) {
|
||||||
|
free_fifo(&j->pin);
|
||||||
|
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
|
||||||
|
if (!j->pin.data) {
|
||||||
|
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
j->last_seq_ondisk = last_seq;
|
||||||
|
j->pin.front = last_seq;
|
||||||
|
j->pin.back = cur_seq;
|
||||||
|
atomic64_set(&j->seq, cur_seq - 1);
|
||||||
|
|
||||||
|
fifo_for_each_entry_ptr(p, &j->pin, seq) {
|
||||||
|
INIT_LIST_HEAD(&p->list);
|
||||||
|
INIT_LIST_HEAD(&p->flushed);
|
||||||
|
atomic_set(&p->count, 0);
|
||||||
|
p->devs.nr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry(i, journal_entries, list) {
|
||||||
|
seq = le64_to_cpu(i->j.seq);
|
||||||
|
|
||||||
|
BUG_ON(seq < last_seq || seq >= cur_seq);
|
||||||
|
|
||||||
|
p = journal_seq_pin(j, seq);
|
||||||
|
|
||||||
|
atomic_set(&p->count, 1);
|
||||||
|
p->devs = i->devs;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
spin_lock(&j->lock);
|
||||||
|
|
||||||
set_bit(JOURNAL_STARTED, &j->flags);
|
set_bit(JOURNAL_STARTED, &j->flags);
|
||||||
|
|
||||||
while (journal_cur_seq(j) < blacklist)
|
|
||||||
journal_pin_new_entry(j, 0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* __journal_entry_close() only inits the next journal entry when it
|
|
||||||
* closes an open journal entry - the very first journal entry gets
|
|
||||||
* initialized here:
|
|
||||||
*/
|
|
||||||
journal_pin_new_entry(j, 1);
|
journal_pin_new_entry(j, 1);
|
||||||
bch2_journal_buf_init(j);
|
bch2_journal_buf_init(j);
|
||||||
|
|
||||||
@ -1021,12 +1051,7 @@ void bch2_fs_journal_start(struct journal *j)
|
|||||||
bch2_journal_space_available(j);
|
bch2_journal_space_available(j);
|
||||||
spin_unlock(&j->lock);
|
spin_unlock(&j->lock);
|
||||||
|
|
||||||
/*
|
return 0;
|
||||||
* Adding entries to the next journal entry before allocating space on
|
|
||||||
* disk for the next journal entry - this is ok, because these entries
|
|
||||||
* only have to go down with the next journal entry we write:
|
|
||||||
*/
|
|
||||||
bch2_journal_seq_blacklist_write(j);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* init/exit: */
|
/* init/exit: */
|
||||||
@ -1091,8 +1116,6 @@ int bch2_fs_journal_init(struct journal *j)
|
|||||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||||
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
|
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
|
||||||
init_waitqueue_head(&j->pin_flush_wait);
|
init_waitqueue_head(&j->pin_flush_wait);
|
||||||
mutex_init(&j->blacklist_lock);
|
|
||||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
|
||||||
mutex_init(&j->reclaim_lock);
|
mutex_init(&j->reclaim_lock);
|
||||||
mutex_init(&j->discard_lock);
|
mutex_init(&j->discard_lock);
|
||||||
|
|
||||||
|
@ -469,8 +469,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
|
|||||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||||
|
|
||||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||||
|
|
||||||
void bch2_fs_journal_stop(struct journal *);
|
void bch2_fs_journal_stop(struct journal *);
|
||||||
void bch2_fs_journal_start(struct journal *);
|
int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
|
||||||
|
|
||||||
void bch2_dev_journal_exit(struct bch_dev *);
|
void bch2_dev_journal_exit(struct bch_dev *);
|
||||||
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
|
||||||
void bch2_fs_journal_exit(struct journal *);
|
void bch2_fs_journal_exit(struct journal *);
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_io.h"
|
#include "journal_io.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
#include "journal_seq_blacklist.h"
|
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
|
|
||||||
#include <trace/events/bcachefs.h>
|
#include <trace/events/bcachefs.h>
|
||||||
@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
|
|
||||||
{
|
|
||||||
struct journal *j = &c->journal;
|
|
||||||
struct journal_entry_pin_list *p;
|
|
||||||
u64 seq, nr = end_seq - last_seq + 1;
|
|
||||||
|
|
||||||
if (nr > j->pin.size) {
|
|
||||||
free_fifo(&j->pin);
|
|
||||||
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
|
|
||||||
if (!j->pin.data) {
|
|
||||||
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic64_set(&j->seq, end_seq);
|
|
||||||
j->last_seq_ondisk = last_seq;
|
|
||||||
|
|
||||||
j->pin.front = last_seq;
|
|
||||||
j->pin.back = end_seq + 1;
|
|
||||||
|
|
||||||
fifo_for_each_entry_ptr(p, &j->pin, seq) {
|
|
||||||
INIT_LIST_HEAD(&p->list);
|
|
||||||
INIT_LIST_HEAD(&p->flushed);
|
|
||||||
atomic_set(&p->count, 0);
|
|
||||||
p->devs.nr = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||||
{
|
{
|
||||||
struct journal *j = &c->journal;
|
|
||||||
struct journal_list jlist;
|
struct journal_list jlist;
|
||||||
struct journal_replay *i;
|
struct journal_replay *i;
|
||||||
struct journal_entry_pin_list *p;
|
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
u64 cur_seq, end_seq;
|
|
||||||
unsigned iter;
|
unsigned iter;
|
||||||
size_t keys = 0, entries = 0;
|
size_t keys = 0, entries = 0;
|
||||||
bool degraded = false;
|
bool degraded = false;
|
||||||
@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
|||||||
if (jlist.ret)
|
if (jlist.ret)
|
||||||
return jlist.ret;
|
return jlist.ret;
|
||||||
|
|
||||||
if (list_empty(list)){
|
|
||||||
bch_err(c, "no journal entries found");
|
|
||||||
return BCH_FSCK_REPAIR_IMPOSSIBLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
list_for_each_entry(i, list, list) {
|
list_for_each_entry(i, list, list) {
|
||||||
|
struct jset_entry *entry;
|
||||||
|
struct bkey_i *k, *_n;
|
||||||
struct bch_replicas_padded replicas;
|
struct bch_replicas_padded replicas;
|
||||||
char buf[80];
|
char buf[80];
|
||||||
|
|
||||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
|
||||||
|
|
||||||
ret = jset_validate_entries(c, &i->j, READ);
|
ret = jset_validate_entries(c, &i->j, READ);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto fsck_err;
|
goto fsck_err;
|
||||||
@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
|||||||
* the devices - this is wrong:
|
* the devices - this is wrong:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
|
||||||
|
|
||||||
if (!degraded &&
|
if (!degraded &&
|
||||||
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
|
fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
|
||||||
@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
i = list_last_entry(list, struct journal_replay, list);
|
|
||||||
|
|
||||||
ret = bch2_journal_set_seq(c,
|
|
||||||
le64_to_cpu(i->j.last_seq),
|
|
||||||
le64_to_cpu(i->j.seq));
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
list_for_each_entry(i, list, list) {
|
|
||||||
p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
|
|
||||||
|
|
||||||
atomic_set(&p->count, 1);
|
|
||||||
p->devs = i->devs;
|
|
||||||
|
|
||||||
if (bch2_journal_seq_blacklist_read(j, i)) {
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
cur_seq = journal_last_seq(j);
|
|
||||||
end_seq = le64_to_cpu(list_last_entry(list,
|
|
||||||
struct journal_replay, list)->j.seq);
|
|
||||||
|
|
||||||
list_for_each_entry(i, list, list) {
|
|
||||||
struct jset_entry *entry;
|
|
||||||
struct bkey_i *k, *_n;
|
|
||||||
bool blacklisted;
|
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
|
||||||
while (cur_seq < le64_to_cpu(i->j.seq) &&
|
|
||||||
bch2_journal_seq_blacklist_find(j, cur_seq))
|
|
||||||
cur_seq++;
|
|
||||||
|
|
||||||
blacklisted = bch2_journal_seq_blacklist_find(j,
|
|
||||||
le64_to_cpu(i->j.seq));
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
fsck_err_on(blacklisted, c,
|
|
||||||
"found blacklisted journal entry %llu",
|
|
||||||
le64_to_cpu(i->j.seq));
|
|
||||||
|
|
||||||
fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
|
|
||||||
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
|
||||||
cur_seq, le64_to_cpu(i->j.seq) - 1,
|
|
||||||
journal_last_seq(j), end_seq);
|
|
||||||
|
|
||||||
cur_seq = le64_to_cpu(i->j.seq) + 1;
|
|
||||||
|
|
||||||
for_each_jset_key(k, _n, entry, &i->j)
|
for_each_jset_key(k, _n, entry, &i->j)
|
||||||
keys++;
|
keys++;
|
||||||
entries++;
|
entries++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!list_empty(list)) {
|
||||||
|
i = list_last_entry(list, struct journal_replay, list);
|
||||||
|
|
||||||
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
|
bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
|
||||||
keys, entries, journal_cur_seq(j));
|
keys, entries, le64_to_cpu(i->j.seq));
|
||||||
|
}
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -876,8 +788,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
|
|||||||
* but - there are other correctness issues if btree gc were to run
|
* but - there are other correctness issues if btree gc were to run
|
||||||
* before journal replay finishes
|
* before journal replay finishes
|
||||||
*/
|
*/
|
||||||
|
BUG_ON(c->gc_pos.phase);
|
||||||
|
|
||||||
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
|
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
|
||||||
gc_pos_btree_node(iter->l[0].b),
|
|
||||||
NULL, 0, 0);
|
NULL, 0, 0);
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
|
|
||||||
|
@ -34,7 +34,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
|||||||
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
|
for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys) \
|
||||||
vstruct_for_each_safe(entry, k, _n)
|
vstruct_for_each_safe(entry, k, _n)
|
||||||
|
|
||||||
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
|
|
||||||
int bch2_journal_read(struct bch_fs *, struct list_head *);
|
int bch2_journal_read(struct bch_fs *, struct list_head *);
|
||||||
void bch2_journal_entries_free(struct list_head *);
|
void bch2_journal_entries_free(struct list_head *);
|
||||||
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
||||||
|
@ -1,12 +1,9 @@
|
|||||||
|
|
||||||
#include "bcachefs.h"
|
#include "bcachefs.h"
|
||||||
#include "btree_update.h"
|
#include "btree_iter.h"
|
||||||
#include "btree_update_interior.h"
|
#include "eytzinger.h"
|
||||||
#include "error.h"
|
|
||||||
#include "journal.h"
|
|
||||||
#include "journal_io.h"
|
|
||||||
#include "journal_reclaim.h"
|
|
||||||
#include "journal_seq_blacklist.h"
|
#include "journal_seq_blacklist.h"
|
||||||
|
#include "super-io.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* journal_seq_blacklist machinery:
|
* journal_seq_blacklist machinery:
|
||||||
@ -36,327 +33,285 @@
|
|||||||
* record that it was blacklisted so that a) on recovery we don't think we have
|
* record that it was blacklisted so that a) on recovery we don't think we have
|
||||||
* missing journal entries and b) so that the btree code continues to ignore
|
* missing journal entries and b) so that the btree code continues to ignore
|
||||||
* that bset, until that btree node is rewritten.
|
* that bset, until that btree node is rewritten.
|
||||||
*
|
|
||||||
* Blacklisted journal sequence numbers are themselves recorded as entries in
|
|
||||||
* the journal.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
static unsigned
|
||||||
* Called when journal needs to evict a blacklist entry to reclaim space: find
|
blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
|
||||||
* any btree nodes that refer to the blacklist journal sequence numbers, and
|
|
||||||
* rewrite them:
|
|
||||||
*/
|
|
||||||
static void journal_seq_blacklist_flush(struct journal *j,
|
|
||||||
struct journal_entry_pin *pin, u64 seq)
|
|
||||||
{
|
{
|
||||||
struct bch_fs *c =
|
return bl
|
||||||
container_of(j, struct bch_fs, journal);
|
? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
|
||||||
struct journal_seq_blacklist *bl =
|
sizeof(struct journal_seq_blacklist_entry))
|
||||||
container_of(pin, struct journal_seq_blacklist, pin);
|
: 0;
|
||||||
struct blacklisted_node n;
|
}
|
||||||
struct closure cl;
|
|
||||||
unsigned i;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
closure_init_stack(&cl);
|
static unsigned sb_blacklist_u64s(unsigned nr)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||||
|
|
||||||
for (i = 0;; i++) {
|
return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bch_sb_field_journal_seq_blacklist *
|
||||||
|
blacklist_entry_try_merge(struct bch_fs *c,
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl,
|
||||||
|
unsigned i)
|
||||||
|
{
|
||||||
|
unsigned nr = blacklist_nr_entries(bl);
|
||||||
|
|
||||||
|
if (le64_to_cpu(bl->start[i].end) >=
|
||||||
|
le64_to_cpu(bl->start[i + 1].start)) {
|
||||||
|
bl->start[i].end = bl->start[i + 1].end;
|
||||||
|
--nr;
|
||||||
|
memmove(&bl->start[i],
|
||||||
|
&bl->start[i + 1],
|
||||||
|
sizeof(bl->start[0]) * (nr - i));
|
||||||
|
|
||||||
|
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||||
|
sb_blacklist_u64s(nr));
|
||||||
|
BUG_ON(!bl);
|
||||||
|
}
|
||||||
|
|
||||||
|
return bl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||||
|
unsigned i, nr;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
mutex_lock(&c->sb_lock);
|
||||||
|
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||||
|
nr = blacklist_nr_entries(bl);
|
||||||
|
|
||||||
|
if (bl) {
|
||||||
|
for (i = 0; i < nr; i++) {
|
||||||
|
struct journal_seq_blacklist_entry *e =
|
||||||
|
bl->start + i;
|
||||||
|
|
||||||
|
if (start == le64_to_cpu(e->start) &&
|
||||||
|
end == le64_to_cpu(e->end))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (start <= le64_to_cpu(e->start) &&
|
||||||
|
end >= le64_to_cpu(e->end)) {
|
||||||
|
e->start = cpu_to_le64(start);
|
||||||
|
e->end = cpu_to_le64(end);
|
||||||
|
|
||||||
|
if (i + 1 < nr)
|
||||||
|
bl = blacklist_entry_try_merge(c,
|
||||||
|
bl, i);
|
||||||
|
if (i)
|
||||||
|
bl = blacklist_entry_try_merge(c,
|
||||||
|
bl, i - 1);
|
||||||
|
goto out_write_sb;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||||
|
sb_blacklist_u64s(nr + 1));
|
||||||
|
if (!bl) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
bl->start[nr].start = cpu_to_le64(start);
|
||||||
|
bl->start[nr].end = cpu_to_le64(end);
|
||||||
|
out_write_sb:
|
||||||
|
c->disk_sb.sb->features[0] |=
|
||||||
|
1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
|
||||||
|
|
||||||
|
ret = bch2_write_super(c);
|
||||||
|
out:
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int journal_seq_blacklist_table_cmp(const void *_l,
|
||||||
|
const void *_r, size_t size)
|
||||||
|
{
|
||||||
|
const struct journal_seq_blacklist_table_entry *l = _l;
|
||||||
|
const struct journal_seq_blacklist_table_entry *r = _r;
|
||||||
|
|
||||||
|
return (l->start > r->start) - (l->start < r->start);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
|
||||||
|
bool dirty)
|
||||||
|
{
|
||||||
|
struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
|
||||||
|
struct journal_seq_blacklist_table_entry search = { .start = seq };
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
if (!t)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
idx = eytzinger0_find_le(t->entries, t->nr,
|
||||||
|
sizeof(t->entries[0]),
|
||||||
|
journal_seq_blacklist_table_cmp,
|
||||||
|
&search);
|
||||||
|
if (idx < 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
BUG_ON(t->entries[idx].start > seq);
|
||||||
|
|
||||||
|
if (seq >= t->entries[idx].end)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (dirty)
|
||||||
|
t->entries[idx].dirty = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bch2_blacklist_table_initialize(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||||
|
bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||||
|
struct journal_seq_blacklist_table *t;
|
||||||
|
unsigned i, nr = blacklist_nr_entries(bl);
|
||||||
|
|
||||||
|
BUG_ON(c->journal_seq_blacklist_table);
|
||||||
|
|
||||||
|
if (!bl)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!t)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
t->nr = nr;
|
||||||
|
|
||||||
|
for (i = 0; i < nr; i++) {
|
||||||
|
t->entries[i].start = le64_to_cpu(bl->start[i].start);
|
||||||
|
t->entries[i].end = le64_to_cpu(bl->start[i].end);
|
||||||
|
}
|
||||||
|
|
||||||
|
eytzinger0_sort(t->entries,
|
||||||
|
t->nr,
|
||||||
|
sizeof(t->entries[0]),
|
||||||
|
journal_seq_blacklist_table_cmp,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
c->journal_seq_blacklist_table = t;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
|
||||||
|
struct bch_sb_field *f)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||||
|
field_to_type(f, journal_seq_blacklist);
|
||||||
|
struct journal_seq_blacklist_entry *i;
|
||||||
|
unsigned nr = blacklist_nr_entries(bl);
|
||||||
|
|
||||||
|
for (i = bl->start; i < bl->start + nr; i++) {
|
||||||
|
if (le64_to_cpu(i->start) >=
|
||||||
|
le64_to_cpu(i->end))
|
||||||
|
return "entry start >= end";
|
||||||
|
|
||||||
|
if (i + 1 < bl->start + nr &&
|
||||||
|
le64_to_cpu(i[0].end) >
|
||||||
|
le64_to_cpu(i[1].start))
|
||||||
|
return "entries out of order";
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
|
||||||
|
struct bch_sb *sb,
|
||||||
|
struct bch_sb_field *f)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl =
|
||||||
|
field_to_type(f, journal_seq_blacklist);
|
||||||
|
struct journal_seq_blacklist_entry *i;
|
||||||
|
unsigned nr = blacklist_nr_entries(bl);
|
||||||
|
|
||||||
|
for (i = bl->start; i < bl->start + nr; i++) {
|
||||||
|
if (i != bl->start)
|
||||||
|
pr_buf(out, " ");
|
||||||
|
|
||||||
|
pr_buf(out, "%llu-%llu",
|
||||||
|
le64_to_cpu(i->start),
|
||||||
|
le64_to_cpu(i->end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
|
||||||
|
.validate = bch2_sb_journal_seq_blacklist_validate,
|
||||||
|
.to_text = bch2_sb_journal_seq_blacklist_to_text
|
||||||
|
};
|
||||||
|
|
||||||
|
void bch2_blacklist_entries_gc(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = container_of(work, struct bch_fs,
|
||||||
|
journal_seq_blacklist_gc_work);
|
||||||
|
struct journal_seq_blacklist_table *t;
|
||||||
|
struct bch_sb_field_journal_seq_blacklist *bl;
|
||||||
|
struct journal_seq_blacklist_entry *src, *dst;
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter *iter;
|
unsigned i, nr, new_nr;
|
||||||
struct btree *b;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c);
|
bch2_trans_init(&trans, c);
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||||
if (i >= bl->nr_entries) {
|
struct btree_iter *iter;
|
||||||
mutex_unlock(&j->blacklist_lock);
|
struct btree *b;
|
||||||
break;
|
|
||||||
}
|
|
||||||
n = bl->entries[i];
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
|
for_each_btree_node(&trans, iter, i, POS_MIN,
|
||||||
0, 0, 0);
|
BTREE_ITER_PREFETCH, b)
|
||||||
|
if (test_bit(BCH_FS_STOPPING, &c->flags)) {
|
||||||
b = bch2_btree_iter_peek_node(iter);
|
|
||||||
|
|
||||||
/* The node might have already been rewritten: */
|
|
||||||
|
|
||||||
if (b->data->keys.seq == n.seq) {
|
|
||||||
ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
|
|
||||||
if (ret) {
|
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
bch2_fs_fatal_error(c,
|
|
||||||
"error %i rewriting btree node with blacklisted journal seq",
|
|
||||||
ret);
|
|
||||||
bch2_journal_halt(j);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
bch2_trans_iter_free(&trans, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_trans_exit(&trans);
|
ret = bch2_trans_exit(&trans);
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0;; i++) {
|
|
||||||
struct btree_update *as;
|
|
||||||
struct pending_btree_node_free *d;
|
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
|
||||||
if (i >= bl->nr_entries) {
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
n = bl->entries[i];
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
redo_wait:
|
|
||||||
mutex_lock(&c->btree_interior_update_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Is the node on the list of pending interior node updates -
|
|
||||||
* being freed? If so, wait for that to finish:
|
|
||||||
*/
|
|
||||||
for_each_pending_btree_node_free(c, as, d)
|
|
||||||
if (n.seq == d->seq &&
|
|
||||||
n.btree_id == d->btree_id &&
|
|
||||||
!d->level &&
|
|
||||||
!bkey_cmp(n.pos, d->key.k.p)) {
|
|
||||||
closure_wait(&as->wait, &cl);
|
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
|
||||||
closure_sync(&cl);
|
|
||||||
goto redo_wait;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
bch2_journal_pin_drop(j, &bl->pin);
|
|
||||||
list_del(&bl->list);
|
|
||||||
kfree(bl->entries);
|
|
||||||
kfree(bl);
|
|
||||||
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine if a particular sequence number is blacklisted - if so, return
|
|
||||||
* blacklist entry:
|
|
||||||
*/
|
|
||||||
struct journal_seq_blacklist *
|
|
||||||
bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
|
|
||||||
{
|
|
||||||
struct journal_seq_blacklist *bl;
|
|
||||||
|
|
||||||
lockdep_assert_held(&j->blacklist_lock);
|
|
||||||
|
|
||||||
list_for_each_entry(bl, &j->seq_blacklist, list)
|
|
||||||
if (seq >= bl->start && seq <= bl->end)
|
|
||||||
return bl;
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate a new, in memory blacklist entry:
|
|
||||||
*/
|
|
||||||
static struct journal_seq_blacklist *
|
|
||||||
bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
|
|
||||||
{
|
|
||||||
struct journal_seq_blacklist *bl;
|
|
||||||
|
|
||||||
lockdep_assert_held(&j->blacklist_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When we start the journal, bch2_journal_start() will skip over @seq:
|
|
||||||
*/
|
|
||||||
|
|
||||||
bl = kzalloc(sizeof(*bl), GFP_KERNEL);
|
|
||||||
if (!bl)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
bl->start = start;
|
|
||||||
bl->end = end;
|
|
||||||
|
|
||||||
list_add_tail(&bl->list, &j->seq_blacklist);
|
|
||||||
return bl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns true if @seq is newer than the most recent journal entry that got
|
|
||||||
* written, and data corresponding to @seq should be ignored - also marks @seq
|
|
||||||
* as blacklisted so that on future restarts the corresponding data will still
|
|
||||||
* be ignored:
|
|
||||||
*/
|
|
||||||
int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
|
|
||||||
{
|
|
||||||
struct journal *j = &c->journal;
|
|
||||||
struct journal_seq_blacklist *bl = NULL;
|
|
||||||
struct blacklisted_node *n;
|
|
||||||
u64 journal_seq;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (!seq)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
spin_lock(&j->lock);
|
|
||||||
journal_seq = journal_cur_seq(j);
|
|
||||||
spin_unlock(&j->lock);
|
|
||||||
|
|
||||||
/* Interier updates aren't journalled: */
|
|
||||||
BUG_ON(b->level);
|
|
||||||
BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decrease this back to j->seq + 2 when we next rev the on disk format:
|
|
||||||
* increasing it temporarily to work around bug in old kernels
|
|
||||||
*/
|
|
||||||
fsck_err_on(seq > journal_seq + 4, c,
|
|
||||||
"bset journal seq too far in the future: %llu > %llu",
|
|
||||||
seq, journal_seq);
|
|
||||||
|
|
||||||
if (seq <= journal_seq &&
|
|
||||||
list_empty_careful(&j->seq_blacklist))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
mutex_lock(&j->blacklist_lock);
|
|
||||||
|
|
||||||
if (seq <= journal_seq) {
|
|
||||||
bl = bch2_journal_seq_blacklist_find(j, seq);
|
|
||||||
if (!bl)
|
|
||||||
goto out;
|
|
||||||
} else {
|
|
||||||
bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
|
|
||||||
b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
|
|
||||||
|
|
||||||
if (!j->new_blacklist) {
|
|
||||||
j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
|
|
||||||
journal_seq + 1,
|
|
||||||
journal_seq + 1);
|
|
||||||
if (!j->new_blacklist) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bl = j->new_blacklist;
|
|
||||||
bl->end = max(bl->end, seq);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (n = bl->entries; n < bl->entries + bl->nr_entries; n++)
|
|
||||||
if (b->data->keys.seq == n->seq &&
|
|
||||||
b->btree_id == n->btree_id &&
|
|
||||||
!bkey_cmp(b->key.k.p, n->pos))
|
|
||||||
goto found_entry;
|
|
||||||
|
|
||||||
if (!bl->nr_entries ||
|
|
||||||
is_power_of_2(bl->nr_entries)) {
|
|
||||||
n = krealloc(bl->entries,
|
|
||||||
max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!n) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
bl->entries = n;
|
|
||||||
}
|
|
||||||
|
|
||||||
bl->entries[bl->nr_entries++] = (struct blacklisted_node) {
|
|
||||||
.seq = b->data->keys.seq,
|
|
||||||
.btree_id = b->btree_id,
|
|
||||||
.pos = b->key.k.p,
|
|
||||||
};
|
|
||||||
found_entry:
|
|
||||||
ret = 1;
|
|
||||||
out:
|
|
||||||
fsck_err:
|
|
||||||
mutex_unlock(&j->blacklist_lock);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __bch2_journal_seq_blacklist_read(struct journal *j,
|
|
||||||
struct journal_replay *i,
|
|
||||||
u64 start, u64 end)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
|
||||||
struct journal_seq_blacklist *bl;
|
|
||||||
|
|
||||||
bch_verbose(c, "blacklisting existing journal seq %llu-%llu",
|
|
||||||
start, end);
|
|
||||||
|
|
||||||
bl = bch2_journal_seq_blacklisted_new(j, start, end);
|
|
||||||
if (!bl)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin,
|
|
||||||
journal_seq_blacklist_flush);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* After reading the journal, find existing journal seq blacklist entries and
|
|
||||||
* read them into memory:
|
|
||||||
*/
|
|
||||||
int bch2_journal_seq_blacklist_read(struct journal *j,
|
|
||||||
struct journal_replay *i)
|
|
||||||
{
|
|
||||||
struct jset_entry *entry;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
vstruct_for_each(&i->j, entry) {
|
|
||||||
switch (entry->type) {
|
|
||||||
case BCH_JSET_ENTRY_blacklist: {
|
|
||||||
struct jset_entry_blacklist *bl_entry =
|
|
||||||
container_of(entry, struct jset_entry_blacklist, entry);
|
|
||||||
|
|
||||||
ret = __bch2_journal_seq_blacklist_read(j, i,
|
|
||||||
le64_to_cpu(bl_entry->seq),
|
|
||||||
le64_to_cpu(bl_entry->seq));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case BCH_JSET_ENTRY_blacklist_v2: {
|
|
||||||
struct jset_entry_blacklist_v2 *bl_entry =
|
|
||||||
container_of(entry, struct jset_entry_blacklist_v2, entry);
|
|
||||||
|
|
||||||
ret = __bch2_journal_seq_blacklist_read(j, i,
|
|
||||||
le64_to_cpu(bl_entry->start),
|
|
||||||
le64_to_cpu(bl_entry->end));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* After reading the journal and walking the btree, we might have new journal
|
|
||||||
* sequence numbers to blacklist - add entries to the next journal entry to be
|
|
||||||
* written:
|
|
||||||
*/
|
|
||||||
void bch2_journal_seq_blacklist_write(struct journal *j)
|
|
||||||
{
|
|
||||||
struct journal_seq_blacklist *bl = j->new_blacklist;
|
|
||||||
struct jset_entry_blacklist_v2 *bl_entry;
|
|
||||||
struct jset_entry *entry;
|
|
||||||
|
|
||||||
if (!bl)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j),
|
mutex_lock(&c->sb_lock);
|
||||||
(sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64));
|
bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
|
||||||
|
if (!bl)
|
||||||
|
goto out;
|
||||||
|
|
||||||
bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
|
nr = blacklist_nr_entries(bl);
|
||||||
bl_entry->entry.type = BCH_JSET_ENTRY_blacklist_v2;
|
dst = bl->start;
|
||||||
bl_entry->start = cpu_to_le64(bl->start);
|
|
||||||
bl_entry->end = cpu_to_le64(bl->end);
|
|
||||||
|
|
||||||
bch2_journal_pin_add(j,
|
t = c->journal_seq_blacklist_table;
|
||||||
journal_cur_seq(j),
|
BUG_ON(nr != t->nr);
|
||||||
&bl->pin,
|
|
||||||
journal_seq_blacklist_flush);
|
|
||||||
|
|
||||||
j->new_blacklist = NULL;
|
for (src = bl->start, i = eytzinger0_first(t->nr);
|
||||||
|
src < bl->start + nr;
|
||||||
|
src++, i = eytzinger0_next(i, nr)) {
|
||||||
|
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
|
||||||
|
BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
|
||||||
|
|
||||||
|
if (t->entries[i].dirty)
|
||||||
|
*dst++ = *src;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_nr = dst - bl->start;
|
||||||
|
|
||||||
|
bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
|
||||||
|
|
||||||
|
if (new_nr != nr) {
|
||||||
|
bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
|
||||||
|
new_nr ? sb_blacklist_u64s(new_nr) : 0);
|
||||||
|
BUG_ON(new_nr && !bl);
|
||||||
|
|
||||||
|
if (!new_nr)
|
||||||
|
c->disk_sb.sb->features[0] &=
|
||||||
|
~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
|
||||||
|
|
||||||
|
bch2_write_super(c);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||||
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
|
||||||
|
|
||||||
struct journal_replay;
|
bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
|
||||||
|
int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
|
||||||
|
int bch2_blacklist_table_initialize(struct bch_fs *);
|
||||||
|
|
||||||
struct journal_seq_blacklist *
|
extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
|
||||||
bch2_journal_seq_blacklist_find(struct journal *, u64);
|
|
||||||
int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *);
|
void bch2_blacklist_entries_gc(struct work_struct *);
|
||||||
int bch2_journal_seq_blacklist_read(struct journal *,
|
|
||||||
struct journal_replay *);
|
|
||||||
void bch2_journal_seq_blacklist_write(struct journal *);
|
|
||||||
|
|
||||||
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
|
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
|
||||||
|
@ -53,24 +53,6 @@ struct journal_entry_pin {
|
|||||||
u64 seq;
|
u64 seq;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* corresponds to a btree node with a blacklisted bset: */
|
|
||||||
struct blacklisted_node {
|
|
||||||
__le64 seq;
|
|
||||||
enum btree_id btree_id;
|
|
||||||
struct bpos pos;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct journal_seq_blacklist {
|
|
||||||
struct list_head list;
|
|
||||||
u64 start;
|
|
||||||
u64 end;
|
|
||||||
|
|
||||||
struct journal_entry_pin pin;
|
|
||||||
|
|
||||||
struct blacklisted_node *entries;
|
|
||||||
size_t nr_entries;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct journal_res {
|
struct journal_res {
|
||||||
bool ref;
|
bool ref;
|
||||||
u8 idx;
|
u8 idx;
|
||||||
@ -221,10 +203,6 @@ struct journal {
|
|||||||
|
|
||||||
u64 replay_journal_seq;
|
u64 replay_journal_seq;
|
||||||
|
|
||||||
struct mutex blacklist_lock;
|
|
||||||
struct list_head seq_blacklist;
|
|
||||||
struct journal_seq_blacklist *new_blacklist;
|
|
||||||
|
|
||||||
struct write_point wp;
|
struct write_point wp;
|
||||||
spinlock_t err_lock;
|
spinlock_t err_lock;
|
||||||
|
|
||||||
|
@ -208,7 +208,8 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
|||||||
up_read(&ca->bucket_lock);
|
up_read(&ca->bucket_lock);
|
||||||
|
|
||||||
if (sectors_not_moved && !ret)
|
if (sectors_not_moved && !ret)
|
||||||
bch_warn(c, "copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved",
|
bch_warn_ratelimited(c,
|
||||||
|
"copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved",
|
||||||
sectors_not_moved, sectors_to_move,
|
sectors_not_moved, sectors_to_move,
|
||||||
buckets_not_moved, buckets_to_move);
|
buckets_not_moved, buckets_to_move);
|
||||||
|
|
||||||
|
@ -457,7 +457,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = sb->s_fs_info;
|
struct bch_fs *c = sb->s_fs_info;
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY)
|
if (sb->s_flags & SB_RDONLY)
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
/* Accounting must be enabled at mount time: */
|
/* Accounting must be enabled at mount time: */
|
||||||
@ -494,7 +494,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = sb->s_fs_info;
|
struct bch_fs *c = sb->s_fs_info;
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY)
|
if (sb->s_flags & SB_RDONLY)
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
mutex_lock(&c->sb_lock);
|
mutex_lock(&c->sb_lock);
|
||||||
@ -518,7 +518,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
|
|||||||
struct bch_fs *c = sb->s_fs_info;
|
struct bch_fs *c = sb->s_fs_info;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY)
|
if (sb->s_flags & SB_RDONLY)
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
if (uflags & FS_USER_QUOTA) {
|
if (uflags & FS_USER_QUOTA) {
|
||||||
@ -600,7 +600,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type,
|
|||||||
struct bch_sb_field_quota *sb_quota;
|
struct bch_sb_field_quota *sb_quota;
|
||||||
struct bch_memquota_type *q;
|
struct bch_memquota_type *q;
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY)
|
if (sb->s_flags & SB_RDONLY)
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
if (type >= QTYP_NR)
|
if (type >= QTYP_NR)
|
||||||
@ -719,7 +719,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
|
|||||||
struct bkey_i_quota new_quota;
|
struct bkey_i_quota new_quota;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY)
|
if (sb->s_flags & SB_RDONLY)
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
bkey_quota_init(&new_quota.k_i);
|
bkey_quota_init(&new_quota.k_i);
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "fsck.h"
|
#include "fsck.h"
|
||||||
#include "journal_io.h"
|
#include "journal_io.h"
|
||||||
|
#include "journal_seq_blacklist.h"
|
||||||
#include "quota.h"
|
#include "quota.h"
|
||||||
#include "recovery.h"
|
#include "recovery.h"
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
@ -51,6 +52,118 @@ found:
|
|||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int verify_superblock_clean(struct bch_fs *c,
|
||||||
|
struct bch_sb_field_clean **cleanp,
|
||||||
|
struct jset *j)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
struct bch_sb_field_clean *clean = *cleanp;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!clean || !j)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
|
||||||
|
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
|
||||||
|
le64_to_cpu(clean->journal_seq),
|
||||||
|
le64_to_cpu(j->seq))) {
|
||||||
|
kfree(clean);
|
||||||
|
*cleanp = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
|
||||||
|
"superblock read clock doesn't match journal after clean shutdown");
|
||||||
|
mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
|
||||||
|
"superblock read clock doesn't match journal after clean shutdown");
|
||||||
|
|
||||||
|
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||||
|
struct bkey_i *k1, *k2;
|
||||||
|
unsigned l1 = 0, l2 = 0;
|
||||||
|
|
||||||
|
k1 = btree_root_find(c, clean, NULL, i, &l1);
|
||||||
|
k2 = btree_root_find(c, NULL, j, i, &l2);
|
||||||
|
|
||||||
|
if (!k1 && !k2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
mustfix_fsck_err_on(!k1 || !k2 ||
|
||||||
|
IS_ERR(k1) ||
|
||||||
|
IS_ERR(k2) ||
|
||||||
|
k1->k.u64s != k2->k.u64s ||
|
||||||
|
memcmp(k1, k2, bkey_bytes(k1)) ||
|
||||||
|
l1 != l2, c,
|
||||||
|
"superblock btree root doesn't match journal after clean shutdown");
|
||||||
|
}
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
|
||||||
|
struct list_head *journal)
|
||||||
|
{
|
||||||
|
struct journal_replay *i =
|
||||||
|
list_last_entry(journal, struct journal_replay, list);
|
||||||
|
u64 start_seq = le64_to_cpu(i->j.last_seq);
|
||||||
|
u64 end_seq = le64_to_cpu(i->j.seq);
|
||||||
|
u64 seq = start_seq;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
list_for_each_entry(i, journal, list) {
|
||||||
|
fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
|
||||||
|
"journal entries %llu-%llu missing! (replaying %llu-%llu)",
|
||||||
|
seq, le64_to_cpu(i->j.seq) - 1,
|
||||||
|
start_seq, end_seq);
|
||||||
|
|
||||||
|
seq = le64_to_cpu(i->j.seq);
|
||||||
|
|
||||||
|
fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
|
||||||
|
"found blacklisted journal entry %llu", seq);
|
||||||
|
|
||||||
|
do {
|
||||||
|
seq++;
|
||||||
|
} while (bch2_journal_seq_is_blacklisted(c, seq, false));
|
||||||
|
}
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct bch_sb_field_clean *clean, *sb_clean;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mutex_lock(&c->sb_lock);
|
||||||
|
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
|
||||||
|
|
||||||
|
if (fsck_err_on(!sb_clean, c,
|
||||||
|
"superblock marked clean but clean section not present")) {
|
||||||
|
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||||
|
c->sb.clean = false;
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!clean) {
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (le16_to_cpu(c->disk_sb.sb->version) <
|
||||||
|
bcachefs_metadata_version_bkey_renumber)
|
||||||
|
bch2_sb_clean_renumber(clean, READ);
|
||||||
|
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
|
return clean;
|
||||||
|
fsck_err:
|
||||||
|
mutex_unlock(&c->sb_lock);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
|
||||||
static int journal_replay_entry_early(struct bch_fs *c,
|
static int journal_replay_entry_early(struct bch_fs *c,
|
||||||
struct jset_entry *entry)
|
struct jset_entry *entry)
|
||||||
{
|
{
|
||||||
@ -100,54 +213,108 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
|||||||
le64_to_cpu(u->v));
|
le64_to_cpu(u->v));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case BCH_JSET_ENTRY_blacklist: {
|
||||||
|
struct jset_entry_blacklist *bl_entry =
|
||||||
|
container_of(entry, struct jset_entry_blacklist, entry);
|
||||||
|
|
||||||
|
ret = bch2_journal_seq_blacklist_add(c,
|
||||||
|
le64_to_cpu(bl_entry->seq),
|
||||||
|
le64_to_cpu(bl_entry->seq) + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BCH_JSET_ENTRY_blacklist_v2: {
|
||||||
|
struct jset_entry_blacklist_v2 *bl_entry =
|
||||||
|
container_of(entry, struct jset_entry_blacklist_v2, entry);
|
||||||
|
|
||||||
|
ret = bch2_journal_seq_blacklist_add(c,
|
||||||
|
le64_to_cpu(bl_entry->start),
|
||||||
|
le64_to_cpu(bl_entry->end) + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int verify_superblock_clean(struct bch_fs *c,
|
static int journal_replay_early(struct bch_fs *c,
|
||||||
struct bch_sb_field_clean **cleanp,
|
struct bch_sb_field_clean *clean,
|
||||||
struct jset *j)
|
struct list_head *journal)
|
||||||
|
{
|
||||||
|
struct jset_entry *entry;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (clean) {
|
||||||
|
c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
|
||||||
|
c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
|
||||||
|
|
||||||
|
for (entry = clean->start;
|
||||||
|
entry != vstruct_end(&clean->field);
|
||||||
|
entry = vstruct_next(entry)) {
|
||||||
|
ret = journal_replay_entry_early(c, entry);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
struct journal_replay *i =
|
||||||
|
list_last_entry(journal, struct journal_replay, list);
|
||||||
|
|
||||||
|
c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
|
||||||
|
c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
|
||||||
|
|
||||||
|
list_for_each_entry(i, journal, list)
|
||||||
|
vstruct_for_each(&i->j, entry) {
|
||||||
|
ret = journal_replay_entry_early(c, entry);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_fs_usage_initialize(c);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int read_btree_roots(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
struct bch_sb_field_clean *clean = *cleanp;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!clean || !j)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
|
|
||||||
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
|
|
||||||
le64_to_cpu(clean->journal_seq),
|
|
||||||
le64_to_cpu(j->seq))) {
|
|
||||||
kfree(clean);
|
|
||||||
*cleanp = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
|
|
||||||
"superblock read clock doesn't match journal after clean shutdown");
|
|
||||||
mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
|
|
||||||
"superblock read clock doesn't match journal after clean shutdown");
|
|
||||||
|
|
||||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||||
struct bkey_i *k1, *k2;
|
struct btree_root *r = &c->btree_roots[i];
|
||||||
unsigned l1 = 0, l2 = 0;
|
|
||||||
|
|
||||||
k1 = btree_root_find(c, clean, NULL, i, &l1);
|
if (!r->alive)
|
||||||
k2 = btree_root_find(c, NULL, j, i, &l2);
|
|
||||||
|
|
||||||
if (!k1 && !k2)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
mustfix_fsck_err_on(!k1 || !k2 ||
|
if (i == BTREE_ID_ALLOC &&
|
||||||
IS_ERR(k1) ||
|
test_reconstruct_alloc(c)) {
|
||||||
IS_ERR(k2) ||
|
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||||
k1->k.u64s != k2->k.u64s ||
|
continue;
|
||||||
memcmp(k1, k2, bkey_bytes(k1)) ||
|
|
||||||
l1 != l2, c,
|
|
||||||
"superblock btree root doesn't match journal after clean shutdown");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (r->error) {
|
||||||
|
__fsck_err(c, i == BTREE_ID_ALLOC
|
||||||
|
? FSCK_CAN_IGNORE : 0,
|
||||||
|
"invalid btree root %s",
|
||||||
|
bch2_btree_ids[i]);
|
||||||
|
if (i == BTREE_ID_ALLOC)
|
||||||
|
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bch2_btree_root_read(c, i, &r->key, r->level);
|
||||||
|
if (ret) {
|
||||||
|
__fsck_err(c, i == BTREE_ID_ALLOC
|
||||||
|
? FSCK_CAN_IGNORE : 0,
|
||||||
|
"error reading btree root %s",
|
||||||
|
bch2_btree_ids[i]);
|
||||||
|
if (i == BTREE_ID_ALLOC)
|
||||||
|
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < BTREE_ID_NR; i++)
|
||||||
|
if (!c->btree_roots[i].b)
|
||||||
|
bch2_btree_root_alloc(c, i);
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -185,119 +352,82 @@ static bool journal_empty(struct list_head *journal)
|
|||||||
int bch2_fs_recovery(struct bch_fs *c)
|
int bch2_fs_recovery(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
const char *err = "cannot allocate memory";
|
const char *err = "cannot allocate memory";
|
||||||
struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL;
|
struct bch_sb_field_clean *clean = NULL;
|
||||||
struct jset_entry *entry;
|
u64 journal_seq;
|
||||||
LIST_HEAD(journal);
|
LIST_HEAD(journal);
|
||||||
struct jset *j = NULL;
|
|
||||||
unsigned i;
|
|
||||||
bool run_gc = c->opts.fsck ||
|
|
||||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
mutex_lock(&c->sb_lock);
|
if (c->sb.clean)
|
||||||
|
clean = read_superblock_clean(c);
|
||||||
|
ret = PTR_ERR_OR_ZERO(clean);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (c->sb.clean)
|
||||||
|
bch_info(c, "recovering from clean shutdown, journal seq %llu",
|
||||||
|
le64_to_cpu(clean->journal_seq));
|
||||||
|
|
||||||
if (!c->replicas.entries) {
|
if (!c->replicas.entries) {
|
||||||
bch_info(c, "building replicas info");
|
bch_info(c, "building replicas info");
|
||||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->sb.clean)
|
if (!c->sb.clean || c->opts.fsck) {
|
||||||
sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
|
struct jset *j;
|
||||||
if (sb_clean) {
|
|
||||||
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!clean) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
mutex_unlock(&c->sb_lock);
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (le16_to_cpu(c->disk_sb.sb->version) <
|
|
||||||
bcachefs_metadata_version_bkey_renumber)
|
|
||||||
bch2_sb_clean_renumber(clean, READ);
|
|
||||||
}
|
|
||||||
mutex_unlock(&c->sb_lock);
|
|
||||||
|
|
||||||
if (clean)
|
|
||||||
bch_info(c, "recovering from clean shutdown, journal seq %llu",
|
|
||||||
le64_to_cpu(clean->journal_seq));
|
|
||||||
|
|
||||||
if (!clean || c->opts.fsck) {
|
|
||||||
ret = bch2_journal_read(c, &journal);
|
ret = bch2_journal_read(c, &journal);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
j = &list_entry(journal.prev, struct journal_replay, list)->j;
|
fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
|
||||||
} else {
|
"filesystem marked clean but journal not empty");
|
||||||
ret = bch2_journal_set_seq(c,
|
|
||||||
le64_to_cpu(clean->journal_seq),
|
if (!c->sb.clean && list_empty(&journal)){
|
||||||
le64_to_cpu(clean->journal_seq));
|
bch_err(c, "no journal entries found");
|
||||||
BUG_ON(ret);
|
ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
|
||||||
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
j = &list_last_entry(&journal, struct journal_replay, list)->j;
|
||||||
|
|
||||||
ret = verify_superblock_clean(c, &clean, j);
|
ret = verify_superblock_clean(c, &clean, j);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
fsck_err_on(clean && !journal_empty(&journal), c,
|
journal_seq = le64_to_cpu(j->seq) + 1;
|
||||||
"filesystem marked clean but journal not empty");
|
|
||||||
|
|
||||||
err = "insufficient memory";
|
|
||||||
if (clean) {
|
|
||||||
c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
|
|
||||||
c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
|
|
||||||
|
|
||||||
for (entry = clean->start;
|
|
||||||
entry != vstruct_end(&clean->field);
|
|
||||||
entry = vstruct_next(entry)) {
|
|
||||||
ret = journal_replay_entry_early(c, entry);
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
struct journal_replay *i;
|
journal_seq = le64_to_cpu(clean->journal_seq) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
|
ret = journal_replay_early(c, clean, &journal);
|
||||||
c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
|
|
||||||
|
|
||||||
list_for_each_entry(i, &journal, list)
|
|
||||||
vstruct_for_each(&i->j, entry) {
|
|
||||||
ret = journal_replay_entry_early(c, entry);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bch2_fs_usage_initialize(c);
|
if (!c->sb.clean) {
|
||||||
|
ret = bch2_journal_seq_blacklist_add(c,
|
||||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
journal_seq,
|
||||||
struct btree_root *r = &c->btree_roots[i];
|
journal_seq + 4);
|
||||||
|
|
||||||
if (!r->alive)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
err = "invalid btree root pointer";
|
|
||||||
ret = -1;
|
|
||||||
if (r->error)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
if (i == BTREE_ID_ALLOC &&
|
|
||||||
test_reconstruct_alloc(c))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
err = "error reading btree root";
|
|
||||||
ret = bch2_btree_root_read(c, i, &r->key, r->level);
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (i != BTREE_ID_ALLOC)
|
bch_err(c, "error creating new journal seq blacklist entry");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
journal_seq += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bch2_blacklist_table_initialize(c);
|
||||||
|
|
||||||
|
ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
|
||||||
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
mustfix_fsck_err(c, "error reading btree root");
|
ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
|
||||||
run_gc = true;
|
if (ret)
|
||||||
}
|
goto err;
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < BTREE_ID_NR; i++)
|
ret = read_btree_roots(c);
|
||||||
if (!c->btree_roots[i].b)
|
if (ret)
|
||||||
bch2_btree_root_alloc(c, i);
|
goto err;
|
||||||
|
|
||||||
err = "error reading allocation information";
|
err = "error reading allocation information";
|
||||||
ret = bch2_alloc_read(c, &journal);
|
ret = bch2_alloc_read(c, &journal);
|
||||||
@ -312,10 +442,12 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
|
|
||||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||||
|
|
||||||
if (run_gc) {
|
if (c->opts.fsck ||
|
||||||
|
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
|
||||||
|
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
|
||||||
bch_verbose(c, "starting mark and sweep:");
|
bch_verbose(c, "starting mark and sweep:");
|
||||||
err = "error in recovery";
|
err = "error in recovery";
|
||||||
ret = bch2_gc(c, &journal, true);
|
ret = bch2_gc(c, &journal, true, false);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
bch_verbose(c, "mark and sweep done");
|
bch_verbose(c, "mark and sweep done");
|
||||||
@ -334,13 +466,6 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
if (c->opts.noreplay)
|
if (c->opts.noreplay)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/*
|
|
||||||
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
|
|
||||||
* will give spurious errors about oldest_gen > bucket_gen -
|
|
||||||
* this is a hack but oh well.
|
|
||||||
*/
|
|
||||||
bch2_fs_journal_start(&c->journal);
|
|
||||||
|
|
||||||
bch_verbose(c, "starting journal replay:");
|
bch_verbose(c, "starting journal replay:");
|
||||||
err = "journal replay failed";
|
err = "journal replay failed";
|
||||||
ret = bch2_journal_replay(c, &journal);
|
ret = bch2_journal_replay(c, &journal);
|
||||||
@ -356,6 +481,14 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (enabled_qtypes(c)) {
|
||||||
|
bch_verbose(c, "reading quotas:");
|
||||||
|
ret = bch2_fs_quota_read(c);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
bch_verbose(c, "quotas done");
|
||||||
|
}
|
||||||
|
|
||||||
mutex_lock(&c->sb_lock);
|
mutex_lock(&c->sb_lock);
|
||||||
if (c->opts.version_upgrade) {
|
if (c->opts.version_upgrade) {
|
||||||
if (c->sb.version < bcachefs_metadata_version_new_versioning)
|
if (c->sb.version < bcachefs_metadata_version_new_versioning)
|
||||||
@ -371,14 +504,9 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
mutex_unlock(&c->sb_lock);
|
mutex_unlock(&c->sb_lock);
|
||||||
|
|
||||||
if (enabled_qtypes(c)) {
|
if (c->journal_seq_blacklist_table &&
|
||||||
bch_verbose(c, "reading quotas:");
|
c->journal_seq_blacklist_table->nr > 128)
|
||||||
ret = bch2_fs_quota_read(c);
|
queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
bch_verbose(c, "quotas done");
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
bch2_journal_entries_free(&journal);
|
bch2_journal_entries_free(&journal);
|
||||||
kfree(clean);
|
kfree(clean);
|
||||||
@ -427,7 +555,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
|||||||
* journal_res_get() will crash if called before this has
|
* journal_res_get() will crash if called before this has
|
||||||
* set up the journal.pin FIFO and journal.cur pointer:
|
* set up the journal.pin FIFO and journal.cur pointer:
|
||||||
*/
|
*/
|
||||||
bch2_fs_journal_start(&c->journal);
|
bch2_fs_journal_start(&c->journal, 1, &journal);
|
||||||
bch2_journal_set_replay_done(&c->journal);
|
bch2_journal_set_replay_done(&c->journal);
|
||||||
|
|
||||||
err = "error going read write";
|
err = "error going read write";
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
|
#include "journal_seq_blacklist.h"
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
#include "quota.h"
|
#include "quota.h"
|
||||||
#include "super-io.h"
|
#include "super-io.h"
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
|
#include "journal_seq_blacklist.h"
|
||||||
#include "move.h"
|
#include "move.h"
|
||||||
#include "migrate.h"
|
#include "migrate.h"
|
||||||
#include "movinggc.h"
|
#include "movinggc.h"
|
||||||
@ -499,6 +500,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
|||||||
kfree(c->replicas.entries);
|
kfree(c->replicas.entries);
|
||||||
kfree(c->replicas_gc.entries);
|
kfree(c->replicas_gc.entries);
|
||||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||||
|
kfree(c->journal_seq_blacklist_table);
|
||||||
|
|
||||||
if (c->journal_reclaim_wq)
|
if (c->journal_reclaim_wq)
|
||||||
destroy_workqueue(c->journal_reclaim_wq);
|
destroy_workqueue(c->journal_reclaim_wq);
|
||||||
@ -527,6 +529,10 @@ void bch2_fs_stop(struct bch_fs *c)
|
|||||||
|
|
||||||
bch_verbose(c, "shutting down");
|
bch_verbose(c, "shutting down");
|
||||||
|
|
||||||
|
set_bit(BCH_FS_STOPPING, &c->flags);
|
||||||
|
|
||||||
|
cancel_work_sync(&c->journal_seq_blacklist_gc_work);
|
||||||
|
|
||||||
for_each_member_device(ca, c, i)
|
for_each_member_device(ca, c, i)
|
||||||
if (ca->kobj.state_in_sysfs &&
|
if (ca->kobj.state_in_sysfs &&
|
||||||
ca->disk_sb.bdev)
|
ca->disk_sb.bdev)
|
||||||
@ -663,6 +669,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
spin_lock_init(&c->btree_write_error_lock);
|
spin_lock_init(&c->btree_write_error_lock);
|
||||||
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
|
INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
|
||||||
|
|
||||||
|
INIT_WORK(&c->journal_seq_blacklist_gc_work,
|
||||||
|
bch2_blacklist_entries_gc);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&c->fsck_errors);
|
INIT_LIST_HEAD(&c->fsck_errors);
|
||||||
mutex_init(&c->fsck_error_lock);
|
mutex_init(&c->fsck_error_lock);
|
||||||
|
|
||||||
|
@ -496,7 +496,7 @@ STORE(__bch2_fs)
|
|||||||
bch2_coalesce(c);
|
bch2_coalesce(c);
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_gc)
|
if (attr == &sysfs_trigger_gc)
|
||||||
bch2_gc(c, NULL, false);
|
bch2_gc(c, NULL, false, false);
|
||||||
|
|
||||||
if (attr == &sysfs_trigger_alloc_write) {
|
if (attr == &sysfs_trigger_alloc_write) {
|
||||||
bool wrote;
|
bool wrote;
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
#include <linux/crypto.h>
|
#include <linux/crypto.h>
|
||||||
#include <crypto/algapi.h>
|
#include <crypto/algapi.h>
|
||||||
#include <crypto/chacha20.h>
|
#include <crypto/chacha.h>
|
||||||
#include <crypto/skcipher.h>
|
#include <crypto/skcipher.h>
|
||||||
|
|
||||||
#include <sodium/crypto_stream_chacha20.h>
|
#include <sodium/crypto_stream_chacha20.h>
|
||||||
@ -36,7 +36,7 @@ static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
|||||||
container_of(tfm, struct chacha20_tfm, tfm);
|
container_of(tfm, struct chacha20_tfm, tfm);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (keysize != CHACHA20_KEY_SIZE)
|
if (keysize != CHACHA_KEY_SIZE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
|
for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
|
||||||
@ -72,8 +72,8 @@ static int crypto_chacha20_crypt(struct skcipher_request *req)
|
|||||||
if (sg_is_last(sg))
|
if (sg_is_last(sg))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
BUG_ON(sg->length % CHACHA20_BLOCK_SIZE);
|
BUG_ON(sg->length % CHACHA_BLOCK_SIZE);
|
||||||
iv[0] += sg->length / CHACHA20_BLOCK_SIZE;
|
iv[0] += sg->length / CHACHA_BLOCK_SIZE;
|
||||||
sg = sg_next(sg);
|
sg = sg_next(sg);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -93,8 +93,8 @@ static void *crypto_chacha20_alloc_tfm(void)
|
|||||||
tfm->tfm.setkey = crypto_chacha20_setkey;
|
tfm->tfm.setkey = crypto_chacha20_setkey;
|
||||||
tfm->tfm.encrypt = crypto_chacha20_crypt;
|
tfm->tfm.encrypt = crypto_chacha20_crypt;
|
||||||
tfm->tfm.decrypt = crypto_chacha20_crypt;
|
tfm->tfm.decrypt = crypto_chacha20_crypt;
|
||||||
tfm->tfm.ivsize = CHACHA20_IV_SIZE;
|
tfm->tfm.ivsize = CHACHA_IV_SIZE;
|
||||||
tfm->tfm.keysize = CHACHA20_KEY_SIZE;
|
tfm->tfm.keysize = CHACHA_KEY_SIZE;
|
||||||
|
|
||||||
return tfm;
|
return tfm;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user