mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 9404a01d3dc5 bcachefs: Make read_only a mount option again, but hidden
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b0eb3c2930
commit
34b5654d9e
@ -1 +1 @@
|
||||
792ca5ba3c9a07d762d9c1a440e31c0520f37de0
|
||||
9404a01d3dc5553b106fa590602f4771b8e0b8ae
|
||||
|
@ -26,6 +26,7 @@ typedef struct {
|
||||
|
||||
#define __ATOMIC_READ(p) uatomic_read(p)
|
||||
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
|
||||
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
|
||||
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
|
||||
@ -64,6 +65,7 @@ typedef struct {
|
||||
|
||||
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
|
||||
#define __ATOMIC_ADD_RETURN(v, p) __atomic_add_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_ADD_RETURN_RELEASE(v, p) \
|
||||
__atomic_add_fetch(p, v, __ATOMIC_RELEASE)
|
||||
@ -189,6 +191,11 @@ static inline void a_type##_set(a_type##_t *v, i_type i) \
|
||||
return __ATOMIC_SET(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_set_release(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return __ATOMIC_SET_RELEASE(&v->counter, i); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \
|
||||
{ \
|
||||
return __ATOMIC_ADD_RETURN(i, &v->counter); \
|
||||
|
@ -284,6 +284,21 @@ static inline void closure_get(struct closure *cl)
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* closure_get_not_zero
|
||||
*/
|
||||
static inline bool closure_get_not_zero(struct closure *cl)
|
||||
{
|
||||
unsigned old = atomic_read(&cl->remaining);
|
||||
do {
|
||||
if (!(old & CLOSURE_REMAINING_MASK))
|
||||
return false;
|
||||
|
||||
} while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* closure_init - Initialize a closure, setting the refcount to 1
|
||||
* @cl: closure to initialize
|
||||
@ -310,6 +325,12 @@ static inline void closure_init_stack(struct closure *cl)
|
||||
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
|
||||
}
|
||||
|
||||
static inline void closure_init_stack_release(struct closure *cl)
|
||||
{
|
||||
memset(cl, 0, sizeof(struct closure));
|
||||
atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
|
||||
}
|
||||
|
||||
/**
|
||||
* closure_wake_up - wake up all closures on a wait list,
|
||||
* with memory barrier
|
||||
@ -355,6 +376,8 @@ do { \
|
||||
*/
|
||||
#define closure_return(_cl) continue_at((_cl), NULL, NULL)
|
||||
|
||||
void closure_return_sync(struct closure *cl);
|
||||
|
||||
/**
|
||||
* continue_at_nobarrier - jump to another function without barrier
|
||||
*
|
||||
|
@ -151,7 +151,7 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq,
|
||||
extern bool current_is_workqueue_rescuer(void);
|
||||
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
|
||||
extern unsigned int work_busy(struct work_struct *work);
|
||||
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
|
||||
static inline __printf(1, 2) void set_worker_desc(const char *fmt, ...) {}
|
||||
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
|
||||
extern void show_workqueue_state(void);
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
|
||||
static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
|
||||
|
||||
/* Persistent alloc info: */
|
||||
|
||||
@ -476,7 +476,8 @@ err:
|
||||
}
|
||||
|
||||
__flatten
|
||||
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos)
|
||||
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
|
||||
enum btree_iter_update_trigger_flags flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
|
||||
@ -484,7 +485,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return unlikely(ret) ? ERR_PTR(ret) : a;
|
||||
}
|
||||
@ -595,8 +596,6 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
struct bch_dev *ca = NULL;
|
||||
int ret;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
|
||||
BTREE_ITER_prefetch, k, ({
|
||||
@ -645,7 +644,6 @@ int bch2_alloc_read(struct bch_fs *c)
|
||||
|
||||
bch2_dev_put(ca);
|
||||
bch2_trans_put(trans);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
@ -847,6 +845,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
|
||||
new_a->gen++;
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
|
||||
alloc_data_type_set(new_a, new_a->data_type);
|
||||
}
|
||||
|
||||
if (old_a->data_type != new_a->data_type ||
|
||||
@ -958,12 +957,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
|
||||
if (statechange(a->data_type == BCH_DATA_need_discard) &&
|
||||
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
|
||||
bucket_flushed(new_a))
|
||||
bch2_discard_one_bucket_fast(c, new.k->p);
|
||||
bch2_discard_one_bucket_fast(ca, new.k->p.offset);
|
||||
|
||||
if (statechange(a->data_type == BCH_DATA_cached) &&
|
||||
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
|
||||
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
|
||||
bch2_do_invalidates(c);
|
||||
bch2_dev_do_invalidates(ca);
|
||||
|
||||
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
|
||||
bch2_gc_gens_async(c);
|
||||
@ -1684,34 +1683,38 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
|
||||
static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->discard_buckets_in_flight_lock);
|
||||
darray_for_each(c->discard_buckets_in_flight, i)
|
||||
if (bkey_eq(*i, bucket)) {
|
||||
mutex_lock(&ca->discard_buckets_in_flight_lock);
|
||||
darray_for_each(ca->discard_buckets_in_flight, i)
|
||||
if (i->bucket == bucket) {
|
||||
ret = -BCH_ERR_EEXIST_discard_in_flight_add;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = darray_push(&c->discard_buckets_in_flight, bucket);
|
||||
ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
|
||||
.in_progress = in_progress,
|
||||
.bucket = bucket,
|
||||
}));
|
||||
out:
|
||||
mutex_unlock(&c->discard_buckets_in_flight_lock);
|
||||
mutex_unlock(&ca->discard_buckets_in_flight_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
|
||||
static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
|
||||
{
|
||||
mutex_lock(&c->discard_buckets_in_flight_lock);
|
||||
darray_for_each(c->discard_buckets_in_flight, i)
|
||||
if (bkey_eq(*i, bucket)) {
|
||||
darray_remove_item(&c->discard_buckets_in_flight, i);
|
||||
mutex_lock(&ca->discard_buckets_in_flight_lock);
|
||||
darray_for_each(ca->discard_buckets_in_flight, i)
|
||||
if (i->bucket == bucket) {
|
||||
BUG_ON(!i->in_progress);
|
||||
darray_remove_item(&ca->discard_buckets_in_flight, i);
|
||||
goto found;
|
||||
}
|
||||
BUG();
|
||||
found:
|
||||
mutex_unlock(&c->discard_buckets_in_flight_lock);
|
||||
mutex_unlock(&ca->discard_buckets_in_flight_lock);
|
||||
}
|
||||
|
||||
struct discard_buckets_state {
|
||||
@ -1719,26 +1722,11 @@ struct discard_buckets_state {
|
||||
u64 open;
|
||||
u64 need_journal_commit;
|
||||
u64 discarded;
|
||||
struct bch_dev *ca;
|
||||
u64 need_journal_commit_this_dev;
|
||||
};
|
||||
|
||||
static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
|
||||
{
|
||||
if (s->ca == ca)
|
||||
return;
|
||||
|
||||
if (s->ca && s->need_journal_commit_this_dev >
|
||||
bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (s->ca)
|
||||
percpu_ref_put(&s->ca->io_ref);
|
||||
s->ca = ca;
|
||||
s->need_journal_commit_this_dev = 0;
|
||||
}
|
||||
|
||||
static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
struct bch_dev *ca,
|
||||
struct btree_iter *need_discard_iter,
|
||||
struct bpos *discard_pos_done,
|
||||
struct discard_buckets_state *s)
|
||||
@ -1752,16 +1740,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
bool discard_locked = false;
|
||||
int ret = 0;
|
||||
|
||||
struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
|
||||
? s->ca
|
||||
: bch2_dev_get_ioref(c, pos.inode, WRITE);
|
||||
if (!ca) {
|
||||
bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
discard_buckets_next_dev(c, s, ca);
|
||||
|
||||
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
|
||||
s->open++;
|
||||
goto out;
|
||||
@ -1821,7 +1799,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
|
||||
if (discard_in_flight_add(ca, iter.pos.offset, true))
|
||||
goto out;
|
||||
|
||||
discard_locked = true;
|
||||
@ -1845,8 +1823,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
|
||||
alloc_data_type_set(&a->v, a->v.data_type);
|
||||
write:
|
||||
alloc_data_type_set(&a->v, a->v.data_type);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
@ -1858,7 +1837,7 @@ write:
|
||||
s->discarded++;
|
||||
out:
|
||||
if (discard_locked)
|
||||
discard_in_flight_remove(c, iter.pos);
|
||||
discard_in_flight_remove(ca, iter.pos.offset);
|
||||
s->seen++;
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
printbuf_exit(&buf);
|
||||
@ -1867,7 +1846,8 @@ out:
|
||||
|
||||
static void bch2_do_discards_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
|
||||
struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct discard_buckets_state s = {};
|
||||
struct bpos discard_pos_done = POS_MAX;
|
||||
int ret;
|
||||
@ -1878,23 +1858,41 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
* successful commit:
|
||||
*/
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter,
|
||||
BTREE_ID_need_discard, POS_MIN, 0, k,
|
||||
bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
|
||||
|
||||
discard_buckets_next_dev(c, &s, NULL);
|
||||
for_each_btree_key_upto(trans, iter,
|
||||
BTREE_ID_need_discard,
|
||||
POS(ca->dev_idx, 0),
|
||||
POS(ca->dev_idx, U64_MAX), 0, k,
|
||||
bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
|
||||
|
||||
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
|
||||
bch2_err_str(ret));
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
void bch2_dev_do_discards(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
|
||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
||||
return;
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
|
||||
goto put_ioref;
|
||||
|
||||
if (queue_work(c->write_ref_wq, &ca->discard_work))
|
||||
return;
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
put_ioref:
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
void bch2_do_discards(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
|
||||
!queue_work(c->write_ref_wq, &c->discard_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
for_each_member_device(c, ca)
|
||||
bch2_dev_do_discards(ca);
|
||||
}
|
||||
|
||||
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
|
||||
@ -1923,68 +1921,69 @@ err:
|
||||
|
||||
static void bch2_do_discards_fast_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
|
||||
struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
|
||||
struct bch_fs *c = ca->fs;
|
||||
|
||||
while (1) {
|
||||
bool got_bucket = false;
|
||||
struct bpos bucket;
|
||||
struct bch_dev *ca;
|
||||
u64 bucket;
|
||||
|
||||
mutex_lock(&c->discard_buckets_in_flight_lock);
|
||||
darray_for_each(c->discard_buckets_in_flight, i) {
|
||||
if (i->snapshot)
|
||||
mutex_lock(&ca->discard_buckets_in_flight_lock);
|
||||
darray_for_each(ca->discard_buckets_in_flight, i) {
|
||||
if (i->in_progress)
|
||||
continue;
|
||||
|
||||
ca = bch2_dev_get_ioref(c, i->inode, WRITE);
|
||||
if (!ca) {
|
||||
darray_remove_item(&c->discard_buckets_in_flight, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
got_bucket = true;
|
||||
bucket = *i;
|
||||
i->snapshot = true;
|
||||
bucket = i->bucket;
|
||||
i->in_progress = true;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&c->discard_buckets_in_flight_lock);
|
||||
mutex_unlock(&ca->discard_buckets_in_flight_lock);
|
||||
|
||||
if (!got_bucket)
|
||||
break;
|
||||
|
||||
if (ca->mi.discard && !c->opts.nochanges)
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket.offset * ca->mi.bucket_size,
|
||||
bucket_to_sector(ca, bucket),
|
||||
ca->mi.bucket_size,
|
||||
GFP_KERNEL);
|
||||
|
||||
int ret = bch2_trans_do(c, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_clear_bucket_needs_discard(trans, bucket));
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
discard_in_flight_remove(c, bucket);
|
||||
discard_in_flight_remove(ca, bucket);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
|
||||
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
|
||||
{
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
|
||||
bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
|
||||
rcu_read_unlock();
|
||||
struct bch_fs *c = ca->fs;
|
||||
|
||||
if (!dead &&
|
||||
!discard_in_flight_add(c, bucket) &&
|
||||
bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
|
||||
!queue_work(c->write_ref_wq, &c->discard_fast_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
if (discard_in_flight_add(ca, bucket, false))
|
||||
return;
|
||||
|
||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
||||
return;
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
|
||||
goto put_ioref;
|
||||
|
||||
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
|
||||
return;
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
|
||||
put_ioref:
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
@ -2010,7 +2009,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, bucket);
|
||||
a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -2086,7 +2085,8 @@ again:
|
||||
|
||||
static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
|
||||
struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
|
||||
struct bch_fs *c = ca->fs;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
int ret = 0;
|
||||
|
||||
@ -2094,50 +2094,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
s64 nr_to_invalidate =
|
||||
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
|
||||
struct btree_iter iter;
|
||||
bool wrapped = false;
|
||||
s64 nr_to_invalidate =
|
||||
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
|
||||
struct btree_iter iter;
|
||||
bool wrapped = false;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
|
||||
lru_pos(ca->dev_idx, 0,
|
||||
((bch2_current_io_time(c, READ) + U32_MAX) &
|
||||
LRU_TIME_MAX)), 0);
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
|
||||
lru_pos(ca->dev_idx, 0,
|
||||
((bch2_current_io_time(c, READ) + U32_MAX) &
|
||||
LRU_TIME_MAX)), 0);
|
||||
|
||||
while (true) {
|
||||
bch2_trans_begin(trans);
|
||||
while (true) {
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret < 0) {
|
||||
bch2_dev_put(ca);
|
||||
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
|
||||
ret = bkey_err(k);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
continue;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_iter_advance(&iter);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
void bch2_dev_do_invalidates(struct bch_dev *ca)
|
||||
{
|
||||
struct bch_fs *c = ca->fs;
|
||||
|
||||
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
|
||||
return;
|
||||
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
|
||||
goto put_ioref;
|
||||
|
||||
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
|
||||
return;
|
||||
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
put_ioref:
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
void bch2_do_invalidates(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
|
||||
!queue_work(c->write_ref_wq, &c->invalidate_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
for_each_member_device(c, ca)
|
||||
bch2_dev_do_invalidates(ca);
|
||||
}
|
||||
|
||||
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -2453,16 +2466,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
|
||||
set_bit(ca->dev_idx, c->rw_devs[i].d);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_background_exit(struct bch_fs *c)
|
||||
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
|
||||
{
|
||||
darray_exit(&c->discard_buckets_in_flight);
|
||||
darray_exit(&ca->discard_buckets_in_flight);
|
||||
}
|
||||
|
||||
void bch2_dev_allocator_background_init(struct bch_dev *ca)
|
||||
{
|
||||
mutex_init(&ca->discard_buckets_in_flight_lock);
|
||||
INIT_WORK(&ca->discard_work, bch2_do_discards_work);
|
||||
INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
|
||||
INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *c)
|
||||
{
|
||||
spin_lock_init(&c->freelist_lock);
|
||||
mutex_init(&c->discard_buckets_in_flight_lock);
|
||||
INIT_WORK(&c->discard_work, bch2_do_discards_work);
|
||||
INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
|
||||
INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
|
||||
}
|
||||
|
@ -206,7 +206,8 @@ static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
|
||||
struct bkey_i_alloc_v4 *
|
||||
bch2_trans_start_alloc_update_noupdate(struct btree_trans *, struct btree_iter *, struct bpos);
|
||||
struct bkey_i_alloc_v4 *
|
||||
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos);
|
||||
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
|
||||
void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
|
||||
|
||||
@ -299,6 +300,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
|
||||
enum btree_iter_update_trigger_flags);
|
||||
int bch2_check_alloc_info(struct bch_fs *);
|
||||
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
|
||||
void bch2_dev_do_discards(struct bch_dev *);
|
||||
void bch2_do_discards(struct bch_fs *);
|
||||
|
||||
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
@ -313,6 +315,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
|
||||
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
|
||||
}
|
||||
|
||||
void bch2_dev_do_invalidates(struct bch_dev *);
|
||||
void bch2_do_invalidates(struct bch_fs *);
|
||||
|
||||
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
|
||||
@ -336,7 +339,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
|
||||
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
|
||||
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_fs_allocator_background_exit(struct bch_fs *);
|
||||
void bch2_dev_allocator_background_exit(struct bch_dev *);
|
||||
void bch2_dev_allocator_background_init(struct bch_dev *);
|
||||
|
||||
void bch2_fs_allocator_background_init(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
|
||||
|
@ -621,13 +621,13 @@ again:
|
||||
avail = dev_buckets_free(ca, *usage, watermark);
|
||||
|
||||
if (usage->d[BCH_DATA_need_discard].buckets > avail)
|
||||
bch2_do_discards(c);
|
||||
bch2_dev_do_discards(ca);
|
||||
|
||||
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
|
||||
bch2_gc_gens_async(c);
|
||||
|
||||
if (should_invalidate_buckets(ca, *usage))
|
||||
bch2_do_invalidates(c);
|
||||
bch2_dev_do_invalidates(ca);
|
||||
|
||||
if (!avail) {
|
||||
if (cl && !waiting) {
|
||||
|
@ -496,6 +496,11 @@ struct io_count {
|
||||
u64 sectors[2][BCH_DATA_NR];
|
||||
};
|
||||
|
||||
struct discard_in_flight {
|
||||
bool in_progress:1;
|
||||
u64 bucket:63;
|
||||
};
|
||||
|
||||
struct bch_dev {
|
||||
struct kobject kobj;
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
@ -533,8 +538,8 @@ struct bch_dev {
|
||||
/*
|
||||
* Buckets:
|
||||
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
|
||||
* gc_lock, for device resize - holding any is sufficient for access:
|
||||
* Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||
* gc_gens_lock, for device resize - holding any is sufficient for
|
||||
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
|
||||
*/
|
||||
struct bucket_array __rcu *buckets_gc;
|
||||
struct bucket_gens __rcu *bucket_gens;
|
||||
@ -555,6 +560,12 @@ struct bch_dev {
|
||||
size_t inc_gen_really_needs_gc;
|
||||
size_t buckets_waiting_on_journal;
|
||||
|
||||
struct work_struct invalidate_work;
|
||||
struct work_struct discard_work;
|
||||
struct mutex discard_buckets_in_flight_lock;
|
||||
DARRAY(struct discard_in_flight) discard_buckets_in_flight;
|
||||
struct work_struct discard_fast_work;
|
||||
|
||||
atomic64_t rebalance_work;
|
||||
|
||||
struct journal_device journal;
|
||||
@ -909,11 +920,6 @@ struct bch_fs {
|
||||
unsigned write_points_nr;
|
||||
|
||||
struct buckets_waiting_for_journal buckets_waiting_for_journal;
|
||||
struct work_struct invalidate_work;
|
||||
struct work_struct discard_work;
|
||||
struct mutex discard_buckets_in_flight_lock;
|
||||
DARRAY(struct bpos) discard_buckets_in_flight;
|
||||
struct work_struct discard_fast_work;
|
||||
|
||||
/* GARBAGE COLLECTION */
|
||||
struct work_struct gc_gens_work;
|
||||
|
@ -468,18 +468,6 @@ struct bch_backpointer {
|
||||
struct bpos pos;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* LRU btree: */
|
||||
|
||||
struct bch_lru {
|
||||
struct bch_val v;
|
||||
__le64 idx;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define LRU_ID_STRIPES (1U << 16)
|
||||
|
||||
#define LRU_TIME_BITS 48
|
||||
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
|
||||
|
||||
/* Optional/variable size superblock sections: */
|
||||
|
||||
struct bch_sb_field {
|
||||
@ -516,6 +504,7 @@ struct bch_sb_field {
|
||||
#include "inode_format.h"
|
||||
#include "journal_seq_blacklist_format.h"
|
||||
#include "logged_ops_format.h"
|
||||
#include "lru_format.h"
|
||||
#include "quota_format.h"
|
||||
#include "reflink_format.h"
|
||||
#include "replicas_format.h"
|
||||
@ -954,8 +943,9 @@ enum bch_version_upgrade_opts {
|
||||
|
||||
#define BCH_ERROR_ACTIONS() \
|
||||
x(continue, 0) \
|
||||
x(ro, 1) \
|
||||
x(panic, 2)
|
||||
x(fix_safe, 1) \
|
||||
x(panic, 2) \
|
||||
x(ro, 3)
|
||||
|
||||
enum bch_error_actions {
|
||||
#define x(t, n) BCH_ON_ERROR_##t = n,
|
||||
|
@ -1229,7 +1229,7 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Ideally we would be using state_lock and not gc_lock here, but that
|
||||
* Ideally we would be using state_lock and not gc_gens_lock here, but that
|
||||
* introduces a deadlock in the RO path - we currently take the state
|
||||
* lock at the start of going RO, thus the gc thread may get stuck:
|
||||
*/
|
||||
@ -1237,7 +1237,8 @@ int bch2_gc_gens(struct bch_fs *c)
|
||||
return 0;
|
||||
|
||||
trace_and_count(c, gc_gens_start, c);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
down_read(&c->state_lock);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
struct bucket_gens *gens = bucket_gens(ca);
|
||||
@ -1306,7 +1307,7 @@ err:
|
||||
ca->oldest_gen = NULL;
|
||||
}
|
||||
|
||||
up_read(&c->gc_lock);
|
||||
up_read(&c->state_lock);
|
||||
mutex_unlock(&c->gc_gens_lock);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch_err_fn(c, ret);
|
||||
|
@ -1801,13 +1801,12 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *
|
||||
goto hole;
|
||||
} else {
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
|
||||
EBUG_ON(ck &&
|
||||
(path->btree_id != ck->key.btree_id ||
|
||||
!bkey_eq(path->pos, ck->key.pos)));
|
||||
if (!ck || !ck->valid)
|
||||
if (!ck)
|
||||
return bkey_s_c_null;
|
||||
|
||||
EBUG_ON(path->btree_id != ck->key.btree_id ||
|
||||
!bkey_eq(path->pos, ck->key.pos));
|
||||
|
||||
*u = ck->k->k;
|
||||
k = bkey_i_to_s_c(ck->k);
|
||||
}
|
||||
@ -3131,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
|
||||
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
closure_init_stack(&trans->ref);
|
||||
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
@ -3151,15 +3149,10 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
|
||||
BUG_ON(pos_task &&
|
||||
pid == pos_task->pid &&
|
||||
pos->locked);
|
||||
|
||||
if (pos_task && pid < pos_task->pid) {
|
||||
list_add_tail(&trans->list, &pos->list);
|
||||
goto list_add_done;
|
||||
}
|
||||
}
|
||||
}
|
||||
list_add_tail(&trans->list, &c->btree_trans_list);
|
||||
list_add_done:
|
||||
|
||||
list_add(&trans->list, &c->btree_trans_list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
got_trans:
|
||||
trans->c = c;
|
||||
@ -3200,6 +3193,8 @@ got_trans:
|
||||
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
|
||||
trans->srcu_lock_time = jiffies;
|
||||
trans->srcu_held = true;
|
||||
|
||||
closure_init_stack_release(&trans->ref);
|
||||
return trans;
|
||||
}
|
||||
|
||||
@ -3236,7 +3231,6 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
trans_for_each_update(trans, i)
|
||||
__btree_path_put(trans->paths + i->path, true);
|
||||
trans->nr_updates = 0;
|
||||
trans->locking_wait.task = NULL;
|
||||
|
||||
check_btree_paths_leaked(trans);
|
||||
|
||||
@ -3248,6 +3242,13 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
bch2_journal_keys_put(c);
|
||||
|
||||
/*
|
||||
* trans->ref protects trans->locking_wait.task, btree_paths array; used
|
||||
* by cycle detector
|
||||
*/
|
||||
closure_return_sync(&trans->ref);
|
||||
trans->locking_wait.task = NULL;
|
||||
|
||||
unsigned long *paths_allocated = trans->paths_allocated;
|
||||
trans->paths_allocated = NULL;
|
||||
trans->paths = NULL;
|
||||
@ -3265,8 +3266,6 @@ void bch2_trans_put(struct btree_trans *trans)
|
||||
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
|
||||
|
||||
if (trans) {
|
||||
closure_sync(&trans->ref);
|
||||
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_del(&trans->list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
@ -3386,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
|
||||
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
|
||||
|
||||
if (trans) {
|
||||
closure_sync(&trans->ref);
|
||||
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_del(&trans->list);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
|
@ -205,9 +205,22 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
}
|
||||
|
||||
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
|
||||
{
|
||||
struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp);
|
||||
if (unlikely(!ck))
|
||||
return NULL;
|
||||
ck->k = kmalloc(key_u64s * sizeof(u64), gfp);
|
||||
if (unlikely(!ck->k)) {
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
return NULL;
|
||||
}
|
||||
ck->u64s = key_u64s;
|
||||
return ck;
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
bool *was_new)
|
||||
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned key_u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
@ -281,8 +294,10 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
}
|
||||
|
||||
ck = allocate_dropping_locks(trans, ret,
|
||||
kmem_cache_zalloc(bch2_key_cache, _gfp));
|
||||
__bkey_cached_alloc(key_u64s, _gfp));
|
||||
if (ret) {
|
||||
if (ck)
|
||||
kfree(ck->k);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -296,7 +311,6 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
ck->c.cached = true;
|
||||
BUG_ON(!six_trylock_intent(&ck->c.lock));
|
||||
BUG_ON(!six_trylock_write(&ck->c.lock));
|
||||
*was_new = true;
|
||||
return ck;
|
||||
}
|
||||
|
||||
@ -326,71 +340,102 @@ out:
|
||||
return ck;
|
||||
}
|
||||
|
||||
static struct bkey_cached *
|
||||
btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
|
||||
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck;
|
||||
bool was_new = false;
|
||||
|
||||
ck = bkey_cached_alloc(trans, path, &was_new);
|
||||
if (IS_ERR(ck))
|
||||
return ck;
|
||||
/*
|
||||
* bch2_varint_decode can read past the end of the buffer by at
|
||||
* most 7 bytes (it won't be used):
|
||||
*/
|
||||
unsigned key_u64s = k.k->u64s + 1;
|
||||
|
||||
/*
|
||||
* Allocate some extra space so that the transaction commit path is less
|
||||
* likely to have to reallocate, since that requires a transaction
|
||||
* restart:
|
||||
*/
|
||||
key_u64s = min(256U, (key_u64s * 3) / 2);
|
||||
key_u64s = roundup_pow_of_two(key_u64s);
|
||||
|
||||
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
|
||||
int ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (unlikely(!ck)) {
|
||||
ck = bkey_cached_reuse(bc);
|
||||
if (unlikely(!ck)) {
|
||||
bch_err(c, "error allocating memory for key cache item, btree %s",
|
||||
bch2_btree_id_str(path->btree_id));
|
||||
return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_create;
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
ck->c.btree_id = path->btree_id;
|
||||
ck->key.btree_id = path->btree_id;
|
||||
ck->key.pos = path->pos;
|
||||
ck->valid = false;
|
||||
ck->flags = 1U << BKEY_CACHED_ACCESSED;
|
||||
|
||||
if (unlikely(rhashtable_lookup_insert_fast(&bc->table,
|
||||
&ck->hash,
|
||||
bch2_btree_key_cache_params))) {
|
||||
/* We raced with another fill: */
|
||||
if (unlikely(key_u64s > ck->u64s)) {
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
if (likely(was_new)) {
|
||||
six_unlock_write(&ck->c.lock);
|
||||
six_unlock_intent(&ck->c.lock);
|
||||
kfree(ck);
|
||||
} else {
|
||||
bkey_cached_free_fast(bc, ck);
|
||||
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
|
||||
kmalloc(key_u64s * sizeof(u64), _gfp));
|
||||
if (unlikely(!new_k)) {
|
||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_id_str(ck->key.btree_id), key_u64s);
|
||||
ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
|
||||
} else if (ret) {
|
||||
kfree(new_k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
return NULL;
|
||||
kfree(ck->k);
|
||||
ck->k = new_k;
|
||||
ck->u64s = key_u64s;
|
||||
}
|
||||
|
||||
atomic_long_inc(&bc->nr_keys);
|
||||
bkey_reassemble(ck->k, k);
|
||||
|
||||
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
|
||||
if (unlikely(ret)) /* raced with another fill? */
|
||||
goto err;
|
||||
|
||||
atomic_long_inc(&bc->nr_keys);
|
||||
six_unlock_write(&ck->c.lock);
|
||||
|
||||
return ck;
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
if (lock_want == SIX_LOCK_read)
|
||||
six_lock_downgrade(&ck->c.lock);
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
err:
|
||||
bkey_cached_free_fast(bc, ck);
|
||||
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
struct bkey_cached *ck)
|
||||
static noinline int btree_key_cache_fill(struct btree_trans *trans,
|
||||
struct btree_path *ck_path,
|
||||
unsigned flags)
|
||||
{
|
||||
if (flags & BTREE_ITER_cached_nofill) {
|
||||
ck_path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
unsigned new_u64s = 0;
|
||||
struct bkey_i *new_k = NULL;
|
||||
int ret;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos,
|
||||
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
|
||||
BTREE_ITER_key_cache_fill|
|
||||
BTREE_ITER_cached_nofill);
|
||||
iter.flags &= ~BTREE_ITER_with_journal;
|
||||
@ -399,70 +444,15 @@ static int btree_key_cache_fill(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
|
||||
/* Recheck after btree lookup, before allocating: */
|
||||
ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0;
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
|
||||
ret = btree_key_cache_create(trans, ck_path, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* bch2_varint_decode can read past the end of the buffer by at
|
||||
* most 7 bytes (it won't be used):
|
||||
*/
|
||||
new_u64s = k.k->u64s + 1;
|
||||
|
||||
/*
|
||||
* Allocate some extra space so that the transaction commit path is less
|
||||
* likely to have to reallocate, since that requires a transaction
|
||||
* restart:
|
||||
*/
|
||||
new_u64s = min(256U, (new_u64s * 3) / 2);
|
||||
|
||||
if (new_u64s > ck->u64s) {
|
||||
new_u64s = roundup_pow_of_two(new_u64s);
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
|
||||
if (!new_k) {
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_k) {
|
||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_id_str(ck->key.btree_id), new_u64s);
|
||||
ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_trans_relock(trans);
|
||||
if (ret) {
|
||||
kfree(new_k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
|
||||
kfree(new_k);
|
||||
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
|
||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c);
|
||||
if (ret) {
|
||||
kfree(new_k);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (new_k) {
|
||||
kfree(ck->k);
|
||||
ck->u64s = new_u64s;
|
||||
ck->k = new_k;
|
||||
}
|
||||
|
||||
bkey_reassemble(ck->k, k);
|
||||
ck->valid = true;
|
||||
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
|
||||
|
||||
out:
|
||||
/* We're not likely to need this iterator again: */
|
||||
bch2_set_btree_iter_dontneed(&iter);
|
||||
err:
|
||||
@ -470,107 +460,19 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int
|
||||
bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
static inline int btree_path_traverse_cached_fast(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(path->level);
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
|
||||
ck = (void *) path->l[0].b;
|
||||
goto fill;
|
||||
}
|
||||
retry:
|
||||
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
|
||||
if (!ck) {
|
||||
ck = btree_key_cache_create(trans, path);
|
||||
ret = PTR_ERR_OR_ZERO(ck);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!ck)
|
||||
goto retry;
|
||||
|
||||
btree_path_cached_set(trans, path, ck, BTREE_NODE_INTENT_LOCKED);
|
||||
path->locks_want = 1;
|
||||
} else {
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
|
||||
ret = btree_node_lock(trans, path, (void *) ck, 0,
|
||||
lock_want, _THIS_IP_);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||
goto err;
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
if (ck->key.btree_id != path->btree_id ||
|
||||
!bpos_eq(ck->key.pos, path->pos)) {
|
||||
six_unlock_type(&ck->c.lock, lock_want);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
}
|
||||
fill:
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
|
||||
if (!ck->valid && !(flags & BTREE_ITER_cached_nofill)) {
|
||||
ret = bch2_btree_path_upgrade(trans, path, 1) ?:
|
||||
btree_key_cache_fill(trans, path, ck) ?:
|
||||
bch2_btree_path_relock(trans, path, _THIS_IP_);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
}
|
||||
|
||||
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
|
||||
BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
|
||||
BUG_ON(path->uptodate);
|
||||
|
||||
return ret;
|
||||
err:
|
||||
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
btree_node_unlock(trans, path, 0);
|
||||
path->l[0].b = ERR_PTR(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_cached *ck;
|
||||
int ret = 0;
|
||||
|
||||
EBUG_ON(path->level);
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
|
||||
ck = (void *) path->l[0].b;
|
||||
goto fill;
|
||||
}
|
||||
retry:
|
||||
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
|
||||
if (!ck)
|
||||
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
|
||||
return -ENOENT;
|
||||
|
||||
enum six_lock_type lock_want = __btree_lock_want(path, 0);
|
||||
|
||||
ret = btree_node_lock(trans, path, (void *) ck, 0,
|
||||
lock_want, _THIS_IP_);
|
||||
EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart));
|
||||
|
||||
int ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -580,18 +482,40 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
fill:
|
||||
if (!ck->valid)
|
||||
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
|
||||
|
||||
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
|
||||
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
EBUG_ON(!ck->valid);
|
||||
EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
|
||||
unsigned flags)
|
||||
{
|
||||
EBUG_ON(path->level);
|
||||
|
||||
path->l[1].b = NULL;
|
||||
|
||||
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret;
|
||||
do {
|
||||
ret = btree_path_traverse_cached_fast(trans, path);
|
||||
if (unlikely(ret == -ENOENT))
|
||||
ret = btree_key_cache_fill(trans, path, flags);
|
||||
} while (ret == -EEXIST);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
|
||||
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
btree_node_unlock(trans, path, 0);
|
||||
path->l[0].b = ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -630,8 +554,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!ck->valid);
|
||||
|
||||
if (journal_seq && ck->journal.seq != journal_seq)
|
||||
goto out;
|
||||
|
||||
@ -753,7 +675,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
||||
BUG_ON(insert->k.u64s > ck->u64s);
|
||||
|
||||
bkey_copy(ck->k, insert);
|
||||
ck->valid = true;
|
||||
|
||||
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
|
||||
@ -792,10 +713,9 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_key_cache *bc = &c->btree_key_cache;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
|
||||
BUG_ON(!ck->valid);
|
||||
|
||||
/*
|
||||
* We just did an update to the btree, bypassing the key cache: the key
|
||||
* cache key is now stale and must be dropped, even if dirty:
|
||||
@ -806,7 +726,11 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
|
||||
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||
}
|
||||
|
||||
ck->valid = false;
|
||||
bkey_cached_evict(bc, ck);
|
||||
bkey_cached_free_fast(bc, ck);
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
|
||||
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
|
@ -137,7 +137,8 @@ static inline void bch2_trans_unlock_write(struct btree_trans *trans)
|
||||
{
|
||||
if (likely(trans->write_locked)) {
|
||||
trans_for_each_update(trans, i)
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
if (btree_node_locked_type(trans->paths + i->path, i->level) ==
|
||||
BTREE_NODE_WRITE_LOCKED)
|
||||
bch2_btree_node_unlock_write_inlined(trans,
|
||||
trans->paths + i->path, insert_l(trans, i)->b);
|
||||
trans->write_locked = false;
|
||||
@ -777,14 +778,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
|
||||
trans_for_each_update(trans, i) {
|
||||
struct btree_path *path = trans->paths + i->path;
|
||||
|
||||
if (!i->cached) {
|
||||
if (!i->cached)
|
||||
bch2_btree_insert_key_leaf(trans, path, i->k, trans->journal_res.seq);
|
||||
} else if (!i->key_cache_already_flushed)
|
||||
else if (!i->key_cache_already_flushed)
|
||||
bch2_btree_insert_key_cached(trans, flags, i);
|
||||
else {
|
||||
else
|
||||
bch2_btree_key_cache_drop(trans, path);
|
||||
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -388,7 +388,6 @@ struct bkey_cached {
|
||||
unsigned long flags;
|
||||
unsigned long btree_trans_barrier_seq;
|
||||
u16 u64s;
|
||||
bool valid;
|
||||
struct bkey_cached_key key;
|
||||
|
||||
struct rhash_head hash;
|
||||
|
@ -569,7 +569,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
|
||||
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket);
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v);
|
||||
if (ret)
|
||||
@ -1217,7 +1217,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bucket_gens->nbuckets - bucket_gens->first_bucket;
|
||||
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
percpu_down_write(&c->mark_lock);
|
||||
}
|
||||
@ -1240,7 +1239,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
if (resize) {
|
||||
percpu_up_write(&c->mark_lock);
|
||||
up_write(&ca->bucket_lock);
|
||||
up_write(&c->gc_lock);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
@ -85,7 +85,7 @@ static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
|
||||
return rcu_dereference_check(ca->buckets_gc,
|
||||
!ca->fs ||
|
||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->fs->state_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
}
|
||||
|
||||
@ -103,7 +103,7 @@ static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
|
||||
return rcu_dereference_check(ca->bucket_gens,
|
||||
!ca->fs ||
|
||||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->fs->state_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
}
|
||||
|
||||
|
@ -214,22 +214,10 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
|
||||
|
||||
if (arg.opts) {
|
||||
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
|
||||
char *ro, *rest;
|
||||
|
||||
/*
|
||||
* If passed a "read_only" mount option, remove it because it is
|
||||
* no longer a valid mount option, and the filesystem will be
|
||||
* set "read_only" regardless.
|
||||
*/
|
||||
ro = strstr(optstr, "read_only");
|
||||
if (ro) {
|
||||
rest = ro + strlen("read_only");
|
||||
memmove(ro, rest, strlen(rest) + 1);
|
||||
}
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
|
||||
kfree(optstr);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -333,7 +321,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
|
||||
return ret;
|
||||
|
||||
ret = bch2_dev_add(c, path);
|
||||
kfree(path);
|
||||
if (!IS_ERR(path))
|
||||
kfree(path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -579,7 +568,6 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c,
|
||||
|
||||
ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
|
||||
err:
|
||||
bch_err_fn(c, ret);
|
||||
darray_exit(&accounting);
|
||||
return ret;
|
||||
}
|
||||
@ -861,7 +849,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(optstr) ?:
|
||||
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
|
||||
kfree(optstr);
|
||||
if (!IS_ERR(optstr))
|
||||
kfree(optstr);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/xxhash.h>
|
||||
#include <linux/key.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha.h>
|
||||
@ -436,7 +437,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
|
||||
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
|
||||
"expected %0llx:%0llx got %0llx:%0llx (old type ",
|
||||
" expected %0llx:%0llx got %0llx:%0llx (old type ",
|
||||
__func__,
|
||||
crc_old.csum.hi,
|
||||
crc_old.csum.lo,
|
||||
@ -446,7 +447,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
|
||||
prt_str(&buf, " new type ");
|
||||
bch2_prt_csum_type(&buf, new_csum_type);
|
||||
prt_str(&buf, ")");
|
||||
bch_err(c, "%s", buf.buf);
|
||||
WARN_RATELIMIT(1, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = {
|
||||
.read = bch2_cached_btree_nodes_read,
|
||||
};
|
||||
|
||||
typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
|
||||
|
||||
static void list_sort(struct list_head *head, list_cmp_fn cmp)
|
||||
{
|
||||
struct list_head *pos;
|
||||
|
||||
list_for_each(pos, head)
|
||||
while (!list_is_last(pos, head) &&
|
||||
cmp(pos, pos->next) > 0) {
|
||||
struct list_head *pos2, *next = pos->next;
|
||||
|
||||
list_del(next);
|
||||
list_for_each(pos2, head)
|
||||
if (cmp(next, pos2) < 0)
|
||||
goto pos_found;
|
||||
BUG();
|
||||
pos_found:
|
||||
list_add_tail(next, pos2);
|
||||
}
|
||||
}
|
||||
|
||||
static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
|
||||
{
|
||||
return cmp_int(l, r);
|
||||
}
|
||||
|
||||
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
{
|
||||
@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
|
||||
struct bch_fs *c = i->c;
|
||||
struct btree_trans *trans;
|
||||
ssize_t ret = 0;
|
||||
u32 seq;
|
||||
|
||||
i->ubuf = buf;
|
||||
i->size = size;
|
||||
i->ret = 0;
|
||||
restart:
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
|
||||
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
|
||||
|
||||
if (!task || task->pid <= i->iter)
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
if ((ulong) trans < i->iter)
|
||||
continue;
|
||||
|
||||
closure_get(&trans->ref);
|
||||
seq = seqmutex_seq(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
i->iter = (ulong) trans;
|
||||
|
||||
ret = flush_buf(i);
|
||||
if (ret) {
|
||||
closure_put(&trans->ref);
|
||||
goto unlocked;
|
||||
}
|
||||
if (!closure_get_not_zero(&trans->ref))
|
||||
continue;
|
||||
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
|
||||
|
||||
bch2_btree_trans_to_text(&i->buf, trans);
|
||||
|
||||
prt_printf(&i->buf, "backtrace:\n");
|
||||
printbuf_indent_add(&i->buf, 2);
|
||||
bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
|
||||
bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
|
||||
printbuf_indent_sub(&i->buf, 2);
|
||||
prt_newline(&i->buf);
|
||||
|
||||
i->iter = task->pid;
|
||||
|
||||
closure_put(&trans->ref);
|
||||
|
||||
ret = flush_buf(i);
|
||||
if (ret)
|
||||
goto unlocked;
|
||||
|
||||
if (!seqmutex_relock(&c->btree_trans_lock, seq))
|
||||
goto restart;
|
||||
}
|
||||
@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
|
||||
.read = btree_transaction_stats_read,
|
||||
};
|
||||
|
||||
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
/* walk btree transactions until we find a deadlock and print it */
|
||||
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct dump_iter *i = file->private_data;
|
||||
struct bch_fs *c = i->c;
|
||||
struct btree_trans *trans;
|
||||
ssize_t ret = 0;
|
||||
u32 seq;
|
||||
|
||||
i->ubuf = buf;
|
||||
i->size = size;
|
||||
i->ret = 0;
|
||||
|
||||
if (i->iter)
|
||||
goto out;
|
||||
pid_t iter = 0;
|
||||
restart:
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
|
||||
|
||||
if (!task || task->pid <= i->iter)
|
||||
if (!task || task->pid <= iter)
|
||||
continue;
|
||||
|
||||
closure_get(&trans->ref);
|
||||
seq = seqmutex_seq(&c->btree_trans_lock);
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
iter = task->pid;
|
||||
|
||||
ret = flush_buf(i);
|
||||
if (ret) {
|
||||
closure_put(&trans->ref);
|
||||
goto out;
|
||||
}
|
||||
if (!closure_get_not_zero(&trans->ref))
|
||||
continue;
|
||||
|
||||
bch2_check_for_deadlock(trans, &i->buf);
|
||||
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
|
||||
|
||||
i->iter = task->pid;
|
||||
bool found = bch2_check_for_deadlock(trans, out) != 0;
|
||||
|
||||
closure_put(&trans->ref);
|
||||
|
||||
if (found)
|
||||
return;
|
||||
|
||||
if (!seqmutex_relock(&c->btree_trans_lock, seq))
|
||||
goto restart;
|
||||
}
|
||||
seqmutex_unlock(&c->btree_trans_lock);
|
||||
out:
|
||||
}
|
||||
|
||||
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
{
|
||||
struct dump_iter *i = file->private_data;
|
||||
struct bch_fs *c = i->c;
|
||||
ssize_t ret = 0;
|
||||
|
||||
i->ubuf = buf;
|
||||
i->size = size;
|
||||
i->ret = 0;
|
||||
|
||||
if (!i->iter) {
|
||||
btree_deadlock_to_text(&i->buf, c);
|
||||
i->iter++;
|
||||
}
|
||||
|
||||
if (i->buf.allocation_failure)
|
||||
ret = -ENOMEM;
|
||||
|
||||
|
@ -521,8 +521,9 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int accounting_read_key(struct bch_fs *c, struct btree_trans *trans, struct bkey_s_c k)
|
||||
static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
if (k.k->type != KEY_TYPE_accounting)
|
||||
@ -557,15 +558,15 @@ fsck_err:
|
||||
int bch2_accounting_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key(trans, iter,
|
||||
int ret = for_each_btree_key(trans, iter,
|
||||
BTREE_ID_accounting, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
|
||||
struct bkey u;
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
|
||||
accounting_read_key(c, trans, k);
|
||||
})));
|
||||
accounting_read_key(trans, k);
|
||||
}));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -598,7 +599,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = accounting_read_key(c, NULL, k);
|
||||
ret = accounting_read_key(trans, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -645,6 +646,7 @@ int bch2_accounting_read(struct bch_fs *c)
|
||||
preempt_enable();
|
||||
percpu_up_read(&c->mark_lock);
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
|
||||
if (flags & BTREE_TRIGGER_transactional) {
|
||||
struct bkey_i_alloc_v4 *a =
|
||||
bch2_trans_start_alloc_update(trans, bucket);
|
||||
bch2_trans_start_alloc_update(trans, bucket, 0);
|
||||
ret = PTR_ERR_OR_ZERO(a) ?:
|
||||
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
|
||||
switch (c->opts.errors) {
|
||||
case BCH_ON_ERROR_continue:
|
||||
return false;
|
||||
case BCH_ON_ERROR_fix_safe:
|
||||
case BCH_ON_ERROR_ro:
|
||||
if (bch2_fs_emergency_read_only(c))
|
||||
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
|
||||
@ -211,6 +212,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
|
||||
prt_str(out, "ing");
|
||||
}
|
||||
|
||||
static const u8 fsck_flags_extra[] = {
|
||||
#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
|
||||
BCH_SB_ERRS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
int __bch2_fsck_err(struct bch_fs *c,
|
||||
struct btree_trans *trans,
|
||||
enum bch_fsck_flags flags,
|
||||
@ -226,6 +233,9 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
|
||||
flags |= fsck_flags_extra[err];
|
||||
|
||||
if (!c)
|
||||
c = trans->c;
|
||||
|
||||
@ -293,7 +303,14 @@ int __bch2_fsck_err(struct bch_fs *c,
|
||||
prt_printf(out, bch2_log_msg(c, ""));
|
||||
#endif
|
||||
|
||||
if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
|
||||
if ((flags & FSCK_CAN_FIX) &&
|
||||
(flags & FSCK_AUTOFIX) &&
|
||||
(c->opts.errors == BCH_ON_ERROR_continue ||
|
||||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
|
||||
prt_str(out, ", ");
|
||||
prt_actioning(out, action);
|
||||
ret = -BCH_ERR_fsck_fix;
|
||||
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
|
||||
if (c->opts.errors != BCH_ON_ERROR_continue ||
|
||||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
||||
prt_str(out, ", shutting down");
|
||||
|
@ -108,13 +108,6 @@ struct fsck_err_state {
|
||||
char *last_msg;
|
||||
};
|
||||
|
||||
enum bch_fsck_flags {
|
||||
FSCK_CAN_FIX = 1 << 0,
|
||||
FSCK_CAN_IGNORE = 1 << 1,
|
||||
FSCK_NEED_FSCK = 1 << 2,
|
||||
FSCK_NO_RATELIMIT = 1 << 3,
|
||||
};
|
||||
|
||||
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
|
||||
|
||||
__printf(5, 6) __cold
|
||||
|
@ -1034,6 +1034,18 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
|
||||
--out->atomic;
|
||||
}
|
||||
|
||||
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
|
||||
{
|
||||
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
|
||||
crc->compressed_size,
|
||||
crc->uncompressed_size,
|
||||
crc->offset, crc->nonce);
|
||||
bch2_prt_csum_type(out, crc->csum_type);
|
||||
prt_printf(out, " %0llx:%0llx ", crc->csum.hi, crc->csum.lo);
|
||||
prt_str(out, " compress ");
|
||||
bch2_prt_compression_type(out, crc->compression_type);
|
||||
}
|
||||
|
||||
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
@ -1059,13 +1071,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_extent_crc_unpacked crc =
|
||||
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
|
||||
|
||||
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
|
||||
crc.compressed_size,
|
||||
crc.uncompressed_size,
|
||||
crc.offset, crc.nonce);
|
||||
bch2_prt_csum_type(out, crc.csum_type);
|
||||
prt_str(out, " compress ");
|
||||
bch2_prt_compression_type(out, crc.compression_type);
|
||||
bch2_extent_crc_unpacked_to_text(out, &crc);
|
||||
break;
|
||||
}
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr: {
|
||||
@ -1096,6 +1102,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int extent_ptr_invalid(struct bch_fs *c,
|
||||
struct bkey_s_c k,
|
||||
enum bch_validate_flags flags,
|
||||
|
@ -212,6 +212,8 @@ static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
|
||||
return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
|
||||
}
|
||||
|
||||
void bch2_extent_crc_unpacked_to_text(struct printbuf *, struct bch_extent_crc_unpacked *);
|
||||
|
||||
/* bkey_ptrs: generically over any key type that has ptrs */
|
||||
|
||||
struct bkey_ptrs_c {
|
||||
|
@ -678,8 +678,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
|
||||
bch2_pagecache_add_get(inode);
|
||||
|
||||
folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT,
|
||||
FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE,
|
||||
mapping_gfp_mask(mapping));
|
||||
FGP_WRITEBEGIN | fgf_set_order(len),
|
||||
mapping_gfp_mask(mapping));
|
||||
if (IS_ERR_OR_NULL(folio))
|
||||
goto err_unlock;
|
||||
|
||||
@ -820,9 +820,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
|
||||
darray_init(&fs);
|
||||
|
||||
ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
|
||||
FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
|
||||
mapping_gfp_mask(mapping),
|
||||
&fs);
|
||||
FGP_WRITEBEGIN | fgf_set_order(len),
|
||||
mapping_gfp_mask(mapping), &fs);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -864,24 +863,26 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
|
||||
f_pos = pos;
|
||||
f_offset = pos - folio_pos(darray_first(fs));
|
||||
darray_for_each(fs, fi) {
|
||||
ssize_t f_reserved;
|
||||
|
||||
f = *fi;
|
||||
f_len = min(end, folio_end_pos(f)) - f_pos;
|
||||
f_reserved = bch2_folio_reservation_get_partial(c, inode, f, &res, f_offset, f_len);
|
||||
|
||||
/*
|
||||
* XXX: per POSIX and fstests generic/275, on -ENOSPC we're
|
||||
* supposed to write as much as we have disk space for.
|
||||
*
|
||||
* On failure here we should still write out a partial page if
|
||||
* we aren't completely out of disk space - we don't do that
|
||||
* yet:
|
||||
*/
|
||||
ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
|
||||
if (unlikely(ret)) {
|
||||
folios_trunc(&fs, fi);
|
||||
if (!fs.nr)
|
||||
goto out;
|
||||
if (unlikely(f_reserved != f_len)) {
|
||||
if (f_reserved < 0) {
|
||||
if (f == darray_first(fs)) {
|
||||
ret = f_reserved;
|
||||
goto out;
|
||||
}
|
||||
|
||||
folios_trunc(&fs, fi);
|
||||
end = min(end, folio_end_pos(darray_last(fs)));
|
||||
} else {
|
||||
folios_trunc(&fs, fi + 1);
|
||||
end = f_pos + f_reserved;
|
||||
}
|
||||
|
||||
end = min(end, folio_end_pos(darray_last(fs)));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -179,7 +179,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
size_t count = iov_iter_count(iter);
|
||||
ssize_t ret;
|
||||
ssize_t ret = 0;
|
||||
|
||||
if (!count)
|
||||
return 0; /* skip atime */
|
||||
@ -205,7 +205,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
iocb->ki_pos += ret;
|
||||
} else {
|
||||
bch2_pagecache_add_get(inode);
|
||||
ret = generic_file_read_iter(iocb, iter);
|
||||
ret = filemap_read(iocb, iter, ret);
|
||||
bch2_pagecache_add_put(inode);
|
||||
}
|
||||
out:
|
||||
|
@ -423,7 +423,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
|
||||
struct bch_inode_info *inode,
|
||||
struct folio *folio,
|
||||
struct bch2_folio_reservation *res,
|
||||
unsigned offset, unsigned len)
|
||||
size_t offset, size_t len)
|
||||
{
|
||||
struct bch_folio *s = bch2_folio_create(folio, 0);
|
||||
unsigned i, disk_sectors = 0, quota_sectors = 0;
|
||||
@ -437,8 +437,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
|
||||
for (i = round_down(offset, block_bytes(c)) >> 9;
|
||||
i < round_up(offset + len, block_bytes(c)) >> 9;
|
||||
i++) {
|
||||
disk_sectors += sectors_to_reserve(&s->s[i],
|
||||
res->disk.nr_replicas);
|
||||
disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
|
||||
quota_sectors += s->s[i].state == SECTOR_unallocated;
|
||||
}
|
||||
|
||||
@ -449,12 +448,9 @@ int bch2_folio_reservation_get(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (quota_sectors) {
|
||||
ret = bch2_quota_reservation_add(c, inode, &res->quota,
|
||||
quota_sectors, true);
|
||||
ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
|
||||
if (unlikely(ret)) {
|
||||
struct disk_reservation tmp = {
|
||||
.sectors = disk_sectors
|
||||
};
|
||||
struct disk_reservation tmp = { .sectors = disk_sectors };
|
||||
|
||||
bch2_disk_reservation_put(c, &tmp);
|
||||
res->disk.sectors -= disk_sectors;
|
||||
@ -465,6 +461,31 @@ int bch2_folio_reservation_get(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
|
||||
struct bch_inode_info *inode,
|
||||
struct folio *folio,
|
||||
struct bch2_folio_reservation *res,
|
||||
size_t offset, size_t len)
|
||||
{
|
||||
size_t l, reserved = 0;
|
||||
int ret;
|
||||
|
||||
while ((l = len - reserved)) {
|
||||
while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
|
||||
if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
|
||||
return reserved ?: ret;
|
||||
|
||||
len = reserved + l;
|
||||
l /= 2;
|
||||
}
|
||||
|
||||
offset += l;
|
||||
reserved += l;
|
||||
}
|
||||
|
||||
return reserved;
|
||||
}
|
||||
|
||||
static void bch2_clear_folio_bits(struct folio *folio)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
|
||||
|
@ -153,7 +153,12 @@ int bch2_folio_reservation_get(struct bch_fs *,
|
||||
struct bch_inode_info *,
|
||||
struct folio *,
|
||||
struct bch2_folio_reservation *,
|
||||
unsigned, unsigned);
|
||||
size_t, size_t);
|
||||
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *,
|
||||
struct bch_inode_info *,
|
||||
struct folio *,
|
||||
struct bch2_folio_reservation *,
|
||||
size_t, size_t);
|
||||
|
||||
void bch2_set_folio_dirty(struct bch_fs *,
|
||||
struct bch_inode_info *,
|
||||
|
@ -188,6 +188,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
|
||||
BUG_ON(!old);
|
||||
|
||||
if (unlikely(old != inode)) {
|
||||
/*
|
||||
* bcachefs doesn't use I_NEW; we have no use for it since we
|
||||
* only insert fully created inodes in the inode hash table. But
|
||||
* discard_new_inode() expects it to be set...
|
||||
*/
|
||||
inode->v.i_flags |= I_NEW;
|
||||
discard_new_inode(&inode->v);
|
||||
inode = old;
|
||||
} else {
|
||||
@ -195,8 +201,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
|
||||
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
/*
|
||||
* we really don't want insert_inode_locked2() to be setting
|
||||
* I_NEW...
|
||||
* Again, I_NEW makes no sense for bcachefs. This is only needed
|
||||
* for clearing I_NEW, but since the inode was already fully
|
||||
* created and initialized we didn't actually want
|
||||
* inode_insert5() to set it for us.
|
||||
*/
|
||||
unlock_new_inode(&inode->v);
|
||||
}
|
||||
@ -880,6 +888,16 @@ static int bch2_getattr(struct mnt_idmap *idmap,
|
||||
stat->subvol = inode->ei_subvol;
|
||||
stat->result_mask |= STATX_SUBVOL;
|
||||
|
||||
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
|
||||
stat->result_mask |= STATX_DIOALIGN;
|
||||
/*
|
||||
* this is incorrect; we should be tracking this in superblock,
|
||||
* and checking the alignment of open devices
|
||||
*/
|
||||
stat->dio_mem_align = SECTOR_SIZE;
|
||||
stat->dio_offset_align = block_bytes(c);
|
||||
}
|
||||
|
||||
if (request_mask & STATX_BTIME) {
|
||||
stat->result_mask |= STATX_BTIME;
|
||||
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
|
||||
@ -1157,6 +1175,7 @@ static const struct file_operations bch_file_operations = {
|
||||
.read_iter = bch2_read_iter,
|
||||
.write_iter = bch2_write_iter,
|
||||
.mmap = bch2_mmap,
|
||||
.get_unmapped_area = thp_get_unmapped_area,
|
||||
.fsync = bch2_fsync,
|
||||
.splice_read = filemap_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
@ -1488,11 +1507,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
||||
bch2_iget5_set(&inode->v, &inum);
|
||||
bch2_inode_update_after_write(trans, inode, bi, ~0);
|
||||
|
||||
if (BCH_SUBVOLUME_SNAP(subvol))
|
||||
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
|
||||
else
|
||||
clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
|
||||
|
||||
inode->v.i_blocks = bi->bi_sectors;
|
||||
inode->v.i_ino = bi->bi_inum;
|
||||
inode->v.i_rdev = bi->bi_dev;
|
||||
@ -1504,6 +1518,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
|
||||
inode->ei_qid = bch_qid(bi);
|
||||
inode->ei_subvol = inum.subvol;
|
||||
|
||||
if (BCH_SUBVOLUME_SNAP(subvol))
|
||||
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
|
||||
|
||||
inode->v.i_mapping->a_ops = &bch_address_space_operations;
|
||||
|
||||
switch (inode->v.i_mode & S_IFMT) {
|
||||
@ -1776,7 +1793,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
|
||||
const struct bch_option *opt = &bch2_opt_table[i];
|
||||
u64 v = bch2_opt_get_by_id(&c->opts, i);
|
||||
|
||||
if (!(opt->flags & OPT_MOUNT))
|
||||
if ((opt->flags & OPT_HIDDEN) ||
|
||||
!(opt->flags & OPT_MOUNT))
|
||||
continue;
|
||||
|
||||
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
|
||||
|
@ -535,12 +535,13 @@ fsck_err:
|
||||
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
|
||||
struct bch_inode_unpacked *inode)
|
||||
{
|
||||
prt_printf(out, "\n");
|
||||
printbuf_indent_add(out, 2);
|
||||
prt_printf(out, "mode=%o\n", inode->bi_mode);
|
||||
|
||||
prt_str(out, "flags=");
|
||||
prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
|
||||
prt_printf(out, " (%x)\n", inode->bi_flags);
|
||||
prt_printf(out, "(%x)\n", inode->bi_flags);
|
||||
|
||||
prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq);
|
||||
prt_printf(out, "bi_size=%llu\n", inode->bi_size);
|
||||
|
@ -389,7 +389,6 @@ retry:
|
||||
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (!bch2_bkey_matches_ptr(c, k,
|
||||
rbio->pick.ptr,
|
||||
|
@ -1080,7 +1080,10 @@ do_write:
|
||||
*_dst = dst;
|
||||
return more;
|
||||
csum_err:
|
||||
bch_err(c, "%s writ error: error verifying existing checksum while rewriting existing data (memory corruption?)",
|
||||
bch_err_inum_offset_ratelimited(c,
|
||||
op->pos.inode,
|
||||
op->pos.offset << 9,
|
||||
"%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)",
|
||||
op->flags & BCH_WRITE_MOVE ? "move" : "user");
|
||||
ret = -EIO;
|
||||
err:
|
||||
|
@ -1520,6 +1520,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
struct journal_entry_pin *pin;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (!test_bit(JOURNAL_running, &j->flags)) {
|
||||
spin_unlock(&j->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
*seq = max(*seq, j->pin.front);
|
||||
|
||||
if (*seq >= j->pin.back) {
|
||||
|
@ -722,13 +722,16 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
|
||||
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
for (i = 0; i < nr_types; i++) {
|
||||
prt_newline(out);
|
||||
bch2_prt_data_type(out, i);
|
||||
prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
|
||||
le64_to_cpu(u->d[i].buckets),
|
||||
le64_to_cpu(u->d[i].sectors),
|
||||
le64_to_cpu(u->d[i].fragmented));
|
||||
}
|
||||
printbuf_indent_sub(out, 2);
|
||||
}
|
||||
|
||||
static int journal_entry_log_validate(struct bch_fs *c,
|
||||
@ -1678,6 +1681,13 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't typically trigger journal writes from her - the next journal
|
||||
* write will be triggered immediately after the previous one is
|
||||
* allocated, in bch2_journal_write() - but the journal write error path
|
||||
* is special:
|
||||
*/
|
||||
bch2_journal_do_writes(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
@ -1974,7 +1984,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_replicas_padded replicas;
|
||||
struct printbuf journal_debug_buf = PRINTBUF;
|
||||
unsigned nr_rw_members = 0;
|
||||
int ret;
|
||||
|
||||
@ -2018,11 +2027,16 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
__bch2_journal_debug_to_text(&journal_debug_buf, j);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
|
||||
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"),
|
||||
le64_to_cpu(w->data->seq),
|
||||
bch2_err_str(ret));
|
||||
__bch2_journal_debug_to_text(&buf, j);
|
||||
spin_unlock(&j->lock);
|
||||
bch_err(c, "Unable to allocate journal write:\n%s",
|
||||
journal_debug_buf.buf);
|
||||
printbuf_exit(&journal_debug_buf);
|
||||
bch2_print_string_as_lines(KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
|
||||
BUG_ON(nr != t->nr);
|
||||
|
||||
unsigned i;
|
||||
for (src = bl->start, i = eytzinger0_first(t->nr);
|
||||
for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
|
||||
src < bl->start + nr;
|
||||
src++, i = eytzinger0_next(i, nr)) {
|
||||
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
|
||||
|
@ -24,18 +24,6 @@ static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
|
||||
return pos;
|
||||
}
|
||||
|
||||
#define BCH_LRU_TYPES() \
|
||||
x(read) \
|
||||
x(fragmentation)
|
||||
|
||||
enum bch_lru_type {
|
||||
#define x(n) BCH_LRU_##n,
|
||||
BCH_LRU_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
|
||||
|
||||
static inline enum bch_lru_type lru_type(struct bkey_s_c l)
|
||||
{
|
||||
u16 lru_id = l.k->p.inode >> 48;
|
||||
|
25
libbcachefs/lru_format.h
Normal file
25
libbcachefs/lru_format.h
Normal file
@ -0,0 +1,25 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_LRU_FORMAT_H
|
||||
#define _BCACHEFS_LRU_FORMAT_H
|
||||
|
||||
struct bch_lru {
|
||||
struct bch_val v;
|
||||
__le64 idx;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define BCH_LRU_TYPES() \
|
||||
x(read) \
|
||||
x(fragmentation)
|
||||
|
||||
enum bch_lru_type {
|
||||
#define x(n) BCH_LRU_##n,
|
||||
BCH_LRU_TYPES()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
|
||||
|
||||
#define LRU_TIME_BITS 48
|
||||
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
|
||||
|
||||
#endif /* _BCACHEFS_LRU_FORMAT_H */
|
@ -63,6 +63,7 @@ enum opt_flags {
|
||||
OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */
|
||||
OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */
|
||||
OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */
|
||||
OPT_HIDDEN = (1 << 10),
|
||||
};
|
||||
|
||||
enum opt_type {
|
||||
@ -137,7 +138,7 @@ enum fsck_err_opts {
|
||||
x(errors, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_STR(bch2_error_actions), \
|
||||
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
|
||||
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
|
||||
NULL, "Action to take on filesystem error") \
|
||||
x(metadata_replicas, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
@ -406,7 +407,7 @@ enum fsck_err_opts {
|
||||
BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
|
||||
"offset", "Sector offset of superblock") \
|
||||
x(read_only, u8, \
|
||||
OPT_FS, \
|
||||
OPT_FS|OPT_MOUNT|OPT_HIDDEN, \
|
||||
OPT_BOOL(), \
|
||||
BCH2_NO_SB_OPT, false, \
|
||||
NULL, NULL) \
|
||||
|
@ -699,10 +699,10 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (check_version_upgrade(c))
|
||||
write_sb = true;
|
||||
|
||||
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
|
||||
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||
|
@ -193,6 +193,8 @@ int bch2_run_online_recovery_passes(struct bch_fs *c)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
down_read(&c->state_lock);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
|
||||
struct recovery_pass_fn *p = recovery_pass_fns + i;
|
||||
|
||||
@ -208,6 +210,8 @@ int bch2_run_online_recovery_passes(struct bch_fs *c)
|
||||
break;
|
||||
}
|
||||
|
||||
up_read(&c->state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -77,6 +77,7 @@
|
||||
BCH_FSCK_ERR_fs_usage_cached_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_reserved_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
|
||||
BCH_FSCK_ERR_bkey_version_in_future)
|
||||
|
||||
|
@ -110,19 +110,25 @@ out:
|
||||
void bch2_sb_errors_from_cpu(struct bch_fs *c)
|
||||
{
|
||||
bch_sb_errors_cpu *src = &c->fsck_error_counts;
|
||||
struct bch_sb_field_errors *dst =
|
||||
bch2_sb_field_resize(&c->disk_sb, errors,
|
||||
bch2_sb_field_errors_u64s(src->nr));
|
||||
struct bch_sb_field_errors *dst;
|
||||
unsigned i;
|
||||
|
||||
mutex_lock(&c->fsck_error_counts_lock);
|
||||
|
||||
dst = bch2_sb_field_resize(&c->disk_sb, errors,
|
||||
bch2_sb_field_errors_u64s(src->nr));
|
||||
|
||||
if (!dst)
|
||||
return;
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < src->nr; i++) {
|
||||
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
|
||||
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
|
||||
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
|
||||
}
|
||||
|
||||
err:
|
||||
mutex_unlock(&c->fsck_error_counts_lock);
|
||||
}
|
||||
|
||||
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
|
||||
|
@ -2,286 +2,294 @@
|
||||
#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
|
||||
#define _BCACHEFS_SB_ERRORS_FORMAT_H
|
||||
|
||||
#define BCH_SB_ERRS() \
|
||||
x(clean_but_journal_not_empty, 0) \
|
||||
x(dirty_but_no_journal_entries, 1) \
|
||||
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
|
||||
x(sb_clean_journal_seq_mismatch, 3) \
|
||||
x(sb_clean_btree_root_mismatch, 4) \
|
||||
x(sb_clean_missing, 5) \
|
||||
x(jset_unsupported_version, 6) \
|
||||
x(jset_unknown_csum, 7) \
|
||||
x(jset_last_seq_newer_than_seq, 8) \
|
||||
x(jset_past_bucket_end, 9) \
|
||||
x(jset_seq_blacklisted, 10) \
|
||||
x(journal_entries_missing, 11) \
|
||||
x(journal_entry_replicas_not_marked, 12) \
|
||||
x(journal_entry_past_jset_end, 13) \
|
||||
x(journal_entry_replicas_data_mismatch, 14) \
|
||||
x(journal_entry_bkey_u64s_0, 15) \
|
||||
x(journal_entry_bkey_past_end, 16) \
|
||||
x(journal_entry_bkey_bad_format, 17) \
|
||||
x(journal_entry_bkey_invalid, 18) \
|
||||
x(journal_entry_btree_root_bad_size, 19) \
|
||||
x(journal_entry_blacklist_bad_size, 20) \
|
||||
x(journal_entry_blacklist_v2_bad_size, 21) \
|
||||
x(journal_entry_blacklist_v2_start_past_end, 22) \
|
||||
x(journal_entry_usage_bad_size, 23) \
|
||||
x(journal_entry_data_usage_bad_size, 24) \
|
||||
x(journal_entry_clock_bad_size, 25) \
|
||||
x(journal_entry_clock_bad_rw, 26) \
|
||||
x(journal_entry_dev_usage_bad_size, 27) \
|
||||
x(journal_entry_dev_usage_bad_dev, 28) \
|
||||
x(journal_entry_dev_usage_bad_pad, 29) \
|
||||
x(btree_node_unreadable, 30) \
|
||||
x(btree_node_fault_injected, 31) \
|
||||
x(btree_node_bad_magic, 32) \
|
||||
x(btree_node_bad_seq, 33) \
|
||||
x(btree_node_unsupported_version, 34) \
|
||||
x(btree_node_bset_older_than_sb_min, 35) \
|
||||
x(btree_node_bset_newer_than_sb, 36) \
|
||||
x(btree_node_data_missing, 37) \
|
||||
x(btree_node_bset_after_end, 38) \
|
||||
x(btree_node_replicas_sectors_written_mismatch, 39) \
|
||||
x(btree_node_replicas_data_mismatch, 40) \
|
||||
x(bset_unknown_csum, 41) \
|
||||
x(bset_bad_csum, 42) \
|
||||
x(bset_past_end_of_btree_node, 43) \
|
||||
x(bset_wrong_sector_offset, 44) \
|
||||
x(bset_empty, 45) \
|
||||
x(bset_bad_seq, 46) \
|
||||
x(bset_blacklisted_journal_seq, 47) \
|
||||
x(first_bset_blacklisted_journal_seq, 48) \
|
||||
x(btree_node_bad_btree, 49) \
|
||||
x(btree_node_bad_level, 50) \
|
||||
x(btree_node_bad_min_key, 51) \
|
||||
x(btree_node_bad_max_key, 52) \
|
||||
x(btree_node_bad_format, 53) \
|
||||
x(btree_node_bkey_past_bset_end, 54) \
|
||||
x(btree_node_bkey_bad_format, 55) \
|
||||
x(btree_node_bad_bkey, 56) \
|
||||
x(btree_node_bkey_out_of_order, 57) \
|
||||
x(btree_root_bkey_invalid, 58) \
|
||||
x(btree_root_read_error, 59) \
|
||||
x(btree_root_bad_min_key, 60) \
|
||||
x(btree_root_bad_max_key, 61) \
|
||||
x(btree_node_read_error, 62) \
|
||||
x(btree_node_topology_bad_min_key, 63) \
|
||||
x(btree_node_topology_bad_max_key, 64) \
|
||||
x(btree_node_topology_overwritten_by_prev_node, 65) \
|
||||
x(btree_node_topology_overwritten_by_next_node, 66) \
|
||||
x(btree_node_topology_interior_node_empty, 67) \
|
||||
x(fs_usage_hidden_wrong, 68) \
|
||||
x(fs_usage_btree_wrong, 69) \
|
||||
x(fs_usage_data_wrong, 70) \
|
||||
x(fs_usage_cached_wrong, 71) \
|
||||
x(fs_usage_reserved_wrong, 72) \
|
||||
x(fs_usage_persistent_reserved_wrong, 73) \
|
||||
x(fs_usage_nr_inodes_wrong, 74) \
|
||||
x(fs_usage_replicas_wrong, 75) \
|
||||
x(dev_usage_buckets_wrong, 76) \
|
||||
x(dev_usage_sectors_wrong, 77) \
|
||||
x(dev_usage_fragmented_wrong, 78) \
|
||||
x(dev_usage_buckets_ec_wrong, 79) \
|
||||
x(bkey_version_in_future, 80) \
|
||||
x(bkey_u64s_too_small, 81) \
|
||||
x(bkey_invalid_type_for_btree, 82) \
|
||||
x(bkey_extent_size_zero, 83) \
|
||||
x(bkey_extent_size_greater_than_offset, 84) \
|
||||
x(bkey_size_nonzero, 85) \
|
||||
x(bkey_snapshot_nonzero, 86) \
|
||||
x(bkey_snapshot_zero, 87) \
|
||||
x(bkey_at_pos_max, 88) \
|
||||
x(bkey_before_start_of_btree_node, 89) \
|
||||
x(bkey_after_end_of_btree_node, 90) \
|
||||
x(bkey_val_size_nonzero, 91) \
|
||||
x(bkey_val_size_too_small, 92) \
|
||||
x(alloc_v1_val_size_bad, 93) \
|
||||
x(alloc_v2_unpack_error, 94) \
|
||||
x(alloc_v3_unpack_error, 95) \
|
||||
x(alloc_v4_val_size_bad, 96) \
|
||||
x(alloc_v4_backpointers_start_bad, 97) \
|
||||
x(alloc_key_data_type_bad, 98) \
|
||||
x(alloc_key_empty_but_have_data, 99) \
|
||||
x(alloc_key_dirty_sectors_0, 100) \
|
||||
x(alloc_key_data_type_inconsistency, 101) \
|
||||
x(alloc_key_to_missing_dev_bucket, 102) \
|
||||
x(alloc_key_cached_inconsistency, 103) \
|
||||
x(alloc_key_cached_but_read_time_zero, 104) \
|
||||
x(alloc_key_to_missing_lru_entry, 105) \
|
||||
x(alloc_key_data_type_wrong, 106) \
|
||||
x(alloc_key_gen_wrong, 107) \
|
||||
x(alloc_key_dirty_sectors_wrong, 108) \
|
||||
x(alloc_key_cached_sectors_wrong, 109) \
|
||||
x(alloc_key_stripe_wrong, 110) \
|
||||
x(alloc_key_stripe_redundancy_wrong, 111) \
|
||||
x(bucket_sector_count_overflow, 112) \
|
||||
x(bucket_metadata_type_mismatch, 113) \
|
||||
x(need_discard_key_wrong, 114) \
|
||||
x(freespace_key_wrong, 115) \
|
||||
x(freespace_hole_missing, 116) \
|
||||
x(bucket_gens_val_size_bad, 117) \
|
||||
x(bucket_gens_key_wrong, 118) \
|
||||
x(bucket_gens_hole_wrong, 119) \
|
||||
x(bucket_gens_to_invalid_dev, 120) \
|
||||
x(bucket_gens_to_invalid_buckets, 121) \
|
||||
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
|
||||
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
|
||||
x(need_discard_freespace_key_bad, 124) \
|
||||
x(backpointer_bucket_offset_wrong, 125) \
|
||||
x(backpointer_to_missing_device, 126) \
|
||||
x(backpointer_to_missing_alloc, 127) \
|
||||
x(backpointer_to_missing_ptr, 128) \
|
||||
x(lru_entry_at_time_0, 129) \
|
||||
x(lru_entry_to_invalid_bucket, 130) \
|
||||
x(lru_entry_bad, 131) \
|
||||
x(btree_ptr_val_too_big, 132) \
|
||||
x(btree_ptr_v2_val_too_big, 133) \
|
||||
x(btree_ptr_has_non_ptr, 134) \
|
||||
x(extent_ptrs_invalid_entry, 135) \
|
||||
x(extent_ptrs_no_ptrs, 136) \
|
||||
x(extent_ptrs_too_many_ptrs, 137) \
|
||||
x(extent_ptrs_redundant_crc, 138) \
|
||||
x(extent_ptrs_redundant_stripe, 139) \
|
||||
x(extent_ptrs_unwritten, 140) \
|
||||
x(extent_ptrs_written_and_unwritten, 141) \
|
||||
x(ptr_to_invalid_device, 142) \
|
||||
x(ptr_to_duplicate_device, 143) \
|
||||
x(ptr_after_last_bucket, 144) \
|
||||
x(ptr_before_first_bucket, 145) \
|
||||
x(ptr_spans_multiple_buckets, 146) \
|
||||
x(ptr_to_missing_backpointer, 147) \
|
||||
x(ptr_to_missing_alloc_key, 148) \
|
||||
x(ptr_to_missing_replicas_entry, 149) \
|
||||
x(ptr_to_missing_stripe, 150) \
|
||||
x(ptr_to_incorrect_stripe, 151) \
|
||||
x(ptr_gen_newer_than_bucket_gen, 152) \
|
||||
x(ptr_too_stale, 153) \
|
||||
x(stale_dirty_ptr, 154) \
|
||||
x(ptr_bucket_data_type_mismatch, 155) \
|
||||
x(ptr_cached_and_erasure_coded, 156) \
|
||||
x(ptr_crc_uncompressed_size_too_small, 157) \
|
||||
x(ptr_crc_csum_type_unknown, 158) \
|
||||
x(ptr_crc_compression_type_unknown, 159) \
|
||||
x(ptr_crc_redundant, 160) \
|
||||
x(ptr_crc_uncompressed_size_too_big, 161) \
|
||||
x(ptr_crc_nonce_mismatch, 162) \
|
||||
x(ptr_stripe_redundant, 163) \
|
||||
x(reservation_key_nr_replicas_invalid, 164) \
|
||||
x(reflink_v_refcount_wrong, 165) \
|
||||
x(reflink_p_to_missing_reflink_v, 166) \
|
||||
x(stripe_pos_bad, 167) \
|
||||
x(stripe_val_size_bad, 168) \
|
||||
x(stripe_sector_count_wrong, 169) \
|
||||
x(snapshot_tree_pos_bad, 170) \
|
||||
x(snapshot_tree_to_missing_snapshot, 171) \
|
||||
x(snapshot_tree_to_missing_subvol, 172) \
|
||||
x(snapshot_tree_to_wrong_subvol, 173) \
|
||||
x(snapshot_tree_to_snapshot_subvol, 174) \
|
||||
x(snapshot_pos_bad, 175) \
|
||||
x(snapshot_parent_bad, 176) \
|
||||
x(snapshot_children_not_normalized, 177) \
|
||||
x(snapshot_child_duplicate, 178) \
|
||||
x(snapshot_child_bad, 179) \
|
||||
x(snapshot_skiplist_not_normalized, 180) \
|
||||
x(snapshot_skiplist_bad, 181) \
|
||||
x(snapshot_should_not_have_subvol, 182) \
|
||||
x(snapshot_to_bad_snapshot_tree, 183) \
|
||||
x(snapshot_bad_depth, 184) \
|
||||
x(snapshot_bad_skiplist, 185) \
|
||||
x(subvol_pos_bad, 186) \
|
||||
x(subvol_not_master_and_not_snapshot, 187) \
|
||||
x(subvol_to_missing_root, 188) \
|
||||
x(subvol_root_wrong_bi_subvol, 189) \
|
||||
x(bkey_in_missing_snapshot, 190) \
|
||||
x(inode_pos_inode_nonzero, 191) \
|
||||
x(inode_pos_blockdev_range, 192) \
|
||||
x(inode_unpack_error, 193) \
|
||||
x(inode_str_hash_invalid, 194) \
|
||||
x(inode_v3_fields_start_bad, 195) \
|
||||
x(inode_snapshot_mismatch, 196) \
|
||||
x(inode_unlinked_but_clean, 197) \
|
||||
x(inode_unlinked_but_nlink_nonzero, 198) \
|
||||
x(inode_checksum_type_invalid, 199) \
|
||||
x(inode_compression_type_invalid, 200) \
|
||||
x(inode_subvol_root_but_not_dir, 201) \
|
||||
x(inode_i_size_dirty_but_clean, 202) \
|
||||
x(inode_i_sectors_dirty_but_clean, 203) \
|
||||
x(inode_i_sectors_wrong, 204) \
|
||||
x(inode_dir_wrong_nlink, 205) \
|
||||
x(inode_dir_multiple_links, 206) \
|
||||
x(inode_multiple_links_but_nlink_0, 207) \
|
||||
x(inode_wrong_backpointer, 208) \
|
||||
x(inode_wrong_nlink, 209) \
|
||||
x(inode_unreachable, 210) \
|
||||
x(deleted_inode_but_clean, 211) \
|
||||
x(deleted_inode_missing, 212) \
|
||||
x(deleted_inode_is_dir, 213) \
|
||||
x(deleted_inode_not_unlinked, 214) \
|
||||
x(extent_overlapping, 215) \
|
||||
x(key_in_missing_inode, 216) \
|
||||
x(key_in_wrong_inode_type, 217) \
|
||||
x(extent_past_end_of_inode, 218) \
|
||||
x(dirent_empty_name, 219) \
|
||||
x(dirent_val_too_big, 220) \
|
||||
x(dirent_name_too_long, 221) \
|
||||
x(dirent_name_embedded_nul, 222) \
|
||||
x(dirent_name_dot_or_dotdot, 223) \
|
||||
x(dirent_name_has_slash, 224) \
|
||||
x(dirent_d_type_wrong, 225) \
|
||||
x(inode_bi_parent_wrong, 226) \
|
||||
x(dirent_in_missing_dir_inode, 227) \
|
||||
x(dirent_in_non_dir_inode, 228) \
|
||||
x(dirent_to_missing_inode, 229) \
|
||||
x(dirent_to_missing_subvol, 230) \
|
||||
x(dirent_to_itself, 231) \
|
||||
x(quota_type_invalid, 232) \
|
||||
x(xattr_val_size_too_small, 233) \
|
||||
x(xattr_val_size_too_big, 234) \
|
||||
x(xattr_invalid_type, 235) \
|
||||
x(xattr_name_invalid_chars, 236) \
|
||||
x(xattr_in_missing_inode, 237) \
|
||||
x(root_subvol_missing, 238) \
|
||||
x(root_dir_missing, 239) \
|
||||
x(root_inode_not_dir, 240) \
|
||||
x(dir_loop, 241) \
|
||||
x(hash_table_key_duplicate, 242) \
|
||||
x(hash_table_key_wrong_offset, 243) \
|
||||
x(unlinked_inode_not_on_deleted_list, 244) \
|
||||
x(reflink_p_front_pad_bad, 245) \
|
||||
x(journal_entry_dup_same_device, 246) \
|
||||
x(inode_bi_subvol_missing, 247) \
|
||||
x(inode_bi_subvol_wrong, 248) \
|
||||
x(inode_points_to_missing_dirent, 249) \
|
||||
x(inode_points_to_wrong_dirent, 250) \
|
||||
x(inode_bi_parent_nonzero, 251) \
|
||||
x(dirent_to_missing_parent_subvol, 252) \
|
||||
x(dirent_not_visible_in_parent_subvol, 253) \
|
||||
x(subvol_fs_path_parent_wrong, 254) \
|
||||
x(subvol_root_fs_path_parent_nonzero, 255) \
|
||||
x(subvol_children_not_set, 256) \
|
||||
x(subvol_children_bad, 257) \
|
||||
x(subvol_loop, 258) \
|
||||
x(subvol_unreachable, 259) \
|
||||
x(btree_node_bkey_bad_u64s, 260) \
|
||||
x(btree_node_topology_empty_interior_node, 261) \
|
||||
x(btree_ptr_v2_min_key_bad, 262) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263) \
|
||||
x(snapshot_node_missing, 264) \
|
||||
x(dup_backpointer_to_bad_csum_extent, 265) \
|
||||
x(btree_bitmap_not_marked, 266) \
|
||||
x(sb_clean_entry_overrun, 267) \
|
||||
x(btree_ptr_v2_written_0, 268) \
|
||||
x(subvol_snapshot_bad, 269) \
|
||||
x(subvol_inode_bad, 270) \
|
||||
x(alloc_key_stripe_sectors_wrong, 271) \
|
||||
x(accounting_mismatch, 272) \
|
||||
x(accounting_replicas_not_marked, 273) \
|
||||
x(invalid_btree_id, 274) \
|
||||
x(alloc_key_io_time_bad, 275)
|
||||
enum bch_fsck_flags {
|
||||
FSCK_CAN_FIX = 1 << 0,
|
||||
FSCK_CAN_IGNORE = 1 << 1,
|
||||
FSCK_NEED_FSCK = 1 << 2,
|
||||
FSCK_NO_RATELIMIT = 1 << 3,
|
||||
FSCK_AUTOFIX = 1 << 4,
|
||||
};
|
||||
|
||||
#define BCH_SB_ERRS() \
|
||||
x(clean_but_journal_not_empty, 0, 0) \
|
||||
x(dirty_but_no_journal_entries, 1, 0) \
|
||||
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \
|
||||
x(sb_clean_journal_seq_mismatch, 3, 0) \
|
||||
x(sb_clean_btree_root_mismatch, 4, 0) \
|
||||
x(sb_clean_missing, 5, 0) \
|
||||
x(jset_unsupported_version, 6, 0) \
|
||||
x(jset_unknown_csum, 7, 0) \
|
||||
x(jset_last_seq_newer_than_seq, 8, 0) \
|
||||
x(jset_past_bucket_end, 9, 0) \
|
||||
x(jset_seq_blacklisted, 10, 0) \
|
||||
x(journal_entries_missing, 11, 0) \
|
||||
x(journal_entry_replicas_not_marked, 12, 0) \
|
||||
x(journal_entry_past_jset_end, 13, 0) \
|
||||
x(journal_entry_replicas_data_mismatch, 14, 0) \
|
||||
x(journal_entry_bkey_u64s_0, 15, 0) \
|
||||
x(journal_entry_bkey_past_end, 16, 0) \
|
||||
x(journal_entry_bkey_bad_format, 17, 0) \
|
||||
x(journal_entry_bkey_invalid, 18, 0) \
|
||||
x(journal_entry_btree_root_bad_size, 19, 0) \
|
||||
x(journal_entry_blacklist_bad_size, 20, 0) \
|
||||
x(journal_entry_blacklist_v2_bad_size, 21, 0) \
|
||||
x(journal_entry_blacklist_v2_start_past_end, 22, 0) \
|
||||
x(journal_entry_usage_bad_size, 23, 0) \
|
||||
x(journal_entry_data_usage_bad_size, 24, 0) \
|
||||
x(journal_entry_clock_bad_size, 25, 0) \
|
||||
x(journal_entry_clock_bad_rw, 26, 0) \
|
||||
x(journal_entry_dev_usage_bad_size, 27, 0) \
|
||||
x(journal_entry_dev_usage_bad_dev, 28, 0) \
|
||||
x(journal_entry_dev_usage_bad_pad, 29, 0) \
|
||||
x(btree_node_unreadable, 30, 0) \
|
||||
x(btree_node_fault_injected, 31, 0) \
|
||||
x(btree_node_bad_magic, 32, 0) \
|
||||
x(btree_node_bad_seq, 33, 0) \
|
||||
x(btree_node_unsupported_version, 34, 0) \
|
||||
x(btree_node_bset_older_than_sb_min, 35, 0) \
|
||||
x(btree_node_bset_newer_than_sb, 36, 0) \
|
||||
x(btree_node_data_missing, 37, 0) \
|
||||
x(btree_node_bset_after_end, 38, 0) \
|
||||
x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
|
||||
x(btree_node_replicas_data_mismatch, 40, 0) \
|
||||
x(bset_unknown_csum, 41, 0) \
|
||||
x(bset_bad_csum, 42, 0) \
|
||||
x(bset_past_end_of_btree_node, 43, 0) \
|
||||
x(bset_wrong_sector_offset, 44, 0) \
|
||||
x(bset_empty, 45, 0) \
|
||||
x(bset_bad_seq, 46, 0) \
|
||||
x(bset_blacklisted_journal_seq, 47, 0) \
|
||||
x(first_bset_blacklisted_journal_seq, 48, 0) \
|
||||
x(btree_node_bad_btree, 49, 0) \
|
||||
x(btree_node_bad_level, 50, 0) \
|
||||
x(btree_node_bad_min_key, 51, 0) \
|
||||
x(btree_node_bad_max_key, 52, 0) \
|
||||
x(btree_node_bad_format, 53, 0) \
|
||||
x(btree_node_bkey_past_bset_end, 54, 0) \
|
||||
x(btree_node_bkey_bad_format, 55, 0) \
|
||||
x(btree_node_bad_bkey, 56, 0) \
|
||||
x(btree_node_bkey_out_of_order, 57, 0) \
|
||||
x(btree_root_bkey_invalid, 58, 0) \
|
||||
x(btree_root_read_error, 59, 0) \
|
||||
x(btree_root_bad_min_key, 60, 0) \
|
||||
x(btree_root_bad_max_key, 61, 0) \
|
||||
x(btree_node_read_error, 62, 0) \
|
||||
x(btree_node_topology_bad_min_key, 63, 0) \
|
||||
x(btree_node_topology_bad_max_key, 64, 0) \
|
||||
x(btree_node_topology_overwritten_by_prev_node, 65, 0) \
|
||||
x(btree_node_topology_overwritten_by_next_node, 66, 0) \
|
||||
x(btree_node_topology_interior_node_empty, 67, 0) \
|
||||
x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \
|
||||
x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \
|
||||
x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \
|
||||
x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \
|
||||
x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \
|
||||
x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \
|
||||
x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \
|
||||
x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \
|
||||
x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \
|
||||
x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \
|
||||
x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \
|
||||
x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \
|
||||
x(bkey_version_in_future, 80, 0) \
|
||||
x(bkey_u64s_too_small, 81, 0) \
|
||||
x(bkey_invalid_type_for_btree, 82, 0) \
|
||||
x(bkey_extent_size_zero, 83, 0) \
|
||||
x(bkey_extent_size_greater_than_offset, 84, 0) \
|
||||
x(bkey_size_nonzero, 85, 0) \
|
||||
x(bkey_snapshot_nonzero, 86, 0) \
|
||||
x(bkey_snapshot_zero, 87, 0) \
|
||||
x(bkey_at_pos_max, 88, 0) \
|
||||
x(bkey_before_start_of_btree_node, 89, 0) \
|
||||
x(bkey_after_end_of_btree_node, 90, 0) \
|
||||
x(bkey_val_size_nonzero, 91, 0) \
|
||||
x(bkey_val_size_too_small, 92, 0) \
|
||||
x(alloc_v1_val_size_bad, 93, 0) \
|
||||
x(alloc_v2_unpack_error, 94, 0) \
|
||||
x(alloc_v3_unpack_error, 95, 0) \
|
||||
x(alloc_v4_val_size_bad, 96, 0) \
|
||||
x(alloc_v4_backpointers_start_bad, 97, 0) \
|
||||
x(alloc_key_data_type_bad, 98, 0) \
|
||||
x(alloc_key_empty_but_have_data, 99, 0) \
|
||||
x(alloc_key_dirty_sectors_0, 100, 0) \
|
||||
x(alloc_key_data_type_inconsistency, 101, 0) \
|
||||
x(alloc_key_to_missing_dev_bucket, 102, 0) \
|
||||
x(alloc_key_cached_inconsistency, 103, 0) \
|
||||
x(alloc_key_cached_but_read_time_zero, 104, 0) \
|
||||
x(alloc_key_to_missing_lru_entry, 105, 0) \
|
||||
x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \
|
||||
x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \
|
||||
x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \
|
||||
x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \
|
||||
x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \
|
||||
x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
|
||||
x(bucket_sector_count_overflow, 112, 0) \
|
||||
x(bucket_metadata_type_mismatch, 113, 0) \
|
||||
x(need_discard_key_wrong, 114, 0) \
|
||||
x(freespace_key_wrong, 115, 0) \
|
||||
x(freespace_hole_missing, 116, 0) \
|
||||
x(bucket_gens_val_size_bad, 117, 0) \
|
||||
x(bucket_gens_key_wrong, 118, 0) \
|
||||
x(bucket_gens_hole_wrong, 119, 0) \
|
||||
x(bucket_gens_to_invalid_dev, 120, 0) \
|
||||
x(bucket_gens_to_invalid_buckets, 121, 0) \
|
||||
x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \
|
||||
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
|
||||
x(need_discard_freespace_key_bad, 124, 0) \
|
||||
x(backpointer_bucket_offset_wrong, 125, 0) \
|
||||
x(backpointer_to_missing_device, 126, 0) \
|
||||
x(backpointer_to_missing_alloc, 127, 0) \
|
||||
x(backpointer_to_missing_ptr, 128, 0) \
|
||||
x(lru_entry_at_time_0, 129, 0) \
|
||||
x(lru_entry_to_invalid_bucket, 130, 0) \
|
||||
x(lru_entry_bad, 131, 0) \
|
||||
x(btree_ptr_val_too_big, 132, 0) \
|
||||
x(btree_ptr_v2_val_too_big, 133, 0) \
|
||||
x(btree_ptr_has_non_ptr, 134, 0) \
|
||||
x(extent_ptrs_invalid_entry, 135, 0) \
|
||||
x(extent_ptrs_no_ptrs, 136, 0) \
|
||||
x(extent_ptrs_too_many_ptrs, 137, 0) \
|
||||
x(extent_ptrs_redundant_crc, 138, 0) \
|
||||
x(extent_ptrs_redundant_stripe, 139, 0) \
|
||||
x(extent_ptrs_unwritten, 140, 0) \
|
||||
x(extent_ptrs_written_and_unwritten, 141, 0) \
|
||||
x(ptr_to_invalid_device, 142, 0) \
|
||||
x(ptr_to_duplicate_device, 143, 0) \
|
||||
x(ptr_after_last_bucket, 144, 0) \
|
||||
x(ptr_before_first_bucket, 145, 0) \
|
||||
x(ptr_spans_multiple_buckets, 146, 0) \
|
||||
x(ptr_to_missing_backpointer, 147, 0) \
|
||||
x(ptr_to_missing_alloc_key, 148, 0) \
|
||||
x(ptr_to_missing_replicas_entry, 149, 0) \
|
||||
x(ptr_to_missing_stripe, 150, 0) \
|
||||
x(ptr_to_incorrect_stripe, 151, 0) \
|
||||
x(ptr_gen_newer_than_bucket_gen, 152, 0) \
|
||||
x(ptr_too_stale, 153, 0) \
|
||||
x(stale_dirty_ptr, 154, 0) \
|
||||
x(ptr_bucket_data_type_mismatch, 155, 0) \
|
||||
x(ptr_cached_and_erasure_coded, 156, 0) \
|
||||
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
|
||||
x(ptr_crc_csum_type_unknown, 158, 0) \
|
||||
x(ptr_crc_compression_type_unknown, 159, 0) \
|
||||
x(ptr_crc_redundant, 160, 0) \
|
||||
x(ptr_crc_uncompressed_size_too_big, 161, 0) \
|
||||
x(ptr_crc_nonce_mismatch, 162, 0) \
|
||||
x(ptr_stripe_redundant, 163, 0) \
|
||||
x(reservation_key_nr_replicas_invalid, 164, 0) \
|
||||
x(reflink_v_refcount_wrong, 165, 0) \
|
||||
x(reflink_p_to_missing_reflink_v, 166, 0) \
|
||||
x(stripe_pos_bad, 167, 0) \
|
||||
x(stripe_val_size_bad, 168, 0) \
|
||||
x(stripe_sector_count_wrong, 169, 0) \
|
||||
x(snapshot_tree_pos_bad, 170, 0) \
|
||||
x(snapshot_tree_to_missing_snapshot, 171, 0) \
|
||||
x(snapshot_tree_to_missing_subvol, 172, 0) \
|
||||
x(snapshot_tree_to_wrong_subvol, 173, 0) \
|
||||
x(snapshot_tree_to_snapshot_subvol, 174, 0) \
|
||||
x(snapshot_pos_bad, 175, 0) \
|
||||
x(snapshot_parent_bad, 176, 0) \
|
||||
x(snapshot_children_not_normalized, 177, 0) \
|
||||
x(snapshot_child_duplicate, 178, 0) \
|
||||
x(snapshot_child_bad, 179, 0) \
|
||||
x(snapshot_skiplist_not_normalized, 180, 0) \
|
||||
x(snapshot_skiplist_bad, 181, 0) \
|
||||
x(snapshot_should_not_have_subvol, 182, 0) \
|
||||
x(snapshot_to_bad_snapshot_tree, 183, 0) \
|
||||
x(snapshot_bad_depth, 184, 0) \
|
||||
x(snapshot_bad_skiplist, 185, 0) \
|
||||
x(subvol_pos_bad, 186, 0) \
|
||||
x(subvol_not_master_and_not_snapshot, 187, 0) \
|
||||
x(subvol_to_missing_root, 188, 0) \
|
||||
x(subvol_root_wrong_bi_subvol, 189, 0) \
|
||||
x(bkey_in_missing_snapshot, 190, 0) \
|
||||
x(inode_pos_inode_nonzero, 191, 0) \
|
||||
x(inode_pos_blockdev_range, 192, 0) \
|
||||
x(inode_unpack_error, 193, 0) \
|
||||
x(inode_str_hash_invalid, 194, 0) \
|
||||
x(inode_v3_fields_start_bad, 195, 0) \
|
||||
x(inode_snapshot_mismatch, 196, 0) \
|
||||
x(inode_unlinked_but_clean, 197, 0) \
|
||||
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
|
||||
x(inode_checksum_type_invalid, 199, 0) \
|
||||
x(inode_compression_type_invalid, 200, 0) \
|
||||
x(inode_subvol_root_but_not_dir, 201, 0) \
|
||||
x(inode_i_size_dirty_but_clean, 202, 0) \
|
||||
x(inode_i_sectors_dirty_but_clean, 203, 0) \
|
||||
x(inode_i_sectors_wrong, 204, 0) \
|
||||
x(inode_dir_wrong_nlink, 205, 0) \
|
||||
x(inode_dir_multiple_links, 206, 0) \
|
||||
x(inode_multiple_links_but_nlink_0, 207, 0) \
|
||||
x(inode_wrong_backpointer, 208, 0) \
|
||||
x(inode_wrong_nlink, 209, 0) \
|
||||
x(inode_unreachable, 210, 0) \
|
||||
x(deleted_inode_but_clean, 211, 0) \
|
||||
x(deleted_inode_missing, 212, 0) \
|
||||
x(deleted_inode_is_dir, 213, 0) \
|
||||
x(deleted_inode_not_unlinked, 214, 0) \
|
||||
x(extent_overlapping, 215, 0) \
|
||||
x(key_in_missing_inode, 216, 0) \
|
||||
x(key_in_wrong_inode_type, 217, 0) \
|
||||
x(extent_past_end_of_inode, 218, 0) \
|
||||
x(dirent_empty_name, 219, 0) \
|
||||
x(dirent_val_too_big, 220, 0) \
|
||||
x(dirent_name_too_long, 221, 0) \
|
||||
x(dirent_name_embedded_nul, 222, 0) \
|
||||
x(dirent_name_dot_or_dotdot, 223, 0) \
|
||||
x(dirent_name_has_slash, 224, 0) \
|
||||
x(dirent_d_type_wrong, 225, 0) \
|
||||
x(inode_bi_parent_wrong, 226, 0) \
|
||||
x(dirent_in_missing_dir_inode, 227, 0) \
|
||||
x(dirent_in_non_dir_inode, 228, 0) \
|
||||
x(dirent_to_missing_inode, 229, 0) \
|
||||
x(dirent_to_missing_subvol, 230, 0) \
|
||||
x(dirent_to_itself, 231, 0) \
|
||||
x(quota_type_invalid, 232, 0) \
|
||||
x(xattr_val_size_too_small, 233, 0) \
|
||||
x(xattr_val_size_too_big, 234, 0) \
|
||||
x(xattr_invalid_type, 235, 0) \
|
||||
x(xattr_name_invalid_chars, 236, 0) \
|
||||
x(xattr_in_missing_inode, 237, 0) \
|
||||
x(root_subvol_missing, 238, 0) \
|
||||
x(root_dir_missing, 239, 0) \
|
||||
x(root_inode_not_dir, 240, 0) \
|
||||
x(dir_loop, 241, 0) \
|
||||
x(hash_table_key_duplicate, 242, 0) \
|
||||
x(hash_table_key_wrong_offset, 243, 0) \
|
||||
x(unlinked_inode_not_on_deleted_list, 244, 0) \
|
||||
x(reflink_p_front_pad_bad, 245, 0) \
|
||||
x(journal_entry_dup_same_device, 246, 0) \
|
||||
x(inode_bi_subvol_missing, 247, 0) \
|
||||
x(inode_bi_subvol_wrong, 248, 0) \
|
||||
x(inode_points_to_missing_dirent, 249, 0) \
|
||||
x(inode_points_to_wrong_dirent, 250, 0) \
|
||||
x(inode_bi_parent_nonzero, 251, 0) \
|
||||
x(dirent_to_missing_parent_subvol, 252, 0) \
|
||||
x(dirent_not_visible_in_parent_subvol, 253, 0) \
|
||||
x(subvol_fs_path_parent_wrong, 254, 0) \
|
||||
x(subvol_root_fs_path_parent_nonzero, 255, 0) \
|
||||
x(subvol_children_not_set, 256, 0) \
|
||||
x(subvol_children_bad, 257, 0) \
|
||||
x(subvol_loop, 258, 0) \
|
||||
x(subvol_unreachable, 259, 0) \
|
||||
x(btree_node_bkey_bad_u64s, 260, 0) \
|
||||
x(btree_node_topology_empty_interior_node, 261, 0) \
|
||||
x(btree_ptr_v2_min_key_bad, 262, 0) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
|
||||
x(snapshot_node_missing, 264, 0) \
|
||||
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
|
||||
x(btree_bitmap_not_marked, 266, 0) \
|
||||
x(sb_clean_entry_overrun, 267, 0) \
|
||||
x(btree_ptr_v2_written_0, 268, 0) \
|
||||
x(subvol_snapshot_bad, 269, 0) \
|
||||
x(subvol_inode_bad, 270, 0) \
|
||||
x(alloc_key_stripe_sectors_wrong, 271, 0) \
|
||||
x(accounting_mismatch, 272, 0) \
|
||||
x(accounting_replicas_not_marked, 273, 0) \
|
||||
x(invalid_btree_id, 274, 0) \
|
||||
x(alloc_key_io_time_bad, 275, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
BCH_SB_ERRS()
|
||||
#undef x
|
||||
BCH_SB_ERR_MAX
|
||||
|
@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock)
|
||||
static inline void seqmutex_lock(struct seqmutex *lock)
|
||||
{
|
||||
mutex_lock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void seqmutex_unlock(struct seqmutex *lock)
|
||||
{
|
||||
lock->seq++;
|
||||
mutex_unlock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline u32 seqmutex_seq(struct seqmutex *lock)
|
||||
static inline u32 seqmutex_unlock(struct seqmutex *lock)
|
||||
{
|
||||
return lock->seq;
|
||||
u32 seq = lock->seq;
|
||||
mutex_unlock(&lock->lock);
|
||||
return seq;
|
||||
}
|
||||
|
||||
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
|
||||
|
@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
|
||||
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
|
||||
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
|
||||
|
||||
if (unlikely(new_bytes > INT_MAX))
|
||||
return NULL;
|
||||
|
||||
new = kvzalloc(new_bytes, GFP_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
@ -1682,6 +1685,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
|
||||
|
||||
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
|
||||
|
||||
bch2_delete_dead_snapshots(c);
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
@ -535,7 +535,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_fs_allocator_background_exit(c);
|
||||
bch2_fs_accounting_exit(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
bch2_fs_counters_exit(c);
|
||||
@ -564,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
bch2_fs_btree_write_buffer_exit(c);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
|
||||
free_percpu(c->online_reserved);
|
||||
if (c->online_reserved) {
|
||||
u64 v = percpu_u64_get(c->online_reserved);
|
||||
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
|
||||
free_percpu(c->online_reserved);
|
||||
}
|
||||
|
||||
darray_exit(&c->btree_roots_extra);
|
||||
free_percpu(c->pcpu);
|
||||
@ -1193,6 +1195,7 @@ static void bch2_dev_free(struct bch_dev *ca)
|
||||
|
||||
kfree(ca->buckets_nouse);
|
||||
bch2_free_super(&ca->disk_sb);
|
||||
bch2_dev_allocator_background_exit(ca);
|
||||
bch2_dev_journal_exit(ca);
|
||||
|
||||
free_percpu(ca->io_done);
|
||||
@ -1315,6 +1318,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
atomic_long_set(&ca->ref, 1);
|
||||
#endif
|
||||
|
||||
bch2_dev_allocator_background_init(ca);
|
||||
|
||||
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
|
||||
@ -1527,6 +1532,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
|
||||
* The allocator thread itself allocates btree nodes, so stop it first:
|
||||
*/
|
||||
bch2_dev_allocator_remove(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
bch2_dev_journal_stop(&c->journal, ca);
|
||||
}
|
||||
|
||||
@ -1538,6 +1544,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
bch2_dev_do_discards(ca);
|
||||
}
|
||||
|
||||
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
@ -697,14 +697,19 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define per_cpu_sum(_p) \
|
||||
({ \
|
||||
typeof(*_p) _ret = 0; \
|
||||
\
|
||||
int cpu; \
|
||||
for_each_possible_cpu(cpu) \
|
||||
_ret += *per_cpu_ptr(_p, cpu); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static inline u64 percpu_u64_get(u64 __percpu *src)
|
||||
{
|
||||
u64 ret = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
ret += *per_cpu_ptr(src, cpu);
|
||||
return ret;
|
||||
return per_cpu_sum(src);
|
||||
}
|
||||
|
||||
static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
|
||||
|
@ -13,14 +13,25 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/sched/debug.h>
|
||||
|
||||
static inline void closure_put_after_sub(struct closure *cl, int flags)
|
||||
static inline void closure_put_after_sub_checks(int flags)
|
||||
{
|
||||
int r = flags & CLOSURE_REMAINING_MASK;
|
||||
|
||||
BUG_ON(flags & CLOSURE_GUARD_MASK);
|
||||
BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
|
||||
if (WARN(flags & CLOSURE_GUARD_MASK,
|
||||
"closure has guard bits set: %x (%u)",
|
||||
flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
|
||||
r &= ~CLOSURE_GUARD_MASK;
|
||||
|
||||
if (!r) {
|
||||
WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
|
||||
"closure ref hit 0 with incorrect flags set: %x (%u)",
|
||||
flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
|
||||
}
|
||||
|
||||
static inline void closure_put_after_sub(struct closure *cl, int flags)
|
||||
{
|
||||
closure_put_after_sub_checks(flags);
|
||||
|
||||
if (!(flags & CLOSURE_REMAINING_MASK)) {
|
||||
smp_acquire__after_ctrl_dep();
|
||||
|
||||
cl->closure_get_happened = false;
|
||||
@ -139,6 +150,41 @@ void __sched __closure_sync(struct closure *cl)
|
||||
}
|
||||
EXPORT_SYMBOL(__closure_sync);
|
||||
|
||||
/*
|
||||
* closure_return_sync - finish running a closure, synchronously (i.e. waiting
|
||||
* for outstanding get()s to finish) and returning once closure refcount is 0.
|
||||
*
|
||||
* Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
|
||||
* closure_get_not_zero() calls waill fail.
|
||||
*/
|
||||
void __sched closure_return_sync(struct closure *cl)
|
||||
{
|
||||
struct closure_syncer s = { .task = current };
|
||||
|
||||
cl->s = &s;
|
||||
set_closure_fn(cl, closure_sync_fn, NULL);
|
||||
|
||||
unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
|
||||
&cl->remaining);
|
||||
|
||||
closure_put_after_sub_checks(flags);
|
||||
|
||||
if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
|
||||
while (1) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (s.done)
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
if (cl->parent)
|
||||
closure_put(cl->parent);
|
||||
}
|
||||
EXPORT_SYMBOL(closure_return_sync);
|
||||
|
||||
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
|
||||
{
|
||||
struct closure_syncer s = { .task = current };
|
||||
|
Loading…
Reference in New Issue
Block a user