Update bcachefs sources to 9404a01d3dc5 bcachefs: Make read_only a mount option again, but hidden

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-06-28 18:11:46 -04:00
parent b0eb3c2930
commit 34b5654d9e
49 changed files with 1004 additions and 838 deletions

View File

@ -1 +1 @@
792ca5ba3c9a07d762d9c1a440e31c0520f37de0
9404a01d3dc5553b106fa590602f4771b8e0b8ae

View File

@ -26,6 +26,7 @@ typedef struct {
#define __ATOMIC_READ(p) uatomic_read(p)
#define __ATOMIC_SET(p, v) uatomic_set(p, v)
#define __ATOMIC_SET_RELEASE(p, v) uatomic_set(p, v)
#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
@ -64,6 +65,7 @@ typedef struct {
#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
#define __ATOMIC_SET_RELEASE(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE)
#define __ATOMIC_ADD_RETURN(v, p) __atomic_add_fetch(p, v, __ATOMIC_RELAXED)
#define __ATOMIC_ADD_RETURN_RELEASE(v, p) \
__atomic_add_fetch(p, v, __ATOMIC_RELEASE)
@ -189,6 +191,11 @@ static inline void a_type##_set(a_type##_t *v, i_type i) \
return __ATOMIC_SET(&v->counter, i); \
} \
\
static inline void a_type##_set_release(a_type##_t *v, i_type i) \
{ \
return __ATOMIC_SET_RELEASE(&v->counter, i); \
} \
\
static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \
{ \
return __ATOMIC_ADD_RETURN(i, &v->counter); \

View File

@ -284,6 +284,21 @@ static inline void closure_get(struct closure *cl)
#endif
}
/**
* closure_get_not_zero
*/
static inline bool closure_get_not_zero(struct closure *cl)
{
unsigned old = atomic_read(&cl->remaining);
do {
if (!(old & CLOSURE_REMAINING_MASK))
return false;
} while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1));
return true;
}
/**
* closure_init - Initialize a closure, setting the refcount to 1
* @cl: closure to initialize
@ -310,6 +325,12 @@ static inline void closure_init_stack(struct closure *cl)
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
}
static inline void closure_init_stack_release(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
}
/**
* closure_wake_up - wake up all closures on a wait list,
* with memory barrier
@ -355,6 +376,8 @@ do { \
*/
#define closure_return(_cl) continue_at((_cl), NULL, NULL)
void closure_return_sync(struct closure *cl);
/**
* continue_at_nobarrier - jump to another function without barrier
*

View File

@ -151,7 +151,7 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq,
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
static inline __printf(1, 2) void set_worker_desc(const char *fmt, ...) {}
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
extern void show_workqueue_state(void);

View File

@ -30,7 +30,7 @@
#include <linux/sched/task.h>
#include <linux/sort.h>
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
/* Persistent alloc info: */
@ -476,7 +476,8 @@ err:
}
__flatten
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos)
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
@ -484,7 +485,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
if (ret)
return ERR_PTR(ret);
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
bch2_trans_iter_exit(trans, &iter);
return unlikely(ret) ? ERR_PTR(ret) : a;
}
@ -595,8 +596,6 @@ int bch2_alloc_read(struct bch_fs *c)
struct bch_dev *ca = NULL;
int ret;
down_read(&c->gc_lock);
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
BTREE_ITER_prefetch, k, ({
@ -645,7 +644,6 @@ int bch2_alloc_read(struct bch_fs *c)
bch2_dev_put(ca);
bch2_trans_put(trans);
up_read(&c->gc_lock);
bch_err_fn(c, ret);
return ret;
@ -847,6 +845,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
new_a->gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
alloc_data_type_set(new_a, new_a->data_type);
}
if (old_a->data_type != new_a->data_type ||
@ -958,12 +957,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (statechange(a->data_type == BCH_DATA_need_discard) &&
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
bucket_flushed(new_a))
bch2_discard_one_bucket_fast(c, new.k->p);
bch2_discard_one_bucket_fast(ca, new.k->p.offset);
if (statechange(a->data_type == BCH_DATA_cached) &&
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
bch2_do_invalidates(c);
bch2_dev_do_invalidates(ca);
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
bch2_gc_gens_async(c);
@ -1684,34 +1683,38 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
return ret;
}
static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
int ret;
mutex_lock(&c->discard_buckets_in_flight_lock);
darray_for_each(c->discard_buckets_in_flight, i)
if (bkey_eq(*i, bucket)) {
mutex_lock(&ca->discard_buckets_in_flight_lock);
darray_for_each(ca->discard_buckets_in_flight, i)
if (i->bucket == bucket) {
ret = -BCH_ERR_EEXIST_discard_in_flight_add;
goto out;
}
ret = darray_push(&c->discard_buckets_in_flight, bucket);
ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
.in_progress = in_progress,
.bucket = bucket,
}));
out:
mutex_unlock(&c->discard_buckets_in_flight_lock);
mutex_unlock(&ca->discard_buckets_in_flight_lock);
return ret;
}
static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
mutex_lock(&c->discard_buckets_in_flight_lock);
darray_for_each(c->discard_buckets_in_flight, i)
if (bkey_eq(*i, bucket)) {
darray_remove_item(&c->discard_buckets_in_flight, i);
mutex_lock(&ca->discard_buckets_in_flight_lock);
darray_for_each(ca->discard_buckets_in_flight, i)
if (i->bucket == bucket) {
BUG_ON(!i->in_progress);
darray_remove_item(&ca->discard_buckets_in_flight, i);
goto found;
}
BUG();
found:
mutex_unlock(&c->discard_buckets_in_flight_lock);
mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
struct discard_buckets_state {
@ -1719,26 +1722,11 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
struct bch_dev *ca;
u64 need_journal_commit_this_dev;
};
static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
{
if (s->ca == ca)
return;
if (s->ca && s->need_journal_commit_this_dev >
bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
bch2_journal_flush_async(&c->journal, NULL);
if (s->ca)
percpu_ref_put(&s->ca->io_ref);
s->ca = ca;
s->need_journal_commit_this_dev = 0;
}
static int bch2_discard_one_bucket(struct btree_trans *trans,
struct bch_dev *ca,
struct btree_iter *need_discard_iter,
struct bpos *discard_pos_done,
struct discard_buckets_state *s)
@ -1752,16 +1740,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
? s->ca
: bch2_dev_get_ioref(c, pos.inode, WRITE);
if (!ca) {
bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
return 0;
}
discard_buckets_next_dev(c, s, ca);
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
goto out;
@ -1821,7 +1799,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
if (discard_in_flight_add(ca, iter.pos.offset, true))
goto out;
discard_locked = true;
@ -1845,8 +1823,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
alloc_data_type_set(&a->v, a->v.data_type);
write:
alloc_data_type_set(&a->v, a->v.data_type);
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
@ -1858,7 +1837,7 @@ write:
s->discarded++;
out:
if (discard_locked)
discard_in_flight_remove(c, iter.pos);
discard_in_flight_remove(ca, iter.pos.offset);
s->seen++;
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
@ -1867,7 +1846,8 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
int ret;
@ -1878,23 +1858,41 @@ static void bch2_do_discards_work(struct work_struct *work)
* successful commit:
*/
ret = bch2_trans_run(c,
for_each_btree_key(trans, iter,
BTREE_ID_need_discard, POS_MIN, 0, k,
bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
discard_buckets_next_dev(c, &s, NULL);
for_each_btree_key_upto(trans, iter,
BTREE_ID_need_discard,
POS(ca->dev_idx, 0),
POS(ca->dev_idx, U64_MAX), 0, k,
bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
percpu_ref_put(&ca->io_ref);
}
void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
return;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
goto put_ioref;
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
put_ioref:
percpu_ref_put(&ca->io_ref);
}
void bch2_do_discards(struct bch_fs *c)
{
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
!queue_work(c->write_ref_wq, &c->discard_work))
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
for_each_member_device(c, ca)
bch2_dev_do_discards(ca);
}
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
@ -1923,68 +1921,69 @@ err:
static void bch2_do_discards_fast_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
struct bch_fs *c = ca->fs;
while (1) {
bool got_bucket = false;
struct bpos bucket;
struct bch_dev *ca;
u64 bucket;
mutex_lock(&c->discard_buckets_in_flight_lock);
darray_for_each(c->discard_buckets_in_flight, i) {
if (i->snapshot)
mutex_lock(&ca->discard_buckets_in_flight_lock);
darray_for_each(ca->discard_buckets_in_flight, i) {
if (i->in_progress)
continue;
ca = bch2_dev_get_ioref(c, i->inode, WRITE);
if (!ca) {
darray_remove_item(&c->discard_buckets_in_flight, i);
continue;
}
got_bucket = true;
bucket = *i;
i->snapshot = true;
bucket = i->bucket;
i->in_progress = true;
break;
}
mutex_unlock(&c->discard_buckets_in_flight_lock);
mutex_unlock(&ca->discard_buckets_in_flight_lock);
if (!got_bucket)
break;
if (ca->mi.discard && !c->opts.nochanges)
blkdev_issue_discard(ca->disk_sb.bdev,
bucket.offset * ca->mi.bucket_size,
bucket_to_sector(ca, bucket),
ca->mi.bucket_size,
GFP_KERNEL);
int ret = bch2_trans_do(c, NULL, NULL,
BCH_WATERMARK_btree|
BCH_TRANS_COMMIT_no_enospc,
bch2_clear_bucket_needs_discard(trans, bucket));
BCH_WATERMARK_btree|
BCH_TRANS_COMMIT_no_enospc,
bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
bch_err_fn(c, ret);
percpu_ref_put(&ca->io_ref);
discard_in_flight_remove(c, bucket);
discard_in_flight_remove(ca, bucket);
if (ret)
break;
}
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
percpu_ref_put(&ca->io_ref);
}
static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
{
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
rcu_read_unlock();
struct bch_fs *c = ca->fs;
if (!dead &&
!discard_in_flight_add(c, bucket) &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
!queue_work(c->write_ref_wq, &c->discard_fast_work))
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
if (discard_in_flight_add(ca, bucket, false))
return;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
return;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
goto put_ioref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
put_ioref:
percpu_ref_put(&ca->io_ref);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@ -2010,7 +2009,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
return 0;
a = bch2_trans_start_alloc_update(trans, bucket);
a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
@ -2086,7 +2085,8 @@ again:
static void bch2_do_invalidates_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
struct bch_fs *c = ca->fs;
struct btree_trans *trans = bch2_trans_get(c);
int ret = 0;
@ -2094,50 +2094,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
if (ret)
goto err;
for_each_member_device(c, ca) {
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
struct btree_iter iter;
bool wrapped = false;
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
struct btree_iter iter;
bool wrapped = false;
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0,
((bch2_current_io_time(c, READ) + U32_MAX) &
LRU_TIME_MAX)), 0);
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0,
((bch2_current_io_time(c, READ) + U32_MAX) &
LRU_TIME_MAX)), 0);
while (true) {
bch2_trans_begin(trans);
while (true) {
bch2_trans_begin(trans);
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
if (!k.k)
break;
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
if (ret)
break;
}
bch2_trans_iter_exit(trans, &iter);
if (ret < 0) {
bch2_dev_put(ca);
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
}
if (!k.k)
break;
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
if (ret)
break;
bch2_btree_iter_advance(&iter);
}
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
percpu_ref_put(&ca->io_ref);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
return;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
goto put_ioref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
put_ioref:
percpu_ref_put(&ca->io_ref);
}
void bch2_do_invalidates(struct bch_fs *c)
{
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
!queue_work(c->write_ref_wq, &c->invalidate_work))
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
for_each_member_device(c, ca)
bch2_dev_do_invalidates(ca);
}
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
@ -2453,16 +2466,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d);
}
void bch2_fs_allocator_background_exit(struct bch_fs *c)
void bch2_dev_allocator_background_exit(struct bch_dev *ca)
{
darray_exit(&c->discard_buckets_in_flight);
darray_exit(&ca->discard_buckets_in_flight);
}
void bch2_dev_allocator_background_init(struct bch_dev *ca)
{
mutex_init(&ca->discard_buckets_in_flight_lock);
INIT_WORK(&ca->discard_work, bch2_do_discards_work);
INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
mutex_init(&c->discard_buckets_in_flight_lock);
INIT_WORK(&c->discard_work, bch2_do_discards_work);
INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
}

View File

@ -206,7 +206,8 @@ static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update_noupdate(struct btree_trans *, struct btree_iter *, struct bpos);
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos);
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos,
enum btree_iter_update_trigger_flags);
void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
@ -299,6 +300,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
enum btree_iter_update_trigger_flags);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_dev_do_discards(struct bch_dev *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@ -313,6 +315,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}
void bch2_dev_do_invalidates(struct bch_dev *);
void bch2_do_invalidates(struct bch_fs *);
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@ -336,7 +339,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
void bch2_fs_allocator_background_exit(struct bch_fs *);
void bch2_dev_allocator_background_exit(struct bch_dev *);
void bch2_dev_allocator_background_init(struct bch_dev *);
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

View File

@ -621,13 +621,13 @@ again:
avail = dev_buckets_free(ca, *usage, watermark);
if (usage->d[BCH_DATA_need_discard].buckets > avail)
bch2_do_discards(c);
bch2_dev_do_discards(ca);
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_gc_gens_async(c);
if (should_invalidate_buckets(ca, *usage))
bch2_do_invalidates(c);
bch2_dev_do_invalidates(ca);
if (!avail) {
if (cl && !waiting) {

View File

@ -496,6 +496,11 @@ struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
struct discard_in_flight {
bool in_progress:1;
u64 bucket:63;
};
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@ -533,8 +538,8 @@ struct bch_dev {
/*
* Buckets:
* Per-bucket arrays are protected by c->mark_lock, bucket_lock and
* gc_lock, for device resize - holding any is sufficient for access:
* Or rcu_read_lock(), but only for dev_ptr_stale():
* gc_gens_lock, for device resize - holding any is sufficient for
* access: Or rcu_read_lock(), but only for dev_ptr_stale():
*/
struct bucket_array __rcu *buckets_gc;
struct bucket_gens __rcu *bucket_gens;
@ -555,6 +560,12 @@ struct bch_dev {
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
struct work_struct invalidate_work;
struct work_struct discard_work;
struct mutex discard_buckets_in_flight_lock;
DARRAY(struct discard_in_flight) discard_buckets_in_flight;
struct work_struct discard_fast_work;
atomic64_t rebalance_work;
struct journal_device journal;
@ -909,11 +920,6 @@ struct bch_fs {
unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
struct work_struct invalidate_work;
struct work_struct discard_work;
struct mutex discard_buckets_in_flight_lock;
DARRAY(struct bpos) discard_buckets_in_flight;
struct work_struct discard_fast_work;
/* GARBAGE COLLECTION */
struct work_struct gc_gens_work;

View File

@ -468,18 +468,6 @@ struct bch_backpointer {
struct bpos pos;
} __packed __aligned(8);
/* LRU btree: */
struct bch_lru {
struct bch_val v;
__le64 idx;
} __packed __aligned(8);
#define LRU_ID_STRIPES (1U << 16)
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
/* Optional/variable size superblock sections: */
struct bch_sb_field {
@ -516,6 +504,7 @@ struct bch_sb_field {
#include "inode_format.h"
#include "journal_seq_blacklist_format.h"
#include "logged_ops_format.h"
#include "lru_format.h"
#include "quota_format.h"
#include "reflink_format.h"
#include "replicas_format.h"
@ -954,8 +943,9 @@ enum bch_version_upgrade_opts {
#define BCH_ERROR_ACTIONS() \
x(continue, 0) \
x(ro, 1) \
x(panic, 2)
x(fix_safe, 1) \
x(panic, 2) \
x(ro, 3)
enum bch_error_actions {
#define x(t, n) BCH_ON_ERROR_##t = n,

View File

@ -1229,7 +1229,7 @@ int bch2_gc_gens(struct bch_fs *c)
int ret;
/*
* Ideally we would be using state_lock and not gc_lock here, but that
* Ideally we would be using state_lock and not gc_gens_lock here, but that
* introduces a deadlock in the RO path - we currently take the state
* lock at the start of going RO, thus the gc thread may get stuck:
*/
@ -1237,7 +1237,8 @@ int bch2_gc_gens(struct bch_fs *c)
return 0;
trace_and_count(c, gc_gens_start, c);
down_read(&c->gc_lock);
down_read(&c->state_lock);
for_each_member_device(c, ca) {
struct bucket_gens *gens = bucket_gens(ca);
@ -1306,7 +1307,7 @@ err:
ca->oldest_gen = NULL;
}
up_read(&c->gc_lock);
up_read(&c->state_lock);
mutex_unlock(&c->gc_gens_lock);
if (!bch2_err_matches(ret, EROFS))
bch_err_fn(c, ret);

View File

@ -1801,13 +1801,12 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *
goto hole;
} else {
struct bkey_cached *ck = (void *) path->l[0].b;
EBUG_ON(ck &&
(path->btree_id != ck->key.btree_id ||
!bkey_eq(path->pos, ck->key.pos)));
if (!ck || !ck->valid)
if (!ck)
return bkey_s_c_null;
EBUG_ON(path->btree_id != ck->key.btree_id ||
!bkey_eq(path->pos, ck->key.pos));
*u = ck->k->k;
k = bkey_i_to_s_c(ck->k);
}
@ -3131,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans));
closure_init_stack(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@ -3151,15 +3149,10 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
BUG_ON(pos_task &&
pid == pos_task->pid &&
pos->locked);
if (pos_task && pid < pos_task->pid) {
list_add_tail(&trans->list, &pos->list);
goto list_add_done;
}
}
}
list_add_tail(&trans->list, &c->btree_trans_list);
list_add_done:
list_add(&trans->list, &c->btree_trans_list);
seqmutex_unlock(&c->btree_trans_lock);
got_trans:
trans->c = c;
@ -3200,6 +3193,8 @@ got_trans:
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
closure_init_stack_release(&trans->ref);
return trans;
}
@ -3236,7 +3231,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans_for_each_update(trans, i)
__btree_path_put(trans->paths + i->path, true);
trans->nr_updates = 0;
trans->locking_wait.task = NULL;
check_btree_paths_leaked(trans);
@ -3248,6 +3242,13 @@ void bch2_trans_put(struct btree_trans *trans)
if (unlikely(trans->journal_replay_not_finished))
bch2_journal_keys_put(c);
/*
* trans->ref protects trans->locking_wait.task, btree_paths array; used
* by cycle detector
*/
closure_return_sync(&trans->ref);
trans->locking_wait.task = NULL;
unsigned long *paths_allocated = trans->paths_allocated;
trans->paths_allocated = NULL;
trans->paths = NULL;
@ -3265,8 +3266,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
if (trans) {
closure_sync(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
@ -3386,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
if (trans) {
closure_sync(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);

View File

@ -205,9 +205,22 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
six_unlock_intent(&ck->c.lock);
}
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
{
struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp);
if (unlikely(!ck))
return NULL;
ck->k = kmalloc(key_u64s * sizeof(u64), gfp);
if (unlikely(!ck->k)) {
kmem_cache_free(bch2_key_cache, ck);
return NULL;
}
ck->u64s = key_u64s;
return ck;
}
static struct bkey_cached *
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
bool *was_new)
bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned key_u64s)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
@ -281,8 +294,10 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
}
ck = allocate_dropping_locks(trans, ret,
kmem_cache_zalloc(bch2_key_cache, _gfp));
__bkey_cached_alloc(key_u64s, _gfp));
if (ret) {
if (ck)
kfree(ck->k);
kmem_cache_free(bch2_key_cache, ck);
return ERR_PTR(ret);
}
@ -296,7 +311,6 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
ck->c.cached = true;
BUG_ON(!six_trylock_intent(&ck->c.lock));
BUG_ON(!six_trylock_write(&ck->c.lock));
*was_new = true;
return ck;
}
@ -326,71 +340,102 @@ out:
return ck;
}
static struct bkey_cached *
btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bkey_cached *ck;
bool was_new = false;
ck = bkey_cached_alloc(trans, path, &was_new);
if (IS_ERR(ck))
return ck;
/*
* bch2_varint_decode can read past the end of the buffer by at
* most 7 bytes (it won't be used):
*/
unsigned key_u64s = k.k->u64s + 1;
/*
* Allocate some extra space so that the transaction commit path is less
* likely to have to reallocate, since that requires a transaction
* restart:
*/
key_u64s = min(256U, (key_u64s * 3) / 2);
key_u64s = roundup_pow_of_two(key_u64s);
struct bkey_cached *ck = bkey_cached_alloc(trans, path, key_u64s);
int ret = PTR_ERR_OR_ZERO(ck);
if (ret)
return ret;
if (unlikely(!ck)) {
ck = bkey_cached_reuse(bc);
if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
bch2_btree_id_str(path->btree_id));
return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
return -BCH_ERR_ENOMEM_btree_key_cache_create;
}
mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
}
ck->c.level = 0;
ck->c.btree_id = path->btree_id;
ck->key.btree_id = path->btree_id;
ck->key.pos = path->pos;
ck->valid = false;
ck->flags = 1U << BKEY_CACHED_ACCESSED;
if (unlikely(rhashtable_lookup_insert_fast(&bc->table,
&ck->hash,
bch2_btree_key_cache_params))) {
/* We raced with another fill: */
if (unlikely(key_u64s > ck->u64s)) {
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
if (likely(was_new)) {
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
kfree(ck);
} else {
bkey_cached_free_fast(bc, ck);
struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
kmalloc(key_u64s * sizeof(u64), _gfp));
if (unlikely(!new_k)) {
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(ck->key.btree_id), key_u64s);
ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
} else if (ret) {
kfree(new_k);
goto err;
}
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
return NULL;
kfree(ck->k);
ck->k = new_k;
ck->u64s = key_u64s;
}
atomic_long_inc(&bc->nr_keys);
bkey_reassemble(ck->k, k);
ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params);
if (unlikely(ret)) /* raced with another fill? */
goto err;
atomic_long_inc(&bc->nr_keys);
six_unlock_write(&ck->c.lock);
return ck;
enum six_lock_type lock_want = __btree_lock_want(path, 0);
if (lock_want == SIX_LOCK_read)
six_lock_downgrade(&ck->c.lock);
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
path->uptodate = BTREE_ITER_UPTODATE;
return 0;
err:
bkey_cached_free_fast(bc, ck);
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
return ret;
}
static int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
struct bkey_cached *ck)
static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
unsigned flags)
{
if (flags & BTREE_ITER_cached_nofill) {
ck_path->uptodate = BTREE_ITER_UPTODATE;
return 0;
}
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
unsigned new_u64s = 0;
struct bkey_i *new_k = NULL;
int ret;
bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos,
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
BTREE_ITER_key_cache_fill|
BTREE_ITER_cached_nofill);
iter.flags &= ~BTREE_ITER_with_journal;
@ -399,70 +444,15 @@ static int btree_key_cache_fill(struct btree_trans *trans,
if (ret)
goto err;
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
/* Recheck after btree lookup, before allocating: */
ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0;
if (unlikely(ret))
goto out;
ret = btree_key_cache_create(trans, ck_path, k);
if (ret)
goto err;
}
/*
* bch2_varint_decode can read past the end of the buffer by at
* most 7 bytes (it won't be used):
*/
new_u64s = k.k->u64s + 1;
/*
* Allocate some extra space so that the transaction commit path is less
* likely to have to reallocate, since that requires a transaction
* restart:
*/
new_u64s = min(256U, (new_u64s * 3) / 2);
if (new_u64s > ck->u64s) {
new_u64s = roundup_pow_of_two(new_u64s);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
if (!new_k) {
bch2_trans_unlock(trans);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
if (!new_k) {
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(ck->key.btree_id), new_u64s);
ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
goto err;
}
ret = bch2_trans_relock(trans);
if (ret) {
kfree(new_k);
goto err;
}
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
kfree(new_k);
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
goto err;
}
}
}
ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c);
if (ret) {
kfree(new_k);
goto err;
}
if (new_k) {
kfree(ck->k);
ck->u64s = new_u64s;
ck->k = new_k;
}
bkey_reassemble(ck->k, k);
ck->valid = true;
bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
out:
/* We're not likely to need this iterator again: */
bch2_set_btree_iter_dontneed(&iter);
err:
@ -470,107 +460,19 @@ err:
return ret;
}
static noinline int
bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
unsigned flags)
static inline int btree_path_traverse_cached_fast(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck;
int ret = 0;
BUG_ON(path->level);
path->l[1].b = NULL;
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
ck = (void *) path->l[0].b;
goto fill;
}
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck) {
ck = btree_key_cache_create(trans, path);
ret = PTR_ERR_OR_ZERO(ck);
if (ret)
goto err;
if (!ck)
goto retry;
btree_path_cached_set(trans, path, ck, BTREE_NODE_INTENT_LOCKED);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
ret = btree_node_lock(trans, path, (void *) ck, 0,
lock_want, _THIS_IP_);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
BUG_ON(ret);
if (ck->key.btree_id != path->btree_id ||
!bpos_eq(ck->key.pos, path->pos)) {
six_unlock_type(&ck->c.lock, lock_want);
goto retry;
}
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
}
fill:
path->uptodate = BTREE_ITER_UPTODATE;
if (!ck->valid && !(flags & BTREE_ITER_cached_nofill)) {
ret = bch2_btree_path_upgrade(trans, path, 1) ?:
btree_key_cache_fill(trans, path, ck) ?:
bch2_btree_path_relock(trans, path, _THIS_IP_);
if (ret)
goto err;
path->uptodate = BTREE_ITER_UPTODATE;
}
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
BUG_ON(path->uptodate);
return ret;
err:
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
btree_node_unlock(trans, path, 0);
path->l[0].b = ERR_PTR(ret);
}
return ret;
}
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck;
int ret = 0;
EBUG_ON(path->level);
path->l[1].b = NULL;
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
ck = (void *) path->l[0].b;
goto fill;
}
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck)
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
return -ENOENT;
enum six_lock_type lock_want = __btree_lock_want(path, 0);
ret = btree_node_lock(trans, path, (void *) ck, 0,
lock_want, _THIS_IP_);
EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart));
int ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_);
if (ret)
return ret;
@ -580,18 +482,40 @@ retry:
goto retry;
}
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
fill:
if (!ck->valid)
return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want);
path->uptodate = BTREE_ITER_UPTODATE;
EBUG_ON(!ck->valid);
EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
return 0;
}
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
unsigned flags)
{
EBUG_ON(path->level);
path->l[1].b = NULL;
if (bch2_btree_node_relock_notrace(trans, path, 0)) {
path->uptodate = BTREE_ITER_UPTODATE;
return 0;
}
int ret;
do {
ret = btree_path_traverse_cached_fast(trans, path);
if (unlikely(ret == -ENOENT))
ret = btree_key_cache_fill(trans, path, flags);
} while (ret == -EEXIST);
if (unlikely(ret)) {
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
btree_node_unlock(trans, path, 0);
path->l[0].b = ERR_PTR(ret);
}
}
return ret;
}
@ -630,8 +554,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
goto out;
}
BUG_ON(!ck->valid);
if (journal_seq && ck->journal.seq != journal_seq)
goto out;
@ -753,7 +675,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
BUG_ON(insert->k.u64s > ck->u64s);
bkey_copy(ck->k, insert);
ck->valid = true;
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
@ -792,10 +713,9 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bkey_cached *ck = (void *) path->l[0].b;
BUG_ON(!ck->valid);
/*
* We just did an update to the btree, bypassing the key cache: the key
* cache key is now stale and must be dropped, even if dirty:
@ -806,7 +726,11 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
bch2_journal_pin_drop(&c->journal, &ck->journal);
}
ck->valid = false;
bkey_cached_evict(bc, ck);
bkey_cached_free_fast(bc, ck);
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,

View File

@ -137,7 +137,8 @@ static inline void bch2_trans_unlock_write(struct btree_trans *trans)
{
if (likely(trans->write_locked)) {
trans_for_each_update(trans, i)
if (!same_leaf_as_prev(trans, i))
if (btree_node_locked_type(trans->paths + i->path, i->level) ==
BTREE_NODE_WRITE_LOCKED)
bch2_btree_node_unlock_write_inlined(trans,
trans->paths + i->path, insert_l(trans, i)->b);
trans->write_locked = false;
@ -777,14 +778,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
trans_for_each_update(trans, i) {
struct btree_path *path = trans->paths + i->path;
if (!i->cached) {
if (!i->cached)
bch2_btree_insert_key_leaf(trans, path, i->k, trans->journal_res.seq);
} else if (!i->key_cache_already_flushed)
else if (!i->key_cache_already_flushed)
bch2_btree_insert_key_cached(trans, flags, i);
else {
else
bch2_btree_key_cache_drop(trans, path);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
}
}
return 0;

View File

@ -388,7 +388,6 @@ struct bkey_cached {
unsigned long flags;
unsigned long btree_trans_barrier_seq;
u16 u64s;
bool valid;
struct bkey_cached_key key;
struct rhash_head hash;

View File

@ -569,7 +569,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket);
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
ret = PTR_ERR_OR_ZERO(a) ?:
__mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v);
if (ret)
@ -1217,7 +1217,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
bucket_gens->nbuckets - bucket_gens->first_bucket;
if (resize) {
down_write(&c->gc_lock);
down_write(&ca->bucket_lock);
percpu_down_write(&c->mark_lock);
}
@ -1240,7 +1239,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
if (resize) {
percpu_up_write(&c->mark_lock);
up_write(&ca->bucket_lock);
up_write(&c->gc_lock);
}
ret = 0;

View File

@ -85,7 +85,7 @@ static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
return rcu_dereference_check(ca->buckets_gc,
!ca->fs ||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->fs->state_lock) ||
lockdep_is_held(&ca->bucket_lock));
}
@ -103,7 +103,7 @@ static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
return rcu_dereference_check(ca->bucket_gens,
!ca->fs ||
percpu_rwsem_is_held(&ca->fs->mark_lock) ||
lockdep_is_held(&ca->fs->gc_lock) ||
lockdep_is_held(&ca->fs->state_lock) ||
lockdep_is_held(&ca->bucket_lock));
}

View File

@ -214,22 +214,10 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
char *ro, *rest;
/*
* If passed a "read_only" mount option, remove it because it is
* no longer a valid mount option, and the filesystem will be
* set "read_only" regardless.
*/
ro = strstr(optstr, "read_only");
if (ro) {
rest = ro + strlen("read_only");
memmove(ro, rest, strlen(rest) + 1);
}
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
kfree(optstr);
if (!IS_ERR(optstr))
kfree(optstr);
if (ret)
goto err;
@ -333,7 +321,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return ret;
ret = bch2_dev_add(c, path);
kfree(path);
if (!IS_ERR(path))
kfree(path);
return ret;
}
@ -579,7 +568,6 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c,
ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
err:
bch_err_fn(c, ret);
darray_exit(&accounting);
return ret;
}
@ -861,7 +849,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
kfree(optstr);
if (!IS_ERR(optstr))
kfree(optstr);
if (ret)
goto err;

View File

@ -10,6 +10,7 @@
#include <linux/xxhash.h>
#include <linux/key.h>
#include <linux/random.h>
#include <linux/ratelimit.h>
#include <linux/scatterlist.h>
#include <crypto/algapi.h>
#include <crypto/chacha.h>
@ -436,7 +437,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
"expected %0llx:%0llx got %0llx:%0llx (old type ",
" expected %0llx:%0llx got %0llx:%0llx (old type ",
__func__,
crc_old.csum.hi,
crc_old.csum.lo,
@ -446,7 +447,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
prt_str(&buf, " new type ");
bch2_prt_csum_type(&buf, new_csum_type);
prt_str(&buf, ")");
bch_err(c, "%s", buf.buf);
WARN_RATELIMIT(1, "%s", buf.buf);
printbuf_exit(&buf);
return -EIO;
}

View File

@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
static void list_sort(struct list_head *head, list_cmp_fn cmp)
{
struct list_head *pos;
list_for_each(pos, head)
while (!list_is_last(pos, head) &&
cmp(pos, pos->next) > 0) {
struct list_head *pos2, *next = pos->next;
list_del(next);
list_for_each(pos2, head)
if (cmp(next, pos2) < 0)
goto pos_found;
BUG();
pos_found:
list_add_tail(next, pos2);
}
}
static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
{
return cmp_int(l, r);
}
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c;
struct btree_trans *trans;
ssize_t ret = 0;
u32 seq;
i->ubuf = buf;
i->size = size;
i->ret = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
if (!task || task->pid <= i->iter)
list_for_each_entry(trans, &c->btree_trans_list, list) {
if ((ulong) trans < i->iter)
continue;
closure_get(&trans->ref);
seq = seqmutex_seq(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans_lock);
i->iter = (ulong) trans;
ret = flush_buf(i);
if (ret) {
closure_put(&trans->ref);
goto unlocked;
}
if (!closure_get_not_zero(&trans->ref))
continue;
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
printbuf_indent_add(&i->buf, 2);
bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter = task->pid;
closure_put(&trans->ref);
ret = flush_buf(i);
if (ret)
goto unlocked;
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
.read = btree_transaction_stats_read,
};
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
/* walk btree transactions until we find a deadlock and print it */
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
struct btree_trans *trans;
ssize_t ret = 0;
u32 seq;
i->ubuf = buf;
i->size = size;
i->ret = 0;
if (i->iter)
goto out;
pid_t iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
if (!task || task->pid <= i->iter)
if (!task || task->pid <= iter)
continue;
closure_get(&trans->ref);
seq = seqmutex_seq(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans_lock);
iter = task->pid;
ret = flush_buf(i);
if (ret) {
closure_put(&trans->ref);
goto out;
}
if (!closure_get_not_zero(&trans->ref))
continue;
bch2_check_for_deadlock(trans, &i->buf);
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
i->iter = task->pid;
bool found = bch2_check_for_deadlock(trans, out) != 0;
closure_put(&trans->ref);
if (found)
return;
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
out:
}
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
i->ret = 0;
if (!i->iter) {
btree_deadlock_to_text(&i->buf, c);
i->iter++;
}
if (i->buf.allocation_failure)
ret = -ENOMEM;

View File

@ -521,8 +521,9 @@ fsck_err:
return ret;
}
static int accounting_read_key(struct bch_fs *c, struct btree_trans *trans, struct bkey_s_c k)
static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
if (k.k->type != KEY_TYPE_accounting)
@ -557,15 +558,15 @@ fsck_err:
int bch2_accounting_read(struct bch_fs *c)
{
struct bch_accounting_mem *acc = &c->accounting;
struct btree_trans *trans = bch2_trans_get(c);
int ret = bch2_trans_run(c,
for_each_btree_key(trans, iter,
int ret = for_each_btree_key(trans, iter,
BTREE_ID_accounting, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
struct bkey u;
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, &iter), &u);
accounting_read_key(c, trans, k);
})));
accounting_read_key(trans, k);
}));
if (ret)
goto err;
@ -598,7 +599,7 @@ int bch2_accounting_read(struct bch_fs *c)
continue;
}
ret = accounting_read_key(c, NULL, k);
ret = accounting_read_key(trans, k);
if (ret)
goto err;
}
@ -645,6 +646,7 @@ int bch2_accounting_read(struct bch_fs *c)
preempt_enable();
percpu_up_read(&c->mark_lock);
err:
bch2_trans_put(trans);
bch_err_fn(c, ret);
return ret;
}

View File

@ -283,7 +283,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a =
bch2_trans_start_alloc_update(trans, bucket);
bch2_trans_start_alloc_update(trans, bucket, 0);
ret = PTR_ERR_OR_ZERO(a) ?:
__mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
}

View File

@ -16,6 +16,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
switch (c->opts.errors) {
case BCH_ON_ERROR_continue:
return false;
case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
@ -211,6 +212,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
prt_str(out, "ing");
}
static const u8 fsck_flags_extra[] = {
#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
BCH_SB_ERRS()
#undef x
};
int __bch2_fsck_err(struct bch_fs *c,
struct btree_trans *trans,
enum bch_fsck_flags flags,
@ -226,6 +233,9 @@ int __bch2_fsck_err(struct bch_fs *c,
might_sleep();
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
flags |= fsck_flags_extra[err];
if (!c)
c = trans->c;
@ -293,7 +303,14 @@ int __bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if ((flags & FSCK_CAN_FIX) &&
(flags & FSCK_AUTOFIX) &&
(c->opts.errors == BCH_ON_ERROR_continue ||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
prt_str(out, ", ");
prt_actioning(out, action);
ret = -BCH_ERR_fsck_fix;
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");

View File

@ -108,13 +108,6 @@ struct fsck_err_state {
char *last_msg;
};
enum bch_fsck_flags {
FSCK_CAN_FIX = 1 << 0,
FSCK_CAN_IGNORE = 1 << 1,
FSCK_NEED_FSCK = 1 << 2,
FSCK_NO_RATELIMIT = 1 << 3,
};
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
__printf(5, 6) __cold

View File

@ -1034,6 +1034,18 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
--out->atomic;
}
void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc)
{
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc->compressed_size,
crc->uncompressed_size,
crc->offset, crc->nonce);
bch2_prt_csum_type(out, crc->csum_type);
prt_printf(out, " %0llx:%0llx ", crc->csum.hi, crc->csum.lo);
prt_str(out, " compress ");
bch2_prt_compression_type(out, crc->compression_type);
}
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@ -1059,13 +1071,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc.compressed_size,
crc.uncompressed_size,
crc.offset, crc.nonce);
bch2_prt_csum_type(out, crc.csum_type);
prt_str(out, " compress ");
bch2_prt_compression_type(out, crc.compression_type);
bch2_extent_crc_unpacked_to_text(out, &crc);
break;
}
case BCH_EXTENT_ENTRY_stripe_ptr: {
@ -1096,6 +1102,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
}
}
static int extent_ptr_invalid(struct bch_fs *c,
struct bkey_s_c k,
enum bch_validate_flags flags,

View File

@ -212,6 +212,8 @@ static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
}
void bch2_extent_crc_unpacked_to_text(struct printbuf *, struct bch_extent_crc_unpacked *);
/* bkey_ptrs: generically over any key type that has ptrs */
struct bkey_ptrs_c {

View File

@ -678,8 +678,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
bch2_pagecache_add_get(inode);
folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT,
FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE,
mapping_gfp_mask(mapping));
FGP_WRITEBEGIN | fgf_set_order(len),
mapping_gfp_mask(mapping));
if (IS_ERR_OR_NULL(folio))
goto err_unlock;
@ -820,9 +820,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_init(&fs);
ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
mapping_gfp_mask(mapping),
&fs);
FGP_WRITEBEGIN | fgf_set_order(len),
mapping_gfp_mask(mapping), &fs);
if (ret)
goto out;
@ -864,24 +863,26 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
f_pos = pos;
f_offset = pos - folio_pos(darray_first(fs));
darray_for_each(fs, fi) {
ssize_t f_reserved;
f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos;
f_reserved = bch2_folio_reservation_get_partial(c, inode, f, &res, f_offset, f_len);
/*
* XXX: per POSIX and fstests generic/275, on -ENOSPC we're
* supposed to write as much as we have disk space for.
*
* On failure here we should still write out a partial page if
* we aren't completely out of disk space - we don't do that
* yet:
*/
ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
if (unlikely(ret)) {
folios_trunc(&fs, fi);
if (!fs.nr)
goto out;
if (unlikely(f_reserved != f_len)) {
if (f_reserved < 0) {
if (f == darray_first(fs)) {
ret = f_reserved;
goto out;
}
folios_trunc(&fs, fi);
end = min(end, folio_end_pos(darray_last(fs)));
} else {
folios_trunc(&fs, fi + 1);
end = f_pos + f_reserved;
}
end = min(end, folio_end_pos(darray_last(fs)));
break;
}

View File

@ -179,7 +179,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = file->f_mapping;
size_t count = iov_iter_count(iter);
ssize_t ret;
ssize_t ret = 0;
if (!count)
return 0; /* skip atime */
@ -205,7 +205,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
iocb->ki_pos += ret;
} else {
bch2_pagecache_add_get(inode);
ret = generic_file_read_iter(iocb, iter);
ret = filemap_read(iocb, iter, ret);
bch2_pagecache_add_put(inode);
}
out:

View File

@ -423,7 +423,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
unsigned offset, unsigned len)
size_t offset, size_t len)
{
struct bch_folio *s = bch2_folio_create(folio, 0);
unsigned i, disk_sectors = 0, quota_sectors = 0;
@ -437,8 +437,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
disk_sectors += sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
quota_sectors += s->s[i].state == SECTOR_unallocated;
}
@ -449,12 +448,9 @@ int bch2_folio_reservation_get(struct bch_fs *c,
}
if (quota_sectors) {
ret = bch2_quota_reservation_add(c, inode, &res->quota,
quota_sectors, true);
ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
if (unlikely(ret)) {
struct disk_reservation tmp = {
.sectors = disk_sectors
};
struct disk_reservation tmp = { .sectors = disk_sectors };
bch2_disk_reservation_put(c, &tmp);
res->disk.sectors -= disk_sectors;
@ -465,6 +461,31 @@ int bch2_folio_reservation_get(struct bch_fs *c,
return 0;
}
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
size_t l, reserved = 0;
int ret;
while ((l = len - reserved)) {
while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
return reserved ?: ret;
len = reserved + l;
l /= 2;
}
offset += l;
reserved += l;
}
return reserved;
}
static void bch2_clear_folio_bits(struct folio *folio)
{
struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);

View File

@ -153,7 +153,12 @@ int bch2_folio_reservation_get(struct bch_fs *,
struct bch_inode_info *,
struct folio *,
struct bch2_folio_reservation *,
unsigned, unsigned);
size_t, size_t);
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *,
struct bch_inode_info *,
struct folio *,
struct bch2_folio_reservation *,
size_t, size_t);
void bch2_set_folio_dirty(struct bch_fs *,
struct bch_inode_info *,

View File

@ -188,6 +188,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
/*
* bcachefs doesn't use I_NEW; we have no use for it since we
* only insert fully created inodes in the inode hash table. But
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
discard_new_inode(&inode->v);
inode = old;
} else {
@ -195,8 +201,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
/*
* we really don't want insert_inode_locked2() to be setting
* I_NEW...
* Again, I_NEW makes no sense for bcachefs. This is only needed
* for clearing I_NEW, but since the inode was already fully
* created and initialized we didn't actually want
* inode_insert5() to set it for us.
*/
unlock_new_inode(&inode->v);
}
@ -880,6 +888,16 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->subvol = inode->ei_subvol;
stat->result_mask |= STATX_SUBVOL;
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
stat->result_mask |= STATX_DIOALIGN;
/*
* this is incorrect; we should be tracking this in superblock,
* and checking the alignment of open devices
*/
stat->dio_mem_align = SECTOR_SIZE;
stat->dio_offset_align = block_bytes(c);
}
if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME;
stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
@ -1157,6 +1175,7 @@ static const struct file_operations bch_file_operations = {
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
.get_unmapped_area = thp_get_unmapped_area,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
@ -1488,11 +1507,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
bch2_iget5_set(&inode->v, &inum);
bch2_inode_update_after_write(trans, inode, bi, ~0);
if (BCH_SUBVOLUME_SNAP(subvol))
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
else
clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
inode->v.i_blocks = bi->bi_sectors;
inode->v.i_ino = bi->bi_inum;
inode->v.i_rdev = bi->bi_dev;
@ -1504,6 +1518,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
if (BCH_SUBVOLUME_SNAP(subvol))
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
inode->v.i_mapping->a_ops = &bch_address_space_operations;
switch (inode->v.i_mode & S_IFMT) {
@ -1776,7 +1793,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
const struct bch_option *opt = &bch2_opt_table[i];
u64 v = bch2_opt_get_by_id(&c->opts, i);
if (!(opt->flags & OPT_MOUNT))
if ((opt->flags & OPT_HIDDEN) ||
!(opt->flags & OPT_MOUNT))
continue;
if (v == bch2_opt_get_by_id(&bch2_opts_default, i))

View File

@ -535,12 +535,13 @@ fsck_err:
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "\n");
printbuf_indent_add(out, 2);
prt_printf(out, "mode=%o\n", inode->bi_mode);
prt_str(out, "flags=");
prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
prt_printf(out, " (%x)\n", inode->bi_flags);
prt_printf(out, "(%x)\n", inode->bi_flags);
prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq);
prt_printf(out, "bi_size=%llu\n", inode->bi_size);

View File

@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,

View File

@ -1080,7 +1080,10 @@ do_write:
*_dst = dst;
return more;
csum_err:
bch_err(c, "%s writ error: error verifying existing checksum while rewriting existing data (memory corruption?)",
bch_err_inum_offset_ratelimited(c,
op->pos.inode,
op->pos.offset << 9,
"%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)",
op->flags & BCH_WRITE_MOVE ? "move" : "user");
ret = -EIO;
err:

View File

@ -1520,6 +1520,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin *pin;
spin_lock(&j->lock);
if (!test_bit(JOURNAL_running, &j->flags)) {
spin_unlock(&j->lock);
return true;
}
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {

View File

@ -722,13 +722,16 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
printbuf_indent_add(out, 2);
for (i = 0; i < nr_types; i++) {
prt_newline(out);
bch2_prt_data_type(out, i);
prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
le64_to_cpu(u->d[i].buckets),
le64_to_cpu(u->d[i].sectors),
le64_to_cpu(u->d[i].fragmented));
}
printbuf_indent_sub(out, 2);
}
static int journal_entry_log_validate(struct bch_fs *c,
@ -1678,6 +1681,13 @@ static CLOSURE_CALLBACK(journal_write_done)
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
}
/*
* We don't typically trigger journal writes from her - the next journal
* write will be triggered immediately after the previous one is
* allocated, in bch2_journal_write() - but the journal write error path
* is special:
*/
bch2_journal_do_writes(j);
spin_unlock(&j->lock);
}
@ -1974,7 +1984,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
struct printbuf journal_debug_buf = PRINTBUF;
unsigned nr_rw_members = 0;
int ret;
@ -2018,11 +2027,16 @@ CLOSURE_CALLBACK(bch2_journal_write)
}
if (ret) {
__bch2_journal_debug_to_text(&journal_debug_buf, j);
struct printbuf buf = PRINTBUF;
buf.atomic++;
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"),
le64_to_cpu(w->data->seq),
bch2_err_str(ret));
__bch2_journal_debug_to_text(&buf, j);
spin_unlock(&j->lock);
bch_err(c, "Unable to allocate journal write:\n%s",
journal_debug_buf.buf);
printbuf_exit(&journal_debug_buf);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
goto err;
}

View File

@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
BUG_ON(nr != t->nr);
unsigned i;
for (src = bl->start, i = eytzinger0_first(t->nr);
for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));

View File

@ -24,18 +24,6 @@ static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
return pos;
}
#define BCH_LRU_TYPES() \
x(read) \
x(fragmentation)
enum bch_lru_type {
#define x(n) BCH_LRU_##n,
BCH_LRU_TYPES()
#undef x
};
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
static inline enum bch_lru_type lru_type(struct bkey_s_c l)
{
u16 lru_id = l.k->p.inode >> 48;

25
libbcachefs/lru_format.h Normal file
View File

@ -0,0 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_LRU_FORMAT_H
#define _BCACHEFS_LRU_FORMAT_H
struct bch_lru {
struct bch_val v;
__le64 idx;
} __packed __aligned(8);
#define BCH_LRU_TYPES() \
x(read) \
x(fragmentation)
enum bch_lru_type {
#define x(n) BCH_LRU_##n,
BCH_LRU_TYPES()
#undef x
};
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
#endif /* _BCACHEFS_LRU_FORMAT_H */

View File

@ -63,6 +63,7 @@ enum opt_flags {
OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */
OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */
OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */
OPT_HIDDEN = (1 << 10),
};
enum opt_type {
@ -137,7 +138,7 @@ enum fsck_err_opts {
x(errors, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_error_actions), \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
NULL, "Action to take on filesystem error") \
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
@ -406,7 +407,7 @@ enum fsck_err_opts {
BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
"offset", "Sector offset of superblock") \
x(read_only, u8, \
OPT_FS, \
OPT_FS|OPT_MOUNT|OPT_HIDDEN, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, NULL) \

View File

@ -699,10 +699,10 @@ int bch2_fs_recovery(struct bch_fs *c)
if (check_version_upgrade(c))
write_sb = true;
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
if (write_sb)
bch2_write_super(c);
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
mutex_unlock(&c->sb_lock);
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))

View File

@ -193,6 +193,8 @@ int bch2_run_online_recovery_passes(struct bch_fs *c)
{
int ret = 0;
down_read(&c->state_lock);
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
struct recovery_pass_fn *p = recovery_pass_fns + i;
@ -208,6 +210,8 @@ int bch2_run_online_recovery_passes(struct bch_fs *c)
break;
}
up_read(&c->state_lock);
return ret;
}

View File

@ -77,6 +77,7 @@
BCH_FSCK_ERR_fs_usage_cached_wrong, \
BCH_FSCK_ERR_fs_usage_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
BCH_FSCK_ERR_bkey_version_in_future)

View File

@ -110,19 +110,25 @@ out:
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
bch_sb_errors_cpu *src = &c->fsck_error_counts;
struct bch_sb_field_errors *dst =
bch2_sb_field_resize(&c->disk_sb, errors,
bch2_sb_field_errors_u64s(src->nr));
struct bch_sb_field_errors *dst;
unsigned i;
mutex_lock(&c->fsck_error_counts_lock);
dst = bch2_sb_field_resize(&c->disk_sb, errors,
bch2_sb_field_errors_u64s(src->nr));
if (!dst)
return;
goto err;
for (i = 0; i < src->nr; i++) {
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
}
err:
mutex_unlock(&c->fsck_error_counts_lock);
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)

View File

@ -2,286 +2,294 @@
#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
#define _BCACHEFS_SB_ERRORS_FORMAT_H
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0) \
x(dirty_but_no_journal_entries, 1) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
x(sb_clean_journal_seq_mismatch, 3) \
x(sb_clean_btree_root_mismatch, 4) \
x(sb_clean_missing, 5) \
x(jset_unsupported_version, 6) \
x(jset_unknown_csum, 7) \
x(jset_last_seq_newer_than_seq, 8) \
x(jset_past_bucket_end, 9) \
x(jset_seq_blacklisted, 10) \
x(journal_entries_missing, 11) \
x(journal_entry_replicas_not_marked, 12) \
x(journal_entry_past_jset_end, 13) \
x(journal_entry_replicas_data_mismatch, 14) \
x(journal_entry_bkey_u64s_0, 15) \
x(journal_entry_bkey_past_end, 16) \
x(journal_entry_bkey_bad_format, 17) \
x(journal_entry_bkey_invalid, 18) \
x(journal_entry_btree_root_bad_size, 19) \
x(journal_entry_blacklist_bad_size, 20) \
x(journal_entry_blacklist_v2_bad_size, 21) \
x(journal_entry_blacklist_v2_start_past_end, 22) \
x(journal_entry_usage_bad_size, 23) \
x(journal_entry_data_usage_bad_size, 24) \
x(journal_entry_clock_bad_size, 25) \
x(journal_entry_clock_bad_rw, 26) \
x(journal_entry_dev_usage_bad_size, 27) \
x(journal_entry_dev_usage_bad_dev, 28) \
x(journal_entry_dev_usage_bad_pad, 29) \
x(btree_node_unreadable, 30) \
x(btree_node_fault_injected, 31) \
x(btree_node_bad_magic, 32) \
x(btree_node_bad_seq, 33) \
x(btree_node_unsupported_version, 34) \
x(btree_node_bset_older_than_sb_min, 35) \
x(btree_node_bset_newer_than_sb, 36) \
x(btree_node_data_missing, 37) \
x(btree_node_bset_after_end, 38) \
x(btree_node_replicas_sectors_written_mismatch, 39) \
x(btree_node_replicas_data_mismatch, 40) \
x(bset_unknown_csum, 41) \
x(bset_bad_csum, 42) \
x(bset_past_end_of_btree_node, 43) \
x(bset_wrong_sector_offset, 44) \
x(bset_empty, 45) \
x(bset_bad_seq, 46) \
x(bset_blacklisted_journal_seq, 47) \
x(first_bset_blacklisted_journal_seq, 48) \
x(btree_node_bad_btree, 49) \
x(btree_node_bad_level, 50) \
x(btree_node_bad_min_key, 51) \
x(btree_node_bad_max_key, 52) \
x(btree_node_bad_format, 53) \
x(btree_node_bkey_past_bset_end, 54) \
x(btree_node_bkey_bad_format, 55) \
x(btree_node_bad_bkey, 56) \
x(btree_node_bkey_out_of_order, 57) \
x(btree_root_bkey_invalid, 58) \
x(btree_root_read_error, 59) \
x(btree_root_bad_min_key, 60) \
x(btree_root_bad_max_key, 61) \
x(btree_node_read_error, 62) \
x(btree_node_topology_bad_min_key, 63) \
x(btree_node_topology_bad_max_key, 64) \
x(btree_node_topology_overwritten_by_prev_node, 65) \
x(btree_node_topology_overwritten_by_next_node, 66) \
x(btree_node_topology_interior_node_empty, 67) \
x(fs_usage_hidden_wrong, 68) \
x(fs_usage_btree_wrong, 69) \
x(fs_usage_data_wrong, 70) \
x(fs_usage_cached_wrong, 71) \
x(fs_usage_reserved_wrong, 72) \
x(fs_usage_persistent_reserved_wrong, 73) \
x(fs_usage_nr_inodes_wrong, 74) \
x(fs_usage_replicas_wrong, 75) \
x(dev_usage_buckets_wrong, 76) \
x(dev_usage_sectors_wrong, 77) \
x(dev_usage_fragmented_wrong, 78) \
x(dev_usage_buckets_ec_wrong, 79) \
x(bkey_version_in_future, 80) \
x(bkey_u64s_too_small, 81) \
x(bkey_invalid_type_for_btree, 82) \
x(bkey_extent_size_zero, 83) \
x(bkey_extent_size_greater_than_offset, 84) \
x(bkey_size_nonzero, 85) \
x(bkey_snapshot_nonzero, 86) \
x(bkey_snapshot_zero, 87) \
x(bkey_at_pos_max, 88) \
x(bkey_before_start_of_btree_node, 89) \
x(bkey_after_end_of_btree_node, 90) \
x(bkey_val_size_nonzero, 91) \
x(bkey_val_size_too_small, 92) \
x(alloc_v1_val_size_bad, 93) \
x(alloc_v2_unpack_error, 94) \
x(alloc_v3_unpack_error, 95) \
x(alloc_v4_val_size_bad, 96) \
x(alloc_v4_backpointers_start_bad, 97) \
x(alloc_key_data_type_bad, 98) \
x(alloc_key_empty_but_have_data, 99) \
x(alloc_key_dirty_sectors_0, 100) \
x(alloc_key_data_type_inconsistency, 101) \
x(alloc_key_to_missing_dev_bucket, 102) \
x(alloc_key_cached_inconsistency, 103) \
x(alloc_key_cached_but_read_time_zero, 104) \
x(alloc_key_to_missing_lru_entry, 105) \
x(alloc_key_data_type_wrong, 106) \
x(alloc_key_gen_wrong, 107) \
x(alloc_key_dirty_sectors_wrong, 108) \
x(alloc_key_cached_sectors_wrong, 109) \
x(alloc_key_stripe_wrong, 110) \
x(alloc_key_stripe_redundancy_wrong, 111) \
x(bucket_sector_count_overflow, 112) \
x(bucket_metadata_type_mismatch, 113) \
x(need_discard_key_wrong, 114) \
x(freespace_key_wrong, 115) \
x(freespace_hole_missing, 116) \
x(bucket_gens_val_size_bad, 117) \
x(bucket_gens_key_wrong, 118) \
x(bucket_gens_hole_wrong, 119) \
x(bucket_gens_to_invalid_dev, 120) \
x(bucket_gens_to_invalid_buckets, 121) \
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(lru_entry_at_time_0, 129) \
x(lru_entry_to_invalid_bucket, 130) \
x(lru_entry_bad, 131) \
x(btree_ptr_val_too_big, 132) \
x(btree_ptr_v2_val_too_big, 133) \
x(btree_ptr_has_non_ptr, 134) \
x(extent_ptrs_invalid_entry, 135) \
x(extent_ptrs_no_ptrs, 136) \
x(extent_ptrs_too_many_ptrs, 137) \
x(extent_ptrs_redundant_crc, 138) \
x(extent_ptrs_redundant_stripe, 139) \
x(extent_ptrs_unwritten, 140) \
x(extent_ptrs_written_and_unwritten, 141) \
x(ptr_to_invalid_device, 142) \
x(ptr_to_duplicate_device, 143) \
x(ptr_after_last_bucket, 144) \
x(ptr_before_first_bucket, 145) \
x(ptr_spans_multiple_buckets, 146) \
x(ptr_to_missing_backpointer, 147) \
x(ptr_to_missing_alloc_key, 148) \
x(ptr_to_missing_replicas_entry, 149) \
x(ptr_to_missing_stripe, 150) \
x(ptr_to_incorrect_stripe, 151) \
x(ptr_gen_newer_than_bucket_gen, 152) \
x(ptr_too_stale, 153) \
x(stale_dirty_ptr, 154) \
x(ptr_bucket_data_type_mismatch, 155) \
x(ptr_cached_and_erasure_coded, 156) \
x(ptr_crc_uncompressed_size_too_small, 157) \
x(ptr_crc_csum_type_unknown, 158) \
x(ptr_crc_compression_type_unknown, 159) \
x(ptr_crc_redundant, 160) \
x(ptr_crc_uncompressed_size_too_big, 161) \
x(ptr_crc_nonce_mismatch, 162) \
x(ptr_stripe_redundant, 163) \
x(reservation_key_nr_replicas_invalid, 164) \
x(reflink_v_refcount_wrong, 165) \
x(reflink_p_to_missing_reflink_v, 166) \
x(stripe_pos_bad, 167) \
x(stripe_val_size_bad, 168) \
x(stripe_sector_count_wrong, 169) \
x(snapshot_tree_pos_bad, 170) \
x(snapshot_tree_to_missing_snapshot, 171) \
x(snapshot_tree_to_missing_subvol, 172) \
x(snapshot_tree_to_wrong_subvol, 173) \
x(snapshot_tree_to_snapshot_subvol, 174) \
x(snapshot_pos_bad, 175) \
x(snapshot_parent_bad, 176) \
x(snapshot_children_not_normalized, 177) \
x(snapshot_child_duplicate, 178) \
x(snapshot_child_bad, 179) \
x(snapshot_skiplist_not_normalized, 180) \
x(snapshot_skiplist_bad, 181) \
x(snapshot_should_not_have_subvol, 182) \
x(snapshot_to_bad_snapshot_tree, 183) \
x(snapshot_bad_depth, 184) \
x(snapshot_bad_skiplist, 185) \
x(subvol_pos_bad, 186) \
x(subvol_not_master_and_not_snapshot, 187) \
x(subvol_to_missing_root, 188) \
x(subvol_root_wrong_bi_subvol, 189) \
x(bkey_in_missing_snapshot, 190) \
x(inode_pos_inode_nonzero, 191) \
x(inode_pos_blockdev_range, 192) \
x(inode_unpack_error, 193) \
x(inode_str_hash_invalid, 194) \
x(inode_v3_fields_start_bad, 195) \
x(inode_snapshot_mismatch, 196) \
x(inode_unlinked_but_clean, 197) \
x(inode_unlinked_but_nlink_nonzero, 198) \
x(inode_checksum_type_invalid, 199) \
x(inode_compression_type_invalid, 200) \
x(inode_subvol_root_but_not_dir, 201) \
x(inode_i_size_dirty_but_clean, 202) \
x(inode_i_sectors_dirty_but_clean, 203) \
x(inode_i_sectors_wrong, 204) \
x(inode_dir_wrong_nlink, 205) \
x(inode_dir_multiple_links, 206) \
x(inode_multiple_links_but_nlink_0, 207) \
x(inode_wrong_backpointer, 208) \
x(inode_wrong_nlink, 209) \
x(inode_unreachable, 210) \
x(deleted_inode_but_clean, 211) \
x(deleted_inode_missing, 212) \
x(deleted_inode_is_dir, 213) \
x(deleted_inode_not_unlinked, 214) \
x(extent_overlapping, 215) \
x(key_in_missing_inode, 216) \
x(key_in_wrong_inode_type, 217) \
x(extent_past_end_of_inode, 218) \
x(dirent_empty_name, 219) \
x(dirent_val_too_big, 220) \
x(dirent_name_too_long, 221) \
x(dirent_name_embedded_nul, 222) \
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
x(inode_bi_parent_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
x(dirent_to_missing_subvol, 230) \
x(dirent_to_itself, 231) \
x(quota_type_invalid, 232) \
x(xattr_val_size_too_small, 233) \
x(xattr_val_size_too_big, 234) \
x(xattr_invalid_type, 235) \
x(xattr_name_invalid_chars, 236) \
x(xattr_in_missing_inode, 237) \
x(root_subvol_missing, 238) \
x(root_dir_missing, 239) \
x(root_inode_not_dir, 240) \
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243) \
x(unlinked_inode_not_on_deleted_list, 244) \
x(reflink_p_front_pad_bad, 245) \
x(journal_entry_dup_same_device, 246) \
x(inode_bi_subvol_missing, 247) \
x(inode_bi_subvol_wrong, 248) \
x(inode_points_to_missing_dirent, 249) \
x(inode_points_to_wrong_dirent, 250) \
x(inode_bi_parent_nonzero, 251) \
x(dirent_to_missing_parent_subvol, 252) \
x(dirent_not_visible_in_parent_subvol, 253) \
x(subvol_fs_path_parent_wrong, 254) \
x(subvol_root_fs_path_parent_nonzero, 255) \
x(subvol_children_not_set, 256) \
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
x(btree_node_bkey_bad_u64s, 260) \
x(btree_node_topology_empty_interior_node, 261) \
x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265) \
x(btree_bitmap_not_marked, 266) \
x(sb_clean_entry_overrun, 267) \
x(btree_ptr_v2_written_0, 268) \
x(subvol_snapshot_bad, 269) \
x(subvol_inode_bad, 270) \
x(alloc_key_stripe_sectors_wrong, 271) \
x(accounting_mismatch, 272) \
x(accounting_replicas_not_marked, 273) \
x(invalid_btree_id, 274) \
x(alloc_key_io_time_bad, 275)
enum bch_fsck_flags {
FSCK_CAN_FIX = 1 << 0,
FSCK_CAN_IGNORE = 1 << 1,
FSCK_NEED_FSCK = 1 << 2,
FSCK_NO_RATELIMIT = 1 << 3,
FSCK_AUTOFIX = 1 << 4,
};
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0, 0) \
x(dirty_but_no_journal_entries, 1, 0) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2, 0) \
x(sb_clean_journal_seq_mismatch, 3, 0) \
x(sb_clean_btree_root_mismatch, 4, 0) \
x(sb_clean_missing, 5, 0) \
x(jset_unsupported_version, 6, 0) \
x(jset_unknown_csum, 7, 0) \
x(jset_last_seq_newer_than_seq, 8, 0) \
x(jset_past_bucket_end, 9, 0) \
x(jset_seq_blacklisted, 10, 0) \
x(journal_entries_missing, 11, 0) \
x(journal_entry_replicas_not_marked, 12, 0) \
x(journal_entry_past_jset_end, 13, 0) \
x(journal_entry_replicas_data_mismatch, 14, 0) \
x(journal_entry_bkey_u64s_0, 15, 0) \
x(journal_entry_bkey_past_end, 16, 0) \
x(journal_entry_bkey_bad_format, 17, 0) \
x(journal_entry_bkey_invalid, 18, 0) \
x(journal_entry_btree_root_bad_size, 19, 0) \
x(journal_entry_blacklist_bad_size, 20, 0) \
x(journal_entry_blacklist_v2_bad_size, 21, 0) \
x(journal_entry_blacklist_v2_start_past_end, 22, 0) \
x(journal_entry_usage_bad_size, 23, 0) \
x(journal_entry_data_usage_bad_size, 24, 0) \
x(journal_entry_clock_bad_size, 25, 0) \
x(journal_entry_clock_bad_rw, 26, 0) \
x(journal_entry_dev_usage_bad_size, 27, 0) \
x(journal_entry_dev_usage_bad_dev, 28, 0) \
x(journal_entry_dev_usage_bad_pad, 29, 0) \
x(btree_node_unreadable, 30, 0) \
x(btree_node_fault_injected, 31, 0) \
x(btree_node_bad_magic, 32, 0) \
x(btree_node_bad_seq, 33, 0) \
x(btree_node_unsupported_version, 34, 0) \
x(btree_node_bset_older_than_sb_min, 35, 0) \
x(btree_node_bset_newer_than_sb, 36, 0) \
x(btree_node_data_missing, 37, 0) \
x(btree_node_bset_after_end, 38, 0) \
x(btree_node_replicas_sectors_written_mismatch, 39, 0) \
x(btree_node_replicas_data_mismatch, 40, 0) \
x(bset_unknown_csum, 41, 0) \
x(bset_bad_csum, 42, 0) \
x(bset_past_end_of_btree_node, 43, 0) \
x(bset_wrong_sector_offset, 44, 0) \
x(bset_empty, 45, 0) \
x(bset_bad_seq, 46, 0) \
x(bset_blacklisted_journal_seq, 47, 0) \
x(first_bset_blacklisted_journal_seq, 48, 0) \
x(btree_node_bad_btree, 49, 0) \
x(btree_node_bad_level, 50, 0) \
x(btree_node_bad_min_key, 51, 0) \
x(btree_node_bad_max_key, 52, 0) \
x(btree_node_bad_format, 53, 0) \
x(btree_node_bkey_past_bset_end, 54, 0) \
x(btree_node_bkey_bad_format, 55, 0) \
x(btree_node_bad_bkey, 56, 0) \
x(btree_node_bkey_out_of_order, 57, 0) \
x(btree_root_bkey_invalid, 58, 0) \
x(btree_root_read_error, 59, 0) \
x(btree_root_bad_min_key, 60, 0) \
x(btree_root_bad_max_key, 61, 0) \
x(btree_node_read_error, 62, 0) \
x(btree_node_topology_bad_min_key, 63, 0) \
x(btree_node_topology_bad_max_key, 64, 0) \
x(btree_node_topology_overwritten_by_prev_node, 65, 0) \
x(btree_node_topology_overwritten_by_next_node, 66, 0) \
x(btree_node_topology_interior_node_empty, 67, 0) \
x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \
x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \
x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \
x(fs_usage_cached_wrong, 71, FSCK_AUTOFIX) \
x(fs_usage_reserved_wrong, 72, FSCK_AUTOFIX) \
x(fs_usage_persistent_reserved_wrong, 73, FSCK_AUTOFIX) \
x(fs_usage_nr_inodes_wrong, 74, FSCK_AUTOFIX) \
x(fs_usage_replicas_wrong, 75, FSCK_AUTOFIX) \
x(dev_usage_buckets_wrong, 76, FSCK_AUTOFIX) \
x(dev_usage_sectors_wrong, 77, FSCK_AUTOFIX) \
x(dev_usage_fragmented_wrong, 78, FSCK_AUTOFIX) \
x(dev_usage_buckets_ec_wrong, 79, FSCK_AUTOFIX) \
x(bkey_version_in_future, 80, 0) \
x(bkey_u64s_too_small, 81, 0) \
x(bkey_invalid_type_for_btree, 82, 0) \
x(bkey_extent_size_zero, 83, 0) \
x(bkey_extent_size_greater_than_offset, 84, 0) \
x(bkey_size_nonzero, 85, 0) \
x(bkey_snapshot_nonzero, 86, 0) \
x(bkey_snapshot_zero, 87, 0) \
x(bkey_at_pos_max, 88, 0) \
x(bkey_before_start_of_btree_node, 89, 0) \
x(bkey_after_end_of_btree_node, 90, 0) \
x(bkey_val_size_nonzero, 91, 0) \
x(bkey_val_size_too_small, 92, 0) \
x(alloc_v1_val_size_bad, 93, 0) \
x(alloc_v2_unpack_error, 94, 0) \
x(alloc_v3_unpack_error, 95, 0) \
x(alloc_v4_val_size_bad, 96, 0) \
x(alloc_v4_backpointers_start_bad, 97, 0) \
x(alloc_key_data_type_bad, 98, 0) \
x(alloc_key_empty_but_have_data, 99, 0) \
x(alloc_key_dirty_sectors_0, 100, 0) \
x(alloc_key_data_type_inconsistency, 101, 0) \
x(alloc_key_to_missing_dev_bucket, 102, 0) \
x(alloc_key_cached_inconsistency, 103, 0) \
x(alloc_key_cached_but_read_time_zero, 104, 0) \
x(alloc_key_to_missing_lru_entry, 105, 0) \
x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \
x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \
x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \
x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \
x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \
x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
x(bucket_sector_count_overflow, 112, 0) \
x(bucket_metadata_type_mismatch, 113, 0) \
x(need_discard_key_wrong, 114, 0) \
x(freespace_key_wrong, 115, 0) \
x(freespace_hole_missing, 116, 0) \
x(bucket_gens_val_size_bad, 117, 0) \
x(bucket_gens_key_wrong, 118, 0) \
x(bucket_gens_hole_wrong, 119, 0) \
x(bucket_gens_to_invalid_dev, 120, 0) \
x(bucket_gens_to_invalid_buckets, 121, 0) \
x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
x(need_discard_freespace_key_bad, 124, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_to_missing_device, 126, 0) \
x(backpointer_to_missing_alloc, 127, 0) \
x(backpointer_to_missing_ptr, 128, 0) \
x(lru_entry_at_time_0, 129, 0) \
x(lru_entry_to_invalid_bucket, 130, 0) \
x(lru_entry_bad, 131, 0) \
x(btree_ptr_val_too_big, 132, 0) \
x(btree_ptr_v2_val_too_big, 133, 0) \
x(btree_ptr_has_non_ptr, 134, 0) \
x(extent_ptrs_invalid_entry, 135, 0) \
x(extent_ptrs_no_ptrs, 136, 0) \
x(extent_ptrs_too_many_ptrs, 137, 0) \
x(extent_ptrs_redundant_crc, 138, 0) \
x(extent_ptrs_redundant_stripe, 139, 0) \
x(extent_ptrs_unwritten, 140, 0) \
x(extent_ptrs_written_and_unwritten, 141, 0) \
x(ptr_to_invalid_device, 142, 0) \
x(ptr_to_duplicate_device, 143, 0) \
x(ptr_after_last_bucket, 144, 0) \
x(ptr_before_first_bucket, 145, 0) \
x(ptr_spans_multiple_buckets, 146, 0) \
x(ptr_to_missing_backpointer, 147, 0) \
x(ptr_to_missing_alloc_key, 148, 0) \
x(ptr_to_missing_replicas_entry, 149, 0) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
x(ptr_gen_newer_than_bucket_gen, 152, 0) \
x(ptr_too_stale, 153, 0) \
x(stale_dirty_ptr, 154, 0) \
x(ptr_bucket_data_type_mismatch, 155, 0) \
x(ptr_cached_and_erasure_coded, 156, 0) \
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
x(ptr_crc_csum_type_unknown, 158, 0) \
x(ptr_crc_compression_type_unknown, 159, 0) \
x(ptr_crc_redundant, 160, 0) \
x(ptr_crc_uncompressed_size_too_big, 161, 0) \
x(ptr_crc_nonce_mismatch, 162, 0) \
x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \
x(reflink_v_refcount_wrong, 165, 0) \
x(reflink_p_to_missing_reflink_v, 166, 0) \
x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \
x(stripe_sector_count_wrong, 169, 0) \
x(snapshot_tree_pos_bad, 170, 0) \
x(snapshot_tree_to_missing_snapshot, 171, 0) \
x(snapshot_tree_to_missing_subvol, 172, 0) \
x(snapshot_tree_to_wrong_subvol, 173, 0) \
x(snapshot_tree_to_snapshot_subvol, 174, 0) \
x(snapshot_pos_bad, 175, 0) \
x(snapshot_parent_bad, 176, 0) \
x(snapshot_children_not_normalized, 177, 0) \
x(snapshot_child_duplicate, 178, 0) \
x(snapshot_child_bad, 179, 0) \
x(snapshot_skiplist_not_normalized, 180, 0) \
x(snapshot_skiplist_bad, 181, 0) \
x(snapshot_should_not_have_subvol, 182, 0) \
x(snapshot_to_bad_snapshot_tree, 183, 0) \
x(snapshot_bad_depth, 184, 0) \
x(snapshot_bad_skiplist, 185, 0) \
x(subvol_pos_bad, 186, 0) \
x(subvol_not_master_and_not_snapshot, 187, 0) \
x(subvol_to_missing_root, 188, 0) \
x(subvol_root_wrong_bi_subvol, 189, 0) \
x(bkey_in_missing_snapshot, 190, 0) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
x(inode_unpack_error, 193, 0) \
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_checksum_type_invalid, 199, 0) \
x(inode_compression_type_invalid, 200, 0) \
x(inode_subvol_root_but_not_dir, 201, 0) \
x(inode_i_size_dirty_but_clean, 202, 0) \
x(inode_i_sectors_dirty_but_clean, 203, 0) \
x(inode_i_sectors_wrong, 204, 0) \
x(inode_dir_wrong_nlink, 205, 0) \
x(inode_dir_multiple_links, 206, 0) \
x(inode_multiple_links_but_nlink_0, 207, 0) \
x(inode_wrong_backpointer, 208, 0) \
x(inode_wrong_nlink, 209, 0) \
x(inode_unreachable, 210, 0) \
x(deleted_inode_but_clean, 211, 0) \
x(deleted_inode_missing, 212, 0) \
x(deleted_inode_is_dir, 213, 0) \
x(deleted_inode_not_unlinked, 214, 0) \
x(extent_overlapping, 215, 0) \
x(key_in_missing_inode, 216, 0) \
x(key_in_wrong_inode_type, 217, 0) \
x(extent_past_end_of_inode, 218, 0) \
x(dirent_empty_name, 219, 0) \
x(dirent_val_too_big, 220, 0) \
x(dirent_name_too_long, 221, 0) \
x(dirent_name_embedded_nul, 222, 0) \
x(dirent_name_dot_or_dotdot, 223, 0) \
x(dirent_name_has_slash, 224, 0) \
x(dirent_d_type_wrong, 225, 0) \
x(inode_bi_parent_wrong, 226, 0) \
x(dirent_in_missing_dir_inode, 227, 0) \
x(dirent_in_non_dir_inode, 228, 0) \
x(dirent_to_missing_inode, 229, 0) \
x(dirent_to_missing_subvol, 230, 0) \
x(dirent_to_itself, 231, 0) \
x(quota_type_invalid, 232, 0) \
x(xattr_val_size_too_small, 233, 0) \
x(xattr_val_size_too_big, 234, 0) \
x(xattr_invalid_type, 235, 0) \
x(xattr_name_invalid_chars, 236, 0) \
x(xattr_in_missing_inode, 237, 0) \
x(root_subvol_missing, 238, 0) \
x(root_dir_missing, 239, 0) \
x(root_inode_not_dir, 240, 0) \
x(dir_loop, 241, 0) \
x(hash_table_key_duplicate, 242, 0) \
x(hash_table_key_wrong_offset, 243, 0) \
x(unlinked_inode_not_on_deleted_list, 244, 0) \
x(reflink_p_front_pad_bad, 245, 0) \
x(journal_entry_dup_same_device, 246, 0) \
x(inode_bi_subvol_missing, 247, 0) \
x(inode_bi_subvol_wrong, 248, 0) \
x(inode_points_to_missing_dirent, 249, 0) \
x(inode_points_to_wrong_dirent, 250, 0) \
x(inode_bi_parent_nonzero, 251, 0) \
x(dirent_to_missing_parent_subvol, 252, 0) \
x(dirent_not_visible_in_parent_subvol, 253, 0) \
x(subvol_fs_path_parent_wrong, 254, 0) \
x(subvol_root_fs_path_parent_nonzero, 255, 0) \
x(subvol_children_not_set, 256, 0) \
x(subvol_children_bad, 257, 0) \
x(subvol_loop, 258, 0) \
x(subvol_unreachable, 259, 0) \
x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \
x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
x(snapshot_node_missing, 264, 0) \
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
x(btree_bitmap_not_marked, 266, 0) \
x(sb_clean_entry_overrun, 267, 0) \
x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \
x(subvol_inode_bad, 270, 0) \
x(alloc_key_stripe_sectors_wrong, 271, 0) \
x(accounting_mismatch, 272, 0) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX

View File

@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock)
static inline void seqmutex_lock(struct seqmutex *lock)
{
mutex_lock(&lock->lock);
}
static inline void seqmutex_unlock(struct seqmutex *lock)
{
lock->seq++;
mutex_unlock(&lock->lock);
}
static inline u32 seqmutex_seq(struct seqmutex *lock)
static inline u32 seqmutex_unlock(struct seqmutex *lock)
{
return lock->seq;
u32 seq = lock->seq;
mutex_unlock(&lock->lock);
return seq;
}
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)

View File

@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
if (unlikely(new_bytes > INT_MAX))
return NULL;
new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
@ -1682,6 +1685,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
}

View File

@ -535,7 +535,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_fs_allocator_background_exit(c);
bch2_fs_accounting_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
@ -564,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
free_percpu(c->online_reserved);
if (c->online_reserved) {
u64 v = percpu_u64_get(c->online_reserved);
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
free_percpu(c->online_reserved);
}
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@ -1193,6 +1195,7 @@ static void bch2_dev_free(struct bch_dev *ca)
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
free_percpu(ca->io_done);
@ -1315,6 +1318,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
bch2_dev_allocator_background_init(ca);
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@ -1527,6 +1532,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
bch2_recalc_capacity(c);
bch2_dev_journal_stop(&c->journal, ca);
}
@ -1538,6 +1544,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
bch2_dev_do_discards(ca);
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,

View File

@ -697,14 +697,19 @@ do { \
} \
} while (0)
#define per_cpu_sum(_p) \
({ \
typeof(*_p) _ret = 0; \
\
int cpu; \
for_each_possible_cpu(cpu) \
_ret += *per_cpu_ptr(_p, cpu); \
_ret; \
})
static inline u64 percpu_u64_get(u64 __percpu *src)
{
u64 ret = 0;
int cpu;
for_each_possible_cpu(cpu)
ret += *per_cpu_ptr(src, cpu);
return ret;
return per_cpu_sum(src);
}
static inline void percpu_u64_set(u64 __percpu *dst, u64 src)

View File

@ -13,14 +13,25 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
static inline void closure_put_after_sub(struct closure *cl, int flags)
static inline void closure_put_after_sub_checks(int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
BUG_ON(flags & CLOSURE_GUARD_MASK);
BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
if (WARN(flags & CLOSURE_GUARD_MASK,
"closure has guard bits set: %x (%u)",
flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
r &= ~CLOSURE_GUARD_MASK;
if (!r) {
WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
"closure ref hit 0 with incorrect flags set: %x (%u)",
flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
}
static inline void closure_put_after_sub(struct closure *cl, int flags)
{
closure_put_after_sub_checks(flags);
if (!(flags & CLOSURE_REMAINING_MASK)) {
smp_acquire__after_ctrl_dep();
cl->closure_get_happened = false;
@ -139,6 +150,41 @@ void __sched __closure_sync(struct closure *cl)
}
EXPORT_SYMBOL(__closure_sync);
/*
* closure_return_sync - finish running a closure, synchronously (i.e. waiting
* for outstanding get()s to finish) and returning once closure refcount is 0.
*
* Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
* closure_get_not_zero() calls waill fail.
*/
void __sched closure_return_sync(struct closure *cl)
{
struct closure_syncer s = { .task = current };
cl->s = &s;
set_closure_fn(cl, closure_sync_fn, NULL);
unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
&cl->remaining);
closure_put_after_sub_checks(flags);
if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (s.done)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
}
if (cl->parent)
closure_put(cl->parent);
}
EXPORT_SYMBOL(closure_return_sync);
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
struct closure_syncer s = { .task = current };