Update bcachefs sources to b91a514413 bcachefs: Don't try to delete stripes when RO

This commit is contained in:
Kent Overstreet 2019-07-10 16:12:15 -04:00
parent 07a0a5b5c4
commit f96ba8e0aa
130 changed files with 932 additions and 1004 deletions

View File

@ -1 +1 @@
7e42539c80470cb655bbc46cd0f144de6c644523 b91a514413ecdd15e0f9d8290761d24663a93425

View File

@ -265,8 +265,7 @@ static void write_data(struct bch_fs *c,
closure_init_stack(&cl); closure_init_stack(&cl);
bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv)); bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
o.op.wbio.bio.bi_iter.bi_size = len; bch2_bio_map(&o.op.wbio.bio, buf, len);
bch2_bio_map(&o.op.wbio.bio, buf);
bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts)); bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
o.op.write_point = writepoint_hashed(0); o.op.write_point = writepoint_hashed(0);

View File

@ -113,8 +113,13 @@ static inline void *bio_data(struct bio *bio)
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) #define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
#define bio_for_each_segment_all(bvl, bio, i) \ struct bvec_iter_all {
for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++) unsigned done;
};
#define bio_for_each_segment_all(bvl, bio, i, iter) \
for (i = 0, bvl = (bio)->bi_io_vec, iter = (struct bvec_iter_all) { 0 }; \
i < (bio)->bi_vcnt; i++, bvl++)
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes) unsigned bytes)
@ -136,6 +141,9 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
#define bio_for_each_segment(bvl, bio, iter) \ #define bio_for_each_segment(bvl, bio, iter) \
__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
#define __bio_for_each_bvec(bvl, bio, iter, start) \
__bio_for_each_segment(bvl, bio, iter, start)
#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
static inline unsigned bio_segments(struct bio *bio) static inline unsigned bio_segments(struct bio *bio)
@ -228,6 +236,8 @@ enum {
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
extern void bio_put(struct bio *); extern void bio_put(struct bio *);
int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
extern void __bio_clone_fast(struct bio *, struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *);
extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);

18
include/linux/sched/mm.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef _LINUX_SCHED_MM_H
#define _LINUX_SCHED_MM_H
#define PF_MEMALLOC_NOFS 0
static inline unsigned int memalloc_nofs_save(void)
{
unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
current->flags |= PF_MEMALLOC_NOFS;
return flags;
}
static inline void memalloc_nofs_restore(unsigned int flags)
{
current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
}
#endif /* _LINUX_SCHED_MM_H */

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM #undef TRACE_SYSTEM
#define TRACE_SYSTEM bcachefs #define TRACE_SYSTEM bcachefs

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#ifdef CONFIG_BCACHEFS_POSIX_ACL #ifdef CONFIG_BCACHEFS_POSIX_ACL
#include "bcachefs.h" #include "bcachefs.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ACL_H #ifndef _BCACHEFS_ACL_H
#define _BCACHEFS_ACL_H #define _BCACHEFS_ACL_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_background.h" #include "alloc_background.h"
#include "alloc_foreground.h" #include "alloc_foreground.h"
@ -355,11 +356,11 @@ restart:
old_u = bch2_alloc_unpack(k); old_u = bch2_alloc_unpack(k);
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
g = bucket(ca, b); g = bucket(ca, b);
m = READ_ONCE(g->mark); m = READ_ONCE(g->mark);
new_u = alloc_mem_to_key(g, m); new_u = alloc_mem_to_key(g, m);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
if (!m.dirty) if (!m.dirty)
continue; continue;
@ -889,7 +890,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
b = ca->alloc_heap.data[0].bucket; b = ca->alloc_heap.data[0].bucket;
/* first, put on free_inc and mark as owned by allocator: */ /* first, put on free_inc and mark as owned by allocator: */
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
verify_not_on_freelist(c, ca, b); verify_not_on_freelist(c, ca, b);
@ -899,7 +900,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0); bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
@ -915,11 +916,11 @@ retry:
* we have to trust the in memory bucket @m, not the version in the * we have to trust the in memory bucket @m, not the version in the
* btree: * btree:
*/ */
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
g = bucket(ca, b); g = bucket(ca, b);
m = READ_ONCE(g->mark); m = READ_ONCE(g->mark);
u = alloc_mem_to_key(g, m); u = alloc_mem_to_key(g, m);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
invalidating_cached_data = m.cached_sectors != 0; invalidating_cached_data = m.cached_sectors != 0;
@ -980,7 +981,7 @@ retry:
size_t b2; size_t b2;
/* remove from free_inc: */ /* remove from free_inc: */
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
bch2_mark_alloc_bucket(c, ca, b, false, bch2_mark_alloc_bucket(c, ca, b, false,
@ -990,7 +991,7 @@ retry:
BUG_ON(b != b2); BUG_ON(b != b2);
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
} }
return ret; return ret;
@ -1001,7 +1002,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
{ {
struct bucket_mark m; struct bucket_mark m;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
bch2_invalidate_bucket(c, ca, bucket, &m); bch2_invalidate_bucket(c, ca, bucket, &m);
@ -1014,7 +1015,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
bucket_io_clock_reset(c, ca, bucket, READ); bucket_io_clock_reset(c, ca, bucket, READ);
bucket_io_clock_reset(c, ca, bucket, WRITE); bucket_io_clock_reset(c, ca, bucket, WRITE);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
*flush_seq = max(*flush_seq, bucket_journal_seq(c, m)); *flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
@ -1563,10 +1564,10 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c)
test_bit(bu, ca->buckets_nouse))) test_bit(bu, ca->buckets_nouse)))
continue; continue;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
bch2_mark_alloc_bucket(c, ca, bu, true, bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL), 0); gc_pos_alloc(c, NULL), 0);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
fifo_push(&ca->free_inc, bu); fifo_push(&ca->free_inc, bu);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ALLOC_BACKGROUND_H #ifndef _BCACHEFS_ALLOC_BACKGROUND_H
#define _BCACHEFS_ALLOC_BACKGROUND_H #define _BCACHEFS_ALLOC_BACKGROUND_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Primary bucket allocation code * Primary bucket allocation code
* *
@ -100,7 +101,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
return; return;
} }
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&ob->lock); spin_lock(&ob->lock);
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
@ -109,7 +110,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
ob->type = 0; ob->type = 0;
spin_unlock(&ob->lock); spin_unlock(&ob->lock);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
ob->freelist = c->open_buckets_freelist; ob->freelist = c->open_buckets_freelist;
@ -376,6 +377,25 @@ void bch2_dev_stripe_increment(struct bch_fs *c, struct bch_dev *ca,
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0) #define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1) #define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
static void add_new_bucket(struct bch_fs *c,
struct open_buckets *ptrs,
struct bch_devs_mask *devs_may_alloc,
unsigned *nr_effective,
bool *have_cache,
unsigned flags,
struct open_bucket *ob)
{
unsigned durability =
bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
__clear_bit(ob->ptr.dev, devs_may_alloc->d);
*nr_effective += (flags & BUCKET_ALLOC_USE_DURABILITY)
? durability : 1;
*have_cache |= !durability;
ob_push(c, ptrs, ob);
}
static int bch2_bucket_alloc_set(struct bch_fs *c, static int bch2_bucket_alloc_set(struct bch_fs *c,
struct open_buckets *ptrs, struct open_buckets *ptrs,
struct dev_stripe_state *stripe, struct dev_stripe_state *stripe,
@ -391,7 +411,7 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
bch2_dev_alloc_list(c, stripe, devs_may_alloc); bch2_dev_alloc_list(c, stripe, devs_may_alloc);
struct bch_dev *ca; struct bch_dev *ca;
bool alloc_failure = false; bool alloc_failure = false;
unsigned i, durability; unsigned i;
BUG_ON(*nr_effective >= nr_replicas); BUG_ON(*nr_effective >= nr_replicas);
@ -421,14 +441,8 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
continue; continue;
} }
durability = (flags & BUCKET_ALLOC_USE_DURABILITY) add_new_bucket(c, ptrs, devs_may_alloc,
? ca->mi.durability : 1; nr_effective, have_cache, flags, ob);
__clear_bit(ca->dev_idx, devs_may_alloc->d);
*nr_effective += durability;
*have_cache |= !durability;
ob_push(c, ptrs, ob);
bch2_dev_stripe_increment(c, ca, stripe); bch2_dev_stripe_increment(c, ca, stripe);
@ -464,7 +478,7 @@ static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
open_bucket_for_each(c, &h->blocks, ob, i) open_bucket_for_each(c, &h->blocks, ob, i)
__clear_bit(ob->ptr.dev, devs.d); __clear_bit(ob->ptr.dev, devs.d);
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
rcu_read_lock(); rcu_read_lock();
if (h->parity.nr < h->redundancy) { if (h->parity.nr < h->redundancy) {
@ -500,12 +514,12 @@ static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
} }
rcu_read_unlock(); rcu_read_unlock();
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
return bch2_ec_stripe_new_alloc(c, h); return bch2_ec_stripe_new_alloc(c, h);
err: err:
rcu_read_unlock(); rcu_read_unlock();
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
return -1; return -1;
} }
@ -523,7 +537,8 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
unsigned erasure_code, unsigned erasure_code,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache) bool *have_cache,
unsigned flags)
{ {
struct dev_alloc_list devs_sorted; struct dev_alloc_list devs_sorted;
struct ec_stripe_head *h; struct ec_stripe_head *h;
@ -563,11 +578,8 @@ got_bucket:
ob->ec_idx = ec_idx; ob->ec_idx = ec_idx;
ob->ec = h->s; ob->ec = h->s;
__clear_bit(ob->ptr.dev, devs_may_alloc->d); add_new_bucket(c, ptrs, devs_may_alloc,
*nr_effective += ca->mi.durability; nr_effective, have_cache, flags, ob);
*have_cache |= !ca->mi.durability;
ob_push(c, ptrs, ob);
atomic_inc(&h->s->pin); atomic_inc(&h->s->pin);
out_put_head: out_put_head:
bch2_ec_stripe_head_put(h); bch2_ec_stripe_head_put(h);
@ -582,6 +594,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
unsigned nr_replicas, unsigned nr_replicas,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
unsigned flags,
bool need_ec) bool need_ec)
{ {
struct open_buckets ptrs_skip = { .nr = 0 }; struct open_buckets ptrs_skip = { .nr = 0 };
@ -596,11 +609,9 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
(ca->mi.durability || (ca->mi.durability ||
(wp->type == BCH_DATA_USER && !*have_cache)) && (wp->type == BCH_DATA_USER && !*have_cache)) &&
(ob->ec || !need_ec)) { (ob->ec || !need_ec)) {
__clear_bit(ob->ptr.dev, devs_may_alloc->d); add_new_bucket(c, ptrs, devs_may_alloc,
*nr_effective += ca->mi.durability; nr_effective, have_cache,
*have_cache |= !ca->mi.durability; flags, ob);
ob_push(c, ptrs, ob);
} else { } else {
ob_push(c, &ptrs_skip, ob); ob_push(c, &ptrs_skip, ob);
} }
@ -618,17 +629,15 @@ static int open_bucket_add_buckets(struct bch_fs *c,
unsigned *nr_effective, unsigned *nr_effective,
bool *have_cache, bool *have_cache,
enum alloc_reserve reserve, enum alloc_reserve reserve,
unsigned flags,
struct closure *_cl) struct closure *_cl)
{ {
struct bch_devs_mask devs; struct bch_devs_mask devs;
struct open_bucket *ob; struct open_bucket *ob;
struct closure *cl = NULL; struct closure *cl = NULL;
unsigned i, flags = BUCKET_ALLOC_USE_DURABILITY; unsigned i;
int ret; int ret;
if (wp->type == BCH_DATA_USER)
flags |= BUCKET_MAY_ALLOC_PARTIAL;
rcu_read_lock(); rcu_read_lock();
devs = target_rw_devs(c, wp->type, target); devs = target_rw_devs(c, wp->type, target);
rcu_read_unlock(); rcu_read_unlock();
@ -643,25 +652,25 @@ static int open_bucket_add_buckets(struct bch_fs *c,
if (erasure_code) { if (erasure_code) {
get_buckets_from_writepoint(c, ptrs, wp, &devs, get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective, nr_replicas, nr_effective,
have_cache, true); have_cache, flags, true);
if (*nr_effective >= nr_replicas) if (*nr_effective >= nr_replicas)
return 0; return 0;
bucket_alloc_from_stripe(c, ptrs, wp, &devs, bucket_alloc_from_stripe(c, ptrs, wp, &devs,
target, erasure_code, target, erasure_code,
nr_replicas, nr_effective, nr_replicas, nr_effective,
have_cache); have_cache, flags);
if (*nr_effective >= nr_replicas) if (*nr_effective >= nr_replicas)
return 0; return 0;
} }
get_buckets_from_writepoint(c, ptrs, wp, &devs, get_buckets_from_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective, nr_replicas, nr_effective,
have_cache, false); have_cache, flags, false);
if (*nr_effective >= nr_replicas) if (*nr_effective >= nr_replicas)
return 0; return 0;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
rcu_read_lock(); rcu_read_lock();
retry_blocking: retry_blocking:
@ -678,7 +687,7 @@ retry_blocking:
} }
rcu_read_unlock(); rcu_read_unlock();
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
return ret; return ret;
} }
@ -862,9 +871,13 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
struct open_bucket *ob; struct open_bucket *ob;
struct open_buckets ptrs; struct open_buckets ptrs;
unsigned nr_effective, write_points_nr; unsigned nr_effective, write_points_nr;
unsigned ob_flags = 0;
bool have_cache; bool have_cache;
int ret, i; int ret, i;
if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
BUG_ON(!nr_replicas || !nr_replicas_required); BUG_ON(!nr_replicas || !nr_replicas_required);
retry: retry:
ptrs.nr = 0; ptrs.nr = 0;
@ -874,6 +887,9 @@ retry:
wp = writepoint_find(c, write_point.v); wp = writepoint_find(c, write_point.v);
if (wp->type == BCH_DATA_USER)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
/* metadata may not allocate on cache devices: */ /* metadata may not allocate on cache devices: */
if (wp->type != BCH_DATA_USER) if (wp->type != BCH_DATA_USER)
have_cache = true; have_cache = true;
@ -882,19 +898,22 @@ retry:
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
target, erasure_code, target, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, cl); &have_cache, reserve,
ob_flags, cl);
} else { } else {
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
target, erasure_code, target, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, NULL); &have_cache, reserve,
ob_flags, NULL);
if (!ret) if (!ret)
goto alloc_done; goto alloc_done;
ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
0, erasure_code, 0, erasure_code,
nr_replicas, &nr_effective, nr_replicas, &nr_effective,
&have_cache, reserve, cl); &have_cache, reserve,
ob_flags, cl);
} }
alloc_done: alloc_done:
BUG_ON(!ret && nr_effective < nr_replicas); BUG_ON(!ret && nr_effective < nr_replicas);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ALLOC_FOREGROUND_H #ifndef _BCACHEFS_ALLOC_FOREGROUND_H
#define _BCACHEFS_ALLOC_FOREGROUND_H #define _BCACHEFS_ALLOC_FOREGROUND_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ALLOC_TYPES_H #ifndef _BCACHEFS_ALLOC_TYPES_H
#define _BCACHEFS_ALLOC_TYPES_H #define _BCACHEFS_ALLOC_TYPES_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_H #ifndef _BCACHEFS_H
#define _BCACHEFS_H #define _BCACHEFS_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FORMAT_H #ifndef _BCACHEFS_FORMAT_H
#define _BCACHEFS_FORMAT_H #define _BCACHEFS_FORMAT_H
@ -847,6 +848,8 @@ static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
BCH_ALLOC_FIELDS(), sizeof(u64)); BCH_ALLOC_FIELDS(), sizeof(u64));
#undef x #undef x
#define BKEY_ALLOC_U64s_MAX (BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
/* Quotas: */ /* Quotas: */
enum quota_types { enum quota_types {

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IOCTL_H #ifndef _BCACHEFS_IOCTL_H
#define _BCACHEFS_IOCTL_H #define _BCACHEFS_IOCTL_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey.h" #include "bkey.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_H #ifndef _BCACHEFS_BKEY_H
#define _BCACHEFS_BKEY_H #define _BCACHEFS_BKEY_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"
@ -81,9 +82,17 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
if (k.k->u64s < BKEY_U64s) if (k.k->u64s < BKEY_U64s)
return "u64s too small"; return "u64s too small";
if ((btree_node_type_is_extents(type) ||
type == BKEY_TYPE_BTREE) &&
bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
return "value too big";
if (btree_node_type_is_extents(type)) { if (btree_node_type_is_extents(type)) {
if ((k.k->size == 0) != bkey_deleted(k.k)) if ((k.k->size == 0) != bkey_deleted(k.k))
return "bad size field"; return "bad size field";
if (k.k->size > k.k->p.offset)
return "size greater than offset";
} else { } else {
if (k.k->size) if (k.k->size)
return "nonzero size field"; return "nonzero size field";
@ -198,22 +207,22 @@ bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
} }
enum merge_result bch2_bkey_merge(struct bch_fs *c, enum merge_result bch2_bkey_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r) struct bkey_s l, struct bkey_s r)
{ {
const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type]; const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
enum merge_result ret; enum merge_result ret;
if (key_merging_disabled(c) || if (key_merging_disabled(c) ||
!ops->key_merge || !ops->key_merge ||
l->k.type != r->k.type || l.k->type != r.k->type ||
bversion_cmp(l->k.version, r->k.version) || bversion_cmp(l.k->version, r.k->version) ||
bkey_cmp(l->k.p, bkey_start_pos(&r->k))) bkey_cmp(l.k->p, bkey_start_pos(r.k)))
return BCH_MERGE_NOMERGE; return BCH_MERGE_NOMERGE;
ret = ops->key_merge(c, l, r); ret = ops->key_merge(c, l, r);
if (ret != BCH_MERGE_NOMERGE) if (ret != BCH_MERGE_NOMERGE)
l->k.needs_whiteout |= r->k.needs_whiteout; l.k->needs_whiteout |= r.k->needs_whiteout;
return ret; return ret;
} }

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_METHODS_H #ifndef _BCACHEFS_BKEY_METHODS_H
#define _BCACHEFS_BKEY_METHODS_H #define _BCACHEFS_BKEY_METHODS_H
@ -32,7 +33,7 @@ struct bkey_ops {
void (*swab)(const struct bkey_format *, struct bkey_packed *); void (*swab)(const struct bkey_format *, struct bkey_packed *);
bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s);
enum merge_result (*key_merge)(struct bch_fs *, enum merge_result (*key_merge)(struct bch_fs *,
struct bkey_i *, struct bkey_i *); struct bkey_s, struct bkey_s);
}; };
const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c); const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
@ -56,7 +57,7 @@ void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *);
bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_bkey_merge(struct bch_fs *, enum merge_result bch2_bkey_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *); struct bkey_s, struct bkey_s);
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_sort.h" #include "bkey_sort.h"
#include "bset.h" #include "bset.h"
@ -240,25 +241,11 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter,
heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL); heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
} }
static void extent_sort_append(struct bch_fs *c, static void extent_sort_advance_prev(struct bkey_format *f,
struct btree *b, struct btree_nr_keys *nr,
struct btree_nr_keys *nr, struct bkey_packed *start,
struct bkey_packed *start, struct bkey_packed **prev)
struct bkey_packed **prev,
struct bkey_packed *k)
{ {
struct bkey_format *f = &b->format;
BKEY_PADDED(k) tmp;
if (bkey_whiteout(k))
return;
bch2_bkey_unpack(b, &tmp.k, k);
if (*prev &&
bch2_bkey_merge(c, (void *) *prev, &tmp.k))
return;
if (*prev) { if (*prev) {
bch2_bkey_pack(*prev, (void *) *prev, f); bch2_bkey_pack(*prev, (void *) *prev, f);
@ -267,8 +254,31 @@ static void extent_sort_append(struct bch_fs *c,
} else { } else {
*prev = start; *prev = start;
} }
}
bkey_copy(*prev, &tmp.k); static void extent_sort_append(struct bch_fs *c,
struct bkey_format *f,
struct btree_nr_keys *nr,
struct bkey_packed *start,
struct bkey_packed **prev,
struct bkey_s k)
{
if (bkey_whiteout(k.k))
return;
/*
* prev is always unpacked, for key merging - until right before we
* advance it:
*/
if (*prev &&
bch2_bkey_merge(c, bkey_i_to_s((void *) *prev), k) ==
BCH_MERGE_MERGE)
return;
extent_sort_advance_prev(f, nr, start, prev);
bkey_reassemble((void *) *prev, k.s_c);
} }
struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
@ -278,7 +288,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
{ {
struct bkey_format *f = &b->format; struct bkey_format *f = &b->format;
struct btree_node_iter_set *_l = iter->data, *_r; struct btree_node_iter_set *_l = iter->data, *_r;
struct bkey_packed *prev = NULL, *out, *lk, *rk; struct bkey_packed *prev = NULL, *lk, *rk;
struct bkey l_unpacked, r_unpacked; struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r; struct bkey_s l, r;
struct btree_nr_keys nr; struct btree_nr_keys nr;
@ -289,9 +299,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
while (!bch2_btree_node_iter_large_end(iter)) { while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k); lk = __btree_node_offset_to_key(b, _l->k);
l = __bkey_disassemble(b, lk, &l_unpacked);
if (iter->used == 1) { if (iter->used == 1) {
extent_sort_append(c, b, &nr, dst->start, &prev, lk); extent_sort_append(c, f, &nr, dst->start, &prev, l);
extent_sort_next(iter, b, _l); extent_sort_next(iter, b, _l);
continue; continue;
} }
@ -302,13 +313,11 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
_r++; _r++;
rk = __btree_node_offset_to_key(b, _r->k); rk = __btree_node_offset_to_key(b, _r->k);
l = __bkey_disassemble(b, lk, &l_unpacked);
r = __bkey_disassemble(b, rk, &r_unpacked); r = __bkey_disassemble(b, rk, &r_unpacked);
/* If current key and next key don't overlap, just append */ /* If current key and next key don't overlap, just append */
if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
extent_sort_append(c, b, &nr, dst->start, &prev, lk); extent_sort_append(c, f, &nr, dst->start, &prev, l);
extent_sort_next(iter, b, _l); extent_sort_next(iter, b, _l);
continue; continue;
} }
@ -353,23 +362,17 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
extent_sort_sift(iter, b, 0); extent_sort_sift(iter, b, 0);
extent_sort_append(c, b, &nr, dst->start, &prev, extent_sort_append(c, f, &nr, dst->start,
bkey_to_packed(&tmp.k)); &prev, bkey_i_to_s(&tmp.k));
} else { } else {
bch2_cut_back(bkey_start_pos(r.k), l.k); bch2_cut_back(bkey_start_pos(r.k), l.k);
extent_save(b, lk, l.k); extent_save(b, lk, l.k);
} }
} }
if (prev) { extent_sort_advance_prev(f, &nr, dst->start, &prev);
bch2_bkey_pack(prev, (void *) prev, f);
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = dst->start;
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data); dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
return nr; return nr;
} }
@ -412,58 +415,36 @@ bch2_sort_repack_merge(struct bch_fs *c,
struct bkey_format *out_f, struct bkey_format *out_f,
bool filter_whiteouts) bool filter_whiteouts)
{ {
struct bkey_packed *k, *prev = NULL, *out; struct bkey_packed *prev = NULL, *k_packed, *next;
struct bkey k_unpacked;
struct bkey_s k;
struct btree_nr_keys nr; struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
memset(&nr, 0, sizeof(nr)); memset(&nr, 0, sizeof(nr));
while ((k = bch2_btree_node_iter_next_all(iter, src))) { next = bch2_btree_node_iter_next_all(iter, src);
if (filter_whiteouts && bkey_whiteout(k)) while ((k_packed = next)) {
/*
* The filter might modify the size of @k's value, so advance
* the iterator first:
*/
next = bch2_btree_node_iter_next_all(iter, src);
if (filter_whiteouts && bkey_whiteout(k_packed))
continue; continue;
/* k = __bkey_disassemble(src, k_packed, &k_unpacked);
* The filter might modify pointers, so we have to unpack the
* key and values to &tmp.k:
*/
bch2_bkey_unpack(src, &tmp.k, k);
if (filter_whiteouts && if (filter_whiteouts &&
bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k))) bch2_bkey_normalize(c, k))
continue; continue;
/* prev is always unpacked, for key merging: */ extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
if (prev &&
bch2_bkey_merge(c, (void *) prev, &tmp.k) ==
BCH_MERGE_MERGE)
continue;
/*
* the current key becomes the new prev: advance prev, then
* copy the current key - but first pack prev (in place):
*/
if (prev) {
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
prev = vstruct_last(dst);
}
bkey_copy(prev, &tmp.k);
} }
if (prev) { extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
bch2_bkey_pack(prev, (void *) prev, out_f);
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
out = vstruct_last(dst);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data); dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
return nr; return nr;
} }

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_SORT_H #ifndef _BCACHEFS_BKEY_SORT_H
#define _BCACHEFS_BKEY_SORT_H #define _BCACHEFS_BKEY_SORT_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Code for working with individual keys, and sorted sets of keys with in a * Code for working with individual keys, and sorted sets of keys with in a
* btree node * btree node
@ -12,7 +13,6 @@
#include "util.h" #include "util.h"
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <linux/dynamic_fault.h>
#include <linux/console.h> #include <linux/console.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BSET_H #ifndef _BCACHEFS_BSET_H
#define _BCACHEFS_BSET_H #define _BCACHEFS_BSET_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_cache.h" #include "btree_cache.h"
@ -7,6 +8,7 @@
#include "debug.h" #include "debug.h"
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/sched/mm.h>
#include <trace/events/bcachefs.h> #include <trace/events/bcachefs.h>
const char * const bch2_btree_ids[] = { const char * const bch2_btree_ids[] = {
@ -507,7 +509,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
u64 start_time = local_clock(); u64 start_time = local_clock();
unsigned flags;
flags = memalloc_nofs_save();
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
/* /*
@ -545,6 +549,7 @@ out_unlock:
list_del_init(&b->list); list_del_init(&b->list);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
memalloc_nofs_restore(flags);
out: out:
b->flags = 0; b->flags = 0;
b->written = 0; b->written = 0;

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_CACHE_H #ifndef _BCACHEFS_BTREE_CACHE_H
#define _BCACHEFS_BTREE_CACHE_H #define _BCACHEFS_BTREE_CACHE_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright (C) 2014 Datera Inc. * Copyright (C) 2014 Datera Inc.
@ -287,11 +288,11 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id,
for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k), for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k),
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL, ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL,
BCH_BUCKET_MARK_GC| BCH_BUCKET_MARK_GC|
BCH_BUCKET_MARK_NOATOMIC); BCH_BUCKET_MARK_NOATOMIC);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
if (!ret) if (!ret)
break; break;
@ -367,9 +368,7 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
*/ */
if (c) { if (c) {
lockdep_assert_held(&c->sb_lock); lockdep_assert_held(&c->sb_lock);
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
} else {
preempt_disable();
} }
for (i = 0; i < layout->nr_superblocks; i++) { for (i = 0; i < layout->nr_superblocks; i++) {
@ -391,11 +390,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
gc_phase(GC_PHASE_SB), flags); gc_phase(GC_PHASE_SB), flags);
} }
if (c) { if (c)
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
} else {
preempt_enable();
}
} }
static void bch2_mark_superblocks(struct bch_fs *c) static void bch2_mark_superblocks(struct bch_fs *c)
@ -435,7 +431,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
size_t i, j, iter; size_t i, j, iter;
unsigned ci; unsigned ci;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&c->freelist_lock); spin_lock(&c->freelist_lock);
gc_pos_set(c, gc_pos_alloc(c, NULL)); gc_pos_set(c, gc_pos_alloc(c, NULL));
@ -471,7 +467,7 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
spin_unlock(&ob->lock); spin_unlock(&ob->lock);
} }
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
} }
static void bch2_gc_free(struct bch_fs *c) static void bch2_gc_free(struct bch_fs *c)

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_GC_H #ifndef _BCACHEFS_BTREE_GC_H
#define _BCACHEFS_BTREE_GC_H #define _BCACHEFS_BTREE_GC_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"
@ -1037,10 +1038,9 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
INIT_WORK(&rb->work, btree_node_read_work); INIT_WORK(&rb->work, btree_node_read_work);
bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META; bio->bi_opf = REQ_OP_READ|REQ_SYNC|REQ_META;
bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c);
bio->bi_end_io = btree_node_read_endio; bio->bi_end_io = btree_node_read_endio;
bio->bi_private = b; bio->bi_private = b;
bch2_bio_map(bio, b->data); bch2_bio_map(bio, b->data, btree_bytes(c));
set_btree_node_read_in_flight(b); set_btree_node_read_in_flight(b);
@ -1501,11 +1501,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
wbio->wbio.order = order; wbio->wbio.order = order;
wbio->wbio.used_mempool = used_mempool; wbio->wbio.used_mempool = used_mempool;
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA; wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
wbio->wbio.bio.bi_iter.bi_size = sectors_to_write << 9;
wbio->wbio.bio.bi_end_io = btree_node_write_endio; wbio->wbio.bio.bi_end_io = btree_node_write_endio;
wbio->wbio.bio.bi_private = b; wbio->wbio.bio.bi_private = b;
bch2_bio_map(&wbio->wbio.bio, data); bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
/* /*
* If we're appending to a leaf node, we don't technically need FUA - * If we're appending to a leaf node, we don't technically need FUA -

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_IO_H #ifndef _BCACHEFS_BTREE_IO_H
#define _BCACHEFS_BTREE_IO_H #define _BCACHEFS_BTREE_IO_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"

View File

@ -1,8 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_ITER_H #ifndef _BCACHEFS_BTREE_ITER_H
#define _BCACHEFS_BTREE_ITER_H #define _BCACHEFS_BTREE_ITER_H
#include <linux/dynamic_fault.h>
#include "bset.h" #include "bset.h"
#include "btree_types.h" #include "btree_types.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_LOCKING_H #ifndef _BCACHEFS_BTREE_LOCKING_H
#define _BCACHEFS_BTREE_LOCKING_H #define _BCACHEFS_BTREE_LOCKING_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_TYPES_H #ifndef _BCACHEFS_BTREE_TYPES_H
#define _BCACHEFS_BTREE_TYPES_H #define _BCACHEFS_BTREE_TYPES_H
@ -261,6 +262,7 @@ struct btree_insert_entry {
bool deferred; bool deferred;
bool triggered; bool triggered;
bool marked;
}; };
#define BTREE_ITER_MAX 64 #define BTREE_ITER_MAX 64

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_UPDATE_H #ifndef _BCACHEFS_BTREE_UPDATE_H
#define _BCACHEFS_BTREE_UPDATE_H #define _BCACHEFS_BTREE_UPDATE_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_foreground.h" #include "alloc_foreground.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H #ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H #define _BCACHEFS_BTREE_UPDATE_INTERIOR_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_update.h" #include "btree_update.h"
@ -541,6 +542,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL; struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i; struct btree_insert_entry *i;
bool saw_non_marked;
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
? BCH_BUCKET_MARK_BUCKET_INVALIDATE ? BCH_BUCKET_MARK_BUCKET_INVALIDATE
: 0; : 0;
@ -550,14 +552,28 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
trans_for_each_update_iter(trans, i) trans_for_each_update_iter(trans, i)
if (update_has_triggers(trans, i) && i->marked = false;
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i); do {
if (ret == -EINTR) saw_non_marked = false;
trace_trans_restart_mark(trans->ip);
if (ret) trans_for_each_update_iter(trans, i) {
goto out_clear_replicas; if (i->marked)
continue;
saw_non_marked = true;
i->marked = true;
if (update_has_triggers(trans, i) &&
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i->iter, i->k);
if (ret == -EINTR)
trace_trans_restart_mark(trans->ip);
if (ret)
goto out_clear_replicas;
}
} }
} while (saw_non_marked);
btree_trans_lock_write(c, trans); btree_trans_lock_write(c, trans);

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Code for manipulating bucket marks for garbage collection. * Code for manipulating bucket marks for garbage collection.
* *
@ -227,12 +228,12 @@ retry:
if (unlikely(!ret)) if (unlikely(!ret))
return NULL; return NULL;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
v = fs_usage_u64s(c); v = fs_usage_u64s(c);
if (unlikely(u64s != v)) { if (unlikely(u64s != v)) {
u64s = v; u64s = v;
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
kfree(ret); kfree(ret);
goto retry; goto retry;
} }
@ -350,9 +351,9 @@ bch2_fs_usage_read_short(struct bch_fs *c)
{ {
struct bch_fs_usage_short ret; struct bch_fs_usage_short ret;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
ret = __bch2_fs_usage_read_short(c); ret = __bch2_fs_usage_read_short(c);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
return ret; return ret;
} }
@ -449,6 +450,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_data_types[old.data_type], bch2_data_types[old.data_type],
bch2_data_types[new.data_type]); bch2_data_types[new.data_type]);
preempt_disable();
dev_usage = this_cpu_ptr(ca->usage[gc]); dev_usage = this_cpu_ptr(ca->usage[gc]);
if (bucket_type(old)) if (bucket_type(old))
@ -472,6 +474,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
(int) new.cached_sectors - (int) old.cached_sectors; (int) new.cached_sectors - (int) old.cached_sectors;
dev_usage->sectors_fragmented += dev_usage->sectors_fragmented +=
is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca); is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
preempt_enable();
if (!is_available_bucket(old) && is_available_bucket(new)) if (!is_available_bucket(old) && is_available_bucket(new))
bch2_wake_allocator(ca); bch2_wake_allocator(ca);
@ -495,11 +498,9 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
buckets = bucket_array(ca); buckets = bucket_array(ca);
preempt_disable();
for_each_bucket(g, buckets) for_each_bucket(g, buckets)
bch2_dev_usage_update(c, ca, c->usage_base, bch2_dev_usage_update(c, ca, c->usage_base,
old, g->mark, false); old, g->mark, false);
preempt_enable();
} }
} }
@ -681,8 +682,12 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, bool owned_by_allocator, size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags) struct gc_pos pos, unsigned flags)
{ {
preempt_disable();
do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags, do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
ca, b, owned_by_allocator); ca, b, owned_by_allocator);
preempt_enable();
} }
static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k, static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
@ -792,12 +797,16 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(type != BCH_DATA_SB && BUG_ON(type != BCH_DATA_SB &&
type != BCH_DATA_JOURNAL); type != BCH_DATA_JOURNAL);
preempt_disable();
if (likely(c)) { if (likely(c)) {
do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags, do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
ca, b, type, sectors); ca, b, type, sectors);
} else { } else {
__bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0); __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
} }
preempt_enable();
} }
static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
@ -1148,10 +1157,10 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
{ {
int ret; int ret;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
ret = bch2_mark_key_locked(c, k, sectors, ret = bch2_mark_key_locked(c, k, sectors,
fs_usage, journal_seq, flags); fs_usage, journal_seq, flags);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
return ret; return ret;
} }
@ -1309,22 +1318,18 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
static int trans_get_key(struct btree_trans *trans, static int trans_get_key(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos, enum btree_id btree_id, struct bpos pos,
struct btree_insert_entry **insert,
struct btree_iter **iter, struct btree_iter **iter,
struct bkey_s_c *k) struct bkey_s_c *k)
{ {
unsigned i; unsigned i;
int ret; int ret;
*insert = NULL;
for (i = 0; i < trans->nr_updates; i++) for (i = 0; i < trans->nr_updates; i++)
if (!trans->updates[i].deferred && if (!trans->updates[i].deferred &&
trans->updates[i].iter->btree_id == btree_id && trans->updates[i].iter->btree_id == btree_id &&
!bkey_cmp(pos, trans->updates[i].iter->pos)) { !bkey_cmp(pos, trans->updates[i].iter->pos)) {
*insert = &trans->updates[i]; *iter = trans->updates[i].iter;
*iter = (*insert)->iter; *k = bkey_i_to_s_c(trans->updates[i].k);
*k = bkey_i_to_s_c((*insert)->k);
return 0; return 0;
} }
@ -1340,30 +1345,34 @@ static int trans_get_key(struct btree_trans *trans,
return ret; return ret;
} }
static int trans_update_key(struct btree_trans *trans, static void *trans_update_key(struct btree_trans *trans,
struct btree_insert_entry **insert, struct btree_iter *iter,
struct btree_iter *iter, unsigned u64s)
struct bkey_s_c k,
unsigned extra_u64s)
{ {
struct bkey_i *new_k; struct bkey_i *new_k;
unsigned i;
if (*insert) new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
return 0;
new_k = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
extra_u64s * sizeof(u64));
if (IS_ERR(new_k)) if (IS_ERR(new_k))
return PTR_ERR(new_k); return new_k;
*insert = bch2_trans_update(trans, ((struct btree_insert_entry) { bkey_init(&new_k->k);
.iter = iter, new_k->k.p = iter->pos;
.k = new_k,
.triggered = true, for (i = 0; i < trans->nr_updates; i++)
if (!trans->updates[i].deferred &&
trans->updates[i].iter == iter) {
trans->updates[i].k = new_k;
return new_k;
}
bch2_trans_update(trans, ((struct btree_insert_entry) {
.iter = iter,
.k = new_k,
.triggered = true,
})); }));
bkey_reassemble((*insert)->k, k); return new_k;
return 0;
} }
static int bch2_trans_mark_pointer(struct btree_trans *trans, static int bch2_trans_mark_pointer(struct btree_trans *trans,
@ -1372,7 +1381,6 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct btree_insert_entry *insert;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_alloc_unpacked u; struct bkey_alloc_unpacked u;
@ -1382,7 +1390,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
ret = trans_get_key(trans, BTREE_ID_ALLOC, ret = trans_get_key(trans, BTREE_ID_ALLOC,
POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)), POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
&insert, &iter, &k); &iter, &k);
if (ret) if (ret)
return ret; return ret;
@ -1415,11 +1423,12 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
? u.dirty_sectors ? u.dirty_sectors
: u.cached_sectors, sectors); : u.cached_sectors, sectors);
ret = trans_update_key(trans, &insert, iter, k, 1); a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
ret = PTR_ERR_OR_ZERO(a);
if (ret) if (ret)
goto out; goto out;
a = bkey_alloc_init(insert->k); bkey_alloc_init(&a->k_i);
a->k.p = iter->pos; a->k.p = iter->pos;
bch2_alloc_pack(a, u); bch2_alloc_pack(a, u);
out: out:
@ -1432,8 +1441,8 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
s64 sectors, enum bch_data_type data_type) s64 sectors, enum bch_data_type data_type)
{ {
struct bch_replicas_padded r; struct bch_replicas_padded r;
struct btree_insert_entry *insert;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_i *new_k;
struct bkey_s_c k; struct bkey_s_c k;
struct bkey_s_stripe s; struct bkey_s_stripe s;
unsigned nr_data; unsigned nr_data;
@ -1442,8 +1451,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
BUG_ON(!sectors); BUG_ON(!sectors);
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
&insert, &iter, &k);
if (ret) if (ret)
return ret; return ret;
@ -1455,11 +1463,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
goto out; goto out;
} }
ret = trans_update_key(trans, &insert, iter, k, 1); new_k = trans_update_key(trans, iter, k.k->u64s);
ret = PTR_ERR_OR_ZERO(new_k);
if (ret) if (ret)
goto out; goto out;
s = bkey_i_to_s_stripe(insert->k); bkey_reassemble(new_k, k);
s = bkey_i_to_s_stripe(new_k);
nr_data = s.v->nr_blocks - s.v->nr_redundant; nr_data = s.v->nr_blocks - s.v->nr_redundant;
@ -1580,9 +1590,9 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
} }
int bch2_trans_mark_update(struct btree_trans *trans, int bch2_trans_mark_update(struct btree_trans *trans,
struct btree_insert_entry *insert) struct btree_iter *iter,
struct bkey_i *insert)
{ {
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b; struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter; struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k; struct bkey_packed *_k;
@ -1592,9 +1602,9 @@ int bch2_trans_mark_update(struct btree_trans *trans,
return 0; return 0;
ret = bch2_trans_mark_key(trans, ret = bch2_trans_mark_key(trans,
bkey_i_to_s_c(insert->k), bkey_i_to_s_c(insert),
bpos_min(insert->k->k.p, b->key.k.p).offset - bpos_min(insert->k.p, b->key.k.p).offset -
bkey_start_offset(&insert->k->k), bkey_start_offset(&insert->k),
BCH_BUCKET_MARK_INSERT); BCH_BUCKET_MARK_INSERT);
if (ret) if (ret)
return ret; return ret;
@ -1608,25 +1618,25 @@ int bch2_trans_mark_update(struct btree_trans *trans,
k = bkey_disassemble(b, _k, &unpacked); k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b) if (btree_node_is_extents(b)
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(insert->k->k.p, k.k->p)) : bkey_cmp(insert->k.p, k.k->p))
break; break;
if (btree_node_is_extents(b)) { if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&insert->k->k, k.k)) { switch (bch2_extent_overlap(&insert->k, k.k)) {
case BCH_EXTENT_OVERLAP_ALL: case BCH_EXTENT_OVERLAP_ALL:
sectors = -((s64) k.k->size); sectors = -((s64) k.k->size);
break; break;
case BCH_EXTENT_OVERLAP_BACK: case BCH_EXTENT_OVERLAP_BACK:
sectors = bkey_start_offset(&insert->k->k) - sectors = bkey_start_offset(&insert->k) -
k.k->p.offset; k.k->p.offset;
break; break;
case BCH_EXTENT_OVERLAP_FRONT: case BCH_EXTENT_OVERLAP_FRONT:
sectors = bkey_start_offset(k.k) - sectors = bkey_start_offset(k.k) -
insert->k->k.p.offset; insert->k.p.offset;
break; break;
case BCH_EXTENT_OVERLAP_MIDDLE: case BCH_EXTENT_OVERLAP_MIDDLE:
sectors = k.k->p.offset - insert->k->k.p.offset; sectors = k.k->p.offset - insert->k.p.offset;
BUG_ON(sectors <= 0); BUG_ON(sectors <= 0);
ret = bch2_trans_mark_key(trans, k, sectors, ret = bch2_trans_mark_key(trans, k, sectors,
@ -1634,7 +1644,7 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
sectors = bkey_start_offset(&insert->k->k) - sectors = bkey_start_offset(&insert->k) -
k.k->p.offset; k.k->p.offset;
break; break;
} }
@ -1664,10 +1674,10 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{ {
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
this_cpu_sub(c->usage[0]->online_reserved, this_cpu_sub(c->usage[0]->online_reserved,
res->sectors); res->sectors);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
res->sectors = 0; res->sectors = 0;
} }
@ -1682,7 +1692,8 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
s64 sectors_available; s64 sectors_available;
int ret; int ret;
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
preempt_disable();
pcpu = this_cpu_ptr(c->pcpu); pcpu = this_cpu_ptr(c->pcpu);
if (sectors <= pcpu->sectors_available) if (sectors <= pcpu->sectors_available)
@ -1694,7 +1705,8 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
get = min((u64) sectors + SECTORS_CACHE, old); get = min((u64) sectors + SECTORS_CACHE, old);
if (get < sectors) { if (get < sectors) {
percpu_up_read_preempt_enable(&c->mark_lock); preempt_enable();
percpu_up_read(&c->mark_lock);
goto recalculate; goto recalculate;
} }
} while ((v = atomic64_cmpxchg(&c->sectors_available, } while ((v = atomic64_cmpxchg(&c->sectors_available,
@ -1707,7 +1719,8 @@ out:
this_cpu_add(c->usage[0]->online_reserved, sectors); this_cpu_add(c->usage[0]->online_reserved, sectors);
res->sectors += sectors; res->sectors += sectors;
percpu_up_read_preempt_enable(&c->mark_lock); preempt_enable();
percpu_up_read(&c->mark_lock);
return 0; return 0;
recalculate: recalculate:

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* /*
* Code for manipulating bucket marks for garbage collection. * Code for manipulating bucket marks for garbage collection.
* *
@ -273,7 +274,8 @@ void bch2_replicas_delta_list_apply(struct bch_fs *,
struct replicas_delta_list *); struct replicas_delta_list *);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned); int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, s64, unsigned);
int bch2_trans_mark_update(struct btree_trans *, int bch2_trans_mark_update(struct btree_trans *,
struct btree_insert_entry *); struct btree_iter *iter,
struct bkey_i *insert);
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
/* disk reservations: */ /* disk reservations: */

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BUCKETS_TYPES_H #ifndef _BUCKETS_TYPES_H
#define _BUCKETS_TYPES_H #define _BUCKETS_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_CHARDEV #ifndef NO_BCACHEFS_CHARDEV
#include "bcachefs.h" #include "bcachefs.h"
@ -405,7 +406,7 @@ static long bch2_ioctl_usage(struct bch_fs *c,
dst.used = bch2_fs_sectors_used(c, src); dst.used = bch2_fs_sectors_used(c, src);
dst.online_reserved = src->online_reserved; dst.online_reserved = src->online_reserved;
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
for (i = 0; i < BCH_REPLICAS_MAX; i++) { for (i = 0; i < BCH_REPLICAS_MAX; i++) {
dst.persistent_reserved[i] = dst.persistent_reserved[i] =

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_CHARDEV_H #ifndef _BCACHEFS_CHARDEV_H
#define _BCACHEFS_CHARDEV_H #define _BCACHEFS_CHARDEV_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "checksum.h" #include "checksum.h"
#include "super.h" #include "super.h"
@ -60,7 +61,7 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t
return crc32c(crc, data, len); return crc32c(crc, data, len);
case BCH_CSUM_CRC64_NONZERO: case BCH_CSUM_CRC64_NONZERO:
case BCH_CSUM_CRC64: case BCH_CSUM_CRC64:
return bch2_crc64_update(crc, data, len); return crc64_be(crc, data, len);
default: default:
BUG(); BUG();
} }
@ -199,7 +200,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
kunmap_atomic(p); kunmap_atomic(p);
} }
#else #else
__bio_for_each_contig_segment(bv, bio, *iter, *iter) __bio_for_each_bvec(bv, bio, *iter, *iter)
crc = bch2_checksum_update(type, crc, crc = bch2_checksum_update(type, crc,
page_address(bv.bv_page) + bv.bv_offset, page_address(bv.bv_page) + bv.bv_offset,
bv.bv_len); bv.bv_len);
@ -224,7 +225,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
kunmap_atomic(p); kunmap_atomic(p);
} }
#else #else
__bio_for_each_contig_segment(bv, bio, *iter, *iter) __bio_for_each_bvec(bv, bio, *iter, *iter)
crypto_shash_update(desc, crypto_shash_update(desc,
page_address(bv.bv_page) + bv.bv_offset, page_address(bv.bv_page) + bv.bv_offset,
bv.bv_len); bv.bv_len);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_CHECKSUM_H #ifndef _BCACHEFS_CHECKSUM_H
#define _BCACHEFS_CHECKSUM_H #define _BCACHEFS_CHECKSUM_H
@ -24,11 +25,6 @@ static inline bool bch2_checksum_mergeable(unsigned type)
struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum, struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
struct bch_csum, size_t); struct bch_csum, size_t);
static inline u64 bch2_crc64_update(u64 crc, const void *p, size_t len)
{
return crc64_be(crc, p, len);
}
#define BCH_NONCE_EXTENT cpu_to_le32(1 << 28) #define BCH_NONCE_EXTENT cpu_to_le32(1 << 28)
#define BCH_NONCE_BTREE cpu_to_le32(2 << 28) #define BCH_NONCE_BTREE cpu_to_le32(2 << 28)
#define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28) #define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28)

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "clock.h" #include "clock.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_CLOCK_H #ifndef _BCACHEFS_CLOCK_H
#define _BCACHEFS_CLOCK_H #define _BCACHEFS_CLOCK_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_CLOCK_TYPES_H #ifndef _BCACHEFS_CLOCK_TYPES_H
#define _BCACHEFS_CLOCK_TYPES_H #define _BCACHEFS_CLOCK_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "checksum.h" #include "checksum.h"
#include "compress.h" #include "compress.h"
@ -5,7 +6,6 @@
#include "io.h" #include "io.h"
#include "super-io.h" #include "super-io.h"
#include "lz4.h"
#include <linux/lz4.h> #include <linux/lz4.h>
#include <linux/zlib.h> #include <linux/zlib.h>
#include <linux/zstd.h> #include <linux/zstd.h>
@ -66,7 +66,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max); BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
#ifndef CONFIG_HIGHMEM #ifndef CONFIG_HIGHMEM
__bio_for_each_contig_segment(bv, bio, iter, start) { __bio_for_each_bvec(bv, bio, iter, start) {
if (bv.bv_len == start.bi_size) if (bv.bv_len == start.bi_size)
return (struct bbuf) { return (struct bbuf) {
.b = page_address(bv.bv_page) + bv.bv_offset, .b = page_address(bv.bv_page) + bv.bv_offset,
@ -159,11 +159,6 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
switch (crc.compression_type) { switch (crc.compression_type) {
case BCH_COMPRESSION_LZ4_OLD: case BCH_COMPRESSION_LZ4_OLD:
ret = bch2_lz4_decompress(src_data.b, &src_len,
dst_data, dst_len);
if (ret)
goto err;
break;
case BCH_COMPRESSION_LZ4: case BCH_COMPRESSION_LZ4:
ret = LZ4_decompress_safe_partial(src_data.b, dst_data, ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
src_len, dst_len, dst_len); src_len, dst_len, dst_len);
@ -246,10 +241,10 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
} }
/* /*
* might have to free existing pages and retry allocation from mempool - * XXX: don't have a good way to assert that the bio was allocated with
* do this _after_ decompressing: * enough space, we depend on bch2_move_extent doing the right thing
*/ */
bch2_bio_alloc_more_pages_pool(c, bio, crc->live_size << 9); bio->bi_iter.bi_size = crc->live_size << 9;
memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9)); memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_COMPRESS_H #ifndef _BCACHEFS_COMPRESS_H
#define _BCACHEFS_COMPRESS_H #define _BCACHEFS_COMPRESS_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Assorted bcachefs debug code * Assorted bcachefs debug code
* *
@ -69,8 +70,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
bio_set_dev(bio, ca->disk_sb.bdev); bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_opf = REQ_OP_READ|REQ_META; bio->bi_opf = REQ_OP_READ|REQ_META;
bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_iter.bi_sector = pick.ptr.offset;
bio->bi_iter.bi_size = btree_bytes(c); bch2_bio_map(bio, n_sorted, btree_bytes(c));
bch2_bio_map(bio, n_sorted);
submit_bio_wait(bio); submit_bio_wait(bio);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DEBUG_H #ifndef _BCACHEFS_DEBUG_H
#define _BCACHEFS_DEBUG_H #define _BCACHEFS_DEBUG_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DIRENT_H #ifndef _BCACHEFS_DIRENT_H
#define _BCACHEFS_DIRENT_H #define _BCACHEFS_DIRENT_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "disk_groups.h" #include "disk_groups.h"
#include "super-io.h" #include "super-io.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DISK_GROUPS_H #ifndef _BCACHEFS_DISK_GROUPS_H
#define _BCACHEFS_DISK_GROUPS_H #define _BCACHEFS_DISK_GROUPS_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* erasure coding */ /* erasure coding */
@ -398,11 +399,10 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bio_set_op_attrs(&ec_bio->bio, rw, 0); bio_set_op_attrs(&ec_bio->bio, rw, 0);
ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
ec_bio->bio.bi_iter.bi_size = b;
ec_bio->bio.bi_end_io = ec_block_endio; ec_bio->bio.bi_end_io = ec_block_endio;
ec_bio->bio.bi_private = cl; ec_bio->bio.bi_private = cl;
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset); bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl); closure_get(cl);
percpu_ref_get(&ca->io_ref); percpu_ref_get(&ca->io_ref);
@ -626,7 +626,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
bch2_stripes_heap_insert(c, m, idx); bch2_stripes_heap_insert(c, m, idx);
} }
if (stripe_idx_to_delete(c) >= 0) if (stripe_idx_to_delete(c) >= 0 &&
!percpu_ref_is_dying(&c->writes))
schedule_work(&c->ec_stripe_delete_work); schedule_work(&c->ec_stripe_delete_work);
} }
@ -684,7 +685,8 @@ static void ec_stripe_delete_work(struct work_struct *work)
if (idx < 0) if (idx < 0)
break; break;
ec_stripe_delete(c, idx); if (ec_stripe_delete(c, idx))
break;
} }
mutex_unlock(&c->ec_stripe_create_lock); mutex_unlock(&c->ec_stripe_create_lock);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_EC_H #ifndef _BCACHEFS_EC_H
#define _BCACHEFS_EC_H #define _BCACHEFS_EC_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_EC_TYPES_H #ifndef _BCACHEFS_EC_TYPES_H
#define _BCACHEFS_EC_TYPES_H #define _BCACHEFS_EC_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "error.h" #include "error.h"
#include "io.h" #include "io.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ERROR_H #ifndef _BCACHEFS_ERROR_H
#define _BCACHEFS_ERROR_H #define _BCACHEFS_ERROR_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
* *
@ -1291,9 +1292,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
{ {
if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
return "value too big";
return bch2_bkey_ptrs_invalid(c, k); return bch2_bkey_ptrs_invalid(c, k);
} }
@ -1521,21 +1519,21 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
} }
enum merge_result bch2_extent_merge(struct bch_fs *c, enum merge_result bch2_extent_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r) struct bkey_s _l, struct bkey_s _r)
{ {
struct bkey_s_extent el = bkey_i_to_s_extent(l); struct bkey_s_extent l = bkey_s_to_extent(_l);
struct bkey_s_extent er = bkey_i_to_s_extent(r); struct bkey_s_extent r = bkey_s_to_extent(_r);
union bch_extent_entry *en_l = el.v->start; union bch_extent_entry *en_l = l.v->start;
union bch_extent_entry *en_r = er.v->start; union bch_extent_entry *en_r = r.v->start;
struct bch_extent_crc_unpacked crc_l, crc_r; struct bch_extent_crc_unpacked crc_l, crc_r;
if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k)) if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
return BCH_MERGE_NOMERGE; return BCH_MERGE_NOMERGE;
crc_l = bch2_extent_crc_unpack(el.k, NULL); crc_l = bch2_extent_crc_unpack(l.k, NULL);
extent_for_each_entry(el, en_l) { extent_for_each_entry(l, en_l) {
en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
if (extent_entry_type(en_l) != extent_entry_type(en_r)) if (extent_entry_type(en_l) != extent_entry_type(en_r))
return BCH_MERGE_NOMERGE; return BCH_MERGE_NOMERGE;
@ -1567,8 +1565,8 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128: case BCH_EXTENT_ENTRY_crc128:
crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l)); crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r)); crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
if (crc_l.csum_type != crc_r.csum_type || if (crc_l.csum_type != crc_r.csum_type ||
crc_l.compression_type != crc_r.compression_type || crc_l.compression_type != crc_r.compression_type ||
@ -1600,16 +1598,16 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
} }
} }
extent_for_each_entry(el, en_l) { extent_for_each_entry(l, en_l) {
struct bch_extent_crc_unpacked crc_l, crc_r; struct bch_extent_crc_unpacked crc_l, crc_r;
en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
if (!extent_entry_is_crc(en_l)) if (!extent_entry_is_crc(en_l))
continue; continue;
crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l)); crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r)); crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
crc_l.csum = bch2_checksum_merge(crc_l.csum_type, crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
crc_l.csum, crc_l.csum,
@ -1622,7 +1620,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l); bch2_extent_crc_pack(entry_to_crc(en_l), crc_l);
} }
bch2_key_resize(&l->k, l->k.size + r->k.size); bch2_key_resize(l.k, l.k->size + r.k->size);
return BCH_MERGE_MERGE; return BCH_MERGE_MERGE;
} }
@ -1662,7 +1660,9 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &li.k, l);
bch2_bkey_unpack(b, &ri.k, r); bch2_bkey_unpack(b, &ri.k, r);
ret = bch2_bkey_merge(c, &li.k, &ri.k); ret = bch2_bkey_merge(c,
bkey_i_to_s(&li.k),
bkey_i_to_s(&ri.k));
if (ret == BCH_MERGE_NOMERGE) if (ret == BCH_MERGE_NOMERGE)
return false; return false;
@ -1785,22 +1785,22 @@ void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
} }
enum merge_result bch2_reservation_merge(struct bch_fs *c, enum merge_result bch2_reservation_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r) struct bkey_s _l, struct bkey_s _r)
{ {
struct bkey_i_reservation *li = bkey_i_to_reservation(l); struct bkey_s_reservation l = bkey_s_to_reservation(_l);
struct bkey_i_reservation *ri = bkey_i_to_reservation(r); struct bkey_s_reservation r = bkey_s_to_reservation(_r);
if (li->v.generation != ri->v.generation || if (l.v->generation != r.v->generation ||
li->v.nr_replicas != ri->v.nr_replicas) l.v->nr_replicas != r.v->nr_replicas)
return BCH_MERGE_NOMERGE; return BCH_MERGE_NOMERGE;
if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
bch2_key_resize(&l->k, KEY_SIZE_MAX); bch2_key_resize(l.k, KEY_SIZE_MAX);
bch2_cut_front(l->k.p, r); __bch2_cut_front(l.k->p, r.s);
return BCH_MERGE_PARTIAL; return BCH_MERGE_PARTIAL;
} }
bch2_key_resize(&l->k, l->k.size + r->k.size); bch2_key_resize(l.k, l.k->size + r.k->size);
return BCH_MERGE_MERGE; return BCH_MERGE_MERGE;
} }

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_EXTENTS_H #ifndef _BCACHEFS_EXTENTS_H
#define _BCACHEFS_EXTENTS_H #define _BCACHEFS_EXTENTS_H
@ -385,7 +386,7 @@ void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_extent_merge(struct bch_fs *, enum merge_result bch2_extent_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *); struct bkey_s, struct bkey_s);
#define bch2_bkey_ops_extent (struct bkey_ops) { \ #define bch2_bkey_ops_extent (struct bkey_ops) { \
.key_invalid = bch2_extent_invalid, \ .key_invalid = bch2_extent_invalid, \
@ -401,7 +402,7 @@ enum merge_result bch2_extent_merge(struct bch_fs *,
const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
enum merge_result bch2_reservation_merge(struct bch_fs *, enum merge_result bch2_reservation_merge(struct bch_fs *,
struct bkey_i *, struct bkey_i *); struct bkey_s, struct bkey_s);
#define bch2_bkey_ops_reservation (struct bkey_ops) { \ #define bch2_bkey_ops_reservation (struct bkey_ops) { \
.key_invalid = bch2_reservation_invalid, \ .key_invalid = bch2_reservation_invalid, \

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_EXTENTS_TYPES_H #ifndef _BCACHEFS_EXTENTS_TYPES_H
#define _BCACHEFS_EXTENTS_TYPES_H #define _BCACHEFS_EXTENTS_TYPES_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _EYTZINGER_H #ifndef _EYTZINGER_H
#define _EYTZINGER_H #define _EYTZINGER_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FIFO_H #ifndef _BCACHEFS_FIFO_H
#define _BCACHEFS_FIFO_H #define _BCACHEFS_FIFO_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_FS #ifndef NO_BCACHEFS_FS
#include "bcachefs.h" #include "bcachefs.h"
@ -500,184 +501,263 @@ static inline struct bch_io_opts io_opts(struct bch_fs *c, struct bch_inode_info
/* stored in page->private: */ /* stored in page->private: */
/* struct bch_page_sector {
* bch_page_state has to (unfortunately) be manipulated with cmpxchg - we could
* almost protected it with the page lock, except that bch2_writepage_io_done has
* to update the sector counts (and from interrupt/bottom half context).
*/
struct bch_page_state {
union { struct {
/* existing data: */
unsigned sectors:PAGE_SECTOR_SHIFT + 1;
/* Uncompressed, fully allocated replicas: */ /* Uncompressed, fully allocated replicas: */
unsigned nr_replicas:4; unsigned nr_replicas:3;
/* Owns PAGE_SECTORS * replicas_reserved sized reservation: */ /* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
unsigned replicas_reserved:4; unsigned replicas_reserved:3;
/* Owns PAGE_SECTORS sized quota reservation: */ /* i_sectors: */
unsigned quota_reserved:1; enum {
SECTOR_UNALLOCATED,
SECTOR_QUOTA_RESERVED,
SECTOR_DIRTY,
SECTOR_ALLOCATED,
} state:2;
};
struct bch_page_state {
struct bch_page_sector s[PAGE_SECTORS];
};
static inline struct bch_page_state *__bch2_page_state(struct page *page)
{
return page_has_private(page)
? (struct bch_page_state *) page_private(page)
: NULL;
}
static inline struct bch_page_state *bch2_page_state(struct page *page)
{
EBUG_ON(!PageLocked(page));
return __bch2_page_state(page);
}
/* for newly allocated pages: */
static void __bch2_page_state_release(struct page *page)
{
struct bch_page_state *s = __bch2_page_state(page);
if (!s)
return;
ClearPagePrivate(page);
set_page_private(page, 0);
put_page(page);
kfree(s);
}
static void bch2_page_state_release(struct page *page)
{
struct bch_page_state *s = bch2_page_state(page);
if (!s)
return;
ClearPagePrivate(page);
set_page_private(page, 0);
put_page(page);
kfree(s);
}
/* for newly allocated pages: */
static struct bch_page_state *__bch2_page_state_create(struct page *page,
gfp_t gfp)
{
struct bch_page_state *s;
s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
if (!s)
return NULL;
/* /*
* Number of sectors on disk - for i_blocks * migrate_page_move_mapping() assumes that pages with private data
* Uncompressed size, not compressed size: * have their count elevated by 1.
*/ */
unsigned dirty_sectors:PAGE_SECTOR_SHIFT + 1; get_page(page);
}; set_page_private(page, (unsigned long) s);
/* for cmpxchg: */ SetPagePrivate(page);
unsigned long v;
};
};
#define page_state_cmpxchg(_ptr, _new, _expr) \
({ \
unsigned long _v = READ_ONCE((_ptr)->v); \
struct bch_page_state _old; \
\
do { \
_old.v = _new.v = _v; \
_expr; \
\
EBUG_ON(_new.sectors + _new.dirty_sectors > PAGE_SECTORS);\
} while (_old.v != _new.v && \
(_v = cmpxchg(&(_ptr)->v, _old.v, _new.v)) != _old.v); \
\
_old; \
})
static inline struct bch_page_state *page_state(struct page *page)
{
struct bch_page_state *s = (void *) &page->private;
BUILD_BUG_ON(sizeof(*s) > sizeof(page->private));
if (!PagePrivate(page))
SetPagePrivate(page);
return s; return s;
} }
static inline unsigned page_res_sectors(struct bch_page_state s) static struct bch_page_state *bch2_page_state_create(struct page *page,
gfp_t gfp)
{ {
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
return s.replicas_reserved * PAGE_SECTORS;
}
static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct bch_page_state s)
{
struct disk_reservation res = { .sectors = page_res_sectors(s) };
struct quota_res quota_res = { .sectors = s.quota_reserved ? PAGE_SECTORS : 0 };
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &res);
} }
static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page) struct page *page)
{ {
struct bch_page_state s; struct bch_page_state *s = bch2_page_state(page);
struct disk_reservation disk_res = { 0 };
struct quota_res quota_res = { 0 };
unsigned i;
EBUG_ON(!PageLocked(page)); if (!s)
return;
s = page_state_cmpxchg(page_state(page), s, { for (i = 0; i < ARRAY_SIZE(s->s); i++) {
s.replicas_reserved = 0; disk_res.sectors += s->s[i].replicas_reserved;
s.quota_reserved = 0; s->s[i].replicas_reserved = 0;
});
__bch2_put_page_reservation(c, inode, s); if (s->s[i].state == SECTOR_QUOTA_RESERVED) {
quota_res.sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res);
}
static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
{
/* XXX: this should not be open coded */
return inode->ei_inode.bi_data_replicas
? inode->ei_inode.bi_data_replicas - 1
: c->opts.data_replicas;
}
static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
unsigned nr_replicas)
{
return max(0, (int) nr_replicas -
s->nr_replicas -
s->replicas_reserved);
}
static int bch2_get_page_disk_reservation(struct bch_fs *c,
struct bch_inode_info *inode,
struct page *page, bool check_enospc)
{
struct bch_page_state *s = bch2_page_state_create(page, 0);
unsigned nr_replicas = inode_nr_replicas(c, inode);
struct disk_reservation disk_res = { 0 };
unsigned i, disk_res_sectors = 0;
int ret;
if (!s)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(s->s); i++)
disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
if (!disk_res_sectors)
return 0;
ret = bch2_disk_reservation_get(c, &disk_res,
disk_res_sectors, 1,
!check_enospc
? BCH_DISK_RESERVATION_NOFAIL
: 0);
if (unlikely(ret))
return ret;
for (i = 0; i < ARRAY_SIZE(s->s); i++)
s->s[i].replicas_reserved +=
sectors_to_reserve(&s->s[i], nr_replicas);
return 0;
}
static int bch2_get_page_quota_reservation(struct bch_fs *c,
struct bch_inode_info *inode,
struct page *page, bool check_enospc)
{
struct bch_page_state *s = bch2_page_state_create(page, 0);
struct quota_res quota_res = { 0 };
unsigned i, quota_res_sectors = 0;
int ret;
if (!s)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(s->s); i++)
quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED;
if (!quota_res_sectors)
return 0;
ret = bch2_quota_reservation_add(c, inode, &quota_res,
quota_res_sectors,
check_enospc);
if (unlikely(ret))
return ret;
for (i = 0; i < ARRAY_SIZE(s->s); i++)
if (s->s[i].state == SECTOR_UNALLOCATED)
s->s[i].state = SECTOR_QUOTA_RESERVED;
return 0;
} }
static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page, bool check_enospc) struct page *page, bool check_enospc)
{ {
struct bch_page_state *s = page_state(page), new; return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?:
bch2_get_page_quota_reservation(c, inode, page, check_enospc);
/* XXX: this should not be open coded */
unsigned nr_replicas = inode->ei_inode.bi_data_replicas
? inode->ei_inode.bi_data_replicas - 1
: c->opts.data_replicas;
struct disk_reservation disk_res;
struct quota_res quota_res = { 0 };
int ret;
EBUG_ON(!PageLocked(page));
if (s->replicas_reserved < nr_replicas) {
ret = bch2_disk_reservation_get(c, &disk_res, PAGE_SECTORS,
nr_replicas - s->replicas_reserved,
!check_enospc ? BCH_DISK_RESERVATION_NOFAIL : 0);
if (unlikely(ret))
return ret;
page_state_cmpxchg(s, new, ({
BUG_ON(new.replicas_reserved +
disk_res.nr_replicas != nr_replicas);
new.replicas_reserved += disk_res.nr_replicas;
}));
}
if (!s->quota_reserved &&
s->sectors + s->dirty_sectors < PAGE_SECTORS) {
ret = bch2_quota_reservation_add(c, inode, &quota_res,
PAGE_SECTORS,
check_enospc);
if (unlikely(ret))
return ret;
page_state_cmpxchg(s, new, ({
BUG_ON(new.quota_reserved);
new.quota_reserved = 1;
}));
}
return ret;
} }
static void bch2_clear_page_bits(struct page *page) static void bch2_clear_page_bits(struct page *page)
{ {
struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state s; struct bch_page_state *s = bch2_page_state(page);
int i, dirty_sectors = 0;
EBUG_ON(!PageLocked(page)); if (!s)
if (!PagePrivate(page))
return; return;
s.v = xchg(&page_state(page)->v, 0); for (i = 0; i < ARRAY_SIZE(s->s); i++) {
ClearPagePrivate(page); if (s->s[i].state == SECTOR_DIRTY) {
dirty_sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
if (s.dirty_sectors) if (dirty_sectors)
i_sectors_acct(c, inode, NULL, -s.dirty_sectors); i_sectors_acct(c, inode, NULL, -dirty_sectors);
bch2_put_page_reservation(c, inode, page);
__bch2_put_page_reservation(c, inode, s); bch2_page_state_release(page);
} }
int bch2_set_page_dirty(struct page *page) static void __bch2_set_page_dirty(struct page *page)
{ {
struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(page);
struct quota_res quota_res = { 0 }; struct quota_res quota_res = { 0 };
struct bch_page_state old, new; unsigned i, dirty_sectors = 0;
old = page_state_cmpxchg(page_state(page), new, BUG_ON(!s);
new.dirty_sectors = PAGE_SECTORS - new.sectors;
new.quota_reserved = 0;
);
quota_res.sectors += old.quota_reserved * PAGE_SECTORS; for (i = 0; i < ARRAY_SIZE(s->s); i++) {
if (s->s[i].state == SECTOR_QUOTA_RESERVED)
quota_res.sectors++;
if (old.dirty_sectors != new.dirty_sectors) if (s->s[i].state == SECTOR_UNALLOCATED ||
i_sectors_acct(c, inode, &quota_res, s->s[i].state == SECTOR_QUOTA_RESERVED) {
new.dirty_sectors - old.dirty_sectors); s->s[i].state = SECTOR_DIRTY;
dirty_sectors++;
}
}
if (dirty_sectors)
i_sectors_acct(c, inode, &quota_res, dirty_sectors);
bch2_quota_reservation_put(c, inode, &quota_res); bch2_quota_reservation_put(c, inode, &quota_res);
return __set_page_dirty_nobuffers(page);
} }
int bch2_page_mkwrite(struct vm_fault *vmf) static void bch2_set_page_dirty(struct page *page)
{
__bch2_set_page_dirty(page);
__set_page_dirty_nobuffers(page);
}
vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
{ {
struct page *page = vmf->page; struct page *page = vmf->page;
struct file *file = vmf->vma->vm_file; struct file *file = vmf->vma->vm_file;
@ -713,7 +793,7 @@ int bch2_page_mkwrite(struct vm_fault *vmf)
} }
if (!PageDirty(page)) if (!PageDirty(page))
set_page_dirty(page); bch2_set_page_dirty(page);
wait_for_stable_page(page); wait_for_stable_page(page);
out: out:
if (current->pagecache_lock != &mapping->add_lock) if (current->pagecache_lock != &mapping->add_lock)
@ -761,11 +841,18 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
return ret; return ret;
if (PagePrivate(page)) { if (PagePrivate(page)) {
*page_state(newpage) = *page_state(page);
ClearPagePrivate(page); ClearPagePrivate(page);
get_page(newpage);
set_page_private(newpage, page_private(page));
set_page_private(page, 0);
put_page(page);
SetPagePrivate(newpage);
} }
migrate_page_copy(newpage, page); if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
else
migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS; return MIGRATEPAGE_SUCCESS;
} }
#endif #endif
@ -791,7 +878,7 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
else if (!bio_can_add_page_contig(bio, page)) else if (!bio_can_add_page_contig(bio, page))
return -1; return -1;
__bio_add_page(bio, page, PAGE_SIZE, 0); BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
return 0; return 0;
} }
@ -799,10 +886,11 @@ static int bio_add_page_contig(struct bio *bio, struct page *page)
static void bch2_readpages_end_io(struct bio *bio) static void bch2_readpages_end_io(struct bio *bio)
{ {
struct bvec_iter_all iter;
struct bio_vec *bv; struct bio_vec *bv;
int i; int i;
bio_for_each_segment_all(bv, bio, i) { bio_for_each_segment_all(bv, bio, i, iter) {
struct page *page = bv->bv_page; struct page *page = bv->bv_page;
if (!bio->bi_status) { if (!bio->bi_status) {
@ -848,7 +936,8 @@ static int readpages_iter_init(struct readpages_iter *iter,
while (!list_empty(pages)) { while (!list_empty(pages)) {
struct page *page = list_last_entry(pages, struct page, lru); struct page *page = list_last_entry(pages, struct page, lru);
prefetchw(&page->flags); __bch2_page_state_create(page, __GFP_NOFAIL);
iter->pages[iter->nr_pages++] = page; iter->pages[iter->nr_pages++] = page;
list_del(&page->lru); list_del(&page->lru);
} }
@ -884,6 +973,7 @@ static inline struct page *readpage_iter_next(struct readpages_iter *iter)
iter->idx++; iter->idx++;
iter->nr_added++; iter->nr_added++;
__bch2_page_state_release(page);
put_page(page); put_page(page);
} }
@ -894,7 +984,6 @@ static inline struct page *readpage_iter_next(struct readpages_iter *iter)
out: out:
EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx); EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
page_state_init_for_read(iter->pages[iter->idx]);
return iter->pages[iter->idx]; return iter->pages[iter->idx];
} }
@ -904,21 +993,20 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
struct bio_vec bv; struct bio_vec bv;
unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k); unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k);
BUG_ON(bio->bi_iter.bi_sector < bkey_start_offset(k.k));
BUG_ON(bio_end_sector(bio) > k.k->p.offset);
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
/* brand new pages, don't need to be locked: */ struct bch_page_state *s = bch2_page_state(bv.bv_page);
unsigned i;
struct bch_page_state *s = page_state(bv.bv_page); for (i = bv.bv_offset >> 9;
i < (bv.bv_offset + bv.bv_len) >> 9;
/* sectors in @k from the start of this page: */ i++) {
unsigned k_sectors = k.k->size - (iter.bi_sector - k.k->p.offset); s->s[i].nr_replicas = nr_ptrs;
s->s[i].state = SECTOR_ALLOCATED;
unsigned page_sectors = min(bv.bv_len >> 9, k_sectors); }
s->nr_replicas = page_sectors == PAGE_SECTORS
? nr_ptrs : 0;
BUG_ON(s->sectors + page_sectors > PAGE_SECTORS);
s->sectors += page_sectors;
} }
} }
@ -949,12 +1037,15 @@ static void readpage_bio_extend(struct readpages_iter *iter,
if (!page) if (!page)
break; break;
page_state_init_for_read(page); if (!__bch2_page_state_create(page, 0)) {
put_page(page);
break;
}
ret = add_to_page_cache_lru(page, iter->mapping, ret = add_to_page_cache_lru(page, iter->mapping,
page_offset, GFP_NOFS); page_offset, GFP_NOFS);
if (ret) { if (ret) {
ClearPagePrivate(page); __bch2_page_state_release(page);
put_page(page); put_page(page);
break; break;
} }
@ -962,7 +1053,7 @@ static void readpage_bio_extend(struct readpages_iter *iter,
put_page(page); put_page(page);
} }
__bio_add_page(bio, page, PAGE_SIZE, 0); BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
} }
} }
@ -1076,7 +1167,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0); bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT; rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
rbio->bio.bi_end_io = bch2_readpages_end_io; rbio->bio.bi_end_io = bch2_readpages_end_io;
__bio_add_page(&rbio->bio, page, PAGE_SIZE, 0); BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
bchfs_read(&trans, iter, rbio, inode->v.i_ino, bchfs_read(&trans, iter, rbio, inode->v.i_ino,
&readpages_iter); &readpages_iter);
@ -1097,7 +1188,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
page_state_init_for_read(page); bch2_page_state_create(page, __GFP_NOFAIL);
bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC); bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
bio_add_page_contig(&rbio->bio, page); bio_add_page_contig(&rbio->bio, page);
@ -1184,11 +1275,12 @@ static void bch2_writepage_io_done(struct closure *cl)
struct bch_writepage_io, cl); struct bch_writepage_io, cl);
struct bch_fs *c = io->op.op.c; struct bch_fs *c = io->op.op.c;
struct bio *bio = &io->op.op.wbio.bio; struct bio *bio = &io->op.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bvec; struct bio_vec *bvec;
unsigned i; unsigned i;
if (io->op.op.error) { if (io->op.op.error) {
bio_for_each_segment_all(bvec, bio, i) { bio_for_each_segment_all(bvec, bio, i, iter) {
SetPageError(bvec->bv_page); SetPageError(bvec->bv_page);
mapping_set_error(bvec->bv_page->mapping, -EIO); mapping_set_error(bvec->bv_page->mapping, -EIO);
} }
@ -1215,7 +1307,7 @@ static void bch2_writepage_io_done(struct closure *cl)
i_sectors_acct(c, io->op.inode, NULL, i_sectors_acct(c, io->op.inode, NULL,
io->op.sectors_added - (s64) io->new_sectors); io->op.sectors_added - (s64) io->new_sectors);
bio_for_each_segment_all(bvec, bio, i) bio_for_each_segment_all(bvec, bio, i, iter)
end_page_writeback(bvec->bv_page); end_page_writeback(bvec->bv_page);
closure_return_with_destructor(&io->cl, bch2_writepage_io_free); closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
@ -1266,10 +1358,13 @@ static int __bch2_writepage(struct page *page,
struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_writepage_state *w = data; struct bch_writepage_state *w = data;
struct bch_page_state new, old; struct bch_page_state *s;
unsigned offset, nr_replicas_this_write; unsigned offset, nr_replicas_this_write = U32_MAX;
unsigned dirty_sectors = 0, reserved_sectors = 0;
loff_t i_size = i_size_read(&inode->v); loff_t i_size = i_size_read(&inode->v);
pgoff_t end_index = i_size >> PAGE_SHIFT; pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned i;
int ret;
EBUG_ON(!PageUptodate(page)); EBUG_ON(!PageUptodate(page));
@ -1293,33 +1388,34 @@ static int __bch2_writepage(struct page *page,
*/ */
zero_user_segment(page, offset, PAGE_SIZE); zero_user_segment(page, offset, PAGE_SIZE);
do_io: do_io:
EBUG_ON(!PageLocked(page)); s = bch2_page_state_create(page, __GFP_NOFAIL);
ret = bch2_get_page_disk_reservation(c, inode, page, true);
if (ret) {
SetPageError(page);
mapping_set_error(page->mapping, ret);
unlock_page(page);
return 0;
}
for (i = 0; i < PAGE_SECTORS; i++)
nr_replicas_this_write =
min_t(unsigned, nr_replicas_this_write,
s->s[i].nr_replicas +
s->s[i].replicas_reserved);
/* Before unlocking the page, transfer reservation to w->io: */ /* Before unlocking the page, transfer reservation to w->io: */
old = page_state_cmpxchg(page_state(page), new, {
/*
* If we didn't get a reservation, we can only write out the
* number of (fully allocated) replicas that currently exist,
* and only if the entire page has been written:
*/
nr_replicas_this_write =
max_t(unsigned,
new.replicas_reserved,
(new.sectors == PAGE_SECTORS
? new.nr_replicas : 0));
BUG_ON(!nr_replicas_this_write); for (i = 0; i < PAGE_SECTORS; i++) {
s->s[i].nr_replicas = w->opts.compression
? 0 : nr_replicas_this_write;
new.nr_replicas = w->opts.compression reserved_sectors += s->s[i].replicas_reserved;
? 0 s->s[i].replicas_reserved = 0;
: nr_replicas_this_write;
new.replicas_reserved = 0; dirty_sectors += s->s[i].state == SECTOR_DIRTY;
s->s[i].state = SECTOR_ALLOCATED;
new.sectors += new.dirty_sectors; }
BUG_ON(new.sectors != PAGE_SECTORS);
new.dirty_sectors = 0;
});
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
set_page_writeback(page); set_page_writeback(page);
@ -1334,12 +1430,12 @@ do_io:
bch2_writepage_io_alloc(c, w, inode, page, bch2_writepage_io_alloc(c, w, inode, page,
nr_replicas_this_write); nr_replicas_this_write);
w->io->new_sectors += new.sectors - old.sectors; w->io->new_sectors += dirty_sectors;
BUG_ON(inode != w->io->op.inode); BUG_ON(inode != w->io->op.inode);
BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page)); BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page));
w->io->op.op.res.sectors += old.replicas_reserved * PAGE_SECTORS; w->io->op.op.res.sectors += reserved_sectors;
w->io->op.new_i_size = i_size; w->io->op.new_i_size = i_size;
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
@ -1478,7 +1574,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
if (!PageUptodate(page)) if (!PageUptodate(page))
SetPageUptodate(page); SetPageUptodate(page);
if (!PageDirty(page)) if (!PageDirty(page))
set_page_dirty(page); bch2_set_page_dirty(page);
inode->ei_last_dirtied = (unsigned long) current; inode->ei_last_dirtied = (unsigned long) current;
} else { } else {
@ -1596,7 +1692,7 @@ out:
if (!PageUptodate(pages[i])) if (!PageUptodate(pages[i]))
SetPageUptodate(pages[i]); SetPageUptodate(pages[i]);
if (!PageDirty(pages[i])) if (!PageDirty(pages[i]))
set_page_dirty(pages[i]); bch2_set_page_dirty(pages[i]);
unlock_page(pages[i]); unlock_page(pages[i]);
put_page(pages[i]); put_page(pages[i]);
} }
@ -1812,6 +1908,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
struct address_space *mapping = req->ki_filp->f_mapping; struct address_space *mapping = req->ki_filp->f_mapping;
struct bch_inode_info *inode = dio->iop.inode; struct bch_inode_info *inode = dio->iop.inode;
struct bio *bio = &dio->iop.op.wbio.bio; struct bio *bio = &dio->iop.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bv; struct bio_vec *bv;
loff_t offset; loff_t offset;
bool sync; bool sync;
@ -1889,7 +1986,7 @@ err_wait_io:
closure_sync(&dio->cl); closure_sync(&dio->cl);
loop: loop:
bio_for_each_segment_all(bv, bio, i) bio_for_each_segment_all(bv, bio, i, iter)
put_page(bv->bv_page); put_page(bv->bv_page);
if (!dio->iter.count || dio->iop.op.error) if (!dio->iter.count || dio->iop.op.error)
break; break;
@ -2223,7 +2320,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
zero_user_segment(page, 0, end_offset); zero_user_segment(page, 0, end_offset);
if (!PageDirty(page)) if (!PageDirty(page))
set_page_dirty(page); bch2_set_page_dirty(page);
unlock: unlock:
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
@ -2677,12 +2774,17 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
static bool page_is_data(struct page *page) static bool page_is_data(struct page *page)
{ {
EBUG_ON(!PageLocked(page)); struct bch_page_state *s = bch2_page_state(page);
unsigned i;
/* XXX: should only have to check PageDirty */ if (!s)
return PagePrivate(page) && return false;
(page_state(page)->sectors ||
page_state(page)->dirty_sectors); for (i = 0; i < PAGE_SECTORS; i++)
if (s->s[i].state >= SECTOR_DIRTY)
return true;
return false;
} }
static loff_t bch2_next_pagecache_data(struct inode *vinode, static loff_t bch2_next_pagecache_data(struct inode *vinode,

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FS_IO_H #ifndef _BCACHEFS_FS_IO_H
#define _BCACHEFS_FS_IO_H #define _BCACHEFS_FS_IO_H
@ -8,8 +9,6 @@
#include <linux/uio.h> #include <linux/uio.h>
int bch2_set_page_dirty(struct page *);
int bch2_writepage(struct page *, struct writeback_control *); int bch2_writepage(struct page *, struct writeback_control *);
int bch2_readpage(struct file *, struct page *); int bch2_readpage(struct file *, struct page *);
@ -33,7 +32,7 @@ long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
loff_t bch2_llseek(struct file *, loff_t, int); loff_t bch2_llseek(struct file *, loff_t, int);
int bch2_page_mkwrite(struct vm_fault *); vm_fault_t bch2_page_mkwrite(struct vm_fault *);
void bch2_invalidatepage(struct page *, unsigned int, unsigned int); void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
int bch2_releasepage(struct page *, gfp_t); int bch2_releasepage(struct page *, gfp_t);
int bch2_migrate_page(struct address_space *, struct page *, int bch2_migrate_page(struct address_space *, struct page *,

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_FS #ifndef NO_BCACHEFS_FS
#include "bcachefs.h" #include "bcachefs.h"
@ -204,7 +205,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
if (ret) if (ret)
goto err2; goto err2;
bch2_lock_inodes(src, dst); bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
if (inode_attr_changing(src, dst, Inode_opt_project)) { if (inode_attr_changing(src, dst, Inode_opt_project)) {
ret = bch2_fs_quota_transfer(c, dst, ret = bch2_fs_quota_transfer(c, dst,
@ -217,7 +218,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
err3: err3:
bch2_unlock_inodes(src, dst); bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
/* return true if we did work */ /* return true if we did work */
if (ret >= 0) if (ret >= 0)

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FS_IOCTL_H #ifndef _BCACHEFS_FS_IOCTL_H
#define _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_FS #ifndef NO_BCACHEFS_FS
#include "bcachefs.h" #include "bcachefs.h"
@ -593,7 +594,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct btree_trans trans; struct btree_trans trans;
int ret; int ret;
bch2_lock_inodes(dir, inode); bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_init(&trans, c, 4, 1024); bch2_trans_init(&trans, c, 4, 1024);
retry: retry:
bch2_trans_begin(&trans); bch2_trans_begin(&trans);
@ -626,7 +627,7 @@ retry:
ATTR_MTIME); ATTR_MTIME);
err: err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
bch2_unlock_inodes(dir, inode); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
return ret; return ret;
} }
@ -803,7 +804,8 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
bch2_trans_init(&trans, c, 8, 2048); bch2_trans_init(&trans, c, 8, 2048);
bch2_lock_inodes(i.src_dir, bch2_lock_inodes(INODE_UPDATE_LOCK,
i.src_dir,
i.dst_dir, i.dst_dir,
i.src_inode, i.src_inode,
i.dst_inode); i.dst_inode);
@ -901,7 +903,8 @@ err:
1 << QTYP_PRJ, 1 << QTYP_PRJ,
KEY_TYPE_QUOTA_NOCHECK); KEY_TYPE_QUOTA_NOCHECK);
bch2_unlock_inodes(i.src_dir, bch2_unlock_inodes(INODE_UPDATE_LOCK,
i.src_dir,
i.dst_dir, i.dst_dir,
i.src_inode, i.src_inode,
i.dst_inode); i.dst_inode);
@ -1263,7 +1266,7 @@ static const struct address_space_operations bch_address_space_operations = {
.readpage = bch2_readpage, .readpage = bch2_readpage,
.writepages = bch2_writepages, .writepages = bch2_writepages,
.readpages = bch2_readpages, .readpages = bch2_readpages,
.set_page_dirty = bch2_set_page_dirty, .set_page_dirty = __set_page_dirty_nobuffers,
.write_begin = bch2_write_begin, .write_begin = bch2_write_begin,
.write_end = bch2_write_end, .write_end = bch2_write_end,
.invalidatepage = bch2_invalidatepage, .invalidatepage = bch2_invalidatepage,
@ -1731,7 +1734,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
sb->s_bdi->congested_fn = bch2_congested; sb->s_bdi->congested_fn = bch2_congested;
sb->s_bdi->congested_data = c; sb->s_bdi->congested_data = c;
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
for_each_online_member(ca, c, i) { for_each_online_member(ca, c, i) {
struct block_device *bdev = ca->disk_sb.bdev; struct block_device *bdev = ca->disk_sb.bdev;

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FS_H #ifndef _BCACHEFS_FS_H
#define _BCACHEFS_FS_H #define _BCACHEFS_FS_H
@ -35,24 +36,42 @@ static inline int ptrcmp(void *l, void *r)
return cmp_int(l, r); return cmp_int(l, r);
} }
#define __bch2_lock_inodes(_lock, ...) \ enum bch_inode_lock_op {
INODE_LOCK = (1U << 0),
INODE_UPDATE_LOCK = (1U << 1),
};
#define bch2_lock_inodes(_locks, ...) \
do { \ do { \
struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
unsigned i; \ unsigned i; \
\ \
bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
\ \
for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \ for (i = 1; i < ARRAY_SIZE(a); i++) \
if (a[i] != a[i - 1]) { \ if (a[i] != a[i - 1]) { \
if (_lock) \ if (_locks & INODE_LOCK) \
down_write_nested(&a[i]->v.i_rwsem, i); \
if (_locks & INODE_UPDATE_LOCK) \
mutex_lock_nested(&a[i]->ei_update_lock, i);\ mutex_lock_nested(&a[i]->ei_update_lock, i);\
else \
mutex_unlock(&a[i]->ei_update_lock); \
} \ } \
} while (0) } while (0)
#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__) #define bch2_unlock_inodes(_locks, ...) \
#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__) do { \
struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
unsigned i; \
\
bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
\
for (i = 1; i < ARRAY_SIZE(a); i++) \
if (a[i] != a[i - 1]) { \
if (_locks & INODE_LOCK) \
up_write(&a[i]->v.i_rwsem); \
if (_locks & INODE_UPDATE_LOCK) \
mutex_unlock(&a[i]->ei_update_lock); \
} \
} while (0)
static inline struct bch_inode_info *file_bch_inode(struct file *file) static inline struct bch_inode_info *file_bch_inode(struct file *file)
{ {

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_update.h" #include "btree_update.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_FSCK_H #ifndef _BCACHEFS_FSCK_H
#define _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_methods.h" #include "bkey_methods.h"
@ -245,6 +246,9 @@ const char *bch2_inode_generation_invalid(const struct bch_fs *c,
void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k) struct bkey_s_c k)
{ {
struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
} }
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_INODE_H #ifndef _BCACHEFS_INODE_H
#define _BCACHEFS_INODE_H #define _BCACHEFS_INODE_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Some low level IO code, and hacks for various block layer limitations * Some low level IO code, and hacks for various block layer limitations
* *
@ -121,23 +122,23 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
{ {
struct bvec_iter_all iter;
struct bio_vec *bv; struct bio_vec *bv;
unsigned i; unsigned i;
bio_for_each_segment_all(bv, bio, i) bio_for_each_segment_all(bv, bio, i, iter)
if (bv->bv_page != ZERO_PAGE(0)) if (bv->bv_page != ZERO_PAGE(0))
mempool_free(bv->bv_page, &c->bio_bounce_pages); mempool_free(bv->bv_page, &c->bio_bounce_pages);
bio->bi_vcnt = 0; bio->bi_vcnt = 0;
} }
static void bch2_bio_alloc_page_pool(struct bch_fs *c, struct bio *bio, static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
bool *using_mempool)
{ {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt++]; struct page *page;
if (likely(!*using_mempool)) { if (likely(!*using_mempool)) {
bv->bv_page = alloc_page(GFP_NOIO); page = alloc_page(GFP_NOIO);
if (unlikely(!bv->bv_page)) { if (unlikely(!page)) {
mutex_lock(&c->bio_bounce_pages_lock); mutex_lock(&c->bio_bounce_pages_lock);
*using_mempool = true; *using_mempool = true;
goto pool_alloc; goto pool_alloc;
@ -145,57 +146,29 @@ static void bch2_bio_alloc_page_pool(struct bch_fs *c, struct bio *bio,
} }
} else { } else {
pool_alloc: pool_alloc:
bv->bv_page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO); page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
} }
bv->bv_len = PAGE_SIZE; return page;
bv->bv_offset = 0;
} }
void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
size_t bytes) size_t size)
{ {
bool using_mempool = false; bool using_mempool = false;
BUG_ON(DIV_ROUND_UP(bytes, PAGE_SIZE) > bio->bi_max_vecs); while (size) {
struct page *page = __bio_alloc_page_pool(c, &using_mempool);
unsigned len = min(PAGE_SIZE, size);
bio->bi_iter.bi_size = bytes; BUG_ON(!bio_add_page(bio, page, len, 0));
size -= len;
while (bio->bi_vcnt < DIV_ROUND_UP(bytes, PAGE_SIZE)) }
bch2_bio_alloc_page_pool(c, bio, &using_mempool);
if (using_mempool) if (using_mempool)
mutex_unlock(&c->bio_bounce_pages_lock); mutex_unlock(&c->bio_bounce_pages_lock);
} }
void bch2_bio_alloc_more_pages_pool(struct bch_fs *c, struct bio *bio,
size_t bytes)
{
while (bio->bi_vcnt < DIV_ROUND_UP(bytes, PAGE_SIZE)) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
bv->bv_page = alloc_page(GFP_NOIO);
if (!bv->bv_page) {
/*
* We already allocated from mempool, we can't allocate from it again
* without freeing the pages we already allocated or else we could
* deadlock:
*/
bch2_bio_free_pages_pool(c, bio);
bch2_bio_alloc_pages_pool(c, bio, bytes);
return;
}
bv->bv_len = PAGE_SIZE;
bv->bv_offset = 0;
bio->bi_vcnt++;
}
bio->bi_iter.bi_size = bytes;
}
/* Writes */ /* Writes */
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@ -481,8 +454,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
wbio->bio.bi_opf = src->bi_opf; wbio->bio.bi_opf = src->bi_opf;
if (buf) { if (buf) {
bio->bi_iter.bi_size = output_available; bch2_bio_map(bio, buf, output_available);
bch2_bio_map(bio, buf);
return bio; return bio;
} }
@ -492,31 +464,17 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
* We can't use mempool for more than c->sb.encoded_extent_max * We can't use mempool for more than c->sb.encoded_extent_max
* worth of pages, but we'd like to allocate more if we can: * worth of pages, but we'd like to allocate more if we can:
*/ */
while (bio->bi_iter.bi_size < output_available) { bch2_bio_alloc_pages_pool(c, bio,
unsigned len = min_t(unsigned, PAGE_SIZE, min_t(unsigned, output_available,
output_available - bio->bi_iter.bi_size); c->sb.encoded_extent_max << 9));
struct page *p;
p = alloc_page(GFP_NOIO); if (bio->bi_iter.bi_size < output_available)
if (!p) { *page_alloc_failed =
unsigned pool_max = bch2_bio_alloc_pages(bio,
min_t(unsigned, output_available, output_available -
c->sb.encoded_extent_max << 9); bio->bi_iter.bi_size,
GFP_NOFS) != 0;
if (bio_sectors(bio) < pool_max)
bch2_bio_alloc_pages_pool(c, bio, pool_max);
break;
}
bio->bi_io_vec[bio->bi_vcnt++] = (struct bio_vec) {
.bv_page = p,
.bv_len = len,
.bv_offset = 0,
};
bio->bi_iter.bi_size += len;
}
*page_alloc_failed = bio->bi_vcnt < pages;
return bio; return bio;
} }
@ -820,12 +778,6 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp)
} }
dst->bi_iter.bi_size = total_output; dst->bi_iter.bi_size = total_output;
/* Free unneeded pages after compressing: */
if (to_wbio(dst)->bounce)
while (dst->bi_vcnt > DIV_ROUND_UP(dst->bi_iter.bi_size, PAGE_SIZE))
mempool_free(dst->bi_io_vec[--dst->bi_vcnt].bv_page,
&c->bio_bounce_pages);
do_write: do_write:
/* might have done a realloc... */ /* might have done a realloc... */
@ -956,7 +908,6 @@ void bch2_write(struct closure *cl)
BUG_ON(!op->nr_replicas); BUG_ON(!op->nr_replicas);
BUG_ON(!op->write_point.v); BUG_ON(!op->write_point.v);
BUG_ON(!bkey_cmp(op->pos, POS_MAX)); BUG_ON(!bkey_cmp(op->pos, POS_MAX));
BUG_ON(bio_sectors(&op->wbio.bio) > U16_MAX);
op->start_time = local_clock(); op->start_time = local_clock();
@ -1003,23 +954,23 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts opts, struct bch_io_opts opts,
unsigned flags) unsigned flags)
{ {
if (!opts.promote_target) if (!bkey_extent_is_data(k.k))
return false; return false;
if (!(flags & BCH_READ_MAY_PROMOTE)) if (!(flags & BCH_READ_MAY_PROMOTE))
return false; return false;
if (percpu_ref_is_dying(&c->writes)) if (!opts.promote_target)
return false; return false;
if (!bkey_extent_is_data(k.k)) if (bch2_extent_has_target(c, bkey_s_c_to_extent(k),
opts.promote_target))
return false; return false;
if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target)) if (bch2_target_congested(c, opts.promote_target)) {
return false; /* XXX trace this */
if (bch2_target_congested(c, opts.promote_target))
return false; return false;
}
if (rhashtable_lookup_fast(&c->promote_table, &pos, if (rhashtable_lookup_fast(&c->promote_table, &pos,
bch_promote_params)) bch_promote_params))
@ -1080,22 +1031,18 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
struct bpos pos, struct bpos pos,
struct extent_ptr_decoded *pick, struct extent_ptr_decoded *pick,
struct bch_io_opts opts, struct bch_io_opts opts,
unsigned rbio_sectors, unsigned sectors,
struct bch_read_bio **rbio) struct bch_read_bio **rbio)
{ {
struct promote_op *op = NULL; struct promote_op *op = NULL;
struct bio *bio; struct bio *bio;
unsigned rbio_pages = DIV_ROUND_UP(rbio_sectors, PAGE_SECTORS); unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
/* data might have to be decompressed in the write path: */
unsigned wbio_pages = DIV_ROUND_UP(pick->crc.uncompressed_size,
PAGE_SECTORS);
int ret; int ret;
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
return NULL; return NULL;
op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * wbio_pages, op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
GFP_NOIO);
if (!op) if (!op)
goto err; goto err;
@ -1103,37 +1050,32 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
op->pos = pos; op->pos = pos;
/* /*
* promotes require bouncing, but if the extent isn't * We don't use the mempool here because extents that aren't
* checksummed/compressed it might be too big for the mempool: * checksummed or compressed can be too big for the mempool:
*/ */
if (rbio_sectors > c->sb.encoded_extent_max) { *rbio = kzalloc(sizeof(struct bch_read_bio) +
*rbio = kzalloc(sizeof(struct bch_read_bio) + sizeof(struct bio_vec) * pages,
sizeof(struct bio_vec) * rbio_pages, GFP_NOIO);
GFP_NOIO); if (!*rbio)
if (!*rbio) goto err;
goto err;
rbio_init(&(*rbio)->bio, opts); rbio_init(&(*rbio)->bio, opts);
bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
rbio_pages);
(*rbio)->bio.bi_iter.bi_size = rbio_sectors << 9; if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
bch2_bio_map(&(*rbio)->bio, NULL); GFP_NOIO))
goto err;
if (bch2_bio_alloc_pages(&(*rbio)->bio, GFP_NOIO)) (*rbio)->bounce = true;
goto err; (*rbio)->split = true;
(*rbio)->kmalloc = true;
(*rbio)->bounce = true;
(*rbio)->split = true;
(*rbio)->kmalloc = true;
}
if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
bch_promote_params)) bch_promote_params))
goto err; goto err;
bio = &op->write.op.wbio.bio; bio = &op->write.op.wbio.bio;
bio_init(bio, bio->bi_inline_vecs, wbio_pages); bio_init(bio, bio->bi_inline_vecs, pages);
ret = bch2_migrate_write_init(c, &op->write, ret = bch2_migrate_write_init(c, &op->write,
writepoint_hashed((unsigned long) current), writepoint_hashed((unsigned long) current),
@ -1167,8 +1109,9 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c,
bool *read_full) bool *read_full)
{ {
bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
/* data might have to be decompressed in the write path: */
unsigned sectors = promote_full unsigned sectors = promote_full
? pick->crc.compressed_size ? max(pick->crc.compressed_size, pick->crc.live_size)
: bvec_iter_sectors(iter); : bvec_iter_sectors(iter);
struct bpos pos = promote_full struct bpos pos = promote_full
? bkey_start_pos(k.k) ? bkey_start_pos(k.k)
@ -1703,7 +1646,16 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
} }
if (rbio) { if (rbio) {
/* promote already allocated bounce rbio */ /*
* promote already allocated bounce rbio:
* promote needs to allocate a bio big enough for uncompressing
* data in the write path, but we're not going to use it all
* here:
*/
BUG_ON(rbio->bio.bi_iter.bi_size <
pick.crc.compressed_size << 9);
rbio->bio.bi_iter.bi_size =
pick.crc.compressed_size << 9;
} else if (bounce) { } else if (bounce) {
unsigned sectors = pick.crc.compressed_size; unsigned sectors = pick.crc.compressed_size;
@ -1767,9 +1719,9 @@ noclone:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ); bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
if (likely(!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT)))) { if (likely(!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT)))) {
bio_inc_remaining(&orig->bio); bio_inc_remaining(&orig->bio);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_H #ifndef _BCACHEFS_IO_H
#define _BCACHEFS_IO_H #define _BCACHEFS_IO_H
@ -12,7 +13,6 @@
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
void bch2_bio_alloc_more_pages_pool(struct bch_fs *, struct bio *, size_t);
void bch2_latency_acct(struct bch_dev *, u64, int); void bch2_latency_acct(struct bch_dev *, u64, int);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_IO_TYPES_H #ifndef _BCACHEFS_IO_TYPES_H
#define _BCACHEFS_IO_TYPES_H #define _BCACHEFS_IO_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* bcachefs journalling code, for btree insertions * bcachefs journalling code, for btree insertions
* *
@ -820,10 +821,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
} }
if (c) { if (c) {
percpu_down_read_preempt_disable(&c->mark_lock); percpu_down_read(&c->mark_lock);
spin_lock(&c->journal.lock); spin_lock(&c->journal.lock);
} else {
preempt_disable();
} }
pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0; pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
@ -852,9 +851,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (c) { if (c) {
spin_unlock(&c->journal.lock); spin_unlock(&c->journal.lock);
percpu_up_read_preempt_enable(&c->mark_lock); percpu_up_read(&c->mark_lock);
} else {
preempt_enable();
} }
if (!new_fs) if (!new_fs)

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_H #ifndef _BCACHEFS_JOURNAL_H
#define _BCACHEFS_JOURNAL_H #define _BCACHEFS_JOURNAL_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_foreground.h" #include "alloc_foreground.h"
#include "buckets.h" #include "buckets.h"
@ -494,9 +495,8 @@ reread:
sectors_read << 9)); sectors_read << 9));
bio_set_dev(bio, ca->disk_sb.bdev); bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_iter.bi_sector = offset; bio->bi_iter.bi_sector = offset;
bio->bi_iter.bi_size = sectors_read << 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0); bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch2_bio_map(bio, buf->data); bch2_bio_map(bio, buf->data, sectors_read << 9);
ret = submit_bio_wait(bio); ret = submit_bio_wait(bio);
bio_put(bio); bio_put(bio);
@ -1086,12 +1086,11 @@ void bch2_journal_write(struct closure *cl)
bio_reset(bio); bio_reset(bio);
bio_set_dev(bio, ca->disk_sb.bdev); bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_iter.bi_sector = ptr->offset; bio->bi_iter.bi_sector = ptr->offset;
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio; bio->bi_end_io = journal_write_endio;
bio->bi_private = ca; bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_WRITE, bio_set_op_attrs(bio, REQ_OP_WRITE,
REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
bch2_bio_map(bio, jset); bch2_bio_map(bio, jset, sectors << 9);
trace_journal_write(bio); trace_journal_write(bio);
closure_bio_submit(bio, cl); closure_bio_submit(bio, cl);

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_IO_H #ifndef _BCACHEFS_JOURNAL_IO_H
#define _BCACHEFS_JOURNAL_IO_H #define _BCACHEFS_JOURNAL_IO_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "journal.h" #include "journal.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_RECLAIM_H #ifndef _BCACHEFS_JOURNAL_RECLAIM_H
#define _BCACHEFS_JOURNAL_RECLAIM_H #define _BCACHEFS_JOURNAL_RECLAIM_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_iter.h" #include "btree_iter.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H #define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_TYPES_H #ifndef _BCACHEFS_JOURNAL_TYPES_H
#define _BCACHEFS_JOURNAL_TYPES_H #define _BCACHEFS_JOURNAL_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "keylist.h" #include "keylist.h"

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_KEYLIST_H #ifndef _BCACHEFS_KEYLIST_H
#define _BCACHEFS_KEYLIST_H #define _BCACHEFS_KEYLIST_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_KEYLIST_TYPES_H #ifndef _BCACHEFS_KEYLIST_TYPES_H
#define _BCACHEFS_KEYLIST_TYPES_H #define _BCACHEFS_KEYLIST_TYPES_H

View File

@ -1,7 +0,0 @@
#ifndef __BCH_LZ4_H__
#define __BCH_LZ4_H__
int bch2_lz4_decompress(const unsigned char *src, size_t *src_len,
unsigned char *dest, size_t actual_dest_len);
#endif

View File

@ -1,277 +0,0 @@
/*
* LZ4 Decompressor for Linux kernel
*
* Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
*
* Based on LZ4 implementation by Yann Collet.
*
* LZ4 - Fast LZ compression algorithm
* Copyright (C) 2011-2012, Yann Collet.
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at :
* - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
* - LZ4 source repository : http://code.google.com/p/lz4/
*/
#ifndef STATIC
#include <linux/module.h>
#include <linux/kernel.h>
#endif
#include "lz4.h"
/*
* Detects 64 bits mode
*/
#if defined(CONFIG_64BIT)
#define LZ4_ARCH64 1
#else
#define LZ4_ARCH64 0
#endif
#include <asm/unaligned.h>
#include <linux/log2.h>
#include <linux/string.h>
#define A32(_p) get_unaligned((u32 *) (_p))
#define A16(_p) get_unaligned((u16 *) (_p))
#define GET_LE16_ADVANCE(_src) \
({ \
u16 _r = get_unaligned_le16(_src); \
(_src) += 2; \
_r; \
})
#define PUT_LE16_ADVANCE(_dst, _v) \
do { \
put_unaligned_le16((_v), (_dst)); \
(_dst) += 2; \
} while (0)
#define LENGTH_LONG 15
#define COPYLENGTH 8
#define ML_BITS 4
#define ML_MASK ((1U << ML_BITS) - 1)
#define RUN_BITS (8 - ML_BITS)
#define RUN_MASK ((1U << RUN_BITS) - 1)
#define MEMORY_USAGE 14
#define MINMATCH 4
#define SKIPSTRENGTH 6
#define LASTLITERALS 5
#define MFLIMIT (COPYLENGTH + MINMATCH)
#define MINLENGTH (MFLIMIT + 1)
#define MAXD_LOG 16
#define MAXD (1 << MAXD_LOG)
#define MAXD_MASK (u32)(MAXD - 1)
#define MAX_DISTANCE (MAXD - 1)
#define HASH_LOG (MAXD_LOG - 1)
#define HASHTABLESIZE (1 << HASH_LOG)
#define MAX_NB_ATTEMPTS 256
#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1))
#define __HASH_VALUE(p, bits) \
(((A32(p)) * 2654435761U) >> (32 - (bits)))
#define HASH_VALUE(p) __HASH_VALUE(p, HASH_LOG)
#define MEMCPY_ADVANCE(_dst, _src, length) \
do { \
typeof(length) _length = (length); \
memcpy(_dst, _src, _length); \
_src += _length; \
_dst += _length; \
} while (0)
#define MEMCPY_ADVANCE_BYTES(_dst, _src, _length) \
do { \
const u8 *_end = (_src) + (_length); \
while ((_src) < _end) \
*_dst++ = *_src++; \
} while (0)
#define STEPSIZE __SIZEOF_LONG__
#define LZ4_COPYPACKET(_src, _dst) \
do { \
MEMCPY_ADVANCE(_dst, _src, STEPSIZE); \
MEMCPY_ADVANCE(_dst, _src, COPYLENGTH - STEPSIZE);\
} while (0)
/*
* Equivalent to MEMCPY_ADVANCE - except may overrun @_dst and @_src by
* COPYLENGTH:
*
* Note: src and dst may overlap (with src < dst) - we must do the copy in
* STEPSIZE chunks for correctness
*
* Note also: length may be negative - we must not call memcpy if length is
* negative, but still adjust dst and src by length
*/
#define MEMCPY_ADVANCE_CHUNKED(_dst, _src, _length) \
do { \
u8 *_end = (_dst) + (_length); \
while ((_dst) < _end) \
LZ4_COPYPACKET(_src, _dst); \
_src -= (_dst) - _end; \
_dst = _end; \
} while (0)
#define MEMCPY_ADVANCE_CHUNKED_NOFIXUP(_dst, _src, _end)\
do { \
while ((_dst) < (_end)) \
LZ4_COPYPACKET((_src), (_dst)); \
} while (0)
static const int dec32table[8] = {0, 3, 2, 3, 0, 0, 0, 0};
#if LZ4_ARCH64
static const int dec64table[8] = {0, 0, 0, -1, 0, 1, 2, 3};
#else
static const int dec64table[8] = {0, 0, 0, 0, 0, 0, 0, 0};
#endif
static inline size_t get_length(const u8 **ip, size_t length)
{
if (length == LENGTH_LONG) {
size_t len;
do {
length += (len = *(*ip)++);
} while (len == 255);
}
return length;
}
static int lz4_uncompress(const u8 *source, u8 *dest, int osize)
{
const u8 *ip = source;
const u8 *ref;
u8 *op = dest;
u8 * const oend = op + osize;
u8 *cpy;
unsigned token, offset;
ssize_t length;
while (1) {
/* get runlength */
token = *ip++;
length = get_length(&ip, token >> ML_BITS);
/* copy literals */
if (unlikely(op + length > oend - COPYLENGTH)) {
/*
* Error: not enough place for another match
* (min 4) + 5 literals
*/
if (op + length != oend)
goto _output_error;
MEMCPY_ADVANCE(op, ip, length);
break; /* EOF */
}
MEMCPY_ADVANCE_CHUNKED(op, ip, length);
/* get match offset */
offset = GET_LE16_ADVANCE(ip);
ref = op - offset;
/* Error: offset create reference outside destination buffer */
if (unlikely(ref < (u8 *const) dest))
goto _output_error;
/* get match length */
length = get_length(&ip, token & ML_MASK);
length += MINMATCH;
/* copy first STEPSIZE bytes of match: */
if (unlikely(offset < STEPSIZE)) {
MEMCPY_ADVANCE_BYTES(op, ref, 4);
ref -= dec32table[offset];
memcpy(op, ref, 4);
op += STEPSIZE - 4;
ref -= dec64table[offset];
} else {
MEMCPY_ADVANCE(op, ref, STEPSIZE);
}
length -= STEPSIZE;
/*
* Note - length could have been < STEPSIZE; that's ok, length
* will now be negative and we'll just end up rewinding op:
*/
/* copy rest of match: */
cpy = op + length;
if (cpy > oend - COPYLENGTH) {
/* Error: request to write beyond destination buffer */
if (cpy > oend ||
ref + COPYLENGTH > oend)
goto _output_error;
#if !LZ4_ARCH64
if (op + COPYLENGTH > oend)
goto _output_error;
#endif
MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH);
/* op could be > cpy here */
while (op < cpy)
*op++ = *ref++;
op = cpy;
/*
* Check EOF (should never happen, since last 5 bytes
* are supposed to be literals)
*/
if (op == oend)
goto _output_error;
} else {
MEMCPY_ADVANCE_CHUNKED(op, ref, length);
}
}
/* end of decoding */
return ip - source;
/* write overflow error detected */
_output_error:
return -1;
}
int bch2_lz4_decompress(const unsigned char *src, size_t *src_len,
unsigned char *dest, size_t actual_dest_len)
{
int ret = -1;
int input_len = 0;
input_len = lz4_uncompress(src, dest, actual_dest_len);
if (input_len < 0)
goto exit_0;
*src_len = input_len;
return 0;
exit_0:
return ret;
}

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Code for moving data off a device. * Code for moving data off a device.
*/ */

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_MIGRATE_H #ifndef _BCACHEFS_MIGRATE_H
#define _BCACHEFS_MIGRATE_H #define _BCACHEFS_MIGRATE_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc_foreground.h" #include "alloc_foreground.h"
@ -300,12 +301,13 @@ static void move_free(struct closure *cl)
{ {
struct moving_io *io = container_of(cl, struct moving_io, cl); struct moving_io *io = container_of(cl, struct moving_io, cl);
struct moving_context *ctxt = io->write.ctxt; struct moving_context *ctxt = io->write.ctxt;
struct bvec_iter_all iter;
struct bio_vec *bv; struct bio_vec *bv;
int i; int i;
bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i) bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
if (bv->bv_page) if (bv->bv_page)
__free_page(bv->bv_page); __free_page(bv->bv_page);
@ -428,10 +430,9 @@ static int bch2_move_extent(struct bch_fs *c,
bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages); bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
bio_set_prio(&io->write.op.wbio.bio, bio_set_prio(&io->write.op.wbio.bio,
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
io->write.op.wbio.bio.bi_iter.bi_size = sectors << 9;
bch2_bio_map(&io->write.op.wbio.bio, NULL); if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, GFP_KERNEL)) GFP_KERNEL))
goto err_free; goto err_free;
io->rbio.opts = io_opts; io->rbio.opts = io_opts;

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_MOVE_H #ifndef _BCACHEFS_MOVE_H
#define _BCACHEFS_MOVE_H #define _BCACHEFS_MOVE_H

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_MOVE_TYPES_H #ifndef _BCACHEFS_MOVE_TYPES_H
#define _BCACHEFS_MOVE_TYPES_H #define _BCACHEFS_MOVE_TYPES_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
/* /*
* Moving/copying garbage collector * Moving/copying garbage collector
* *

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_MOVINGGC_H #ifndef _BCACHEFS_MOVINGGC_H
#define _BCACHEFS_MOVINGGC_H #define _BCACHEFS_MOVINGGC_H

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h> #include <linux/kernel.h>

Some files were not shown because too many files have changed in this diff Show More