Update bcachefs sources to 3adea2c857dd bcachefs: vendorize bio_iov_iter_get_pages()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-03 16:34:04 -05:00
parent 39f7bed273
commit e9e05dd37f
30 changed files with 626 additions and 499 deletions

View File

@ -1 +1 @@
d7354b97c0100568c5696b192e30335a3666062f
3adea2c857ddebd719c40731b113640d94984a9a

View File

@ -107,6 +107,7 @@ bcachefs-y := \
util/two_state_shared_lock.o \
util/util.o \
util/varint.o \
vendor/bio_iov_iter.o \
vendor/closure.o \
vendor/min_heap.o \
vfs/fiemap.o \

View File

@ -16,40 +16,25 @@ DEFINE_CLASS(bch_replicas_cpu, struct bch_replicas_cpu,
kfree(_T.entries),
(struct bch_replicas_cpu) {}, void)
static inline struct bch_replicas_entry_cpu *
static inline struct bch_replicas_entry_v1 *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
}
static inline unsigned __cpu_replicas_entry_bytes(unsigned v1_bytes)
{
return offsetof(struct bch_replicas_entry_cpu, e) + v1_bytes;
}
static inline unsigned cpu_replicas_entry_bytes(struct bch_replicas_entry_cpu *e)
{
return __cpu_replicas_entry_bytes(replicas_entry_bytes(&e->e));
}
#define for_each_cpu_replicas_entry(_r, _i) \
for (struct bch_replicas_entry_cpu *_i = (_r)->entries; \
for (struct bch_replicas_entry_v1 *_i = (_r)->entries; \
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size; \
_i = (void *) (_i) + (_r)->entry_size)
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
static int cpu_replicas_entry_cmp(const struct bch_replicas_entry_cpu *l,
const struct bch_replicas_entry_cpu *r,
size_t size)
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
static int bch2_memcmp(const void *l, const void *r, const void *priv)
{
return memcmp(&l->e, &r->e, size - offsetof(struct bch_replicas_entry_cpu, e));
}
static int cpu_replicas_entry_cmp_r(const void *l, const void *r, const void *priv)
{
return cpu_replicas_entry_cmp(l, r, (size_t) priv);
size_t size = (size_t) priv;
return memcmp(l, r, size);
}
/* Replicas tracking - in memory: */
@ -75,8 +60,7 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
{
eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
cpu_replicas_entry_cmp_r, NULL,
(void *)(size_t)r->entry_size);
bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
}
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
@ -101,13 +85,6 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
prt_printf(out, "]");
}
static void bch2_replicas_entry_cpu_to_text(struct printbuf *out,
struct bch_replicas_entry_cpu *e)
{
prt_printf(out, "ref=%u ", atomic_read(&e->ref));
bch2_replicas_entry_to_text(out, &e->e);
}
static int bch2_replicas_entry_sb_validate(struct bch_replicas_entry_v1 *r,
struct bch_sb *sb,
struct printbuf *err)
@ -174,7 +151,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *out,
prt_printf(out, " ");
first = false;
bch2_replicas_entry_cpu_to_text(out, i);
bch2_replicas_entry_to_text(out, i);
}
}
@ -255,44 +232,6 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
bch2_replicas_entry_sort(e);
}
/* @l is bch_replicas_entry_v1, @r is bch_replicas_entry_cpu */
static int replicas_entry_search_cmp(const void *_l, const void *_r, const void *priv)
{
const struct bch_replicas_entry_v1 *l = _l;
const struct bch_replicas_entry_cpu *r = _r;
size_t size = (size_t) priv;
return memcmp(l, &r->e, size);
}
static inline struct bch_replicas_entry_cpu *
replicas_entry_search(struct bch_replicas_cpu *r,
struct bch_replicas_entry_v1 *search)
{
verify_replicas_entry(search);
size_t entry_size = replicas_entry_bytes(search);
int idx = likely(__cpu_replicas_entry_bytes(entry_size) <= r->entry_size)
? eytzinger0_find_r(r->entries, r->nr, r->entry_size,
replicas_entry_search_cmp,
(void *) entry_size, search)
: -1;
return idx >= 0 ? cpu_replicas_entry(r, idx) : NULL;
}
bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
return !search->nr_devs || replicas_entry_search(&c->replicas, search);
}
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
guard(percpu_read)(&c->mark_lock);
return bch2_replicas_marked_locked(c, search);
}
static struct bch_replicas_cpu
cpu_replicas_add_entry(struct bch_fs *c,
struct bch_replicas_cpu *old,
@ -301,12 +240,9 @@ cpu_replicas_add_entry(struct bch_fs *c,
struct bch_replicas_cpu new = {
.nr = old->nr + 1,
.entry_size = max_t(unsigned, old->entry_size,
__cpu_replicas_entry_bytes(replicas_entry_bytes(new_entry))),
replicas_entry_bytes(new_entry)),
};
/* alignment */
new.entry_size = round_up(new.entry_size, sizeof(atomic_t));
new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL);
if (!new.entries)
return new;
@ -316,7 +252,7 @@ cpu_replicas_add_entry(struct bch_fs *c,
cpu_replicas_entry(old, i),
old->entry_size);
memcpy(&cpu_replicas_entry(&new, old->nr)->e,
memcpy(cpu_replicas_entry(&new, old->nr),
new_entry,
replicas_entry_bytes(new_entry));
@ -324,56 +260,152 @@ cpu_replicas_add_entry(struct bch_fs *c,
return new;
}
static inline struct bch_replicas_entry_v1 *
replicas_entry_search(struct bch_replicas_cpu *r,
struct bch_replicas_entry_v1 *search)
{
verify_replicas_entry(search);
size_t entry_size = replicas_entry_bytes(search);
int idx = likely(entry_size <= r->entry_size)
? eytzinger0_find_r(r->entries, r->nr, r->entry_size,
bch2_memcmp, (void *) entry_size, search)
: -1;
return idx >= 0 ? cpu_replicas_entry(r, idx) : NULL;
}
bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
return !search->nr_devs ||
(replicas_entry_search(&c->replicas, search) &&
(likely((!c->replicas_gc.entries)) ||
replicas_entry_search(&c->replicas_gc, search)));
}
bool bch2_replicas_marked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
guard(percpu_read)(&c->mark_lock);
return bch2_replicas_marked_locked(c, search);
}
noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_entry_v1 *new_entry,
unsigned ref)
struct bch_replicas_entry_v1 *new_entry)
{
verify_replicas_entry(new_entry);
CLASS(bch_replicas_cpu, new_r)();
CLASS(bch_replicas_cpu, new_gc)();
guard(mutex)(&c->sb_lock);
bool write_sb = false;
scoped_guard(percpu_write, &c->mark_lock) {
if (!replicas_entry_search(&c->replicas, new_entry)) {
CLASS(bch_replicas_cpu, new_r)();
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries)
return bch_err_throw(c, ENOMEM_cpu_replicas);
try(bch2_cpu_replicas_to_sb_replicas(c, &new_r));
swap(c->replicas, new_r);
write_sb = true;
}
atomic_add(ref, &replicas_entry_search(&c->replicas, new_entry)->ref);
if (c->replicas_gc.entries &&
!replicas_entry_search(&c->replicas_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries)
return bch_err_throw(c, ENOMEM_cpu_replicas);
}
/* After dropping mark_lock */
if (write_sb)
if (!replicas_entry_search(&c->replicas, new_entry)) {
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries)
return bch_err_throw(c, ENOMEM_cpu_replicas);
try(bch2_cpu_replicas_to_sb_replicas(c, &new_r));
}
if (!new_r.entries &&
!new_gc.entries)
return 0;
/* allocations done, now commit: */
if (new_r.entries)
bch2_write_super(c);
/* don't update in memory replicas until changes are persistent */
scoped_guard(percpu_write, &c->mark_lock) {
if (new_r.entries)
swap(c->replicas, new_r);
if (new_gc.entries)
swap(new_gc, c->replicas_gc);
}
return 0;
}
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
{
return likely(bch2_replicas_marked(c, r))
? 0 : bch2_mark_replicas_slowpath(c, r, 0);
? 0 : bch2_mark_replicas_slowpath(c, r);
}
static void __replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_cpu *e)
/*
* Old replicas_gc mechanism: only used for journal replicas entries now, should
* die at some point:
*/
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
{
struct bch_replicas_cpu *r = &c->replicas;
lockdep_assert_held(&c->replicas_gc_lock);
memcpy(e, cpu_replicas_entry(r, --r->nr), r->entry_size);
bch2_cpu_replicas_sort(r);
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
ret = ret ?:
bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
if (!ret)
swap(c->replicas, c->replicas_gc);
int ret = bch2_cpu_replicas_to_sb_replicas(c, r);
if (WARN(ret, "bch2_cpu_replicas_to_sb_replicas() error: %s", bch2_err_str(ret)))
return;
kfree(c->replicas_gc.entries);
c->replicas_gc.entries = NULL;
}
if (!ret)
bch2_write_super(c);
return ret;
}
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
{
lockdep_assert_held(&c->replicas_gc_lock);
guard(mutex)(&c->sb_lock);
BUG_ON(c->replicas_gc.entries);
c->replicas_gc.nr = 0;
c->replicas_gc.entry_size = 0;
for_each_cpu_replicas_entry(&c->replicas, e) {
/* Preserve unknown data types */
if (e->data_type >= BCH_DATA_NR ||
!(BIT(e->data_type) & typemask)) {
c->replicas_gc.nr++;
c->replicas_gc.entry_size =
max_t(unsigned, c->replicas_gc.entry_size,
replicas_entry_bytes(e));
}
}
c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
c->replicas_gc.entry_size,
GFP_KERNEL);
if (!c->replicas_gc.entries) {
bch_err(c, "error allocating c->replicas_gc");
return bch_err_throw(c, ENOMEM_replicas_gc);
}
unsigned i = 0;
for_each_cpu_replicas_entry(&c->replicas, e)
if (e->data_type >= BCH_DATA_NR ||
!(BIT(e->data_type) & typemask))
memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
e, c->replicas_gc.entry_size);
bch2_cpu_replicas_sort(&c->replicas_gc);
return 0;
}
void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *kill)
@ -381,95 +413,18 @@ void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *ki
lockdep_assert_held(&c->mark_lock);
lockdep_assert_held(&c->sb_lock);
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, kill);
struct bch_replicas_cpu *r = &c->replicas;
struct bch_replicas_entry_v1 *e = replicas_entry_search(&c->replicas, kill);
if (WARN(!e, "replicas entry not found in sb"))
return;
__replicas_entry_kill(c, e);
memcpy(e, cpu_replicas_entry(r, --r->nr), r->entry_size);
/* caller does write_super() after dropping mark_lock */
}
bch2_cpu_replicas_sort(r);
void bch2_replicas_entry_put_many(struct bch_fs *c, struct bch_replicas_entry_v1 *r, unsigned nr)
{
if (!r->nr_devs)
return;
BUG_ON(r->data_type != BCH_DATA_journal);
verify_replicas_entry(r);
scoped_guard(percpu_read, &c->mark_lock) {
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
int v = atomic_sub_return(nr, &e->ref);
BUG_ON(v < 0);
if (v)
return;
}
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
if (e && !atomic_read(&e->ref))
__replicas_entry_kill(c, e);
}
bch2_write_super(c);
}
static inline bool bch2_replicas_entry_get_inmem(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
{
guard(percpu_read)(&c->mark_lock);
struct bch_replicas_entry_cpu *e = replicas_entry_search(&c->replicas, r);
if (e)
atomic_inc(&e->ref);
return e != NULL;
}
int bch2_replicas_entry_get(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
{
if (!r->nr_devs)
return 0;
BUG_ON(r->data_type != BCH_DATA_journal);
verify_replicas_entry(r);
return bch2_replicas_entry_get_inmem(c, r)
? 0
: bch2_mark_replicas_slowpath(c, r, 1);
}
int bch2_replicas_gc_reffed(struct bch_fs *c)
{
bool write_sb = false;
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
unsigned dst = 0;
for (unsigned i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry_cpu *e =
cpu_replicas_entry(&c->replicas, i);
if (e->e.data_type != BCH_DATA_journal ||
atomic_read(&e->ref))
memcpy(cpu_replicas_entry(&c->replicas, dst++),
e,
c->replicas.entry_size);
}
if (c->replicas.nr != dst) {
c->replicas.nr = dst;
bch2_cpu_replicas_sort(&c->replicas);
try(bch2_cpu_replicas_to_sb_replicas(c, &c->replicas));
}
}
if (write_sb)
bch2_write_super(c);
return 0;
int ret = bch2_cpu_replicas_to_sb_replicas(c, r);
WARN(ret, "bch2_cpu_replicas_to_sb_replicas() error: %s", bch2_err_str(ret));
}
/* Replicas tracking - superblock: */
@ -486,9 +441,6 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
nr++;
}
entry_size = __cpu_replicas_entry_bytes(entry_size);
entry_size = round_up(entry_size, sizeof(atomic_t));
cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
if (!cpu_r->entries)
return -BCH_ERR_ENOMEM_cpu_replicas;
@ -496,10 +448,10 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
for_each_replicas_entry(sb_r, src) {
struct bch_replicas_entry_cpu *dst = cpu_replicas_entry(cpu_r, idx++);
memcpy(&dst->e, src, replicas_entry_bytes(src));
bch2_replicas_entry_sort(&dst->e);
for_each_replicas_entry(sb_r, e) {
struct bch_replicas_entry_v1 *dst = cpu_replicas_entry(cpu_r, idx++);
memcpy(dst, e, replicas_entry_bytes(e));
bch2_replicas_entry_sort(dst);
}
return 0;
@ -517,13 +469,9 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
nr++;
}
entry_size = __cpu_replicas_entry_bytes(entry_size);
entry_size += sizeof(struct bch_replicas_entry_v1) -
sizeof(struct bch_replicas_entry_v0);
entry_size = round_up(entry_size, sizeof(atomic_t));
cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
if (!cpu_r->entries)
return -BCH_ERR_ENOMEM_cpu_replicas;
@ -532,14 +480,14 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
cpu_r->entry_size = entry_size;
for_each_replicas_entry(sb_r, src) {
struct bch_replicas_entry_cpu *dst =
struct bch_replicas_entry_v1 *dst =
cpu_replicas_entry(cpu_r, idx++);
dst->e.data_type = src->data_type;
dst->e.nr_devs = src->nr_devs;
dst->e.nr_required = 1;
memcpy(dst->e.devs, src->devs, src->nr_devs);
bch2_replicas_entry_sort(&dst->e);
dst->data_type = src->data_type;
dst->nr_devs = src->nr_devs;
dst->nr_required = 1;
memcpy(dst->devs, src->devs, src->nr_devs);
bch2_replicas_entry_sort(dst);
}
return 0;
@ -547,12 +495,6 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
{
/*
* If called after fs is started (after journal read), we'll be blowing
* away refcounts
*/
BUG_ON(test_bit(BCH_FS_started, &c->flags));
struct bch_sb_field_replicas *sb_v1;
struct bch_sb_field_replicas_v0 *sb_v0;
CLASS(bch_replicas_cpu, new_r)();
@ -580,7 +522,7 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
bytes = sizeof(struct bch_sb_field_replicas);
for_each_cpu_replicas_entry(r, src)
bytes += replicas_entry_bytes(&src->e) - 1;
bytes += replicas_entry_bytes(src) - 1;
sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0,
DIV_ROUND_UP(bytes, sizeof(u64)));
@ -596,9 +538,9 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
dst = sb_r->entries;
for_each_cpu_replicas_entry(r, src) {
dst->data_type = src->e.data_type;
dst->nr_devs = src->e.nr_devs;
memcpy(dst->devs, src->e.devs, src->e.nr_devs);
dst->data_type = src->data_type;
dst->nr_devs = src->nr_devs;
memcpy(dst->devs, src->devs, src->nr_devs);
dst = replicas_entry_next(dst);
@ -619,8 +561,8 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
bytes = sizeof(struct bch_sb_field_replicas);
for_each_cpu_replicas_entry(r, src) {
bytes += replicas_entry_bytes(&src->e);
if (src->e.nr_required != 1)
bytes += replicas_entry_bytes(src);
if (src->nr_required != 1)
need_v1 = true;
}
@ -641,7 +583,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
dst = sb_r->entries;
for_each_cpu_replicas_entry(r, src) {
memcpy(dst, &src->e, replicas_entry_bytes(&src->e));
memcpy(dst, src, replicas_entry_bytes(src));
dst = replicas_entry_next(dst);
@ -660,26 +602,24 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
sort_r(cpu_r->entries,
cpu_r->nr,
cpu_r->entry_size,
cpu_replicas_entry_cmp_r, NULL,
bch2_memcmp, NULL,
(void *)(size_t)cpu_r->entry_size);
for (i = 0; i < cpu_r->nr; i++) {
struct bch_replicas_entry_cpu *e =
struct bch_replicas_entry_v1 *e =
cpu_replicas_entry(cpu_r, i);
try(bch2_replicas_entry_sb_validate(&e->e, sb, err));
try(bch2_replicas_entry_sb_validate(e, sb, err));
if (i + 1 < cpu_r->nr) {
struct bch_replicas_entry_cpu *n =
struct bch_replicas_entry_v1 *n =
cpu_replicas_entry(cpu_r, i + 1);
int cmp = cpu_replicas_entry_cmp(e, n, cpu_r->entry_size);
BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
BUG_ON(cmp > 0);
if (!cmp) {
if (!memcmp(e, n, cpu_r->entry_size)) {
prt_printf(err, "duplicate replicas entry ");
bch2_replicas_entry_to_text(err, &e->e);
bch2_replicas_entry_to_text(err, e);
return -BCH_ERR_invalid_sb_replicas;
}
}
@ -762,9 +702,7 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
{
guard(percpu_read)(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, i) {
struct bch_replicas_entry_v1 *e = &i->e;
for_each_cpu_replicas_entry(&c->replicas, e) {
unsigned nr_online = 0, nr_failed = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
@ -882,25 +820,6 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
return bch2_can_read_fs_with_devs(c, devs, flags, err);
}
bool bch2_sb_has_journal(struct bch_sb *sb)
{
struct bch_sb_field_replicas *replicas = bch2_sb_field_get(sb, replicas);
struct bch_sb_field_replicas_v0 *replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
if (replicas) {
for_each_replicas_entry(replicas, r)
if (r->data_type == BCH_DATA_journal)
return true;
} else if (replicas_v0) {
for_each_replicas_entry(replicas_v0, r)
if (r->data_type == BCH_DATA_journal)
return true;
}
return false;
}
unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
{
struct bch_sb_field_replicas *replicas;
@ -944,4 +863,5 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
void bch2_fs_replicas_exit(struct bch_fs *c)
{
kfree(c->replicas.entries);
kfree(c->replicas_gc.entries);
}

View File

@ -39,22 +39,13 @@ bool bch2_can_read_fs_with_devs(struct bch_fs *, struct bch_devs_mask,
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, struct printbuf *, bool);
bool bch2_sb_has_journal(struct bch_sb *);
unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
void bch2_replicas_entry_put_many(struct bch_fs *, struct bch_replicas_entry_v1 *, unsigned);
static inline void bch2_replicas_entry_put(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
{
bch2_replicas_entry_put_many(c, r, 1);
}
int bch2_replicas_entry_get(struct bch_fs *, struct bch_replicas_entry_v1 *);
int bch2_replicas_gc_end(struct bch_fs *, int);
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
void bch2_replicas_entry_kill(struct bch_fs *, struct bch_replicas_entry_v1 *);
int bch2_replicas_gc_reffed(struct bch_fs *);
static inline bool bch2_replicas_entry_has_dev(struct bch_replicas_entry_v1 *r, unsigned dev)
{
for (unsigned i = 0; i < r->nr_devs; i++)
@ -63,12 +54,6 @@ static inline bool bch2_replicas_entry_has_dev(struct bch_replicas_entry_v1 *r,
return false;
}
static inline bool bch2_replicas_entry_eq(struct bch_replicas_entry_v1 *l,
struct bch_replicas_entry_v1 *r)
{
return l->nr_devs == r->nr_devs && !memcmp(l, r, replicas_entry_bytes(l));
}
/* iterate over superblock replicas - used by userspace tools: */
#define replicas_entry_next(_i) \

View File

@ -2,16 +2,10 @@
#ifndef _BCACHEFS_REPLICAS_TYPES_H
#define _BCACHEFS_REPLICAS_TYPES_H
/* unsized - bch_replicas_entry_v1 is variable length */
struct bch_replicas_entry_cpu {
atomic_t ref;
struct bch_replicas_entry_v1 e;
};
struct bch_replicas_cpu {
unsigned nr;
unsigned entry_size;
struct bch_replicas_entry_cpu *entries;
unsigned nr;
unsigned entry_size;
struct bch_replicas_entry_v1 *entries;
};
union bch_replicas_padded {

View File

@ -808,6 +808,8 @@ struct bch_fs {
struct bch_accounting_mem accounting;
struct bch_replicas_cpu replicas;
struct bch_replicas_cpu replicas_gc;
struct mutex replicas_gc_lock;
struct journal_entry_res btree_root_journal_res;
struct journal_entry_res clock_journal_res;

View File

@ -438,10 +438,10 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
* sequence number with a new btree node write, we want to re-journal
* the update
*/
if (ck->journal.seq == j->last_seq)
if (ck->journal.seq == journal_last_seq(j))
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != j->last_seq ||
if (ck->journal.seq != journal_last_seq(j) ||
!journal_low_on_space(&c->journal))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;

View File

@ -1030,11 +1030,24 @@ static int ec_stripe_key_update(struct btree_trans *trans,
return bch2_trans_update(trans, &iter, &new->k_i, 0);
}
struct stripe_update_bucket_stats {
u32 nr_bp_to_deleted;
u32 nr_no_match;
u32 nr_cached;
u32 nr_done;
u32 sectors_bp_to_deleted;
u32 sectors_no_match;
u32 sectors_cached;
u32 sectors_done;
};
static int ec_stripe_update_extent(struct btree_trans *trans,
struct bch_dev *ca,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
struct bkey_s_c_backpointer bp,
struct stripe_update_bucket_stats *stats,
struct wb_maybe_flush *last_flushed)
{
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
@ -1063,6 +1076,9 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
* extent no longer exists - we could flush the btree
* write buffer and retry to verify, but no need:
*/
stats->nr_bp_to_deleted++;
stats->sectors_bp_to_deleted += bp.v->bucket_len;
count_event(c, ec_stripe_update_extent_fail);
return 0;
}
@ -1075,8 +1091,18 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
* It doesn't generally make sense to erasure code cached ptrs:
* XXX: should we be incrementing a counter?
*/
if (!ptr_c || ptr_c->cached)
if (!ptr_c) {
stats->nr_no_match++;
stats->sectors_no_match += bp.v->bucket_len;
count_event(c, ec_stripe_update_extent_fail);
return 0;
}
if (ptr_c->cached) {
stats->nr_cached++;
stats->sectors_cached += bp.v->bucket_len;
count_event(c, ec_stripe_update_extent_fail);
return 0;
}
unsigned dev = v->ptrs[block].dev;
@ -1106,6 +1132,14 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
try(bch2_bkey_set_needs_rebalance(trans->c, &opts, n,
SET_NEEDS_REBALANCE_other, 0));
try(bch2_trans_update(trans, &iter, n, 0));
try(bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc));
stats->nr_done++;
stats->sectors_done += bp.v->bucket_len;
count_event(c, ec_stripe_update_extent);
return 0;
}
@ -1126,12 +1160,11 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
return for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers,
struct stripe_update_bucket_stats stats = {};
try(for_each_btree_key_max(trans, bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket_pos),
bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k,
NULL, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc, ({
bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, ({
if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0)))
break;
@ -1143,8 +1176,26 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
continue;
wb_maybe_flush_inc(&last_flushed);
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, bp, &last_flushed);
}));
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, bp,
&stats, &last_flushed);
})));
if (trace_stripe_update_bucket_enabled()) {
CLASS(printbuf, buf)();
prt_printf(&buf, "bp_to_deleted:\t%u %u\n",
stats.nr_bp_to_deleted, stats.sectors_bp_to_deleted);
prt_printf(&buf, "no_match:\t%u %u\n",
stats.nr_no_match, stats.sectors_no_match);
prt_printf(&buf, "cached:\t%u %u\n",
stats.nr_cached, stats.sectors_cached);
prt_printf(&buf, "done:\t%u %u\n",
stats.nr_done, stats.sectors_done);
trace_stripe_update_bucket(c, buf.buf);
}
return 0;
}
static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)

View File

@ -306,9 +306,11 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
bch2_inode_opts_get_inode(c, &inode, opts);
}
} else {
if (snapshot_opts->fs_io_opts.change_cookie != atomic_read(&c->opt_change_cookie)) {
if (snapshot_opts->fs_io_opts.change_cookie != atomic_read(&c->opt_change_cookie) ||
snapshot_opts->metadata != metadata) {
bch2_inode_opts_get(c, &snapshot_opts->fs_io_opts, metadata);
snapshot_opts->metadata = metadata;
snapshot_opts->cur_inum = 0;
snapshot_opts->d.nr = 0;
}

View File

@ -52,6 +52,8 @@ struct snapshot_io_opts_entry {
struct per_snapshot_io_opts {
u64 cur_inum;
bool metadata;
struct bch_inode_opts fs_io_opts;
DARRAY(struct snapshot_io_opts_entry) d;
};

View File

@ -346,6 +346,11 @@ TRACE_EVENT(stripe_create,
__entry->ret)
);
DEFINE_EVENT(fs_str, stripe_update_bucket,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
);
/* Journal */
DEFINE_EVENT(bch_fs, journal_full,

View File

@ -375,6 +375,9 @@ void bch2_fs_read_only(struct bch_fs *c)
BUG_ON(c->btree_write_buffer.inc.keys.nr);
BUG_ON(c->btree_write_buffer.flushing.keys.nr);
bch2_verify_accounting_clean(c);
bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c);
} else {
/* Make sure error counts/counters are persisted */
guard(mutex)(&c->sb_lock);
@ -470,6 +473,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
try(bch2_fs_init_rw(c));
try(bch2_sb_members_v2_init(c));
try(bch2_fs_mark_dirty(c));
clear_bit(BCH_FS_clean_shutdown, &c->flags);
@ -1048,6 +1052,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
init_rwsem(&c->state_lock);
mutex_init(&c->sb_lock);
mutex_init(&c->replicas_gc_lock);
mutex_init(&c->btree_root_lock);
INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);

View File

@ -610,7 +610,8 @@ fsck_err:
int bch2_fs_recovery(struct bch_fs *c)
{
struct bch_sb_field_clean *clean = NULL;
struct journal_start_info journal_start = {};
struct jset *last_journal_entry = NULL;
u64 last_seq = 0, blacklist_seq, journal_seq;
int ret = 0;
if (c->sb.clean) {
@ -636,7 +637,7 @@ int bch2_fs_recovery(struct bch_fs *c)
struct journal_replay **i;
bch_verbose(c, "starting journal read");
ret = bch2_journal_read(c, &journal_start);
ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
if (ret)
goto err;
@ -647,21 +648,22 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.read_journal_only)
goto out;
if (mustfix_fsck_err_on(c->sb.clean && !journal_start.clean,
c, clean_but_journal_not_empty,
"filesystem marked clean but journal not empty")) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
}
struct jset *last_journal_entry = NULL;
genradix_for_each_reverse(&c->journal_entries, iter, i)
if (!journal_replay_ignore(*i)) {
last_journal_entry = &(*i)->j;
break;
}
if (mustfix_fsck_err_on(c->sb.clean &&
last_journal_entry &&
!journal_entry_empty(last_journal_entry), c,
clean_but_journal_not_empty,
"filesystem marked clean but journal not empty")) {
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false;
}
if (!last_journal_entry) {
fsck_err_on(!c->sb.clean, c,
dirty_but_no_journal_entries,
@ -703,12 +705,11 @@ use_clean:
goto err;
}
journal_start.start_seq = le64_to_cpu(clean->journal_seq) + 1;
blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1;
}
c->journal_replay_seq_start = journal_start.seq_read_start;
c->journal_replay_seq_end = journal_start.seq_read_end;
c->journal_replay_seq_start = last_seq;
c->journal_replay_seq_end = blacklist_seq - 1;
zero_out_btree_mem_ptr(&c->journal_keys);
@ -755,15 +756,13 @@ use_clean:
* journal sequence numbers:
*/
if (!c->sb.clean)
journal_start.start_seq += JOURNAL_BUF_NR * 4;
journal_seq += JOURNAL_BUF_NR * 4;
if (journal_start.seq_read_end &&
journal_start.seq_read_end + 1 != journal_start.start_seq) {
u64 blacklist_seq = journal_start.seq_read_end + 1;
if (blacklist_seq != journal_seq) {
ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
blacklist_seq, journal_start.start_seq) ?:
blacklist_seq, journal_seq) ?:
bch2_journal_seq_blacklist_add(c,
blacklist_seq, journal_start.start_seq);
blacklist_seq, journal_seq);
if (ret) {
bch_err_msg(c, ret, "error creating new journal seq blacklist entry");
goto err;
@ -771,10 +770,8 @@ use_clean:
}
ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
journal_start.start_seq,
journal_start.seq_read_start,
journal_start.seq_read_end) ?:
bch2_fs_journal_start(&c->journal, journal_start);
journal_seq, last_seq, blacklist_seq - 1) ?:
bch2_fs_journal_start(&c->journal, last_seq, journal_seq);
if (ret)
goto err;
@ -1017,8 +1014,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
struct journal_start_info journal_start = { .start_seq = 1 };
ret = bch2_fs_journal_start(&c->journal, journal_start);
ret = bch2_fs_journal_start(&c->journal, 1, 1);
if (ret)
goto err;

View File

@ -11,7 +11,6 @@
#include "alloc/foreground.h"
#include "alloc/replicas.h"
#include "btree/update.h"
#include "init/error.h"
/* allocate journal on a device: */
@ -368,30 +367,29 @@ void bch2_fs_journal_stop(struct journal *j)
clear_bit(JOURNAL_running, &j->flags);
}
int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_entry_pin_list *p;
struct journal_replay *i, **_i;
struct genradix_iter iter;
bool had_entries = false;
int ret = 0;
/*
*
* XXX pick most recent non blacklisted sequence number
*/
info.start_seq = max(info.start_seq, bch2_journal_last_blacklisted_seq(c));
cur_seq = max(cur_seq, bch2_journal_last_blacklisted_seq(c));
if (info.start_seq >= JOURNAL_SEQ_MAX) {
if (cur_seq >= JOURNAL_SEQ_MAX) {
bch_err(c, "cannot start: journal seq overflow");
return -EINVAL;
}
/* Clean filesystem? */
u64 cur_seq = info.start_seq;
u64 last_seq = info.seq_read_start ?: info.start_seq;
if (!last_seq)
last_seq = cur_seq;
u64 nr = cur_seq - last_seq;
if (nr * sizeof(struct journal_entry_pin_list) > 1U << 30) {
@ -421,7 +419,6 @@ int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
j->seq_write_started = cur_seq - 1;
j->seq_ondisk = cur_seq - 1;
j->pin.front = last_seq;
j->last_seq = last_seq;
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
@ -444,26 +441,12 @@ int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
if (journal_entry_empty(&i->j))
j->last_empty_seq = le64_to_cpu(i->j.seq);
if (!info.clean) {
struct bch_devs_list seq_devs = {};
darray_for_each(i->ptrs, ptr)
seq_devs.data[seq_devs.nr++] = ptr->dev;
struct bch_devs_list seq_devs = {};
darray_for_each(i->ptrs, ptr)
seq_devs.data[seq_devs.nr++] = ptr->dev;
p = journal_seq_pin(j, seq);
bch2_devlist_to_replicas(&p->devs.e, BCH_DATA_journal, seq_devs);
CLASS(printbuf, buf)();
bch2_replicas_entry_to_text(&buf, &p->devs.e);
fsck_err_on(!test_bit(JOURNAL_degraded, &j->flags) &&
!bch2_replicas_marked(c, &p->devs.e),
c, journal_entry_replicas_not_marked,
"superblock not marked as containing replicas for journal entry %llu\n%s",
le64_to_cpu(i->j.seq), buf.buf);
if (bch2_replicas_entry_get(c, &p->devs.e))
p->devs.e.nr_devs = 0;
}
p = journal_seq_pin(j, seq);
bch2_devlist_to_replicas(&p->devs.e, BCH_DATA_journal, seq_devs);
had_entries = true;
}
@ -477,9 +460,7 @@ int bch2_fs_journal_start(struct journal *j, struct journal_start_info info)
c->last_bucket_seq_cleanup = journal_cur_seq(j);
}
try(bch2_replicas_gc_reffed(c));
fsck_err:
return ret;
return 0;
}
void bch2_journal_set_replay_done(struct journal *j)
@ -604,7 +585,6 @@ void bch2_fs_journal_init_early(struct journal *j)
init_waitqueue_head(&j->reclaim_wait);
init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->reclaim_lock);
mutex_init(&j->last_seq_ondisk_lock);
mutex_init(&j->discard_lock);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);

View File

@ -11,7 +11,7 @@ int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
int bch2_fs_journal_start(struct journal *, struct journal_start_info);
int bch2_fs_journal_start(struct journal *, u64, u64);
void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);

View File

@ -187,7 +187,7 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq)
lockdep_assert_held(&j->lock);
if (__bch2_journal_pin_put(j, seq))
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
bch2_journal_do_writes(j);
/*
@ -235,10 +235,10 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
/* Close out old buffer: */
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
size_t bytes = roundup_pow_of_two(vstruct_bytes(buf->data));
journal_seq_pin(j, journal_cur_seq(j))->bytes = bytes;
j->dirty_entry_bytes += bytes;
struct journal_entry_pin_list *pin_list =
journal_seq_pin(j, journal_cur_seq(j));
pin_list->bytes = roundup_pow_of_two(vstruct_bytes(buf->data));
j->dirty_entry_bytes += pin_list->bytes;
if (trace_journal_entry_close_enabled() && trace) {
CLASS(printbuf, err)();
@ -280,7 +280,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
* contain either what the old pin protected or what the new pin
* protects.
*
* After the old pin is dropped j->last_seq won't include the old
* After the old pin is dropped journal_last_seq() won't include the old
* pin, so we can only write the updated last_seq on the entry that
* contains whatever the new pin protects.
*
@ -291,7 +291,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
* Hence, we want update/set last_seq on the current journal entry right
* before we open a new one:
*/
buf->last_seq = j->last_seq;
buf->last_seq = journal_last_seq(j);
buf->data->last_seq = cpu_to_le64(buf->last_seq);
BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq));
@ -358,6 +358,7 @@ static int journal_entry_open(struct journal *j)
lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j));
BUG_ON(c->sb.clean);
if (j->blocked)
return bch_err_throw(c, journal_blocked);
@ -415,7 +416,7 @@ static int journal_entry_open(struct journal *j)
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for j->last_seq to be calculated correctly
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
journal_pin_list_init(fifo_push_ref(&j->pin), 1);
@ -1091,7 +1092,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size);
prt_printf(out, "seq:\t%llu\n", journal_cur_seq(j));
prt_printf(out, "seq_ondisk:\t%llu\n", j->seq_ondisk);
prt_printf(out, "last_seq:\t%llu\n", j->last_seq);
prt_printf(out, "last_seq:\t%llu\n", journal_last_seq(j));
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
prt_printf(out, "watermark:\t%s\n", bch2_watermarks[j->watermark]);

View File

@ -129,6 +129,11 @@ static inline bool journal_low_on_space(struct journal *j)
/* Sequence number of oldest dirty journal entry */
static inline u64 journal_last_seq(struct journal *j)
{
return j->pin.front;
}
static inline u64 journal_cur_seq(struct journal *j)
{
return atomic64_read(&j->seq);

View File

@ -1346,17 +1346,18 @@ fsck_err:
return ret;
}
int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
int bch2_journal_read(struct bch_fs *c,
u64 *last_seq,
u64 *blacklist_seq,
u64 *start_seq)
{
struct journal_list jlist;
struct journal_replay *i, **_i;
struct genradix_iter radix_iter;
bool last_write_torn = false;
bool degraded = false, last_write_torn = false;
u64 seq;
int ret = 0;
memset(info, 0, sizeof(*info));
closure_init_stack(&jlist.cl);
mutex_init(&jlist.lock);
jlist.last_seq = 0;
@ -1376,7 +1377,7 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
system_unbound_wq,
&jlist.cl);
else
set_bit(JOURNAL_degraded, &c->journal.flags);
degraded = true;
}
while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2))
@ -1385,6 +1386,10 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
if (jlist.ret)
return jlist.ret;
*last_seq = 0;
*start_seq = 0;
*blacklist_seq = 0;
/*
* Find most recent flush entry, and ignore newer non flush entries -
* those entries will be blacklisted:
@ -1395,8 +1400,8 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
if (journal_replay_ignore(i))
continue;
if (!info->start_seq)
info->start_seq = le64_to_cpu(i->j.seq) + 1;
if (!*start_seq)
*blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
if (JSET_NO_FLUSH(&i->j)) {
i->ignore_blacklisted = true;
@ -1421,28 +1426,27 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
le64_to_cpu(i->j.seq)))
i->j.last_seq = i->j.seq;
info->seq_read_start = le64_to_cpu(i->j.last_seq);
info->seq_read_end = le64_to_cpu(i->j.seq);
info->clean = journal_entry_empty(&i->j);
*last_seq = le64_to_cpu(i->j.last_seq);
*blacklist_seq = le64_to_cpu(i->j.seq) + 1;
break;
}
if (!info->start_seq) {
if (!*start_seq) {
bch_info(c, "journal read done, but no entries found");
return 0;
}
if (!info->seq_read_end) {
if (!*last_seq) {
fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes,
"journal read done, but no entries found after dropping non-flushes");
return 0;
}
u64 drop_before = info->seq_read_start;
u64 drop_before = *last_seq;
{
CLASS(printbuf, buf)();
prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
info->seq_read_start, info->seq_read_end);
*last_seq, *blacklist_seq - 1);
/*
* Drop blacklisted entries and entries older than last_seq (or start of
@ -1453,11 +1457,9 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
}
info->seq_read_start = drop_before;
if (info->seq_read_end + 1 != info->start_seq)
prt_printf(&buf, " (unflushed %llu-%llu)",
info->seq_read_end + 1,
info->start_seq - 1);
*last_seq = drop_before;
if (*start_seq != *blacklist_seq)
prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
bch_info(c, "%s", buf.buf);
}
@ -1481,7 +1483,7 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
}
}
try(bch2_journal_check_for_missing(c, drop_before, info->seq_read_end));
try(bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1));
genradix_for_each(&c->journal_entries, radix_iter, _i) {
union bch_replicas_padded replicas = {
@ -1514,6 +1516,17 @@ int bch2_journal_read(struct bch_fs *c, struct journal_start_info *info)
replicas_entry_add_dev(&replicas.e, ptr->dev);
bch2_replicas_entry_sort(&replicas.e);
CLASS(printbuf, buf)();
bch2_replicas_entry_to_text(&buf, &replicas.e);
if (!degraded &&
!bch2_replicas_marked(c, &replicas.e) &&
(le64_to_cpu(i->j.seq) == *last_seq ||
fsck_err(c, journal_entry_replicas_not_marked,
"superblock not marked as containing replicas for journal entry %llu\n%s",
le64_to_cpu(i->j.seq), buf.buf)))
try(bch2_mark_replicas(c, &replicas.e));
}
fsck_err:
return ret;

View File

@ -70,6 +70,6 @@ struct u64_range {
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64);
int bch2_journal_read(struct bch_fs *, struct journal_start_info *);
int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
#endif /* _BCACHEFS_JOURNAL_READ_H */

View File

@ -211,7 +211,7 @@ void bch2_journal_space_available(struct journal *j)
continue;
while (ja->dirty_idx != ja->cur_idx &&
ja->bucket_seq[ja->dirty_idx] < j->last_seq)
ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
while (ja->dirty_idx_ondisk != ja->dirty_idx &&
@ -325,66 +325,37 @@ void bch2_journal_do_discards(struct journal *j)
* entry, holding it open to ensure it gets replayed during recovery:
*/
void bch2_journal_update_last_seq(struct journal *j)
void bch2_journal_reclaim_fast(struct journal *j)
{
bool popped = false;
lockdep_assert_held(&j->lock);
/*
* Unpin journal entries whose reference counts reached zero, meaning
* all btree nodes got written out
*/
u64 old = j->last_seq;
struct journal_entry_pin_list *pin_list;
while (j->last_seq < j->pin.back &&
j->last_seq <= j->seq_ondisk &&
!atomic_read(&(pin_list = journal_seq_pin(j, j->last_seq))->count))
j->last_seq++;
while (!fifo_empty(&j->pin) &&
j->pin.front <= j->seq_ondisk &&
!atomic_read(&(pin_list = &fifo_peek_front(&j->pin))->count)) {
if (old != j->last_seq) {
if (WARN_ON(j->dirty_entry_bytes < pin_list->bytes))
pin_list->bytes = j->dirty_entry_bytes;
j->dirty_entry_bytes -= pin_list->bytes;
pin_list->bytes = 0;
j->pin.front++;
popped = true;
}
if (popped) {
bch2_journal_space_available(j);
__closure_wake_up(&j->reclaim_flush_wait);
}
}
void bch2_journal_update_last_seq_ondisk(struct journal *j, u64 last_seq_ondisk)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
union bch_replicas_padded replicas;
unsigned nr_refs = 0;
size_t dirty_entry_bytes = 0;
scoped_guard(mutex, &j->last_seq_ondisk_lock)
while (j->last_seq_ondisk < last_seq_ondisk) {
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, j->last_seq_ondisk);
if (pin_list->devs.e.nr_devs) {
if (nr_refs &&
!bch2_replicas_entry_eq(&replicas.e, &pin_list->devs.e)) {
bch2_replicas_entry_put_many(c, &replicas.e, nr_refs);
nr_refs = 0;
}
memcpy(&replicas, &pin_list->devs, replicas_entry_bytes(&pin_list->devs.e));
pin_list->devs.e.nr_devs = 0;
nr_refs++;
}
dirty_entry_bytes += pin_list->bytes;
pin_list->bytes = 0;
j->last_seq_ondisk++;
}
scoped_guard(spinlock, &j->lock) {
if (WARN_ON(j->dirty_entry_bytes < dirty_entry_bytes))
dirty_entry_bytes = j->dirty_entry_bytes;
j->dirty_entry_bytes -= dirty_entry_bytes;
}
if (nr_refs)
bch2_replicas_entry_put_many(c, &replicas.e, nr_refs);
}
bool __bch2_journal_pin_put(struct journal *j, u64 seq)
{
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
@ -396,7 +367,7 @@ void bch2_journal_pin_put(struct journal *j, u64 seq)
{
if (__bch2_journal_pin_put(j, seq)) {
guard(spinlock)(&j->lock);
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
}
}
@ -423,7 +394,7 @@ static inline bool __journal_pin_drop(struct journal *j,
* writing a new last_seq will now make another bucket available:
*/
return atomic_dec_and_test(&pin_list->count) &&
pin_list == journal_seq_pin(j, j->last_seq);
pin_list == &fifo_peek_front(&j->pin);
}
void bch2_journal_pin_drop(struct journal *j,
@ -431,7 +402,7 @@ void bch2_journal_pin_drop(struct journal *j,
{
guard(spinlock)(&j->lock);
if (__journal_pin_drop(j, pin))
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
}
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
@ -482,7 +453,7 @@ void bch2_journal_pin_copy(struct journal *j,
u64 seq = READ_ONCE(src->seq);
if (seq < j->last_seq) {
if (seq < journal_last_seq(j)) {
/*
* bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
* the src pin - with the pin dropped, the entry to pin might no
@ -497,13 +468,13 @@ void bch2_journal_pin_copy(struct journal *j,
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
if (reclaim)
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
/*
* If the journal is currently full, we might want to call flush_fn
* immediately:
*/
if (seq == j->last_seq)
if (seq == journal_last_seq(j))
journal_wake(j);
}
@ -514,19 +485,19 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
bool wake;
scoped_guard(spinlock, &j->lock) {
BUG_ON(seq < j->last_seq);
BUG_ON(seq < journal_last_seq(j));
bool reclaim = __journal_pin_drop(j, pin);
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
if (reclaim)
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
/*
* If the journal is currently full, we might want to call flush_fn
* immediately:
*/
wake = seq == j->last_seq;
wake = seq == journal_last_seq(j);
}
if (wake)
@ -958,8 +929,8 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
*/
guard(spinlock)(&j->lock);
return !test_bit(JOURNAL_replay_done, &j->flags) ||
j->last_seq > seq_to_flush ||
j->last_seq == j->pin.back;
journal_last_seq(j) > seq_to_flush ||
!fifo_used(&j->pin);
}
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
@ -993,7 +964,39 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
try(bch2_journal_error(j));
return 0;
guard(mutex)(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
/*
* Now that we've populated replicas_gc, write to the journal to mark
* active journal devices. This handles the case where the journal might
* be empty. Otherwise we could clear all journal replicas and
* temporarily put the fs into an unrecoverable state. Journal recovery
* expects to find devices marked for journal data on unclean mount.
*/
int ret = bch2_journal_meta(&c->journal);
if (ret)
goto err;
seq = 0;
scoped_guard(spinlock, &j->lock)
while (!ret) {
seq = max(seq, journal_last_seq(j));
if (seq > j->seq_ondisk)
break;
union bch_replicas_padded replicas;
memcpy(&replicas, &journal_seq_pin(j, seq)->devs, sizeof(replicas));
seq++;
if (replicas.e.nr_devs) {
spin_unlock(&j->lock);
ret = bch2_mark_replicas(c, &replicas.e);
spin_lock(&j->lock);
}
}
err:
return bch2_replicas_gc_end(c, ret);
}
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
@ -1007,7 +1010,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
if (!test_bit(JOURNAL_running, &j->flags))
return true;
*seq = max(*seq, j->last_seq);
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back)
return true;

View File

@ -43,9 +43,7 @@ journal_seq_pin(struct journal *j, u64 seq)
return &j->pin.data[seq & j->pin.mask];
}
void bch2_journal_update_last_seq(struct journal *);
void bch2_journal_update_last_seq_ondisk(struct journal *, u64);
void bch2_journal_reclaim_fast(struct journal *);
bool __bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_put(struct journal *, u64);
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);

View File

@ -149,7 +149,6 @@ enum journal_space_from {
};
#define JOURNAL_FLAGS() \
x(degraded) \
x(replay_done) \
x(running) \
x(may_skip_flush) \
@ -266,8 +265,6 @@ struct journal {
u64 front, back, size, mask;
struct journal_entry_pin_list *data;
} pin;
u64 last_seq;
size_t dirty_entry_bytes;
struct journal_space space[journal_space_nr];
@ -279,7 +276,6 @@ struct journal {
spinlock_t err_lock;
struct mutex reclaim_lock;
struct mutex last_seq_ondisk_lock;
/*
* Used for waiting until journal reclaim has freed up space in the
* journal:
@ -356,11 +352,4 @@ struct journal_entry_res {
unsigned u64s;
};
struct journal_start_info {
u64 seq_read_start;
u64 seq_read_end;
u64 start_seq;
bool clean;
};
#endif /* _BCACHEFS_JOURNAL_TYPES_H */

View File

@ -189,7 +189,6 @@ static CLOSURE_CALLBACK(journal_write_done)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
u64 seq = le64_to_cpu(w->data->seq);
u64 seq_wrote = seq;
int err = 0;
bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
@ -198,12 +197,8 @@ static CLOSURE_CALLBACK(journal_write_done)
if (w->had_error) {
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, seq)->devs.e;
bch2_replicas_entry_put(c, r);
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
err = bch2_replicas_entry_get(c, r);
if (err)
r->nr_devs = 0;
}
if (!w->devs_written.nr)
@ -230,6 +225,7 @@ static CLOSURE_CALLBACK(journal_write_done)
BUG_ON(seq < j->pin.front);
if (err && (!j->err_seq || seq < j->err_seq))
j->err_seq = seq;
w->write_done = true;
if (!j->free_buf || j->free_buf_size < w->buf_size) {
swap(j->free_buf, w->data);
@ -247,31 +243,22 @@ static CLOSURE_CALLBACK(journal_write_done)
}
bool completed = false;
bool last_seq_ondisk_updated = false;
again:
bool do_discards = false;
for (seq = journal_last_unwritten_seq(j);
seq <= journal_cur_seq(j);
seq++) {
w = j->buf + (seq & JOURNAL_BUF_MASK);
if (!w->write_done && seq != seq_wrote)
if (!w->write_done)
break;
if (!j->err_seq && !w->noflush) {
if (j->last_seq_ondisk < w->last_seq) {
spin_unlock(&j->lock);
/*
* this needs to happen _before_ updating
* j->flushed_seq_ondisk, for flushing to work
* properly - when the flush completes replcias
* refs need to have been dropped
* */
bch2_journal_update_last_seq_ondisk(j, w->last_seq);
last_seq_ondisk_updated = true;
spin_lock(&j->lock);
goto again;
}
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq;
closure_wake_up(&c->freelist_wait);
bch2_reset_alloc_cursors(c);
do_discards = true;
}
j->seq_ondisk = seq;
@ -290,10 +277,8 @@ again:
completed = true;
}
j->buf[seq_wrote & JOURNAL_BUF_MASK].write_done = true;
if (completed) {
bch2_journal_update_last_seq(j);
bch2_journal_reclaim_fast(j);
bch2_journal_space_available(j);
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], false);
@ -301,8 +286,6 @@ again:
journal_wake(j);
}
j->pin.front = min(j->pin.back, j->last_seq_ondisk);
if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
struct journal_buf *buf = journal_cur_buf(j);
@ -325,11 +308,8 @@ again:
bch2_journal_do_writes(j);
spin_unlock(&j->lock);
if (last_seq_ondisk_updated) {
bch2_reset_alloc_cursors(c);
closure_wake_up(&c->freelist_wait);
if (do_discards)
bch2_do_discards(c);
}
closure_put(&c->cl);
}
@ -655,6 +635,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
BUG_ON(!w->write_started);
BUG_ON(w->write_allocated);
BUG_ON(w->write_done);
@ -721,11 +702,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
*/
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs.e;
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
ret = bch2_replicas_entry_get(c, r);
if (ret) {
r->nr_devs = 0;
ret = bch2_mark_replicas(c, r);
if (ret)
goto err;
}
if (c->opts.nochanges)
goto no_io;

View File

@ -256,10 +256,18 @@ const struct bch_sb_field_ops bch_sb_field_ops_clean = {
.to_text = bch2_sb_clean_to_text,
};
void bch2_fs_mark_dirty(struct bch_fs *c)
int bch2_fs_mark_dirty(struct bch_fs *c)
{
/*
* Unconditionally write superblock, to verify it hasn't changed before
* we go rw:
*/
guard(mutex)(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
return bch2_write_super(c);
}
void bch2_fs_mark_clean(struct bch_fs *c)
@ -269,6 +277,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
unsigned u64s;
int ret;
guard(mutex)(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
return;
@ -312,4 +321,6 @@ void bch2_fs_mark_clean(struct bch_fs *c)
}
bch2_journal_pos_from_member_info_set(c);
bch2_write_super(c);
}

View File

@ -10,7 +10,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *, struct jset_entry **
extern const struct bch_sb_field_ops bch_sb_field_ops_clean;
void bch2_fs_mark_dirty(struct bch_fs *);
int bch2_fs_mark_dirty(struct bch_fs *);
void bch2_fs_mark_clean(struct bch_fs *);
#endif /* _BCACHEFS_SB_CLEAN_H */

View File

@ -31,6 +31,8 @@ enum counters_flags {
x(data_update_fail, 82, TYPE_COUNTER) \
x(data_update_key, 37, TYPE_SECTORS) \
x(data_update_key_fail, 38, TYPE_COUNTER) \
x(ec_stripe_update_extent, 99, TYPE_COUNTER) \
x(ec_stripe_update_extent_fail, 100, TYPE_COUNTER) \
x(io_move_read, 35, TYPE_SECTORS) \
x(io_move_write, 36, TYPE_SECTORS) \
x(io_move_start_fail, 39, TYPE_COUNTER) \

View File

@ -1021,11 +1021,6 @@ int bch2_write_super(struct bch_fs *c)
closure_init_stack(cl);
memset(&sb_written, 0, sizeof(sb_written));
if (bch2_sb_has_journal(c->disk_sb.sb))
bch2_fs_mark_dirty(c);
else
bch2_fs_mark_clean(c);
/*
* Note: we do writes to RO devices here, and we might want to change
* that in the future.

188
libbcachefs/vendor/bio_iov_iter.c vendored Normal file
View File

@ -0,0 +1,188 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef NO_BCACHEFS_FS
#include <linux/blkdev.h>
#include <linux/uio.h>
#include "vendor/bio_iov_iter.h"
static inline bool bio_full(struct bio *bio, unsigned len)
{
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;
if (bio->bi_iter.bi_size > UINT_MAX - len)
return true;
return false;
}
static inline void bio_release_page(struct bio *bio, struct page *page)
{
if (bio_flagged(bio, BIO_PAGE_PINNED))
unpin_user_page(page);
}
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
static unsigned int get_contig_folio_len(unsigned int *num_pages,
struct page **pages, unsigned int i,
struct folio *folio, size_t left,
size_t offset)
{
size_t bytes = left;
size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, bytes);
unsigned int j;
/*
* We might COW a single page in the middle of
* a large folio, so we have to check that all
* pages belong to the same folio.
*/
bytes -= contig_sz;
for (j = i + 1; j < i + *num_pages; j++) {
size_t next = min_t(size_t, PAGE_SIZE, bytes);
if (page_folio(pages[j]) != folio ||
pages[j] != pages[j - 1] + 1) {
break;
}
contig_sz += next;
bytes -= next;
}
*num_pages = j - i;
return contig_sz;
}
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
iov_iter_extraction_t extraction_flags = 0;
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
ssize_t size;
unsigned int num_pages, i = 0;
size_t offset, folio_offset, left, len;
int ret = 0;
/*
* Move page array up in the allocated memory for the bio vecs as far as
* possible so that we can start filling biovecs from the beginning
* without overwriting the temporary page array.
*/
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
extraction_flags |= ITER_ALLOW_P2PDMA;
size = iov_iter_extract_pages(iter, &pages,
UINT_MAX - bio->bi_iter.bi_size,
nr_pages, extraction_flags, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
for (left = size, i = 0; left > 0; left -= len, i += num_pages) {
struct page *page = pages[i];
struct folio *folio = page_folio(page);
unsigned int old_vcnt = bio->bi_vcnt;
folio_offset = ((size_t)folio_page_idx(folio, page) <<
PAGE_SHIFT) + offset;
len = min(folio_size(folio) - folio_offset, left);
num_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
if (num_pages > 1)
len = get_contig_folio_len(&num_pages, pages, i,
folio, left, offset);
if (!bio_add_folio(bio, folio, len, folio_offset)) {
WARN_ON_ONCE(1);
ret = -EINVAL;
goto out;
}
if (bio_flagged(bio, BIO_PAGE_PINNED)) {
/*
* We're adding another fragment of a page that already
* was part of the last segment. Undo our pin as the
* page was pinned when an earlier fragment of it was
* added to the bio and __bio_release_pages expects a
* single pin per page.
*/
if (offset && bio->bi_vcnt == old_vcnt)
unpin_user_folio(folio, 1);
}
offset = 0;
}
iov_iter_revert(iter, left);
out:
while (i < nr_pages)
bio_release_page(bio, pages[i++]);
return ret;
}
/*
* Aligns the bio size to the len_align_mask, releasing excessive bio vecs that
* __bio_iov_iter_get_pages may have inserted, and reverts the trimmed length
* for the next iteration.
*/
static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter,
unsigned len_align_mask)
{
size_t nbytes = bio->bi_iter.bi_size & len_align_mask;
if (!nbytes)
return 0;
iov_iter_revert(iter, nbytes);
bio->bi_iter.bi_size -= nbytes;
do {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (nbytes < bv->bv_len) {
bv->bv_len -= nbytes;
break;
}
bio_release_page(bio, bv->bv_page);
bio->bi_vcnt--;
nbytes -= bv->bv_len;
} while (nbytes);
if (!bio->bi_vcnt)
return -EFAULT;
return 0;
}
int bch2_bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
unsigned len_align_mask)
{
int ret = 0;
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return -EIO;
if (iov_iter_is_bvec(iter)) {
bio_iov_bvec_set(bio, iter);
iov_iter_advance(iter, bio->bi_iter.bi_size);
return 0;
}
if (iov_iter_extract_will_pin(iter))
bio_set_flag(bio, BIO_PAGE_PINNED);
do {
ret = __bio_iov_iter_get_pages(bio, iter);
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
if (bio->bi_vcnt)
return bio_iov_iter_align_down(bio, iter, len_align_mask);
return ret;
}
#endif /* NO_BCACHEFS_FS */

6
libbcachefs/vendor/bio_iov_iter.h vendored Normal file
View File

@ -0,0 +1,6 @@
#ifndef _BCACHEFS_VENDOR_BIO_IOV_ITER_H
#define _BCACHEFS_VENDOR_BIO_IOV_ITER_H
int bch2_bio_iov_iter_get_pages(struct bio *, struct iov_iter *, unsigned);
#endif /* _BCACHEFS_VENDOR_BIO_IOV_ITER_H */

View File

@ -13,6 +13,8 @@
#include "vfs/direct.h"
#include "vfs/pagecache.h"
#include "vendor/bio_iov_iter.h"
#include "util/enumerated_ref.h"
#include <linux/kthread.h>
@ -148,11 +150,7 @@ start:
bio->bi_iter.bi_sector = offset >> 9;
bio->bi_private = dio;
#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
ret = bio_iov_iter_get_pages(bio, iter);
#else
ret = bio_iov_iter_get_pages(bio, iter, 0);
#endif
ret = bch2_bio_iov_iter_get_pages(bio, iter, 0);
if (ret < 0) {
/* XXX: fault inject this path */
bio->bi_status = BLK_STS_RESOURCE;
@ -465,11 +463,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
EBUG_ON(current->faults_disabled_mapping);
current->faults_disabled_mapping = mapping;
#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0)
ret = bio_iov_iter_get_pages(bio, &dio->iter);
#else
ret = bio_iov_iter_get_pages(bio, &dio->iter, 0);
#endif
ret = bch2_bio_iov_iter_get_pages(bio, &dio->iter, 0);
dropped_locks = fdm_dropped_locks();