Update bcachefs sources to 5fe20ac58af4 bcachefs: Don't bail out of check_inode() if check_has_case_sensitive() fails
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled

This commit is contained in:
Kent Overstreet 2025-10-30 16:42:35 -04:00
parent b2b4a5e78b
commit dc8c10a4b0
38 changed files with 610 additions and 532 deletions

View File

@ -1 +1 @@
b552eb12225133c8bf869b461faba6b72e35d2be
5fe20ac58af402e8ad9ace0bcf9daad524e3005d

View File

@ -440,25 +440,39 @@ static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e)
return true;
}
void bch2_accounting_mem_gc(struct bch_fs *c)
void __bch2_accounting_maybe_kill(struct bch_fs *c, struct bpos pos)
{
struct bch_accounting_mem *acc = &c->accounting;
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, pos);
guard(percpu_write)(&c->mark_lock);
struct accounting_mem_entry *dst = acc->k.data;
if (acc_k.type != BCH_DISK_ACCOUNTING_replicas)
return;
darray_for_each(acc->k, src) {
if (accounting_mem_entry_is_zero(src)) {
free_percpu(src->v[0]);
free_percpu(src->v[1]);
} else {
*dst++ = *src;
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &pos);
if (idx < acc->k.nr) {
struct accounting_mem_entry *e = acc->k.data + idx;
if (!accounting_mem_entry_is_zero(e))
return;
free_percpu(e->v[0]);
free_percpu(e->v[1]);
swap(*e, darray_last(acc->k));
--acc->k.nr;
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
}
bch2_replicas_entry_kill(c, &acc_k.replicas);
}
acc->k.nr = dst - acc->k.data;
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, NULL);
bch2_write_super(c);
}
/*
@ -472,9 +486,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c)
int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
{
struct bch_accounting_mem *acc = &c->accounting;
int ret = 0;
darray_init(usage);
guard(percpu_read)(&c->mark_lock);
darray_for_each(acc->k, i) {
@ -492,24 +503,19 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
bch2_accounting_mem_read_counters(acc, i - acc->k.data, &sectors, 1, false);
u.r.sectors = sectors;
ret = darray_make_room(usage, replicas_usage_bytes(&u.r));
if (ret)
break;
try(darray_make_room(usage, replicas_usage_bytes(&u.r)));
memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r));
usage->nr += replicas_usage_bytes(&u.r);
}
if (ret)
darray_exit(usage);
return ret;
return 0;
}
int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned accounting_types_mask)
{
struct bch_accounting_mem *acc = &c->accounting;
int ret = 0;
darray_init(out_buf);
@ -521,10 +527,8 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
if (!(accounting_types_mask & BIT(a_p.type)))
continue;
ret = darray_make_room(out_buf, sizeof(struct bkey_i_accounting) +
sizeof(u64) * i->nr_counters);
if (ret)
break;
try(darray_make_room(out_buf, sizeof(struct bkey_i_accounting) +
sizeof(u64) * i->nr_counters));
struct bkey_i_accounting *a_out =
bkey_accounting_init((void *) &darray_top(*out_buf));
@ -537,9 +541,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
out_buf->nr += bkey_bytes(&a_out->k);
}
if (ret)
darray_exit(out_buf);
return ret;
return 0;
}
static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc)

View File

@ -43,6 +43,21 @@ static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
dst->k.bversion = src.k->bversion;
}
void __bch2_accounting_maybe_kill(struct bch_fs *, struct bpos pos);
static inline void bch2_accounting_accumulate_maybe_kill(struct bch_fs *c,
struct bkey_i_accounting *dst,
struct bkey_s_c_accounting src)
{
bch2_accounting_accumulate(dst, src);
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
if (dst->v.d[i])
return;
__bch2_accounting_maybe_kill(c, dst->k.p);
}
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
enum bch_data_type data_type,
s64 sectors)
@ -137,7 +152,6 @@ enum bch_accounting_mode {
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
void bch2_accounting_mem_gc(struct bch_fs *);
static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc)
{
@ -205,13 +219,10 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
int ret = 0;
if (unlikely(write_locked))
ret = bch2_accounting_mem_insert_locked(c, a, mode);
try(bch2_accounting_mem_insert_locked(c, a, mode));
else
ret = bch2_accounting_mem_insert(c, a, mode);
if (ret)
return ret;
try(bch2_accounting_mem_insert(c, a, mode));
}
struct accounting_mem_entry *e = &acc->k.data[idx];

View File

@ -12,6 +12,21 @@
#include <linux/sort.h>
DEFINE_CLASS(bch_replicas_cpu, struct bch_replicas_cpu,
kfree(_T.entries),
(struct bch_replicas_cpu) {}, void)
static inline struct bch_replicas_entry_v1 *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
}
#define for_each_cpu_replicas_entry(_r, _i) \
for (struct bch_replicas_entry_v1 *_i = (_r)->entries; \
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size; \
_i = (void *) (_i) + (_r)->entry_size)
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
@ -129,15 +144,14 @@ bad:
void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r)
{
struct bch_replicas_entry_v1 *e;
bool first = true;
for_each_cpu_replicas_entry(r, e) {
for_each_cpu_replicas_entry(r, i) {
if (!first)
prt_printf(out, " ");
first = false;
bch2_replicas_entry_to_text(out, e);
bch2_replicas_entry_to_text(out, i);
}
}
@ -246,45 +260,27 @@ cpu_replicas_add_entry(struct bch_fs *c,
return new;
}
static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
struct bch_replicas_entry_v1 *search)
static inline struct bch_replicas_entry_v1 *
replicas_entry_search(struct bch_replicas_cpu *r,
struct bch_replicas_entry_v1 *search)
{
int idx, entry_size = replicas_entry_bytes(search);
verify_replicas_entry(search);
if (unlikely(entry_size > r->entry_size))
return -1;
#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
entry_cmp, search);
#undef entry_cmp
return idx < r->nr ? idx : -1;
}
int bch2_replicas_entry_idx(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
bch2_replicas_entry_sort(search);
return __replicas_entry_idx(&c->replicas, search);
}
static bool __replicas_has_entry(struct bch_replicas_cpu *r,
struct bch_replicas_entry_v1 *search)
{
return __replicas_entry_idx(r, search) >= 0;
size_t entry_size = replicas_entry_bytes(search);
int idx = likely(entry_size <= r->entry_size)
? eytzinger0_find_r(r->entries, r->nr, r->entry_size,
bch2_memcmp, (void *) entry_size, search)
: -1;
return idx >= 0 ? cpu_replicas_entry(r, idx) : NULL;
}
bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry_v1 *search)
{
verify_replicas_entry(search);
return !search->nr_devs ||
(__replicas_has_entry(&c->replicas, search) &&
(replicas_entry_search(&c->replicas, search) &&
(likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search)));
replicas_entry_search(&c->replicas_gc, search)));
}
bool bch2_replicas_marked(struct bch_fs *c,
@ -298,40 +294,31 @@ noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_entry_v1 *new_entry)
{
struct bch_replicas_cpu new_r, new_gc;
int ret = 0;
verify_replicas_entry(new_entry);
memset(&new_r, 0, sizeof(new_r));
memset(&new_gc, 0, sizeof(new_gc));
CLASS(bch_replicas_cpu, new_r)();
CLASS(bch_replicas_cpu, new_gc)();
guard(mutex)(&c->sb_lock);
if (c->replicas_gc.entries &&
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
!replicas_entry_search(&c->replicas_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
if (!new_gc.entries) {
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
goto out;
}
if (!new_gc.entries)
return bch_err_throw(c, ENOMEM_cpu_replicas);
}
if (!__replicas_has_entry(&c->replicas, new_entry)) {
if (!replicas_entry_search(&c->replicas, new_entry)) {
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
if (!new_r.entries) {
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
goto out;
}
if (!new_r.entries)
return bch_err_throw(c, ENOMEM_cpu_replicas);
ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
if (ret)
goto out;
try(bch2_cpu_replicas_to_sb_replicas(c, &new_r));
}
if (!new_r.entries &&
!new_gc.entries)
goto out;
return 0;
/* allocations done, now commit: */
@ -345,12 +332,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
if (new_gc.entries)
swap(new_gc, c->replicas_gc);
}
out:
kfree(new_r.entries);
kfree(new_gc.entries);
bch_err_msg(c, ret, "adding replicas entry");
return ret;
return 0;
}
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
@ -387,9 +370,6 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
{
struct bch_replicas_entry_v1 *e;
unsigned i = 0;
lockdep_assert_held(&c->replicas_gc_lock);
guard(mutex)(&c->sb_lock);
@ -401,7 +381,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
for_each_cpu_replicas_entry(&c->replicas, e) {
/* Preserve unknown data types */
if (e->data_type >= BCH_DATA_NR ||
!((1 << e->data_type) & typemask)) {
!(BIT(e->data_type) & typemask)) {
c->replicas_gc.nr++;
c->replicas_gc.entry_size =
max_t(unsigned, c->replicas_gc.entry_size,
@ -417,9 +397,10 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
return bch_err_throw(c, ENOMEM_replicas_gc);
}
unsigned i = 0;
for_each_cpu_replicas_entry(&c->replicas, e)
if (e->data_type >= BCH_DATA_NR ||
!((1 << e->data_type) & typemask))
!(BIT(e->data_type) & typemask))
memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
e, c->replicas_gc.entry_size);
@ -427,73 +408,23 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
return 0;
}
/*
* New much simpler mechanism for clearing out unneeded replicas entries - drop
* replicas entries that have 0 sectors used.
*
* However, we don't track sector counts for journal usage, so this doesn't drop
* any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism
* is retained for that.
*/
int bch2_replicas_gc2(struct bch_fs *c)
void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *kill)
{
struct bch_replicas_cpu new = { 0 };
unsigned nr;
int ret = 0;
lockdep_assert_held(&c->mark_lock);
lockdep_assert_held(&c->sb_lock);
bch2_accounting_mem_gc(c);
retry:
nr = READ_ONCE(c->replicas.nr);
new.entry_size = READ_ONCE(c->replicas.entry_size);
new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
if (!new.entries) {
bch_err(c, "error allocating c->replicas_gc");
return bch_err_throw(c, ENOMEM_replicas_gc);
}
struct bch_replicas_cpu *r = &c->replicas;
guard(mutex)(&c->sb_lock);
scoped_guard(percpu_write, &c->mark_lock) {
if (nr != c->replicas.nr ||
new.entry_size != c->replicas.entry_size) {
kfree(new.entries);
goto retry;
}
struct bch_replicas_entry_v1 *e = replicas_entry_search(&c->replicas, kill);
if (WARN(!e, "replicas entry not found in sb"))
return;
for (unsigned i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry_v1 *e =
cpu_replicas_entry(&c->replicas, i);
memcpy(e, cpu_replicas_entry(r, --r->nr), r->entry_size);
struct disk_accounting_pos k = {
.type = BCH_DISK_ACCOUNTING_replicas,
};
bch2_cpu_replicas_sort(r);
unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
"embedded variable length struct");
struct bpos p = disk_accounting_pos_to_bpos(&k);
struct bch_accounting_mem *acc = &c->accounting;
bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &p) >= acc->k.nr;
if (e->data_type == BCH_DATA_journal || !kill)
memcpy(cpu_replicas_entry(&new, new.nr++),
e, new.entry_size);
}
bch2_cpu_replicas_sort(&new);
ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
if (!ret)
swap(c->replicas, new);
kfree(new.entries);
}
if (!ret)
bch2_write_super(c);
return ret;
int ret = bch2_cpu_replicas_to_sb_replicas(c, r);
WARN(ret, "bch2_cpu_replicas_to_sb_replicas() error: %s", bch2_err_str(ret));
}
/* Replicas tracking - superblock: */
@ -502,7 +433,6 @@ static int
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
struct bch_replicas_cpu *cpu_r)
{
struct bch_replicas_entry_v1 *e, *dst;
unsigned nr = 0, entry_size = 0, idx = 0;
for_each_replicas_entry(sb_r, e) {
@ -519,7 +449,7 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
cpu_r->entry_size = entry_size;
for_each_replicas_entry(sb_r, e) {
dst = cpu_replicas_entry(cpu_r, idx++);
struct bch_replicas_entry_v1 *dst = cpu_replicas_entry(cpu_r, idx++);
memcpy(dst, e, replicas_entry_bytes(e));
bch2_replicas_entry_sort(dst);
}
@ -531,7 +461,6 @@ static int
__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
struct bch_replicas_cpu *cpu_r)
{
struct bch_replicas_entry_v0 *e;
unsigned nr = 0, entry_size = 0, idx = 0;
for_each_replicas_entry(sb_r, e) {
@ -550,14 +479,14 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
for_each_replicas_entry(sb_r, e) {
for_each_replicas_entry(sb_r, src) {
struct bch_replicas_entry_v1 *dst =
cpu_replicas_entry(cpu_r, idx++);
dst->data_type = e->data_type;
dst->nr_devs = e->nr_devs;
dst->data_type = src->data_type;
dst->nr_devs = src->nr_devs;
dst->nr_required = 1;
memcpy(dst->devs, e->devs, e->nr_devs);
memcpy(dst->devs, src->devs, src->nr_devs);
bch2_replicas_entry_sort(dst);
}
@ -568,7 +497,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
{
struct bch_sb_field_replicas *sb_v1;
struct bch_sb_field_replicas_v0 *sb_v0;
struct bch_replicas_cpu new_r = { 0, 0, NULL };
CLASS(bch_replicas_cpu, new_r)();
if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas)))
try(__bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r));
@ -580,8 +509,6 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
guard(percpu_write)(&c->mark_lock);
swap(c->replicas, new_r);
kfree(new_r.entries);
return 0;
}
@ -590,7 +517,6 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
{
struct bch_sb_field_replicas_v0 *sb_r;
struct bch_replicas_entry_v0 *dst;
struct bch_replicas_entry_v1 *src;
size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
@ -628,7 +554,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
struct bch_replicas_entry_v1 *dst, *src;
struct bch_replicas_entry_v1 *dst;
bool need_v1 = false;
size_t bytes;
@ -707,12 +633,11 @@ static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f,
{
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
struct bch_replicas_cpu cpu_r;
CLASS(bch_replicas_cpu, cpu_r)();
try(__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r));
try(bch2_cpu_replicas_validate(&cpu_r, sb, err));
int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
kfree(cpu_r.entries);
return ret;
return 0;
}
static void bch2_sb_replicas_to_text(struct printbuf *out,
@ -720,7 +645,6 @@ static void bch2_sb_replicas_to_text(struct printbuf *out,
struct bch_sb_field *f)
{
struct bch_sb_field_replicas *r = field_to_type(f, replicas);
struct bch_replicas_entry_v1 *e;
bool first = true;
for_each_replicas_entry(r, e) {
@ -743,12 +667,11 @@ static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *
{
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
struct bch_replicas_cpu cpu_r;
CLASS(bch_replicas_cpu, cpu_r)();
try(__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r));
try(bch2_cpu_replicas_validate(&cpu_r, sb, err));
int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
kfree(cpu_r.entries);
return ret;
return 0;
}
static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
@ -756,7 +679,6 @@ static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
struct bch_sb_field *f)
{
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
struct bch_replicas_entry_v0 *e;
bool first = true;
for_each_replicas_entry(sb_r, e) {
@ -779,8 +701,6 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, struct printbuf *err)
{
struct bch_replicas_entry_v1 *e;
guard(percpu_read)(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
unsigned nr_online = 0, nr_failed = 0, dflags = 0;
@ -910,8 +830,6 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
if (replicas) {
struct bch_replicas_entry_v1 *r;
for_each_replicas_entry(replicas, r) {
if (r->data_type >= sizeof(data_has) * 8)
continue;
@ -922,9 +840,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
}
} else if (replicas_v0) {
struct bch_replicas_entry_v0 *r;
for_each_replicas_entry_v0(replicas_v0, r) {
for_each_replicas_entry(replicas_v0, r) {
if (r->data_type >= sizeof(data_has) * 8)
continue;

View File

@ -13,15 +13,6 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
struct bch_fs *, struct printbuf *);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
static inline struct bch_replicas_entry_v1 *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
}
int bch2_replicas_entry_idx(struct bch_fs *,
struct bch_replicas_entry_v1 *);
void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *,
enum bch_data_type,
struct bch_devs_list);
@ -53,12 +44,15 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int);
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
int bch2_replicas_gc2(struct bch_fs *);
void bch2_replicas_entry_kill(struct bch_fs *, struct bch_replicas_entry_v1 *);
#define for_each_cpu_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
_i = (void *) (_i) + (_r)->entry_size)
static inline bool bch2_replicas_entry_has_dev(struct bch_replicas_entry_v1 *r, unsigned dev)
{
for (unsigned i = 0; i < r->nr_devs; i++)
if (r->devs[i] == dev)
return true;
return false;
}
/* iterate over superblock replicas - used by userspace tools: */
@ -66,12 +60,7 @@ int bch2_replicas_gc2(struct bch_fs *);
((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
#define for_each_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))
#define for_each_replicas_entry_v0(_r, _i) \
for (_i = (_r)->entries; \
for (typeof(&(_r)->entries[0]) _i = (_r)->entries; \
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))

View File

@ -8,4 +8,10 @@ struct bch_replicas_cpu {
struct bch_replicas_entry_v1 *entries;
};
union bch_replicas_padded {
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
devs, BCH_BKEY_PTRS_MAX)];
struct bch_replicas_entry_v1 e;
};
#endif /* _BCACHEFS_REPLICAS_TYPES_H */

View File

@ -609,6 +609,18 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
closure_wake_up(&c->btree_interior_update_wait);
}
static void bch2_btree_update_add_key(btree_update_nodes *nodes,
unsigned level, struct bkey_i *k)
{
BUG_ON(darray_make_room(nodes, 1));
struct btree_update_node *n = &darray_top(*nodes);
nodes->nr++;
*n = (struct btree_update_node) { .level = level };
bkey_copy(&n->key, k);
}
static void bch2_btree_update_add_node(struct bch_fs *c, btree_update_nodes *nodes, struct btree *b)
{
BUG_ON(darray_make_room(nodes, 1));
@ -649,20 +661,26 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
static int btree_update_nodes_written_trans(struct btree_trans *trans,
struct btree_update *as)
{
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, as->journal_u64s));
memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64));
trans->journal_pin = &as->journal;
darray_for_each(as->old_nodes, i)
try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key),
BTREE_TRIGGER_transactional));
darray_for_each(as->new_nodes, i)
darray_for_each(as->new_nodes, i) {
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
BTREE_TRIGGER_transactional));
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(i->key.k.u64s))),
i->root
? BCH_JSET_ENTRY_btree_root
: BCH_JSET_ENTRY_btree_keys,
as->btree_id,
i->root ? i->level : i->level + 1,
&i->key, i->key.k.u64s);
}
return 0;
}
@ -749,11 +767,12 @@ static void btree_update_nodes_written(struct btree_update *as)
* all our new nodes, to avoid racing with
* btree_node_update_key():
*/
darray_for_each(as->new_nodes, i) {
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
i->b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(i->b);
}
darray_for_each(as->new_nodes, i)
if (i->b) {
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
i->b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(i->b);
}
}
/*
@ -841,11 +860,12 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_journal_pin_drop(&c->journal, &as->journal);
darray_for_each(as->new_nodes, i) {
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
six_unlock_read(&i->b->c.lock);
}
darray_for_each(as->new_nodes, i)
if (i->b) {
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
six_unlock_read(&i->b->c.lock);
}
for (unsigned i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
@ -931,25 +951,13 @@ static void btree_update_reparent(struct btree_update *as,
static void btree_update_updated_root(struct btree_update *as, struct btree *b)
{
struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c;
BUG_ON(as->mode != BTREE_UPDATE_none);
as->mode = BTREE_UPDATE_root;
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
as->journal_u64s +=
journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
BCH_JSET_ENTRY_btree_root,
b->c.btree_id, b->c.level,
insert, insert->k.u64s);
scoped_guard(mutex, &c->btree_interior_update_lock) {
scoped_guard(mutex, &c->btree_interior_update_lock)
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
as->mode = BTREE_UPDATE_root;
}
}
/*
@ -1323,7 +1331,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
{
struct bch_fs *c = as->c;
struct bkey_packed *k;
CLASS(printbuf, buf)();
unsigned long old, new;
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
@ -1344,15 +1351,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
dump_stack();
}
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
as->journal_u64s +=
journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
BCH_JSET_ENTRY_btree_keys,
b->c.btree_id, b->c.level,
insert, insert->k.u64s);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
bch2_btree_node_iter_advance(node_iter, b);
@ -2105,6 +2103,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
bch2_btree_update_add_key(&as->new_nodes, n->c.level, &delete);
bch2_btree_update_add_node(c, &as->new_nodes, n);
bch2_btree_node_free_inmem(trans, trans->paths + path, b);
@ -2386,15 +2385,6 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
struct bch_fs *c = trans->c;
if (!btree_node_will_make_reachable(b)) {
if (!skip_triggers) {
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key),
BTREE_TRIGGER_transactional));
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s(new_key),
BTREE_TRIGGER_transactional));
}
if (!btree_node_is_root(c, b)) {
CLASS(btree_node_iter, parent_iter)(trans,
b->c.btree_id,
@ -2404,15 +2394,32 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_ITER_intent);
try(bch2_btree_iter_traverse(&parent_iter));
try(bch2_trans_update(trans, &parent_iter, new_key, BTREE_TRIGGER_norun));
try(bch2_trans_update(trans, &parent_iter, new_key, skip_triggers ? BTREE_TRIGGER_norun : 0));
} else {
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s)));
if (!skip_triggers)
try(bch2_key_trigger(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key),
bkey_i_to_s(new_key),
BTREE_TRIGGER_insert|
BTREE_TRIGGER_overwrite|
BTREE_TRIGGER_transactional));
journal_entry_set(e,
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(b->key.k.u64s))),
BCH_JSET_ENTRY_overwrite,
b->c.btree_id, b->c.level + 1,
&b->key, b->key.k.u64s);
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(new_key->k.u64s))),
BCH_JSET_ENTRY_btree_root,
b->c.btree_id, b->c.level,
new_key, new_key->k.u64s);
/*
* propagated back to c->btree_roots[].key by
* bch2_journal_entry_to_btree_root() incorrect for
*/
}
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));

View File

@ -8,8 +8,6 @@
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
#define BTREE_UPDATE_MODES() \
@ -111,9 +109,6 @@ struct btree_update {
BCH_REPLICAS_MAX];
open_bucket_idx_t nr_open_buckets;
unsigned journal_u64s;
u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
/* Only here to reduce stack usage on recursive splits: */
struct keylist parent_keys;
/*

View File

@ -736,6 +736,19 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
/* Btree path: traverse, set_pos: */
static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
prt_str(&buf, "btree root doesn't cover expected range:\n");
bch2_btree_pos_to_text(&buf, c, b);
prt_newline(&buf);
return __bch2_topology_error(c, &buf);
}
static inline int btree_path_lock_root(struct btree_trans *trans,
struct btree_path *path,
unsigned depth_want,
@ -783,6 +796,13 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
if (likely(b == READ_ONCE(r->b) &&
b->c.level == path->level &&
!race_fault())) {
if (unlikely(!bpos_eq(b->data->min_key, POS_MIN) ||
!bpos_eq(b->key.k.p, SPOS_MAX))) {
ret = btree_node_root_err(trans, b);
six_unlock_type(&b->c.lock, lock_type);
return ret;
}
for (i = 0; i < path->level; i++)
path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
path->l[path->level].b = b;

View File

@ -557,7 +557,7 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans,
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
enum btree_id btree, struct bpos start, struct bpos end)
{
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent|BTREE_ITER_with_updates);
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_prev(iter));
if (bpos_lt(iter->pos, start))

View File

@ -158,8 +158,9 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u);
if (k.k->type == KEY_TYPE_accounting)
bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k),
bkey_s_c_to_accounting(k));
bch2_accounting_accumulate_maybe_kill(trans->c,
bkey_i_to_accounting(&wb->k),
bkey_s_c_to_accounting(k));
}
*accounting_accumulated = true;

View File

@ -4,12 +4,6 @@
#include "bcachefs_format.h"
union bch_replicas_padded {
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
devs, BCH_BKEY_PTRS_MAX)];
struct bch_replicas_entry_v1 e;
};
struct stripe {
size_t heap_idx;
u16 sectors;

View File

@ -994,7 +994,6 @@ int bch2_data_job(struct bch_fs *c,
true,
rereplicate_pred, c) ?: ret;
bch2_btree_interior_updates_flush(c);
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_migrate:
if (op->migrate.dev >= c->sb.nr_devices)
@ -1010,7 +1009,6 @@ int bch2_data_job(struct bch_fs *c,
true,
migrate_pred, op) ?: ret;
bch2_btree_interior_updates_flush(c);
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_rewrite_old_nodes:
ret = bch2_scan_old_btree_nodes(c, stats);
@ -1020,7 +1018,6 @@ int bch2_data_job(struct bch_fs *c,
writepoint_hashed((unsigned long) current),
true,
drop_extra_replicas_pred, c) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
default:
ret = -EINVAL;

View File

@ -296,7 +296,7 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
if (!snapshot_opts) {
bch2_inode_opts_get(c, opts, metadata);
if (k.k->p.snapshot) {
if (!metadata && k.k->p.snapshot) {
struct bch_inode_unpacked inode;
int ret = bch2_inode_find_by_inum_snapshot(trans, k.k->p.inode, k.k->p.snapshot,
&inode, BTREE_ITER_cached);
@ -313,7 +313,7 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
snapshot_opts->d.nr = 0;
}
if (k.k->p.snapshot) {
if (!metadata && k.k->p.snapshot) {
if (snapshot_opts->cur_inum != k.k->p.inode) {
snapshot_opts->d.nr = 0;
@ -362,6 +362,8 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
#undef x
}
BUG_ON(metadata && opts->erasure_code);
return 0;
}
@ -374,10 +376,46 @@ static const char * const bch2_rebalance_state_strs[] = {
#undef x
};
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
static u64 rebalance_scan_encode(struct rebalance_scan s)
{
switch (s.type) {
case REBALANCE_SCAN_fs:
return 0;
case REBALANCE_SCAN_metadata:
return 1;
case REBALANCE_SCAN_device:
return s.dev + 32;
case REBALANCE_SCAN_inum:
return s.inum;
default:
BUG();
}
}
static struct rebalance_scan rebalance_scan_decode(u64 v)
{
if (v == 0)
return (struct rebalance_scan) { .type = REBALANCE_SCAN_fs };
if (v == 1)
return (struct rebalance_scan) { .type = REBALANCE_SCAN_metadata };
if (v < BCACHEFS_ROOT_INO)
return (struct rebalance_scan) {
.type = REBALANCE_SCAN_device,
.dev = v - 32,
};
return (struct rebalance_scan) {
.type = REBALANCE_SCAN_inum,
.inum = v,
};
}
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, struct rebalance_scan s)
{
CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work,
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
SPOS(rebalance_scan_encode(s),
REBALANCE_WORK_SCAN_OFFSET,
U32_MAX),
BTREE_ITER_intent);
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter));
@ -394,16 +432,17 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
return bch2_trans_update(trans, &iter, &cookie->k_i, 0);
}
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
int bch2_set_rebalance_needs_scan(struct bch_fs *c, struct rebalance_scan s)
{
CLASS(btree_trans, trans)(c);
return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_set_rebalance_needs_scan_trans(trans, inum));
bch2_set_rebalance_needs_scan_trans(trans, s));
}
int bch2_set_fs_needs_rebalance(struct bch_fs *c)
{
return bch2_set_rebalance_needs_scan(c, 0);
return bch2_set_rebalance_needs_scan(c,
(struct rebalance_scan) { .type = REBALANCE_SCAN_fs });
}
static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
@ -647,7 +686,7 @@ root_err:
noinline_for_stack
static int do_rebalance_scan(struct moving_context *ctxt,
struct per_snapshot_io_opts *snapshot_io_opts,
u64 inum, u64 cookie, u64 *sectors_scanned)
u64 scan_v, u64 cookie, u64 *sectors_scanned)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
@ -658,7 +697,8 @@ static int do_rebalance_scan(struct moving_context *ctxt,
r->state = BCH_REBALANCE_scanning;
if (!inum) {
struct rebalance_scan s = rebalance_scan_decode(scan_v);
if (s.type == REBALANCE_SCAN_fs) {
r->scan_start = BBPOS_MIN;
r->scan_end = BBPOS_MAX;
@ -670,16 +710,16 @@ static int do_rebalance_scan(struct moving_context *ctxt,
try(do_rebalance_scan_btree(ctxt, snapshot_io_opts, btree, 0,
POS_MIN, SPOS_MAX));
}
} else {
r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0));
r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
} else if (s.type == REBALANCE_SCAN_inum) {
r->scan_start = BBPOS(BTREE_ID_extents, POS(s.inum, 0));
r->scan_end = BBPOS(BTREE_ID_extents, POS(s.inum, U64_MAX));
try(do_rebalance_scan_btree(ctxt, snapshot_io_opts, BTREE_ID_extents, 0,
r->scan_start.pos, r->scan_end.pos));
}
try(commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_clear_rebalance_needs_scan(trans, inum, cookie)));
bch2_clear_rebalance_needs_scan(trans, scan_v, cookie)));
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
/*

View File

@ -84,8 +84,22 @@ int bch2_bkey_get_io_opts(struct btree_trans *,
struct per_snapshot_io_opts *, struct bkey_s_c,
struct bch_inode_opts *opts);
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
struct rebalance_scan {
enum rebalance_scan_type {
REBALANCE_SCAN_fs,
REBALANCE_SCAN_metadata,
REBALANCE_SCAN_device,
REBALANCE_SCAN_inum,
} type;
union {
unsigned dev;
u64 inum;
};
};
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, struct rebalance_scan);
int bch2_set_rebalance_needs_scan(struct bch_fs *, struct rebalance_scan);
int bch2_set_fs_needs_rebalance(struct bch_fs *);
static inline void bch2_rebalance_wakeup(struct bch_fs *c)

View File

@ -693,6 +693,9 @@ static int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
struct bch_inode_opts *io_opts,
unsigned buf_bytes)
{
/* be paranoid */
buf_bytes = round_up(buf_bytes, c->opts.block_size);
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
@ -702,7 +705,7 @@ static int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) {
if (bch2_bio_alloc_pages(&m->op.wbio.bio, c->opts.block_size, buf_bytes, GFP_KERNEL)) {
kfree(m->bvecs);
m->bvecs = NULL;
return -ENOMEM;

View File

@ -807,6 +807,19 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
struct bio *bio;
unsigned output_available =
min(wp->sectors_free << 9, src->bi_iter.bi_size);
/*
* XXX: we'll want to delete this later, there's no reason we can't
* issue > 2MB bios if we're allocating high order pages
*
* But bch2_bio_alloc_pages() BUGS() if we ask it to allocate more pages
* than fit in the bio, and we're using bio_alloc_bioset() which is
* limited to BIO_MAX_VECS
*/
output_available = min(output_available, BIO_MAX_VECS * PAGE_SIZE);
BUG_ON(output_available & (c->opts.block_size - 1));
unsigned pages = DIV_ROUND_UP(output_available +
(buf
? ((unsigned long) buf & (PAGE_SIZE - 1))
@ -814,8 +827,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
pages = min(pages, BIO_MAX_VECS);
bio = bio_alloc_bioset(NULL, pages, 0,
GFP_NOFS, &c->bio_write);
bio = bio_alloc_bioset(NULL, pages, 0, GFP_NOFS, &c->bio_write);
wbio = wbio_init(bio);
wbio->put_bio = true;
/* copy WRITE_SYNC flag */
@ -839,6 +851,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
if (bio->bi_iter.bi_size < output_available)
*page_alloc_failed =
bch2_bio_alloc_pages(bio,
c->opts.block_size,
output_available -
bio->bi_iter.bi_size,
GFP_NOFS) != 0;

View File

@ -196,6 +196,7 @@ read_attribute(btree_reserve_cache);
read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(nocow_lock_table);
read_attribute(replicas);
read_attribute(read_refs);
read_attribute(write_refs);
@ -389,6 +390,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
if (attr == &sysfs_replicas)
bch2_cpu_replicas_to_text(out, &c->replicas);
if (attr == &sysfs_disk_groups)
bch2_disk_groups_to_text(out, c);
@ -600,6 +604,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_open_buckets_partial,
&sysfs_write_refs,
&sysfs_nocow_lock_table,
&sysfs_replicas,
&sysfs_io_timers_read,
&sysfs_io_timers_write,

View File

@ -913,6 +913,9 @@ static int check_inode(struct btree_trans *trans,
}
ret = bch2_check_inode_has_case_insensitive(trans, &u, &s->ids, &do_update);
if (bch2_err_matches(ret, ENOENT)) /* disconnected inode; will be fixed by a later pass */
ret = 0;
bch_err_msg(c, ret, "bch2_check_inode_has_case_insensitive()");
if (ret)
goto err;
@ -1627,7 +1630,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
new_d->k.p.inode = d.k->p.inode;
new_d->k.p.snapshot = d.k->p.snapshot;
struct btree_iter dup_iter = {};
CLASS(btree_iter_uninit, dup_iter)(trans);
return bch2_hash_delete_at(trans,
bch2_dirent_hash_desc, hash_info, iter,
BTREE_UPDATE_internal_snapshot_node) ?:

View File

@ -549,7 +549,7 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans,
hash_info, dir, &lookup_name, flags));
int ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
return ret > 0 ? -ENOENT : 0;
return ret > 0 ? -ENOENT : ret;
}
u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,

View File

@ -832,10 +832,8 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
inode->bi_inum, inode->bi_snapshot);
ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
snapshot_overwrites, &buf);
if (ret)
goto out;
try(bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
snapshot_overwrites, &buf));
if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
inode->bi_flags |= BCH_INODE_has_case_insensitive;
@ -844,7 +842,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
}
if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
goto out;
return 0;
struct bch_inode_unpacked dir = *inode;
u32 snapshot = dir.bi_snapshot;
@ -852,30 +850,22 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
while (!(dir.bi_inum == BCACHEFS_ROOT_INO &&
dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
if (dir.bi_parent_subvol) {
ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
if (ret)
goto out;
try(bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot));
snapshot_overwrites = NULL;
}
ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
if (ret)
goto out;
try(bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0));
if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
snapshot_overwrites, &buf);
if (ret)
goto out;
try(bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
snapshot_overwrites, &buf));
if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
dir.bi_flags |= BCH_INODE_has_case_insensitive;
ret = __bch2_fsck_write_inode(trans, &dir);
if (ret)
goto out;
try(__bch2_fsck_write_inode(trans, &dir));
}
}
@ -886,15 +876,11 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
if (!repairing_parents)
break;
}
out:
fsck_err:
bch_err_fn(trans->c, ret);
if (ret)
return ret;
if (repairing_parents)
return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
bch_err_throw(trans->c, transaction_restart_nested);
return 0;
fsck_err:
return ret;
}

View File

@ -118,7 +118,7 @@ static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q)
prt_printf(out, "d_fieldmask\t%x\n", q->d_fieldmask);
prt_printf(out, "d_spc_hardlimit\t%llu\n", q->d_spc_hardlimit);
prt_printf(out, "d_spc_softlimit\t%llu\n", q->d_spc_softlimit);
prt_printf(out, "d_ino_hardlimit\%llu\n", q->d_ino_hardlimit);
prt_printf(out, "d_ino_hardlimit\t%llu\n", q->d_ino_hardlimit);
prt_printf(out, "d_ino_softlimit\t%llu\n", q->d_ino_softlimit);
prt_printf(out, "d_space\t%llu\n", q->d_space);
prt_printf(out, "d_ino_count\t%llu\n", q->d_ino_count);

View File

@ -218,6 +218,50 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans
return 0;
}
static int str_hash_dup_entries(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c k,
struct btree_iter *dup_iter, struct bkey_s_c dup_k,
bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
int ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
if (ret < 0)
return ret;
if (!fsck_err(trans, hash_table_key_duplicate,
"duplicate hash table keys%s:\n%s",
ret != 2 ? "" : ", both point to valid inodes",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
prt_newline(&buf),
bch2_bkey_val_to_text(&buf, c, dup_k),
buf.buf)))
return 0;
switch (ret) {
case 0:
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0));
break;
case 1:
try(bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0));
break;
case 2:
try(bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
bkey_s_c_to_dirent(k),
updated_before_k_pos));
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0));
break;
}
return bch2_trans_commit_lazy(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
fsck_err:
return ret;
}
/* Put a str_hash key in its proper location, checking for duplicates */
int bch2_str_hash_repair_key(struct btree_trans *trans,
struct snapshots_seen *s,
@ -227,96 +271,65 @@ int bch2_str_hash_repair_key(struct btree_trans *trans,
struct btree_iter *dup_iter, struct bkey_s_c dup_k,
bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
CLASS(printbuf, buf)();
bool free_snapshots_seen = false;
int ret = 0;
CLASS(snapshots_seen, s_onstack)();
if (!s) {
s = bch2_trans_kmalloc(trans, sizeof(*s));
ret = PTR_ERR_OR_ZERO(s);
if (ret)
goto out;
s = &s_onstack;
s->pos = k_iter->pos;
darray_init(&s->ids);
ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
if (ret)
goto out;
free_snapshots_seen = true;
try(bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids));
}
if (!dup_k.k) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
goto out;
struct bkey_i *new = errptr_try(bch2_bkey_make_mut_noupdate(trans, k));
dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
dup_k = bkey_try(bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
(subvol_inum) { 0, new->k.p.inode },
new->k.p.snapshot, new,
STR_HASH_must_create|
BTREE_ITER_with_updates|
BTREE_UPDATE_internal_snapshot_node);
ret = bkey_err(dup_k);
if (ret)
goto out;
if (dup_k.k)
goto duplicate_entries;
BTREE_UPDATE_internal_snapshot_node));
if (bpos_lt(new->k.p, k.k->p))
*updated_before_k_pos = true;
ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
k_iter->pos, new->k.p) ?:
bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
BTREE_ITER_with_updates|
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
bch_err_throw(c, transaction_restart_commit);
} else {
duplicate_entries:
ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
if (ret < 0)
goto out;
if (!fsck_err(trans, hash_table_key_duplicate,
"duplicate hash table keys%s:\n%s",
ret != 2 ? "" : ", both point to valid inodes",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
prt_newline(&buf),
bch2_bkey_val_to_text(&buf, c, dup_k),
buf.buf)))
goto out;
switch (ret) {
case 0:
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
break;
case 1:
ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
break;
case 2:
ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
bkey_s_c_to_dirent(k),
updated_before_k_pos) ?:
bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
BTREE_ITER_with_updates);
goto out;
if (!dup_k.k) {
try(bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
k_iter->pos, new->k.p));
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
BTREE_UPDATE_internal_snapshot_node));
try(bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new));
try(bch2_trans_commit_lazy(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc));
}
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
bch_err_throw(c, transaction_restart_commit);
}
out:
if (dup_k.k)
try(str_hash_dup_entries(trans, s, desc, hash_info,
k_iter, k, dup_iter, dup_k,
updated_before_k_pos));
return 0;
}
static int str_hash_bad_hash(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k,
bool *updated_before_k_pos,
struct btree_iter *iter, u64 hash)
{
CLASS(printbuf, buf)();
int ret = 0;
/*
* Before doing any repair, check hash_info itself:
*/
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: should be at %llu\n%s",
hash,
(bch2_bkey_val_to_text(&buf, trans->c, hash_k), buf.buf)))
ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
k_iter, hash_k,
iter, bkey_s_c_null,
updated_before_k_pos);
fsck_err:
bch2_trans_iter_exit(dup_iter);
if (free_snapshots_seen)
darray_exit(&s->ids);
return ret;
}
@ -327,57 +340,36 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
struct btree_iter *k_iter, struct bkey_s_c hash_k,
bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = {};
CLASS(printbuf, buf)();
u64 hash = desc->hash_bkey(hash_info, hash_k);
CLASS(btree_iter, iter)(trans, desc->btree_id,
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
BTREE_ITER_slots);
if (hash_k.k->p.offset < hash)
return str_hash_bad_hash(trans, s, desc, hash_info, k_iter, hash_k,
updated_before_k_pos, &iter, hash);
struct bkey_s_c k;
int ret = 0;
u64 hash = desc->hash_bkey(hash_info, hash_k);
if (hash_k.k->p.offset < hash)
goto bad_hash;
bch2_trans_iter_init(trans, &iter, desc->btree_id,
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
BTREE_ITER_slots|
BTREE_ITER_with_updates);
for_each_btree_key_continue_norestart(iter,
BTREE_ITER_slots|
BTREE_ITER_with_updates, k, ret) {
BTREE_ITER_slots, k, ret) {
if (bkey_eq(k.k->p, hash_k.k->p))
break;
if (k.k->type == desc->key_type &&
!desc->cmp_bkey(k, hash_k)) {
ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
hash_info) ?:
bch2_str_hash_repair_key(trans, s, desc, hash_info,
k_iter, hash_k,
&iter, k, updated_before_k_pos);
/* dup */
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
try(bch2_str_hash_repair_key(trans, s, desc, hash_info, k_iter, hash_k,
&iter, k, updated_before_k_pos));
break;
}
if (bkey_deleted(k.k))
goto bad_hash;
return str_hash_bad_hash(trans, s, desc, hash_info, k_iter, hash_k,
updated_before_k_pos, &iter, hash);
}
bch2_trans_iter_exit(&iter);
fsck_err:
return ret;
bad_hash:
bch2_trans_iter_exit(&iter);
/*
* Before doing any repair, check hash_info itself:
*/
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: should be at %llu\n%s",
hash,
(bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
k_iter, hash_k,
&iter, bkey_s_c_null,
updated_before_k_pos);
return ret;
}

View File

@ -447,8 +447,13 @@ int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct prin
lockdep_assert_held(&c->state_lock);
if (le64_to_cpu(sb->sb->seq) >
le64_to_cpu(c->disk_sb.sb->seq))
bch2_sb_to_fs(c, sb->sb);
le64_to_cpu(c->disk_sb.sb->seq)) {
/*
* rewind, we'll lose some updates but it's not safe to call
* bch2_sb_to_fs() after fs is started
*/
sb->sb->seq = c->disk_sb.sb->seq;
}
BUG_ON(!bch2_dev_exists(c, sb->sb->dev_idx));
@ -628,11 +633,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
goto err;
}
ret = bch2_replicas_gc2(c);
if (ret) {
prt_printf(err, "bch2_replicas_gc2() error: %s\n", bch2_err_str(ret));
goto err;
}
/*
* flushing the journal should be sufficient, but it's the write buffer
* flush that kills superblock replicas entries after they've gone to 0
* so bch2_dev_has_data() returns the correct value:
*/
data = bch2_dev_has_data(c, ca);
if (data) {

View File

@ -9,6 +9,7 @@
#include "journal/seq_blacklist.h"
#include "alloc/foreground.h"
#include "alloc/replicas.h"
#include "btree/update.h"
/* allocate journal on a device: */
@ -440,11 +441,12 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
if (journal_entry_empty(&i->j))
j->last_empty_seq = le64_to_cpu(i->j.seq);
p = journal_seq_pin(j, seq);
p->devs.nr = 0;
struct bch_devs_list seq_devs = {};
darray_for_each(i->ptrs, ptr)
bch2_dev_list_add_dev(&p->devs, ptr->dev);
seq_devs.data[seq_devs.nr++] = ptr->dev;
p = journal_seq_pin(j, seq);
bch2_devlist_to_replicas(&p->devs.e, BCH_DATA_journal, seq_devs);
had_entries = true;
}

View File

@ -442,6 +442,7 @@ static int journal_entry_open(struct journal *j)
buf->write_started = false;
buf->write_allocated = false;
buf->write_done = false;
buf->had_error = false;
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));

View File

@ -410,20 +410,14 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
unsigned u64s, unsigned flags,
struct btree_trans *trans)
{
int ret;
EBUG_ON(res->ref);
EBUG_ON(!test_bit(JOURNAL_running, &j->flags));
res->u64s = u64s;
if (journal_res_get_fast(j, res, flags))
goto out;
if (!journal_res_get_fast(j, res, flags))
try(bch2_journal_res_get_slowpath(j, res, flags, trans));
ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
if (ret)
return ret;
out:
if (!(flags & JOURNAL_RES_GET_CHECK)) {
lock_acquire_shared(&j->res_map, 0,
(flags & JOURNAL_RES_GET_NONBLOCK) != 0,

View File

@ -956,8 +956,8 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
scoped_guard(spinlock, &j->lock)
fifo_for_each_entry_ptr(p, &j->pin, iter)
if (dev_idx >= 0
? bch2_dev_list_has_dev(p->devs, dev_idx)
: p->devs.nr < c->opts.metadata_replicas)
? bch2_replicas_entry_has_dev(&p->devs.e, dev_idx)
: p->devs.e.nr_devs < c->opts.metadata_replicas)
seq = iter;
bch2_journal_flush_pins(j, seq);
@ -981,13 +981,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0;
scoped_guard(spinlock, &j->lock)
while (!ret) {
union bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j));
if (seq >= j->pin.back)
if (seq > j->seq_ondisk)
break;
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
journal_seq_pin(j, seq)->devs);
union bch_replicas_padded replicas;
memcpy(&replicas, &journal_seq_pin(j, seq)->devs, sizeof(replicas));
seq++;
if (replicas.e.nr_devs) {
@ -1021,6 +1020,9 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
guard(printbuf_indent)(out);
bch2_replicas_entry_to_text(out, &pin_list->devs.e);
prt_newline(out);
prt_printf(out, "unflushed:\n");
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
list_for_each_entry(pin, &pin_list->unflushed[i], list)

View File

@ -26,7 +26,7 @@ static inline void journal_pin_list_init(struct journal_entry_pin_list *p, int c
for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++)
INIT_LIST_HEAD(&p->flushed[i]);
atomic_set(&p->count, count);
p->devs.nr = 0;
p->devs.e.nr_devs = 0;
p->bytes = 0;
}

View File

@ -5,6 +5,7 @@
#include <linux/cache.h>
#include <linux/workqueue.h>
#include "alloc/replicas_types.h"
#include "alloc/types.h"
#include "init/dev_types.h"
#include "util/fifo.h"
@ -48,6 +49,7 @@ struct journal_buf {
bool write_started:1;
bool write_allocated:1;
bool write_done:1;
bool had_error:1;
u8 idx;
};
@ -70,7 +72,7 @@ struct journal_entry_pin_list {
struct list_head unflushed[JOURNAL_PIN_TYPE_NR];
struct list_head flushed[JOURNAL_PIN_TYPE_NR];
atomic_t count;
struct bch_devs_list devs;
union bch_replicas_padded devs;
size_t bytes;
};
@ -113,7 +115,14 @@ union journal_res_state {
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
#define JOURNAL_ENTRY_SIZE_MAX (4U << 22) /* 16M */
/*
* The block layer is fragile with large bios - it should be able to process any
* IO incrementally, but...
*
* 4MB corresponds to bio_kmalloc() -> UIO_MAXIOV
*/
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
/*
* We stash some journal state as sentinal values in cur_entry_offset:

View File

@ -188,7 +188,6 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
union bch_replicas_padded replicas;
u64 seq = le64_to_cpu(w->data->seq);
int err = 0;
@ -196,14 +195,15 @@ static CLOSURE_CALLBACK(journal_write_done)
? j->flush_write_time
: j->noflush_write_time, j->write_start_time);
if (!w->devs_written.nr) {
err = bch_err_throw(c, journal_write_err);
} else {
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
w->devs_written);
err = bch2_mark_replicas(c, &replicas.e);
if (w->had_error) {
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, seq)->devs.e;
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
}
if (!w->devs_written.nr)
err = bch_err_throw(c, journal_write_err);
if (err && !bch2_journal_error(j)) {
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
@ -222,8 +222,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_debug_destroy(cl);
spin_lock(&j->lock);
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = w->devs_written;
BUG_ON(seq < j->pin.front);
if (err && (!j->err_seq || seq < j->err_seq))
j->err_seq = seq;
w->write_done = true;
@ -334,6 +333,7 @@ static void journal_write_endio(struct bio *bio)
unsigned long flags;
spin_lock_irqsave(&j->err_lock, flags);
bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
w->had_error = true;
spin_unlock_irqrestore(&j->err_lock, flags);
}
@ -632,7 +632,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
union bch_replicas_padded replicas;
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
int ret;
@ -701,9 +700,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
* Mark journal replicas before we submit the write to guarantee
* recovery will find the journal entries after a crash.
*/
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
w->devs_written);
ret = bch2_mark_replicas(c, &replicas.e);
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs.e;
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
ret = bch2_mark_replicas(c, r);
if (ret)
goto err;

View File

@ -525,6 +525,37 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
static int opt_hook_io(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, bool post)
{
if (!test_bit(BCH_FS_started, &c->flags))
return 0;
switch (id) {
case Opt_foreground_target:
case Opt_background_target:
case Opt_promote_target:
case Opt_compression:
case Opt_background_compression:
case Opt_data_checksum:
case Opt_data_replicas:
case Opt_erasure_code: {
struct rebalance_scan s = {
.type = !inum ? REBALANCE_SCAN_fs : REBALANCE_SCAN_inum,
.inum = inum,
};
try(bch2_set_rebalance_needs_scan(c, s));
if (post)
bch2_rebalance_wakeup(c);
break;
}
default:
break;
}
return 0;
}
int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v,
bool change)
{
@ -546,16 +577,8 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum b
break;
}
if (change &&
test_bit(BCH_FS_started, &c->flags) &&
(id == Opt_foreground_target ||
id == Opt_background_target ||
id == Opt_promote_target ||
id == Opt_compression ||
id == Opt_background_compression ||
id == Opt_data_checksum ||
id == Opt_data_replicas))
try(bch2_set_rebalance_needs_scan(c, inum));
if (change)
try(opt_hook_io(c, ca, inum, id, false));
return 0;
}
@ -571,17 +594,7 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c)
void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
enum bch_opt_id id, u64 v)
{
if (test_bit(BCH_FS_started, &c->flags) &&
(id == Opt_foreground_target ||
id == Opt_background_target ||
id == Opt_promote_target ||
id == Opt_compression ||
id == Opt_background_compression ||
id == Opt_data_checksum ||
id == Opt_data_replicas)) {
bch2_set_rebalance_needs_scan(c, inum);
bch2_rebalance_wakeup(c);
}
opt_hook_io(c, ca, inum, id, true);
switch (id) {
case Opt_rebalance_enabled:
@ -838,6 +851,7 @@ void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret, bool meta
ret->background_target = c->opts.metadata_target ?: c->opts.foreground_target;
ret->data_replicas = c->opts.metadata_replicas;
ret->data_checksum = c->opts.metadata_checksum;
ret->erasure_code = false;
} else {
bch2_io_opts_fixups(ret);
}

View File

@ -72,10 +72,7 @@ static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
unsigned dev)
{
darray_for_each(devs, i)
if (*i == dev)
return true;
return false;
return darray_find(devs, dev) != NULL;
}
static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,

View File

@ -96,7 +96,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool);
#define darray_find_p(_d, _i, cond) \
({ \
typeof((_d).data) _ret = NULL; \
typeof(&(_d).data[0]) _ret = NULL; \
\
darray_for_each(_d, _i) \
if (cond) { \

View File

@ -278,20 +278,51 @@ static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
return n - 1;
}
#define eytzinger0_find(base, nr, size, _cmp, search) \
({ \
size_t _size = (size); \
void *_base1 = (void *)(base) - _size; \
const void *_search = (search); \
size_t _nr = (nr); \
size_t _i = 1; \
int _res; \
\
while (_i <= _nr && \
(_res = _cmp(_search, _base1 + _i * _size))) \
_i = eytzinger1_child(_i, _res > 0); \
_i - 1; \
})
/* 0 == not found */
static inline int eytzinger1_find_r(void *base, unsigned nr, unsigned size,
cmp_r_func_t cmp_fn, const void *priv,
const void *search)
{
unsigned i = 1;
while (i <= nr) {
int cmp = cmp_fn(search, base + i * size, priv);
if (!cmp)
return i;
i = eytzinger1_child(i, cmp > 0);
}
return 0;
}
/* 0 == not found */
static inline int eytzinger1_find(void *base, unsigned nr, unsigned size,
cmp_func_t cmp_fn, const void *search)
{
unsigned i = 1;
while (i <= nr) {
int cmp = cmp_fn(search, base + i * size);
if (!cmp)
return i;
i = eytzinger1_child(i, cmp > 0);
}
return 0;
}
/* -1 == not found */
static inline int eytzinger0_find_r(void *base, unsigned nr, unsigned size,
cmp_r_func_t cmp_fn, const void *priv,
const void *search)
{
return eytzinger1_find_r(base - size, nr, size, cmp_fn, priv, search) - 1;
}
/* -1 == not found */
static inline int eytzinger0_find(void *base, unsigned nr, unsigned size,
cmp_func_t cmp_fn, const void *search)
{
return eytzinger1_find(base - size, nr, size, cmp_fn, search) - 1;
}
void eytzinger0_sort_r(void *, size_t, size_t,
cmp_r_func_t, swap_r_func_t, const void *);

View File

@ -612,24 +612,51 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
bio_add_virt_nofail(bio, base, size);
}
int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
int bch2_bio_alloc_pages(struct bio *bio, unsigned bs, size_t size, gfp_t gfp_mask)
{
BUG_ON(size & (bs - 1));
unsigned bs_pages = DIV_ROUND_UP(bs, PAGE_SIZE);
/*
* XXX: we could do this by allocating higher order pages, but
*
* - the page allocator gets slower at a certain order (5?) - we'd have
* to check for this
*
* - bch2_bio_free_pages_pool() probably does not handle compound pages
* yet
*/
DARRAY_PREALLOCATED(struct page *, 16) pages;
darray_init(&pages);
darray_make_room_gfp(&pages, bs_pages, gfp_mask|__GFP_NOFAIL);
int ret = 0;
while (size) {
struct page *page = alloc_pages(gfp_mask, 0);
unsigned len = min_t(size_t, PAGE_SIZE, size);
while (pages.nr < bs_pages) {
struct page *page = alloc_pages(gfp_mask, 0);
if (!page) {
ret = -ENOMEM;
goto out;
}
if (!page)
return -ENOMEM;
if (unlikely(!bio_add_page(bio, page, len, 0))) {
__free_page(page);
break;
BUG_ON(darray_push(&pages, page));
}
size -= len;
}
while (pages.nr) {
BUG_ON(!size);
return 0;
unsigned len = min(PAGE_SIZE, size);
size -= len;
struct page *page = darray_pop(&pages);
BUG_ON(!bio_add_page(bio, page, len, 0));
}
}
out:
darray_for_each(pages, i)
__free_page(*i);
darray_exit(&pages);
return ret;
}
u64 bch2_get_random_u64_below(u64 ceil)

View File

@ -370,7 +370,7 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
}
void bch2_bio_map(struct bio *bio, void *base, size_t);
int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
int bch2_bio_alloc_pages(struct bio *, unsigned, size_t, gfp_t);
#define closure_bio_submit(bio, cl) \
do { \

View File

@ -123,7 +123,10 @@ static int bch2_write_inode_trans(struct btree_trans *trans,
struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u);
*rebalance_changed = memcmp(&old_r, &new_r, sizeof(new_r));
if (*rebalance_changed)
try(bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum));
try(bch2_set_rebalance_needs_scan_trans(trans,
(struct rebalance_scan) {
.type = REBALANCE_SCAN_inum,
.inum = inode_u.bi_inum }));
try(bch2_inode_write(trans, &iter, &inode_u));
try(bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc));