mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to 5fe20ac58af4 bcachefs: Don't bail out of check_inode() if check_has_case_sensitive() fails
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
This commit is contained in:
parent
b2b4a5e78b
commit
dc8c10a4b0
@ -1 +1 @@
|
||||
b552eb12225133c8bf869b461faba6b72e35d2be
|
||||
5fe20ac58af402e8ad9ace0bcf9daad524e3005d
|
||||
|
||||
@ -440,25 +440,39 @@ static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e)
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_accounting_mem_gc(struct bch_fs *c)
|
||||
void __bch2_accounting_maybe_kill(struct bch_fs *c, struct bpos pos)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, pos);
|
||||
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
struct accounting_mem_entry *dst = acc->k.data;
|
||||
if (acc_k.type != BCH_DISK_ACCOUNTING_replicas)
|
||||
return;
|
||||
|
||||
darray_for_each(acc->k, src) {
|
||||
if (accounting_mem_entry_is_zero(src)) {
|
||||
free_percpu(src->v[0]);
|
||||
free_percpu(src->v[1]);
|
||||
} else {
|
||||
*dst++ = *src;
|
||||
guard(mutex)(&c->sb_lock);
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
|
||||
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &pos);
|
||||
|
||||
if (idx < acc->k.nr) {
|
||||
struct accounting_mem_entry *e = acc->k.data + idx;
|
||||
if (!accounting_mem_entry_is_zero(e))
|
||||
return;
|
||||
|
||||
free_percpu(e->v[0]);
|
||||
free_percpu(e->v[1]);
|
||||
|
||||
swap(*e, darray_last(acc->k));
|
||||
--acc->k.nr;
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
}
|
||||
|
||||
bch2_replicas_entry_kill(c, &acc_k.replicas);
|
||||
}
|
||||
|
||||
acc->k.nr = dst - acc->k.data;
|
||||
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, NULL);
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -472,9 +486,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c)
|
||||
int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
|
||||
{
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
int ret = 0;
|
||||
|
||||
darray_init(usage);
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
darray_for_each(acc->k, i) {
|
||||
@ -492,24 +503,19 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
|
||||
bch2_accounting_mem_read_counters(acc, i - acc->k.data, §ors, 1, false);
|
||||
u.r.sectors = sectors;
|
||||
|
||||
ret = darray_make_room(usage, replicas_usage_bytes(&u.r));
|
||||
if (ret)
|
||||
break;
|
||||
try(darray_make_room(usage, replicas_usage_bytes(&u.r)));
|
||||
|
||||
memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r));
|
||||
usage->nr += replicas_usage_bytes(&u.r);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
darray_exit(usage);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned accounting_types_mask)
|
||||
{
|
||||
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
int ret = 0;
|
||||
|
||||
darray_init(out_buf);
|
||||
|
||||
@ -521,10 +527,8 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
|
||||
if (!(accounting_types_mask & BIT(a_p.type)))
|
||||
continue;
|
||||
|
||||
ret = darray_make_room(out_buf, sizeof(struct bkey_i_accounting) +
|
||||
sizeof(u64) * i->nr_counters);
|
||||
if (ret)
|
||||
break;
|
||||
try(darray_make_room(out_buf, sizeof(struct bkey_i_accounting) +
|
||||
sizeof(u64) * i->nr_counters));
|
||||
|
||||
struct bkey_i_accounting *a_out =
|
||||
bkey_accounting_init((void *) &darray_top(*out_buf));
|
||||
@ -537,9 +541,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc
|
||||
out_buf->nr += bkey_bytes(&a_out->k);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
darray_exit(out_buf);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_accounting_free_counters(struct bch_accounting_mem *acc, bool gc)
|
||||
|
||||
@ -43,6 +43,21 @@ static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
|
||||
dst->k.bversion = src.k->bversion;
|
||||
}
|
||||
|
||||
void __bch2_accounting_maybe_kill(struct bch_fs *, struct bpos pos);
|
||||
|
||||
static inline void bch2_accounting_accumulate_maybe_kill(struct bch_fs *c,
|
||||
struct bkey_i_accounting *dst,
|
||||
struct bkey_s_c_accounting src)
|
||||
{
|
||||
bch2_accounting_accumulate(dst, src);
|
||||
|
||||
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
|
||||
if (dst->v.d[i])
|
||||
return;
|
||||
|
||||
__bch2_accounting_maybe_kill(c, dst->k.p);
|
||||
}
|
||||
|
||||
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
|
||||
enum bch_data_type data_type,
|
||||
s64 sectors)
|
||||
@ -137,7 +152,6 @@ enum bch_accounting_mode {
|
||||
|
||||
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
|
||||
int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
|
||||
void bch2_accounting_mem_gc(struct bch_fs *);
|
||||
|
||||
static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc)
|
||||
{
|
||||
@ -205,13 +219,10 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
|
||||
|
||||
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
|
||||
int ret = 0;
|
||||
if (unlikely(write_locked))
|
||||
ret = bch2_accounting_mem_insert_locked(c, a, mode);
|
||||
try(bch2_accounting_mem_insert_locked(c, a, mode));
|
||||
else
|
||||
ret = bch2_accounting_mem_insert(c, a, mode);
|
||||
if (ret)
|
||||
return ret;
|
||||
try(bch2_accounting_mem_insert(c, a, mode));
|
||||
}
|
||||
|
||||
struct accounting_mem_entry *e = &acc->k.data[idx];
|
||||
|
||||
@ -12,6 +12,21 @@
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
DEFINE_CLASS(bch_replicas_cpu, struct bch_replicas_cpu,
|
||||
kfree(_T.entries),
|
||||
(struct bch_replicas_cpu) {}, void)
|
||||
|
||||
static inline struct bch_replicas_entry_v1 *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (struct bch_replicas_entry_v1 *_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size; \
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
@ -129,15 +144,14 @@ bad:
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *out,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_replicas_entry_v1 *e;
|
||||
bool first = true;
|
||||
|
||||
for_each_cpu_replicas_entry(r, e) {
|
||||
for_each_cpu_replicas_entry(r, i) {
|
||||
if (!first)
|
||||
prt_printf(out, " ");
|
||||
first = false;
|
||||
|
||||
bch2_replicas_entry_to_text(out, e);
|
||||
bch2_replicas_entry_to_text(out, i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -246,45 +260,27 @@ cpu_replicas_add_entry(struct bch_fs *c,
|
||||
return new;
|
||||
}
|
||||
|
||||
static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
static inline struct bch_replicas_entry_v1 *
|
||||
replicas_entry_search(struct bch_replicas_cpu *r,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
{
|
||||
int idx, entry_size = replicas_entry_bytes(search);
|
||||
verify_replicas_entry(search);
|
||||
|
||||
if (unlikely(entry_size > r->entry_size))
|
||||
return -1;
|
||||
|
||||
#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
|
||||
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
|
||||
entry_cmp, search);
|
||||
#undef entry_cmp
|
||||
|
||||
return idx < r->nr ? idx : -1;
|
||||
}
|
||||
|
||||
int bch2_replicas_entry_idx(struct bch_fs *c,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
{
|
||||
bch2_replicas_entry_sort(search);
|
||||
|
||||
return __replicas_entry_idx(&c->replicas, search);
|
||||
}
|
||||
|
||||
static bool __replicas_has_entry(struct bch_replicas_cpu *r,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
{
|
||||
return __replicas_entry_idx(r, search) >= 0;
|
||||
size_t entry_size = replicas_entry_bytes(search);
|
||||
int idx = likely(entry_size <= r->entry_size)
|
||||
? eytzinger0_find_r(r->entries, r->nr, r->entry_size,
|
||||
bch2_memcmp, (void *) entry_size, search)
|
||||
: -1;
|
||||
return idx >= 0 ? cpu_replicas_entry(r, idx) : NULL;
|
||||
}
|
||||
|
||||
bool bch2_replicas_marked_locked(struct bch_fs *c,
|
||||
struct bch_replicas_entry_v1 *search)
|
||||
{
|
||||
verify_replicas_entry(search);
|
||||
|
||||
return !search->nr_devs ||
|
||||
(__replicas_has_entry(&c->replicas, search) &&
|
||||
(replicas_entry_search(&c->replicas, search) &&
|
||||
(likely((!c->replicas_gc.entries)) ||
|
||||
__replicas_has_entry(&c->replicas_gc, search)));
|
||||
replicas_entry_search(&c->replicas_gc, search)));
|
||||
}
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
@ -298,40 +294,31 @@ noinline
|
||||
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
struct bch_replicas_entry_v1 *new_entry)
|
||||
{
|
||||
struct bch_replicas_cpu new_r, new_gc;
|
||||
int ret = 0;
|
||||
|
||||
verify_replicas_entry(new_entry);
|
||||
|
||||
memset(&new_r, 0, sizeof(new_r));
|
||||
memset(&new_gc, 0, sizeof(new_gc));
|
||||
CLASS(bch_replicas_cpu, new_r)();
|
||||
CLASS(bch_replicas_cpu, new_gc)();
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
|
||||
if (c->replicas_gc.entries &&
|
||||
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
|
||||
!replicas_entry_search(&c->replicas_gc, new_entry)) {
|
||||
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
|
||||
if (!new_gc.entries) {
|
||||
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
|
||||
goto out;
|
||||
}
|
||||
if (!new_gc.entries)
|
||||
return bch_err_throw(c, ENOMEM_cpu_replicas);
|
||||
}
|
||||
|
||||
if (!__replicas_has_entry(&c->replicas, new_entry)) {
|
||||
if (!replicas_entry_search(&c->replicas, new_entry)) {
|
||||
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
|
||||
if (!new_r.entries) {
|
||||
ret = bch_err_throw(c, ENOMEM_cpu_replicas);
|
||||
goto out;
|
||||
}
|
||||
if (!new_r.entries)
|
||||
return bch_err_throw(c, ENOMEM_cpu_replicas);
|
||||
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(bch2_cpu_replicas_to_sb_replicas(c, &new_r));
|
||||
}
|
||||
|
||||
if (!new_r.entries &&
|
||||
!new_gc.entries)
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
/* allocations done, now commit: */
|
||||
|
||||
@ -345,12 +332,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
if (new_gc.entries)
|
||||
swap(new_gc, c->replicas_gc);
|
||||
}
|
||||
out:
|
||||
kfree(new_r.entries);
|
||||
kfree(new_gc.entries);
|
||||
|
||||
bch_err_msg(c, ret, "adding replicas entry");
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
|
||||
@ -387,9 +370,6 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
|
||||
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
{
|
||||
struct bch_replicas_entry_v1 *e;
|
||||
unsigned i = 0;
|
||||
|
||||
lockdep_assert_held(&c->replicas_gc_lock);
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
@ -401,7 +381,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
for_each_cpu_replicas_entry(&c->replicas, e) {
|
||||
/* Preserve unknown data types */
|
||||
if (e->data_type >= BCH_DATA_NR ||
|
||||
!((1 << e->data_type) & typemask)) {
|
||||
!(BIT(e->data_type) & typemask)) {
|
||||
c->replicas_gc.nr++;
|
||||
c->replicas_gc.entry_size =
|
||||
max_t(unsigned, c->replicas_gc.entry_size,
|
||||
@ -417,9 +397,10 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
return bch_err_throw(c, ENOMEM_replicas_gc);
|
||||
}
|
||||
|
||||
unsigned i = 0;
|
||||
for_each_cpu_replicas_entry(&c->replicas, e)
|
||||
if (e->data_type >= BCH_DATA_NR ||
|
||||
!((1 << e->data_type) & typemask))
|
||||
!(BIT(e->data_type) & typemask))
|
||||
memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
|
||||
e, c->replicas_gc.entry_size);
|
||||
|
||||
@ -427,73 +408,23 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* New much simpler mechanism for clearing out unneeded replicas entries - drop
|
||||
* replicas entries that have 0 sectors used.
|
||||
*
|
||||
* However, we don't track sector counts for journal usage, so this doesn't drop
|
||||
* any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism
|
||||
* is retained for that.
|
||||
*/
|
||||
int bch2_replicas_gc2(struct bch_fs *c)
|
||||
void bch2_replicas_entry_kill(struct bch_fs *c, struct bch_replicas_entry_v1 *kill)
|
||||
{
|
||||
struct bch_replicas_cpu new = { 0 };
|
||||
unsigned nr;
|
||||
int ret = 0;
|
||||
lockdep_assert_held(&c->mark_lock);
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
bch2_accounting_mem_gc(c);
|
||||
retry:
|
||||
nr = READ_ONCE(c->replicas.nr);
|
||||
new.entry_size = READ_ONCE(c->replicas.entry_size);
|
||||
new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
|
||||
if (!new.entries) {
|
||||
bch_err(c, "error allocating c->replicas_gc");
|
||||
return bch_err_throw(c, ENOMEM_replicas_gc);
|
||||
}
|
||||
struct bch_replicas_cpu *r = &c->replicas;
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
scoped_guard(percpu_write, &c->mark_lock) {
|
||||
if (nr != c->replicas.nr ||
|
||||
new.entry_size != c->replicas.entry_size) {
|
||||
kfree(new.entries);
|
||||
goto retry;
|
||||
}
|
||||
struct bch_replicas_entry_v1 *e = replicas_entry_search(&c->replicas, kill);
|
||||
if (WARN(!e, "replicas entry not found in sb"))
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
memcpy(e, cpu_replicas_entry(r, --r->nr), r->entry_size);
|
||||
|
||||
struct disk_accounting_pos k = {
|
||||
.type = BCH_DISK_ACCOUNTING_replicas,
|
||||
};
|
||||
bch2_cpu_replicas_sort(r);
|
||||
|
||||
unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
|
||||
"embedded variable length struct");
|
||||
|
||||
struct bpos p = disk_accounting_pos_to_bpos(&k);
|
||||
|
||||
struct bch_accounting_mem *acc = &c->accounting;
|
||||
bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
|
||||
accounting_pos_cmp, &p) >= acc->k.nr;
|
||||
|
||||
if (e->data_type == BCH_DATA_journal || !kill)
|
||||
memcpy(cpu_replicas_entry(&new, new.nr++),
|
||||
e, new.entry_size);
|
||||
}
|
||||
|
||||
bch2_cpu_replicas_sort(&new);
|
||||
|
||||
ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
|
||||
|
||||
if (!ret)
|
||||
swap(c->replicas, new);
|
||||
|
||||
kfree(new.entries);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
bch2_write_super(c);
|
||||
return ret;
|
||||
int ret = bch2_cpu_replicas_to_sb_replicas(c, r);
|
||||
WARN(ret, "bch2_cpu_replicas_to_sb_replicas() error: %s", bch2_err_str(ret));
|
||||
}
|
||||
|
||||
/* Replicas tracking - superblock: */
|
||||
@ -502,7 +433,6 @@ static int
|
||||
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
|
||||
struct bch_replicas_cpu *cpu_r)
|
||||
{
|
||||
struct bch_replicas_entry_v1 *e, *dst;
|
||||
unsigned nr = 0, entry_size = 0, idx = 0;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
@ -519,7 +449,7 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
|
||||
cpu_r->entry_size = entry_size;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
dst = cpu_replicas_entry(cpu_r, idx++);
|
||||
struct bch_replicas_entry_v1 *dst = cpu_replicas_entry(cpu_r, idx++);
|
||||
memcpy(dst, e, replicas_entry_bytes(e));
|
||||
bch2_replicas_entry_sort(dst);
|
||||
}
|
||||
@ -531,7 +461,6 @@ static int
|
||||
__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
|
||||
struct bch_replicas_cpu *cpu_r)
|
||||
{
|
||||
struct bch_replicas_entry_v0 *e;
|
||||
unsigned nr = 0, entry_size = 0, idx = 0;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
@ -550,14 +479,14 @@ __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
|
||||
cpu_r->nr = nr;
|
||||
cpu_r->entry_size = entry_size;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
for_each_replicas_entry(sb_r, src) {
|
||||
struct bch_replicas_entry_v1 *dst =
|
||||
cpu_replicas_entry(cpu_r, idx++);
|
||||
|
||||
dst->data_type = e->data_type;
|
||||
dst->nr_devs = e->nr_devs;
|
||||
dst->data_type = src->data_type;
|
||||
dst->nr_devs = src->nr_devs;
|
||||
dst->nr_required = 1;
|
||||
memcpy(dst->devs, e->devs, e->nr_devs);
|
||||
memcpy(dst->devs, src->devs, src->nr_devs);
|
||||
bch2_replicas_entry_sort(dst);
|
||||
}
|
||||
|
||||
@ -568,7 +497,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_v1;
|
||||
struct bch_sb_field_replicas_v0 *sb_v0;
|
||||
struct bch_replicas_cpu new_r = { 0, 0, NULL };
|
||||
CLASS(bch_replicas_cpu, new_r)();
|
||||
|
||||
if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas)))
|
||||
try(__bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r));
|
||||
@ -580,8 +509,6 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
guard(percpu_write)(&c->mark_lock);
|
||||
swap(c->replicas, new_r);
|
||||
|
||||
kfree(new_r.entries);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -590,7 +517,6 @@ static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
|
||||
{
|
||||
struct bch_sb_field_replicas_v0 *sb_r;
|
||||
struct bch_replicas_entry_v0 *dst;
|
||||
struct bch_replicas_entry_v1 *src;
|
||||
size_t bytes;
|
||||
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
@ -628,7 +554,7 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry_v1 *dst, *src;
|
||||
struct bch_replicas_entry_v1 *dst;
|
||||
bool need_v1 = false;
|
||||
size_t bytes;
|
||||
|
||||
@ -707,12 +633,11 @@ static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f,
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
|
||||
|
||||
struct bch_replicas_cpu cpu_r;
|
||||
CLASS(bch_replicas_cpu, cpu_r)();
|
||||
try(__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r));
|
||||
try(bch2_cpu_replicas_validate(&cpu_r, sb, err));
|
||||
|
||||
int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
|
||||
kfree(cpu_r.entries);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_sb_replicas_to_text(struct printbuf *out,
|
||||
@ -720,7 +645,6 @@ static void bch2_sb_replicas_to_text(struct printbuf *out,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_replicas *r = field_to_type(f, replicas);
|
||||
struct bch_replicas_entry_v1 *e;
|
||||
bool first = true;
|
||||
|
||||
for_each_replicas_entry(r, e) {
|
||||
@ -743,12 +667,11 @@ static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *
|
||||
{
|
||||
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
|
||||
|
||||
struct bch_replicas_cpu cpu_r;
|
||||
CLASS(bch_replicas_cpu, cpu_r)();
|
||||
try(__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r));
|
||||
try(bch2_cpu_replicas_validate(&cpu_r, sb, err));
|
||||
|
||||
int ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
|
||||
kfree(cpu_r.entries);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
|
||||
@ -756,7 +679,6 @@ static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
|
||||
struct bch_replicas_entry_v0 *e;
|
||||
bool first = true;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
@ -779,8 +701,6 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
bool bch2_can_read_fs_with_devs(struct bch_fs *c, struct bch_devs_mask devs,
|
||||
unsigned flags, struct printbuf *err)
|
||||
{
|
||||
struct bch_replicas_entry_v1 *e;
|
||||
|
||||
guard(percpu_read)(&c->mark_lock);
|
||||
for_each_cpu_replicas_entry(&c->replicas, e) {
|
||||
unsigned nr_online = 0, nr_failed = 0, dflags = 0;
|
||||
@ -910,8 +830,6 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
|
||||
replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
|
||||
|
||||
if (replicas) {
|
||||
struct bch_replicas_entry_v1 *r;
|
||||
|
||||
for_each_replicas_entry(replicas, r) {
|
||||
if (r->data_type >= sizeof(data_has) * 8)
|
||||
continue;
|
||||
@ -922,9 +840,7 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
|
||||
}
|
||||
|
||||
} else if (replicas_v0) {
|
||||
struct bch_replicas_entry_v0 *r;
|
||||
|
||||
for_each_replicas_entry_v0(replicas_v0, r) {
|
||||
for_each_replicas_entry(replicas_v0, r) {
|
||||
if (r->data_type >= sizeof(data_has) * 8)
|
||||
continue;
|
||||
|
||||
|
||||
@ -13,15 +13,6 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
|
||||
struct bch_fs *, struct printbuf *);
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
|
||||
static inline struct bch_replicas_entry_v1 *
|
||||
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
|
||||
{
|
||||
return (void *) r->entries + r->entry_size * i;
|
||||
}
|
||||
|
||||
int bch2_replicas_entry_idx(struct bch_fs *,
|
||||
struct bch_replicas_entry_v1 *);
|
||||
|
||||
void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *,
|
||||
enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
@ -53,12 +44,15 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
int bch2_replicas_gc2(struct bch_fs *);
|
||||
void bch2_replicas_entry_kill(struct bch_fs *, struct bch_replicas_entry_v1 *);
|
||||
|
||||
#define for_each_cpu_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
|
||||
_i = (void *) (_i) + (_r)->entry_size)
|
||||
static inline bool bch2_replicas_entry_has_dev(struct bch_replicas_entry_v1 *r, unsigned dev)
|
||||
{
|
||||
for (unsigned i = 0; i < r->nr_devs; i++)
|
||||
if (r->devs[i] == dev)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* iterate over superblock replicas - used by userspace tools: */
|
||||
|
||||
@ -66,12 +60,7 @@ int bch2_replicas_gc2(struct bch_fs *);
|
||||
((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
|
||||
|
||||
#define for_each_replicas_entry(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
#define for_each_replicas_entry_v0(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
for (typeof(&(_r)->entries[0]) _i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
|
||||
@ -8,4 +8,10 @@ struct bch_replicas_cpu {
|
||||
struct bch_replicas_entry_v1 *entries;
|
||||
};
|
||||
|
||||
union bch_replicas_padded {
|
||||
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
|
||||
devs, BCH_BKEY_PTRS_MAX)];
|
||||
struct bch_replicas_entry_v1 e;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_TYPES_H */
|
||||
|
||||
@ -609,6 +609,18 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
||||
closure_wake_up(&c->btree_interior_update_wait);
|
||||
}
|
||||
|
||||
static void bch2_btree_update_add_key(btree_update_nodes *nodes,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
BUG_ON(darray_make_room(nodes, 1));
|
||||
|
||||
struct btree_update_node *n = &darray_top(*nodes);
|
||||
nodes->nr++;
|
||||
|
||||
*n = (struct btree_update_node) { .level = level };
|
||||
bkey_copy(&n->key, k);
|
||||
}
|
||||
|
||||
static void bch2_btree_update_add_node(struct bch_fs *c, btree_update_nodes *nodes, struct btree *b)
|
||||
{
|
||||
BUG_ON(darray_make_room(nodes, 1));
|
||||
@ -649,20 +661,26 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
|
||||
static int btree_update_nodes_written_trans(struct btree_trans *trans,
|
||||
struct btree_update *as)
|
||||
{
|
||||
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans, as->journal_u64s));
|
||||
|
||||
memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64));
|
||||
|
||||
trans->journal_pin = &as->journal;
|
||||
|
||||
darray_for_each(as->old_nodes, i)
|
||||
try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
|
||||
darray_for_each(as->new_nodes, i)
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
|
||||
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(i->key.k.u64s))),
|
||||
i->root
|
||||
? BCH_JSET_ENTRY_btree_root
|
||||
: BCH_JSET_ENTRY_btree_keys,
|
||||
as->btree_id,
|
||||
i->root ? i->level : i->level + 1,
|
||||
&i->key, i->key.k.u64s);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -749,11 +767,12 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
* all our new nodes, to avoid racing with
|
||||
* btree_node_update_key():
|
||||
*/
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
|
||||
i->b->will_make_reachable = 0;
|
||||
clear_btree_node_will_make_reachable(i->b);
|
||||
}
|
||||
darray_for_each(as->new_nodes, i)
|
||||
if (i->b) {
|
||||
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
|
||||
i->b->will_make_reachable = 0;
|
||||
clear_btree_node_will_make_reachable(i->b);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -841,11 +860,12 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||
|
||||
darray_for_each(as->new_nodes, i) {
|
||||
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
|
||||
six_unlock_read(&i->b->c.lock);
|
||||
}
|
||||
darray_for_each(as->new_nodes, i)
|
||||
if (i->b) {
|
||||
btree_node_lock_nopath_nofail(trans, &i->b->c, SIX_LOCK_read);
|
||||
btree_node_write_if_need(trans, i->b, SIX_LOCK_read);
|
||||
six_unlock_read(&i->b->c.lock);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < as->nr_open_buckets; i++)
|
||||
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
|
||||
@ -931,25 +951,13 @@ static void btree_update_reparent(struct btree_update *as,
|
||||
|
||||
static void btree_update_updated_root(struct btree_update *as, struct btree *b)
|
||||
{
|
||||
struct bkey_i *insert = &b->key;
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
BUG_ON(as->mode != BTREE_UPDATE_none);
|
||||
as->mode = BTREE_UPDATE_root;
|
||||
|
||||
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
|
||||
ARRAY_SIZE(as->journal_entries));
|
||||
|
||||
as->journal_u64s +=
|
||||
journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
|
||||
BCH_JSET_ENTRY_btree_root,
|
||||
b->c.btree_id, b->c.level,
|
||||
insert, insert->k.u64s);
|
||||
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock) {
|
||||
scoped_guard(mutex, &c->btree_interior_update_lock)
|
||||
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
|
||||
|
||||
as->mode = BTREE_UPDATE_root;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1323,7 +1331,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct bkey_packed *k;
|
||||
CLASS(printbuf, buf)();
|
||||
unsigned long old, new;
|
||||
|
||||
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
@ -1344,15 +1351,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
|
||||
ARRAY_SIZE(as->journal_entries));
|
||||
|
||||
as->journal_u64s +=
|
||||
journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
|
||||
BCH_JSET_ENTRY_btree_keys,
|
||||
b->c.btree_id, b->c.level,
|
||||
insert, insert->k.u64s);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
|
||||
bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
|
||||
bch2_btree_node_iter_advance(node_iter, b);
|
||||
@ -2105,6 +2103,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
|
||||
bch2_btree_update_get_open_buckets(as, n);
|
||||
bch2_btree_node_write_trans(trans, n, SIX_LOCK_intent, 0);
|
||||
bch2_btree_update_add_key(&as->new_nodes, n->c.level, &delete);
|
||||
bch2_btree_update_add_node(c, &as->new_nodes, n);
|
||||
|
||||
bch2_btree_node_free_inmem(trans, trans->paths + path, b);
|
||||
@ -2386,15 +2385,6 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (!btree_node_will_make_reachable(b)) {
|
||||
if (!skip_triggers) {
|
||||
try(bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
try(bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s(new_key),
|
||||
BTREE_TRIGGER_transactional));
|
||||
}
|
||||
|
||||
if (!btree_node_is_root(c, b)) {
|
||||
CLASS(btree_node_iter, parent_iter)(trans,
|
||||
b->c.btree_id,
|
||||
@ -2404,15 +2394,32 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
BTREE_ITER_intent);
|
||||
|
||||
try(bch2_btree_iter_traverse(&parent_iter));
|
||||
try(bch2_trans_update(trans, &parent_iter, new_key, BTREE_TRIGGER_norun));
|
||||
try(bch2_trans_update(trans, &parent_iter, new_key, skip_triggers ? BTREE_TRIGGER_norun : 0));
|
||||
} else {
|
||||
struct jset_entry *e = errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(new_key->k.u64s)));
|
||||
if (!skip_triggers)
|
||||
try(bch2_key_trigger(trans, b->c.btree_id, b->c.level + 1,
|
||||
bkey_i_to_s_c(&b->key),
|
||||
bkey_i_to_s(new_key),
|
||||
BTREE_TRIGGER_insert|
|
||||
BTREE_TRIGGER_overwrite|
|
||||
BTREE_TRIGGER_transactional));
|
||||
|
||||
journal_entry_set(e,
|
||||
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(b->key.k.u64s))),
|
||||
BCH_JSET_ENTRY_overwrite,
|
||||
b->c.btree_id, b->c.level + 1,
|
||||
&b->key, b->key.k.u64s);
|
||||
|
||||
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
|
||||
jset_u64s(new_key->k.u64s))),
|
||||
BCH_JSET_ENTRY_btree_root,
|
||||
b->c.btree_id, b->c.level,
|
||||
new_key, new_key->k.u64s);
|
||||
|
||||
/*
|
||||
* propagated back to c->btree_roots[].key by
|
||||
* bch2_journal_entry_to_btree_root() incorrect for
|
||||
*/
|
||||
}
|
||||
|
||||
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
|
||||
|
||||
@ -8,8 +8,6 @@
|
||||
|
||||
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
|
||||
|
||||
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
|
||||
|
||||
int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
|
||||
|
||||
#define BTREE_UPDATE_MODES() \
|
||||
@ -111,9 +109,6 @@ struct btree_update {
|
||||
BCH_REPLICAS_MAX];
|
||||
open_bucket_idx_t nr_open_buckets;
|
||||
|
||||
unsigned journal_u64s;
|
||||
u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
|
||||
|
||||
/* Only here to reduce stack usage on recursive splits: */
|
||||
struct keylist parent_keys;
|
||||
/*
|
||||
|
||||
@ -736,6 +736,19 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
|
||||
|
||||
/* Btree path: traverse, set_pos: */
|
||||
|
||||
static noinline_for_stack int btree_node_root_err(struct btree_trans *trans, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
CLASS(printbuf, buf)();
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
prt_str(&buf, "btree root doesn't cover expected range:\n");
|
||||
bch2_btree_pos_to_text(&buf, c, b);
|
||||
prt_newline(&buf);
|
||||
|
||||
return __bch2_topology_error(c, &buf);
|
||||
}
|
||||
|
||||
static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned depth_want,
|
||||
@ -783,6 +796,13 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
|
||||
if (likely(b == READ_ONCE(r->b) &&
|
||||
b->c.level == path->level &&
|
||||
!race_fault())) {
|
||||
if (unlikely(!bpos_eq(b->data->min_key, POS_MIN) ||
|
||||
!bpos_eq(b->key.k.p, SPOS_MAX))) {
|
||||
ret = btree_node_root_err(trans, b);
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < path->level; i++)
|
||||
path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
|
||||
path->l[path->level].b = b;
|
||||
|
||||
@ -557,7 +557,7 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans,
|
||||
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
|
||||
enum btree_id btree, struct bpos start, struct bpos end)
|
||||
{
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent|BTREE_ITER_with_updates);
|
||||
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_prev(iter));
|
||||
|
||||
if (bpos_lt(iter->pos, start))
|
||||
|
||||
@ -158,8 +158,9 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
|
||||
struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u);
|
||||
|
||||
if (k.k->type == KEY_TYPE_accounting)
|
||||
bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k),
|
||||
bkey_s_c_to_accounting(k));
|
||||
bch2_accounting_accumulate_maybe_kill(trans->c,
|
||||
bkey_i_to_accounting(&wb->k),
|
||||
bkey_s_c_to_accounting(k));
|
||||
}
|
||||
*accounting_accumulated = true;
|
||||
|
||||
|
||||
@ -4,12 +4,6 @@
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
|
||||
union bch_replicas_padded {
|
||||
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
|
||||
devs, BCH_BKEY_PTRS_MAX)];
|
||||
struct bch_replicas_entry_v1 e;
|
||||
};
|
||||
|
||||
struct stripe {
|
||||
size_t heap_idx;
|
||||
u16 sectors;
|
||||
|
||||
@ -994,7 +994,6 @@ int bch2_data_job(struct bch_fs *c,
|
||||
true,
|
||||
rereplicate_pred, c) ?: ret;
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
break;
|
||||
case BCH_DATA_OP_migrate:
|
||||
if (op->migrate.dev >= c->sb.nr_devices)
|
||||
@ -1010,7 +1009,6 @@ int bch2_data_job(struct bch_fs *c,
|
||||
true,
|
||||
migrate_pred, op) ?: ret;
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
break;
|
||||
case BCH_DATA_OP_rewrite_old_nodes:
|
||||
ret = bch2_scan_old_btree_nodes(c, stats);
|
||||
@ -1020,7 +1018,6 @@ int bch2_data_job(struct bch_fs *c,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
true,
|
||||
drop_extra_replicas_pred, c) ?: ret;
|
||||
ret = bch2_replicas_gc2(c) ?: ret;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
|
||||
@ -296,7 +296,7 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
|
||||
if (!snapshot_opts) {
|
||||
bch2_inode_opts_get(c, opts, metadata);
|
||||
|
||||
if (k.k->p.snapshot) {
|
||||
if (!metadata && k.k->p.snapshot) {
|
||||
struct bch_inode_unpacked inode;
|
||||
int ret = bch2_inode_find_by_inum_snapshot(trans, k.k->p.inode, k.k->p.snapshot,
|
||||
&inode, BTREE_ITER_cached);
|
||||
@ -313,7 +313,7 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
|
||||
snapshot_opts->d.nr = 0;
|
||||
}
|
||||
|
||||
if (k.k->p.snapshot) {
|
||||
if (!metadata && k.k->p.snapshot) {
|
||||
if (snapshot_opts->cur_inum != k.k->p.inode) {
|
||||
snapshot_opts->d.nr = 0;
|
||||
|
||||
@ -362,6 +362,8 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
|
||||
#undef x
|
||||
}
|
||||
|
||||
BUG_ON(metadata && opts->erasure_code);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -374,10 +376,46 @@ static const char * const bch2_rebalance_state_strs[] = {
|
||||
#undef x
|
||||
};
|
||||
|
||||
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
|
||||
static u64 rebalance_scan_encode(struct rebalance_scan s)
|
||||
{
|
||||
switch (s.type) {
|
||||
case REBALANCE_SCAN_fs:
|
||||
return 0;
|
||||
case REBALANCE_SCAN_metadata:
|
||||
return 1;
|
||||
case REBALANCE_SCAN_device:
|
||||
return s.dev + 32;
|
||||
case REBALANCE_SCAN_inum:
|
||||
return s.inum;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static struct rebalance_scan rebalance_scan_decode(u64 v)
|
||||
{
|
||||
if (v == 0)
|
||||
return (struct rebalance_scan) { .type = REBALANCE_SCAN_fs };
|
||||
if (v == 1)
|
||||
return (struct rebalance_scan) { .type = REBALANCE_SCAN_metadata };
|
||||
if (v < BCACHEFS_ROOT_INO)
|
||||
return (struct rebalance_scan) {
|
||||
.type = REBALANCE_SCAN_device,
|
||||
.dev = v - 32,
|
||||
};
|
||||
|
||||
return (struct rebalance_scan) {
|
||||
.type = REBALANCE_SCAN_inum,
|
||||
.inum = v,
|
||||
};
|
||||
}
|
||||
|
||||
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, struct rebalance_scan s)
|
||||
{
|
||||
CLASS(btree_iter, iter)(trans, BTREE_ID_rebalance_work,
|
||||
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
|
||||
SPOS(rebalance_scan_encode(s),
|
||||
REBALANCE_WORK_SCAN_OFFSET,
|
||||
U32_MAX),
|
||||
BTREE_ITER_intent);
|
||||
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter));
|
||||
|
||||
@ -394,16 +432,17 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
|
||||
return bch2_trans_update(trans, &iter, &cookie->k_i, 0);
|
||||
}
|
||||
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *c, struct rebalance_scan s)
|
||||
{
|
||||
CLASS(btree_trans, trans)(c);
|
||||
return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_set_rebalance_needs_scan_trans(trans, inum));
|
||||
bch2_set_rebalance_needs_scan_trans(trans, s));
|
||||
}
|
||||
|
||||
int bch2_set_fs_needs_rebalance(struct bch_fs *c)
|
||||
{
|
||||
return bch2_set_rebalance_needs_scan(c, 0);
|
||||
return bch2_set_rebalance_needs_scan(c,
|
||||
(struct rebalance_scan) { .type = REBALANCE_SCAN_fs });
|
||||
}
|
||||
|
||||
static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
|
||||
@ -647,7 +686,7 @@ root_err:
|
||||
noinline_for_stack
|
||||
static int do_rebalance_scan(struct moving_context *ctxt,
|
||||
struct per_snapshot_io_opts *snapshot_io_opts,
|
||||
u64 inum, u64 cookie, u64 *sectors_scanned)
|
||||
u64 scan_v, u64 cookie, u64 *sectors_scanned)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
struct bch_fs *c = trans->c;
|
||||
@ -658,7 +697,8 @@ static int do_rebalance_scan(struct moving_context *ctxt,
|
||||
|
||||
r->state = BCH_REBALANCE_scanning;
|
||||
|
||||
if (!inum) {
|
||||
struct rebalance_scan s = rebalance_scan_decode(scan_v);
|
||||
if (s.type == REBALANCE_SCAN_fs) {
|
||||
r->scan_start = BBPOS_MIN;
|
||||
r->scan_end = BBPOS_MAX;
|
||||
|
||||
@ -670,16 +710,16 @@ static int do_rebalance_scan(struct moving_context *ctxt,
|
||||
try(do_rebalance_scan_btree(ctxt, snapshot_io_opts, btree, 0,
|
||||
POS_MIN, SPOS_MAX));
|
||||
}
|
||||
} else {
|
||||
r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0));
|
||||
r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
|
||||
} else if (s.type == REBALANCE_SCAN_inum) {
|
||||
r->scan_start = BBPOS(BTREE_ID_extents, POS(s.inum, 0));
|
||||
r->scan_end = BBPOS(BTREE_ID_extents, POS(s.inum, U64_MAX));
|
||||
|
||||
try(do_rebalance_scan_btree(ctxt, snapshot_io_opts, BTREE_ID_extents, 0,
|
||||
r->scan_start.pos, r->scan_end.pos));
|
||||
}
|
||||
|
||||
try(commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_clear_rebalance_needs_scan(trans, inum, cookie)));
|
||||
bch2_clear_rebalance_needs_scan(trans, scan_v, cookie)));
|
||||
|
||||
*sectors_scanned += atomic64_read(&r->scan_stats.sectors_seen);
|
||||
/*
|
||||
|
||||
@ -84,8 +84,22 @@ int bch2_bkey_get_io_opts(struct btree_trans *,
|
||||
struct per_snapshot_io_opts *, struct bkey_s_c,
|
||||
struct bch_inode_opts *opts);
|
||||
|
||||
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64);
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
|
||||
struct rebalance_scan {
|
||||
enum rebalance_scan_type {
|
||||
REBALANCE_SCAN_fs,
|
||||
REBALANCE_SCAN_metadata,
|
||||
REBALANCE_SCAN_device,
|
||||
REBALANCE_SCAN_inum,
|
||||
} type;
|
||||
|
||||
union {
|
||||
unsigned dev;
|
||||
u64 inum;
|
||||
};
|
||||
};
|
||||
|
||||
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, struct rebalance_scan);
|
||||
int bch2_set_rebalance_needs_scan(struct bch_fs *, struct rebalance_scan);
|
||||
int bch2_set_fs_needs_rebalance(struct bch_fs *);
|
||||
|
||||
static inline void bch2_rebalance_wakeup(struct bch_fs *c)
|
||||
|
||||
@ -693,6 +693,9 @@ static int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
|
||||
struct bch_inode_opts *io_opts,
|
||||
unsigned buf_bytes)
|
||||
{
|
||||
/* be paranoid */
|
||||
buf_bytes = round_up(buf_bytes, c->opts.block_size);
|
||||
|
||||
unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
|
||||
|
||||
m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
|
||||
@ -702,7 +705,7 @@ static int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
|
||||
bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
|
||||
bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
|
||||
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) {
|
||||
if (bch2_bio_alloc_pages(&m->op.wbio.bio, c->opts.block_size, buf_bytes, GFP_KERNEL)) {
|
||||
kfree(m->bvecs);
|
||||
m->bvecs = NULL;
|
||||
return -ENOMEM;
|
||||
|
||||
@ -807,6 +807,19 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
|
||||
struct bio *bio;
|
||||
unsigned output_available =
|
||||
min(wp->sectors_free << 9, src->bi_iter.bi_size);
|
||||
|
||||
/*
|
||||
* XXX: we'll want to delete this later, there's no reason we can't
|
||||
* issue > 2MB bios if we're allocating high order pages
|
||||
*
|
||||
* But bch2_bio_alloc_pages() BUGS() if we ask it to allocate more pages
|
||||
* than fit in the bio, and we're using bio_alloc_bioset() which is
|
||||
* limited to BIO_MAX_VECS
|
||||
*/
|
||||
output_available = min(output_available, BIO_MAX_VECS * PAGE_SIZE);
|
||||
|
||||
BUG_ON(output_available & (c->opts.block_size - 1));
|
||||
|
||||
unsigned pages = DIV_ROUND_UP(output_available +
|
||||
(buf
|
||||
? ((unsigned long) buf & (PAGE_SIZE - 1))
|
||||
@ -814,8 +827,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
|
||||
|
||||
pages = min(pages, BIO_MAX_VECS);
|
||||
|
||||
bio = bio_alloc_bioset(NULL, pages, 0,
|
||||
GFP_NOFS, &c->bio_write);
|
||||
bio = bio_alloc_bioset(NULL, pages, 0, GFP_NOFS, &c->bio_write);
|
||||
wbio = wbio_init(bio);
|
||||
wbio->put_bio = true;
|
||||
/* copy WRITE_SYNC flag */
|
||||
@ -839,6 +851,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
|
||||
if (bio->bi_iter.bi_size < output_available)
|
||||
*page_alloc_failed =
|
||||
bch2_bio_alloc_pages(bio,
|
||||
c->opts.block_size,
|
||||
output_available -
|
||||
bio->bi_iter.bi_size,
|
||||
GFP_NOFS) != 0;
|
||||
|
||||
@ -196,6 +196,7 @@ read_attribute(btree_reserve_cache);
|
||||
read_attribute(open_buckets);
|
||||
read_attribute(open_buckets_partial);
|
||||
read_attribute(nocow_lock_table);
|
||||
read_attribute(replicas);
|
||||
|
||||
read_attribute(read_refs);
|
||||
read_attribute(write_refs);
|
||||
@ -389,6 +390,9 @@ SHOW(bch2_fs)
|
||||
if (attr == &sysfs_nocow_lock_table)
|
||||
bch2_nocow_locks_to_text(out, &c->nocow_locks);
|
||||
|
||||
if (attr == &sysfs_replicas)
|
||||
bch2_cpu_replicas_to_text(out, &c->replicas);
|
||||
|
||||
if (attr == &sysfs_disk_groups)
|
||||
bch2_disk_groups_to_text(out, c);
|
||||
|
||||
@ -600,6 +604,7 @@ struct attribute *bch2_fs_internal_files[] = {
|
||||
&sysfs_open_buckets_partial,
|
||||
&sysfs_write_refs,
|
||||
&sysfs_nocow_lock_table,
|
||||
&sysfs_replicas,
|
||||
&sysfs_io_timers_read,
|
||||
&sysfs_io_timers_write,
|
||||
|
||||
|
||||
@ -913,6 +913,9 @@ static int check_inode(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
ret = bch2_check_inode_has_case_insensitive(trans, &u, &s->ids, &do_update);
|
||||
if (bch2_err_matches(ret, ENOENT)) /* disconnected inode; will be fixed by a later pass */
|
||||
ret = 0;
|
||||
bch_err_msg(c, ret, "bch2_check_inode_has_case_insensitive()");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -1627,7 +1630,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
new_d->k.p.inode = d.k->p.inode;
|
||||
new_d->k.p.snapshot = d.k->p.snapshot;
|
||||
|
||||
struct btree_iter dup_iter = {};
|
||||
CLASS(btree_iter_uninit, dup_iter)(trans);
|
||||
return bch2_hash_delete_at(trans,
|
||||
bch2_dirent_hash_desc, hash_info, iter,
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
|
||||
@ -549,7 +549,7 @@ int bch2_dirent_lookup_trans(struct btree_trans *trans,
|
||||
hash_info, dir, &lookup_name, flags));
|
||||
|
||||
int ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
|
||||
return ret > 0 ? -ENOENT : 0;
|
||||
return ret > 0 ? -ENOENT : ret;
|
||||
}
|
||||
|
||||
u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
|
||||
|
||||
@ -832,10 +832,8 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
|
||||
prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
|
||||
inode->bi_inum, inode->bi_snapshot);
|
||||
|
||||
ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
|
||||
snapshot_overwrites, &buf);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
|
||||
snapshot_overwrites, &buf));
|
||||
|
||||
if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
|
||||
inode->bi_flags |= BCH_INODE_has_case_insensitive;
|
||||
@ -844,7 +842,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
|
||||
}
|
||||
|
||||
if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
struct bch_inode_unpacked dir = *inode;
|
||||
u32 snapshot = dir.bi_snapshot;
|
||||
@ -852,30 +850,22 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
|
||||
while (!(dir.bi_inum == BCACHEFS_ROOT_INO &&
|
||||
dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
|
||||
if (dir.bi_parent_subvol) {
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot));
|
||||
|
||||
snapshot_overwrites = NULL;
|
||||
}
|
||||
|
||||
ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0));
|
||||
|
||||
if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
|
||||
prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
|
||||
|
||||
ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
|
||||
snapshot_overwrites, &buf);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
|
||||
snapshot_overwrites, &buf));
|
||||
|
||||
if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
|
||||
dir.bi_flags |= BCH_INODE_has_case_insensitive;
|
||||
ret = __bch2_fsck_write_inode(trans, &dir);
|
||||
if (ret)
|
||||
goto out;
|
||||
try(__bch2_fsck_write_inode(trans, &dir));
|
||||
}
|
||||
}
|
||||
|
||||
@ -886,15 +876,11 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
|
||||
if (!repairing_parents)
|
||||
break;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
bch_err_fn(trans->c, ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (repairing_parents)
|
||||
return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
bch_err_throw(trans->c, transaction_restart_nested);
|
||||
|
||||
return 0;
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -118,7 +118,7 @@ static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q)
|
||||
prt_printf(out, "d_fieldmask\t%x\n", q->d_fieldmask);
|
||||
prt_printf(out, "d_spc_hardlimit\t%llu\n", q->d_spc_hardlimit);
|
||||
prt_printf(out, "d_spc_softlimit\t%llu\n", q->d_spc_softlimit);
|
||||
prt_printf(out, "d_ino_hardlimit\%llu\n", q->d_ino_hardlimit);
|
||||
prt_printf(out, "d_ino_hardlimit\t%llu\n", q->d_ino_hardlimit);
|
||||
prt_printf(out, "d_ino_softlimit\t%llu\n", q->d_ino_softlimit);
|
||||
prt_printf(out, "d_space\t%llu\n", q->d_space);
|
||||
prt_printf(out, "d_ino_count\t%llu\n", q->d_ino_count);
|
||||
|
||||
@ -218,6 +218,50 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int str_hash_dup_entries(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc *desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c k,
|
||||
struct btree_iter *dup_iter, struct bkey_s_c dup_k,
|
||||
bool *updated_before_k_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
CLASS(printbuf, buf)();
|
||||
int ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!fsck_err(trans, hash_table_key_duplicate,
|
||||
"duplicate hash table keys%s:\n%s",
|
||||
ret != 2 ? "" : ", both point to valid inodes",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
prt_newline(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, dup_k),
|
||||
buf.buf)))
|
||||
return 0;
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0));
|
||||
break;
|
||||
case 1:
|
||||
try(bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0));
|
||||
break;
|
||||
case 2:
|
||||
try(bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
|
||||
bkey_s_c_to_dirent(k),
|
||||
updated_before_k_pos));
|
||||
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0));
|
||||
break;
|
||||
}
|
||||
|
||||
return bch2_trans_commit_lazy(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Put a str_hash key in its proper location, checking for duplicates */
|
||||
int bch2_str_hash_repair_key(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
@ -227,96 +271,65 @@ int bch2_str_hash_repair_key(struct btree_trans *trans,
|
||||
struct btree_iter *dup_iter, struct bkey_s_c dup_k,
|
||||
bool *updated_before_k_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
CLASS(printbuf, buf)();
|
||||
bool free_snapshots_seen = false;
|
||||
int ret = 0;
|
||||
CLASS(snapshots_seen, s_onstack)();
|
||||
|
||||
if (!s) {
|
||||
s = bch2_trans_kmalloc(trans, sizeof(*s));
|
||||
ret = PTR_ERR_OR_ZERO(s);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
s = &s_onstack;
|
||||
s->pos = k_iter->pos;
|
||||
darray_init(&s->ids);
|
||||
|
||||
ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
free_snapshots_seen = true;
|
||||
try(bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids));
|
||||
}
|
||||
|
||||
if (!dup_k.k) {
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
goto out;
|
||||
struct bkey_i *new = errptr_try(bch2_bkey_make_mut_noupdate(trans, k));
|
||||
|
||||
dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
|
||||
dup_k = bkey_try(bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
|
||||
(subvol_inum) { 0, new->k.p.inode },
|
||||
new->k.p.snapshot, new,
|
||||
STR_HASH_must_create|
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_UPDATE_internal_snapshot_node);
|
||||
ret = bkey_err(dup_k);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (dup_k.k)
|
||||
goto duplicate_entries;
|
||||
BTREE_UPDATE_internal_snapshot_node));
|
||||
|
||||
if (bpos_lt(new->k.p, k.k->p))
|
||||
*updated_before_k_pos = true;
|
||||
|
||||
ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
|
||||
k_iter->pos, new->k.p) ?:
|
||||
bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
|
||||
BTREE_ITER_with_updates|
|
||||
BTREE_UPDATE_internal_snapshot_node) ?:
|
||||
bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
bch_err_throw(c, transaction_restart_commit);
|
||||
} else {
|
||||
duplicate_entries:
|
||||
ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (!fsck_err(trans, hash_table_key_duplicate,
|
||||
"duplicate hash table keys%s:\n%s",
|
||||
ret != 2 ? "" : ", both point to valid inodes",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
prt_newline(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, dup_k),
|
||||
buf.buf)))
|
||||
goto out;
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
|
||||
break;
|
||||
case 1:
|
||||
ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
|
||||
break;
|
||||
case 2:
|
||||
ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
|
||||
bkey_s_c_to_dirent(k),
|
||||
updated_before_k_pos) ?:
|
||||
bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
|
||||
BTREE_ITER_with_updates);
|
||||
goto out;
|
||||
if (!dup_k.k) {
|
||||
try(bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
|
||||
k_iter->pos, new->k.p));
|
||||
try(bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
|
||||
BTREE_UPDATE_internal_snapshot_node));
|
||||
try(bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new));
|
||||
try(bch2_trans_commit_lazy(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc));
|
||||
}
|
||||
|
||||
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
|
||||
bch_err_throw(c, transaction_restart_commit);
|
||||
}
|
||||
out:
|
||||
|
||||
if (dup_k.k)
|
||||
try(str_hash_dup_entries(trans, s, desc, hash_info,
|
||||
k_iter, k, dup_iter, dup_k,
|
||||
updated_before_k_pos));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int str_hash_bad_hash(struct btree_trans *trans,
|
||||
struct snapshots_seen *s,
|
||||
const struct bch_hash_desc *desc,
|
||||
struct bch_hash_info *hash_info,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k,
|
||||
bool *updated_before_k_pos,
|
||||
struct btree_iter *iter, u64 hash)
|
||||
{
|
||||
CLASS(printbuf, buf)();
|
||||
int ret = 0;
|
||||
/*
|
||||
* Before doing any repair, check hash_info itself:
|
||||
*/
|
||||
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
|
||||
|
||||
if (fsck_err(trans, hash_table_key_wrong_offset,
|
||||
"hash table key at wrong offset: should be at %llu\n%s",
|
||||
hash,
|
||||
(bch2_bkey_val_to_text(&buf, trans->c, hash_k), buf.buf)))
|
||||
ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
|
||||
k_iter, hash_k,
|
||||
iter, bkey_s_c_null,
|
||||
updated_before_k_pos);
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(dup_iter);
|
||||
if (free_snapshots_seen)
|
||||
darray_exit(&s->ids);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -327,57 +340,36 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
|
||||
struct btree_iter *k_iter, struct bkey_s_c hash_k,
|
||||
bool *updated_before_k_pos)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter = {};
|
||||
CLASS(printbuf, buf)();
|
||||
u64 hash = desc->hash_bkey(hash_info, hash_k);
|
||||
|
||||
CLASS(btree_iter, iter)(trans, desc->btree_id,
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_slots);
|
||||
|
||||
if (hash_k.k->p.offset < hash)
|
||||
return str_hash_bad_hash(trans, s, desc, hash_info, k_iter, hash_k,
|
||||
updated_before_k_pos, &iter, hash);
|
||||
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
u64 hash = desc->hash_bkey(hash_info, hash_k);
|
||||
if (hash_k.k->p.offset < hash)
|
||||
goto bad_hash;
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, desc->btree_id,
|
||||
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
|
||||
BTREE_ITER_slots|
|
||||
BTREE_ITER_with_updates);
|
||||
|
||||
for_each_btree_key_continue_norestart(iter,
|
||||
BTREE_ITER_slots|
|
||||
BTREE_ITER_with_updates, k, ret) {
|
||||
BTREE_ITER_slots, k, ret) {
|
||||
if (bkey_eq(k.k->p, hash_k.k->p))
|
||||
break;
|
||||
|
||||
if (k.k->type == desc->key_type &&
|
||||
!desc->cmp_bkey(k, hash_k)) {
|
||||
ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
|
||||
hash_info) ?:
|
||||
bch2_str_hash_repair_key(trans, s, desc, hash_info,
|
||||
k_iter, hash_k,
|
||||
&iter, k, updated_before_k_pos);
|
||||
/* dup */
|
||||
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
|
||||
try(bch2_str_hash_repair_key(trans, s, desc, hash_info, k_iter, hash_k,
|
||||
&iter, k, updated_before_k_pos));
|
||||
break;
|
||||
}
|
||||
|
||||
if (bkey_deleted(k.k))
|
||||
goto bad_hash;
|
||||
return str_hash_bad_hash(trans, s, desc, hash_info, k_iter, hash_k,
|
||||
updated_before_k_pos, &iter, hash);
|
||||
}
|
||||
bch2_trans_iter_exit(&iter);
|
||||
fsck_err:
|
||||
return ret;
|
||||
bad_hash:
|
||||
bch2_trans_iter_exit(&iter);
|
||||
/*
|
||||
* Before doing any repair, check hash_info itself:
|
||||
*/
|
||||
try(check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info));
|
||||
|
||||
if (fsck_err(trans, hash_table_key_wrong_offset,
|
||||
"hash table key at wrong offset: should be at %llu\n%s",
|
||||
hash,
|
||||
(bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
|
||||
ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
|
||||
k_iter, hash_k,
|
||||
&iter, bkey_s_c_null,
|
||||
updated_before_k_pos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -447,8 +447,13 @@ int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct prin
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (le64_to_cpu(sb->sb->seq) >
|
||||
le64_to_cpu(c->disk_sb.sb->seq))
|
||||
bch2_sb_to_fs(c, sb->sb);
|
||||
le64_to_cpu(c->disk_sb.sb->seq)) {
|
||||
/*
|
||||
* rewind, we'll lose some updates but it's not safe to call
|
||||
* bch2_sb_to_fs() after fs is started
|
||||
*/
|
||||
sb->sb->seq = c->disk_sb.sb->seq;
|
||||
}
|
||||
|
||||
BUG_ON(!bch2_dev_exists(c, sb->sb->dev_idx));
|
||||
|
||||
@ -628,11 +633,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_replicas_gc2(c);
|
||||
if (ret) {
|
||||
prt_printf(err, "bch2_replicas_gc2() error: %s\n", bch2_err_str(ret));
|
||||
goto err;
|
||||
}
|
||||
/*
|
||||
* flushing the journal should be sufficient, but it's the write buffer
|
||||
* flush that kills superblock replicas entries after they've gone to 0
|
||||
* so bch2_dev_has_data() returns the correct value:
|
||||
*/
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
#include "journal/seq_blacklist.h"
|
||||
|
||||
#include "alloc/foreground.h"
|
||||
#include "alloc/replicas.h"
|
||||
#include "btree/update.h"
|
||||
|
||||
/* allocate journal on a device: */
|
||||
@ -440,11 +441,12 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
|
||||
if (journal_entry_empty(&i->j))
|
||||
j->last_empty_seq = le64_to_cpu(i->j.seq);
|
||||
|
||||
p = journal_seq_pin(j, seq);
|
||||
|
||||
p->devs.nr = 0;
|
||||
struct bch_devs_list seq_devs = {};
|
||||
darray_for_each(i->ptrs, ptr)
|
||||
bch2_dev_list_add_dev(&p->devs, ptr->dev);
|
||||
seq_devs.data[seq_devs.nr++] = ptr->dev;
|
||||
|
||||
p = journal_seq_pin(j, seq);
|
||||
bch2_devlist_to_replicas(&p->devs.e, BCH_DATA_journal, seq_devs);
|
||||
|
||||
had_entries = true;
|
||||
}
|
||||
|
||||
@ -442,6 +442,7 @@ static int journal_entry_open(struct journal *j)
|
||||
buf->write_started = false;
|
||||
buf->write_allocated = false;
|
||||
buf->write_done = false;
|
||||
buf->had_error = false;
|
||||
|
||||
memset(buf->data, 0, sizeof(*buf->data));
|
||||
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
|
||||
|
||||
@ -410,20 +410,14 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
|
||||
unsigned u64s, unsigned flags,
|
||||
struct btree_trans *trans)
|
||||
{
|
||||
int ret;
|
||||
|
||||
EBUG_ON(res->ref);
|
||||
EBUG_ON(!test_bit(JOURNAL_running, &j->flags));
|
||||
|
||||
res->u64s = u64s;
|
||||
|
||||
if (journal_res_get_fast(j, res, flags))
|
||||
goto out;
|
||||
if (!journal_res_get_fast(j, res, flags))
|
||||
try(bch2_journal_res_get_slowpath(j, res, flags, trans));
|
||||
|
||||
ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
|
||||
if (ret)
|
||||
return ret;
|
||||
out:
|
||||
if (!(flags & JOURNAL_RES_GET_CHECK)) {
|
||||
lock_acquire_shared(&j->res_map, 0,
|
||||
(flags & JOURNAL_RES_GET_NONBLOCK) != 0,
|
||||
|
||||
@ -956,8 +956,8 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
scoped_guard(spinlock, &j->lock)
|
||||
fifo_for_each_entry_ptr(p, &j->pin, iter)
|
||||
if (dev_idx >= 0
|
||||
? bch2_dev_list_has_dev(p->devs, dev_idx)
|
||||
: p->devs.nr < c->opts.metadata_replicas)
|
||||
? bch2_replicas_entry_has_dev(&p->devs.e, dev_idx)
|
||||
: p->devs.e.nr_devs < c->opts.metadata_replicas)
|
||||
seq = iter;
|
||||
|
||||
bch2_journal_flush_pins(j, seq);
|
||||
@ -981,13 +981,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
seq = 0;
|
||||
scoped_guard(spinlock, &j->lock)
|
||||
while (!ret) {
|
||||
union bch_replicas_padded replicas;
|
||||
|
||||
seq = max(seq, journal_last_seq(j));
|
||||
if (seq >= j->pin.back)
|
||||
if (seq > j->seq_ondisk)
|
||||
break;
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
journal_seq_pin(j, seq)->devs);
|
||||
|
||||
union bch_replicas_padded replicas;
|
||||
memcpy(&replicas, &journal_seq_pin(j, seq)->devs, sizeof(replicas));
|
||||
seq++;
|
||||
|
||||
if (replicas.e.nr_devs) {
|
||||
@ -1021,6 +1020,9 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
|
||||
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
|
||||
guard(printbuf_indent)(out);
|
||||
|
||||
bch2_replicas_entry_to_text(out, &pin_list->devs.e);
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "unflushed:\n");
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
|
||||
list_for_each_entry(pin, &pin_list->unflushed[i], list)
|
||||
|
||||
@ -26,7 +26,7 @@ static inline void journal_pin_list_init(struct journal_entry_pin_list *p, int c
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++)
|
||||
INIT_LIST_HEAD(&p->flushed[i]);
|
||||
atomic_set(&p->count, count);
|
||||
p->devs.nr = 0;
|
||||
p->devs.e.nr_devs = 0;
|
||||
p->bytes = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
#include <linux/cache.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "alloc/replicas_types.h"
|
||||
#include "alloc/types.h"
|
||||
#include "init/dev_types.h"
|
||||
#include "util/fifo.h"
|
||||
@ -48,6 +49,7 @@ struct journal_buf {
|
||||
bool write_started:1;
|
||||
bool write_allocated:1;
|
||||
bool write_done:1;
|
||||
bool had_error:1;
|
||||
u8 idx;
|
||||
};
|
||||
|
||||
@ -70,7 +72,7 @@ struct journal_entry_pin_list {
|
||||
struct list_head unflushed[JOURNAL_PIN_TYPE_NR];
|
||||
struct list_head flushed[JOURNAL_PIN_TYPE_NR];
|
||||
atomic_t count;
|
||||
struct bch_devs_list devs;
|
||||
union bch_replicas_padded devs;
|
||||
size_t bytes;
|
||||
};
|
||||
|
||||
@ -113,7 +115,14 @@ union journal_res_state {
|
||||
|
||||
/* bytes: */
|
||||
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 22) /* 16M */
|
||||
|
||||
/*
|
||||
* The block layer is fragile with large bios - it should be able to process any
|
||||
* IO incrementally, but...
|
||||
*
|
||||
* 4MB corresponds to bio_kmalloc() -> UIO_MAXIOV
|
||||
*/
|
||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
||||
|
||||
/*
|
||||
* We stash some journal state as sentinal values in cur_entry_offset:
|
||||
|
||||
@ -188,7 +188,6 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
union bch_replicas_padded replicas;
|
||||
u64 seq = le64_to_cpu(w->data->seq);
|
||||
int err = 0;
|
||||
|
||||
@ -196,14 +195,15 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
? j->flush_write_time
|
||||
: j->noflush_write_time, j->write_start_time);
|
||||
|
||||
if (!w->devs_written.nr) {
|
||||
err = bch_err_throw(c, journal_write_err);
|
||||
} else {
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
w->devs_written);
|
||||
err = bch2_mark_replicas(c, &replicas.e);
|
||||
if (w->had_error) {
|
||||
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, seq)->devs.e;
|
||||
|
||||
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
|
||||
}
|
||||
|
||||
if (!w->devs_written.nr)
|
||||
err = bch_err_throw(c, journal_write_err);
|
||||
|
||||
if (err && !bch2_journal_error(j)) {
|
||||
CLASS(printbuf, buf)();
|
||||
bch2_log_msg_start(c, &buf);
|
||||
@ -222,8 +222,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (seq >= j->pin.front)
|
||||
journal_seq_pin(j, seq)->devs = w->devs_written;
|
||||
BUG_ON(seq < j->pin.front);
|
||||
if (err && (!j->err_seq || seq < j->err_seq))
|
||||
j->err_seq = seq;
|
||||
w->write_done = true;
|
||||
@ -334,6 +333,7 @@ static void journal_write_endio(struct bio *bio)
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&j->err_lock, flags);
|
||||
bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
|
||||
w->had_error = true;
|
||||
spin_unlock_irqrestore(&j->err_lock, flags);
|
||||
}
|
||||
|
||||
@ -632,7 +632,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
union bch_replicas_padded replicas;
|
||||
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_free]);
|
||||
int ret;
|
||||
|
||||
@ -701,9 +700,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
* Mark journal replicas before we submit the write to guarantee
|
||||
* recovery will find the journal entries after a crash.
|
||||
*/
|
||||
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
|
||||
w->devs_written);
|
||||
ret = bch2_mark_replicas(c, &replicas.e);
|
||||
struct bch_replicas_entry_v1 *r = &journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs.e;
|
||||
bch2_devlist_to_replicas(r, BCH_DATA_journal, w->devs_written);
|
||||
ret = bch2_mark_replicas(c, r);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
||||
@ -525,6 +525,37 @@ void bch2_opts_to_text(struct printbuf *out,
|
||||
}
|
||||
}
|
||||
|
||||
static int opt_hook_io(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, bool post)
|
||||
{
|
||||
if (!test_bit(BCH_FS_started, &c->flags))
|
||||
return 0;
|
||||
|
||||
switch (id) {
|
||||
case Opt_foreground_target:
|
||||
case Opt_background_target:
|
||||
case Opt_promote_target:
|
||||
case Opt_compression:
|
||||
case Opt_background_compression:
|
||||
case Opt_data_checksum:
|
||||
case Opt_data_replicas:
|
||||
case Opt_erasure_code: {
|
||||
struct rebalance_scan s = {
|
||||
.type = !inum ? REBALANCE_SCAN_fs : REBALANCE_SCAN_inum,
|
||||
.inum = inum,
|
||||
};
|
||||
|
||||
try(bch2_set_rebalance_needs_scan(c, s));
|
||||
if (post)
|
||||
bch2_rebalance_wakeup(c);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, u64 v,
|
||||
bool change)
|
||||
{
|
||||
@ -546,16 +577,8 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum b
|
||||
break;
|
||||
}
|
||||
|
||||
if (change &&
|
||||
test_bit(BCH_FS_started, &c->flags) &&
|
||||
(id == Opt_foreground_target ||
|
||||
id == Opt_background_target ||
|
||||
id == Opt_promote_target ||
|
||||
id == Opt_compression ||
|
||||
id == Opt_background_compression ||
|
||||
id == Opt_data_checksum ||
|
||||
id == Opt_data_replicas))
|
||||
try(bch2_set_rebalance_needs_scan(c, inum));
|
||||
if (change)
|
||||
try(opt_hook_io(c, ca, inum, id, false));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -571,17 +594,7 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c)
|
||||
void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
|
||||
enum bch_opt_id id, u64 v)
|
||||
{
|
||||
if (test_bit(BCH_FS_started, &c->flags) &&
|
||||
(id == Opt_foreground_target ||
|
||||
id == Opt_background_target ||
|
||||
id == Opt_promote_target ||
|
||||
id == Opt_compression ||
|
||||
id == Opt_background_compression ||
|
||||
id == Opt_data_checksum ||
|
||||
id == Opt_data_replicas)) {
|
||||
bch2_set_rebalance_needs_scan(c, inum);
|
||||
bch2_rebalance_wakeup(c);
|
||||
}
|
||||
opt_hook_io(c, ca, inum, id, true);
|
||||
|
||||
switch (id) {
|
||||
case Opt_rebalance_enabled:
|
||||
@ -838,6 +851,7 @@ void bch2_inode_opts_get(struct bch_fs *c, struct bch_inode_opts *ret, bool meta
|
||||
ret->background_target = c->opts.metadata_target ?: c->opts.foreground_target;
|
||||
ret->data_replicas = c->opts.metadata_replicas;
|
||||
ret->data_checksum = c->opts.metadata_checksum;
|
||||
ret->erasure_code = false;
|
||||
} else {
|
||||
bch2_io_opts_fixups(ret);
|
||||
}
|
||||
|
||||
@ -72,10 +72,7 @@ static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
|
||||
static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
|
||||
unsigned dev)
|
||||
{
|
||||
darray_for_each(devs, i)
|
||||
if (*i == dev)
|
||||
return true;
|
||||
return false;
|
||||
return darray_find(devs, dev) != NULL;
|
||||
}
|
||||
|
||||
static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
|
||||
|
||||
@ -96,7 +96,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool);
|
||||
|
||||
#define darray_find_p(_d, _i, cond) \
|
||||
({ \
|
||||
typeof((_d).data) _ret = NULL; \
|
||||
typeof(&(_d).data[0]) _ret = NULL; \
|
||||
\
|
||||
darray_for_each(_d, _i) \
|
||||
if (cond) { \
|
||||
|
||||
@ -278,20 +278,51 @@ static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
size_t _size = (size); \
|
||||
void *_base1 = (void *)(base) - _size; \
|
||||
const void *_search = (search); \
|
||||
size_t _nr = (nr); \
|
||||
size_t _i = 1; \
|
||||
int _res; \
|
||||
\
|
||||
while (_i <= _nr && \
|
||||
(_res = _cmp(_search, _base1 + _i * _size))) \
|
||||
_i = eytzinger1_child(_i, _res > 0); \
|
||||
_i - 1; \
|
||||
})
|
||||
/* 0 == not found */
|
||||
static inline int eytzinger1_find_r(void *base, unsigned nr, unsigned size,
|
||||
cmp_r_func_t cmp_fn, const void *priv,
|
||||
const void *search)
|
||||
{
|
||||
unsigned i = 1;
|
||||
while (i <= nr) {
|
||||
int cmp = cmp_fn(search, base + i * size, priv);
|
||||
if (!cmp)
|
||||
return i;
|
||||
i = eytzinger1_child(i, cmp > 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* 0 == not found */
|
||||
static inline int eytzinger1_find(void *base, unsigned nr, unsigned size,
|
||||
cmp_func_t cmp_fn, const void *search)
|
||||
{
|
||||
unsigned i = 1;
|
||||
while (i <= nr) {
|
||||
int cmp = cmp_fn(search, base + i * size);
|
||||
if (!cmp)
|
||||
return i;
|
||||
i = eytzinger1_child(i, cmp > 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -1 == not found */
|
||||
static inline int eytzinger0_find_r(void *base, unsigned nr, unsigned size,
|
||||
cmp_r_func_t cmp_fn, const void *priv,
|
||||
const void *search)
|
||||
{
|
||||
return eytzinger1_find_r(base - size, nr, size, cmp_fn, priv, search) - 1;
|
||||
}
|
||||
|
||||
/* -1 == not found */
|
||||
static inline int eytzinger0_find(void *base, unsigned nr, unsigned size,
|
||||
cmp_func_t cmp_fn, const void *search)
|
||||
{
|
||||
return eytzinger1_find(base - size, nr, size, cmp_fn, search) - 1;
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
cmp_r_func_t, swap_r_func_t, const void *);
|
||||
|
||||
@ -612,24 +612,51 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
|
||||
bio_add_virt_nofail(bio, base, size);
|
||||
}
|
||||
|
||||
int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
|
||||
int bch2_bio_alloc_pages(struct bio *bio, unsigned bs, size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
BUG_ON(size & (bs - 1));
|
||||
unsigned bs_pages = DIV_ROUND_UP(bs, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* XXX: we could do this by allocating higher order pages, but
|
||||
*
|
||||
* - the page allocator gets slower at a certain order (5?) - we'd have
|
||||
* to check for this
|
||||
*
|
||||
* - bch2_bio_free_pages_pool() probably does not handle compound pages
|
||||
* yet
|
||||
*/
|
||||
DARRAY_PREALLOCATED(struct page *, 16) pages;
|
||||
darray_init(&pages);
|
||||
darray_make_room_gfp(&pages, bs_pages, gfp_mask|__GFP_NOFAIL);
|
||||
|
||||
int ret = 0;
|
||||
while (size) {
|
||||
struct page *page = alloc_pages(gfp_mask, 0);
|
||||
unsigned len = min_t(size_t, PAGE_SIZE, size);
|
||||
while (pages.nr < bs_pages) {
|
||||
struct page *page = alloc_pages(gfp_mask, 0);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
if (unlikely(!bio_add_page(bio, page, len, 0))) {
|
||||
__free_page(page);
|
||||
break;
|
||||
BUG_ON(darray_push(&pages, page));
|
||||
}
|
||||
|
||||
size -= len;
|
||||
}
|
||||
while (pages.nr) {
|
||||
BUG_ON(!size);
|
||||
|
||||
return 0;
|
||||
unsigned len = min(PAGE_SIZE, size);
|
||||
size -= len;
|
||||
|
||||
struct page *page = darray_pop(&pages);
|
||||
BUG_ON(!bio_add_page(bio, page, len, 0));
|
||||
}
|
||||
}
|
||||
out:
|
||||
darray_for_each(pages, i)
|
||||
__free_page(*i);
|
||||
darray_exit(&pages);
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 bch2_get_random_u64_below(u64 ceil)
|
||||
|
||||
@ -370,7 +370,7 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
|
||||
}
|
||||
|
||||
void bch2_bio_map(struct bio *bio, void *base, size_t);
|
||||
int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
|
||||
int bch2_bio_alloc_pages(struct bio *, unsigned, size_t, gfp_t);
|
||||
|
||||
#define closure_bio_submit(bio, cl) \
|
||||
do { \
|
||||
|
||||
@ -123,7 +123,10 @@ static int bch2_write_inode_trans(struct btree_trans *trans,
|
||||
struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u);
|
||||
*rebalance_changed = memcmp(&old_r, &new_r, sizeof(new_r));
|
||||
if (*rebalance_changed)
|
||||
try(bch2_set_rebalance_needs_scan_trans(trans, inode_u.bi_inum));
|
||||
try(bch2_set_rebalance_needs_scan_trans(trans,
|
||||
(struct rebalance_scan) {
|
||||
.type = REBALANCE_SCAN_inum,
|
||||
.inum = inode_u.bi_inum }));
|
||||
|
||||
try(bch2_inode_write(trans, &iter, &inode_u));
|
||||
try(bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user