Update bcachefs sources to f595b42bf8ea bcachefs: set reconcile.pending on -ENOSPC
Some checks failed
build / bcachefs-tools-msrv (push) Has been cancelled
.deb build orchestrator / source-only (push) Has been cancelled
.deb build orchestrator / obs (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:forky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:trixie], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:amd64 host-arch:ppc64el machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:debian version:unstable], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:plucky], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:amd64 host-arch:amd64 machine-arch:amd64 runs-on:ubuntu-24.04]) (push) Has been cancelled
.deb build orchestrator / buildd (map[name:ubuntu version:questing], map[build-arch:arm64 host-arch:arm64 machine-arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Has been cancelled
.deb build orchestrator / reprotest (push) Has been cancelled
.deb build orchestrator / publish (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled

This commit is contained in:
Kent Overstreet 2025-11-15 18:35:33 -05:00
parent 2bb681dbd1
commit 070b9ab882
25 changed files with 370 additions and 240 deletions

View File

@ -1 +1 @@
b3616d33633509373c40c441e8c32ea554e2fa35
f595b42bf8eae730a95de7636238556ef9e86cee

View File

@ -1196,36 +1196,19 @@ deallocate_extra_replicas(struct bch_fs *c,
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
unsigned target,
unsigned erasure_code,
struct write_point_specifier write_point,
struct bch_devs_list *devs_have,
unsigned nr_replicas,
unsigned nr_replicas_required,
enum bch_watermark watermark,
enum bch_write_flags flags,
struct closure *cl,
struct write_point **wp_ret)
int bch2_alloc_sectors_req(struct btree_trans *trans,
struct alloc_request *req,
struct write_point_specifier write_point,
unsigned nr_replicas_required,
struct closure *cl,
struct write_point **wp_ret)
{
struct bch_fs *c = trans->c;
struct open_bucket *ob;
unsigned write_points_nr;
int i;
struct alloc_request *req = errptr_try(bch2_trans_kmalloc_nomemzero(trans, sizeof(*req)));
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
erasure_code = false;
req->nr_replicas = nr_replicas;
req->target = target;
req->ec = erasure_code;
req->watermark = watermark;
req->flags = flags;
req->devs_have = devs_have;
BUG_ON(!nr_replicas || !nr_replicas_required);
BUG_ON(!req->nr_replicas || !nr_replicas_required);
retry:
req->ptrs.nr = 0;
req->nr_effective = 0;
@ -1244,7 +1227,7 @@ retry:
if (req->data_type != BCH_DATA_user)
req->have_cache = true;
if (target && !(flags & BCH_WRITE_only_specified_devs)) {
if (req->target && !(req->flags & BCH_WRITE_only_specified_devs)) {
ret = open_bucket_add_buckets(trans, req, NULL);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart))
@ -1275,7 +1258,7 @@ retry:
alloc_done:
BUG_ON(!ret && req->nr_effective < req->nr_replicas);
if (erasure_code && !ec_open_bucket(c, &req->ptrs))
if (req->ec && !ec_open_bucket(c, &req->ptrs))
pr_debug("failed to get ec bucket: ret %u", ret);
if (ret == -BCH_ERR_insufficient_devices &&
@ -1341,7 +1324,7 @@ err:
if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
ret = bch_err_throw(c, bucket_alloc_blocked);
if (cl && !(flags & BCH_WRITE_alloc_nowait) &&
if (cl && !(req->flags & BCH_WRITE_alloc_nowait) &&
bch2_err_matches(ret, BCH_ERR_freelist_empty))
ret = bch_err_throw(c, bucket_alloc_blocked);

View File

@ -5,6 +5,7 @@
#include "bcachefs.h"
#include "alloc/buckets.h"
#include "alloc/types.h"
#include "btree/iter.h"
#include "data/extents.h"
#include "data/write_types.h"
#include "sb/members.h"
@ -221,15 +222,51 @@ enum bch_write_flags;
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct alloc_request *,
struct dev_stripe_state *, struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,
struct write_point_specifier,
struct bch_devs_list *,
unsigned, unsigned,
enum bch_watermark,
enum bch_write_flags,
struct closure *,
struct write_point **);
int bch2_alloc_sectors_req(struct btree_trans *, struct alloc_request *,
struct write_point_specifier, unsigned,
struct closure *, struct write_point **);
static inline struct alloc_request *alloc_request_get(struct btree_trans *trans,
unsigned target,
unsigned erasure_code,
struct bch_devs_list *devs_have,
unsigned nr_replicas,
enum bch_watermark watermark,
enum bch_write_flags flags)
{
struct alloc_request *req = bch2_trans_kmalloc_nomemzero(trans, sizeof(*req));
if (IS_ERR(req))
return req;
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
erasure_code = false;
req->nr_replicas = nr_replicas;
req->target = target;
req->ec = erasure_code;
req->watermark = watermark;
req->flags = flags;
req->devs_have = devs_have;
return req;
}
static inline int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
unsigned target,
unsigned erasure_code,
struct write_point_specifier write_point,
struct bch_devs_list *devs_have,
unsigned nr_replicas,
unsigned nr_replicas_required,
enum bch_watermark watermark,
enum bch_write_flags flags,
struct closure *cl,
struct write_point **wp_ret)
{
struct alloc_request *req = errptr_try(alloc_request_get(trans, target, erasure_code,
devs_have, nr_replicas,
watermark, flags));
return bch2_alloc_sectors_req(trans, req, write_point, nr_replicas_required, cl, wp_ret);
}
static inline struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
{

View File

@ -205,12 +205,21 @@ static __always_inline bool bversion_eq(struct bversion l, struct bversion r)
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return l.k->u64s == r.k->u64s &&
l.k->type == r.k->type &&
bpos_eq(l.k->p, r.k->p) &&
bversion_eq(l.k->bversion, r.k->bversion) &&
l.k->size == r.k->size &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
if (!(l.k->u64s == r.k->u64s &&
l.k->type == r.k->type &&
bpos_eq(l.k->p, r.k->p) &&
bversion_eq(l.k->bversion, r.k->bversion) &&
l.k->size == r.k->size))
return false;
if (l.k->type != KEY_TYPE_btree_ptr_v2) {
return !memcmp(l.v, r.v, bkey_val_bytes(l.k));
} else {
/* don't compare bch_btree_ptr_v2.mem_ptr */
return !memcmp((void *) l.v + 8,
(void *) r.v + 8,
bkey_val_bytes(l.k) - 8);
}
}
#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })

View File

@ -67,13 +67,15 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert
k = bkey_i_to_s_c(j_k);
}
/* when updating btree ptrs, mem_ptr may change underneath us, unlocked */
struct bkey_s_c old = { &i->old_k, i->old_v };
if (unlikely(!bkey_and_val_eq(k, old))) {
CLASS(printbuf, buf)();
prt_str(&buf, "updated cached old key doesn't match\n");
prt_str(&buf, "cached: ");
prt_str(&buf, "updated cached old key doesn't match");
prt_str(&buf, "\ncached: ");
bch2_bkey_val_to_text(&buf, c, old);
prt_str(&buf, "real: ");
prt_str(&buf, "\nreal: ");
bch2_bkey_val_to_text(&buf, c, k);
panic("%s\n", buf.buf);
}

View File

@ -321,16 +321,13 @@ static bool can_use_btree_node(struct bch_fs *c,
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
bool interior_node,
unsigned target,
enum bch_trans_commit_flags flags)
struct alloc_request *req,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct write_point *wp;
struct btree *b;
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
int ret;
b = bch2_btree_node_mem_alloc(trans, interior_node);
@ -339,18 +336,10 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
BUG_ON(b->ob.nr);
retry:
ret = bch2_alloc_sectors_start_trans(trans,
target ?:
c->opts.metadata_target ?:
c->opts.foreground_target,
0,
ret = bch2_alloc_sectors_req(trans, req,
writepoint_ptr(&c->btree_write_point),
&devs_have,
res->nr_replicas,
min(res->nr_replicas,
c->opts.metadata_replicas_required),
watermark,
target ? BCH_WRITE_only_specified_devs : 0,
cl, &wp);
if (unlikely(ret))
goto err;
@ -373,7 +362,9 @@ retry:
/* check if it has sufficient durability */
if (can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
if (can_use_btree_node(c, res,
req->flags & BCH_WRITE_only_specified_devs ? req->target : 0,
bkey_i_to_s_c(&a->k))) {
bkey_copy(&b->key, &a->k);
b->ob = a->ob;
mutex_unlock(&c->btree_reserve_cache_lock);
@ -546,8 +537,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
static int bch2_btree_reserve_get(struct btree_trans *trans,
struct btree_update *as,
unsigned nr_nodes[2],
unsigned target,
unsigned flags,
struct alloc_request *req,
struct closure *cl)
{
BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
@ -564,7 +554,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
while (p->nr < nr_nodes[interior]) {
struct btree *b = __bch2_btree_node_alloc(trans, &as->disk_res,
cl, interior, target, flags);
interior, req, cl);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err;
@ -1196,16 +1186,17 @@ static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned level_start, bool split,
unsigned target,
enum bch_trans_commit_flags flags)
enum bch_trans_commit_flags commit_flags,
enum bch_write_flags write_flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
u64 start_time = local_clock();
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
int disk_res_flags = (commit_flags & BCH_TRANS_COMMIT_no_enospc)
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 };
unsigned level_end = level_start;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
enum bch_watermark watermark = commit_flags & BCH_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
@ -1216,12 +1207,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (watermark < BCH_WATERMARK_btree)
watermark = BCH_WATERMARK_btree;
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
commit_flags &= ~BCH_WATERMARK_MASK;
commit_flags |= watermark;
if (watermark < BCH_WATERMARK_reclaim &&
journal_low_on_space(&c->journal)) {
if (flags & BCH_TRANS_COMMIT_journal_reclaim)
if (commit_flags & BCH_TRANS_COMMIT_journal_reclaim)
return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
ret = drop_locks_do(trans,
@ -1271,7 +1262,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as->start_time = start_time;
as->ip_started = _RET_IP_;
as->mode = BTREE_UPDATE_none;
as->flags = flags;
as->flags = commit_flags;
as->took_gc_lock = true;
as->btree_id = path->btree_id;
as->update_level_start = level_start;
@ -1314,26 +1305,38 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL);
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
struct alloc_request *req = alloc_request_get(trans,
target ?:
c->opts.metadata_target ?:
c->opts.foreground_target,
false,
&devs_have,
as->disk_res.nr_replicas,
watermark,
write_flags);
ret = PTR_ERR_OR_ZERO(req);
if (ret)
goto err;
ret = bch2_btree_reserve_get(trans, as, nr_nodes, req, NULL);
if (bch2_err_matches(ret, ENOSPC) ||
bch2_err_matches(ret, ENOMEM)) {
struct closure cl;
/*
* XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
* flag
*/
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
(commit_flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
ret = bch_err_throw(c, journal_reclaim_would_deadlock);
goto err;
}
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
do {
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
ret = bch2_btree_reserve_get(trans, as, nr_nodes, req, &cl);
if (!bch2_err_matches(ret, BCH_ERR_operation_blocked))
break;
bch2_trans_unlock(trans);
@ -1945,7 +1948,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
as = bch2_btree_update_start(trans, trans->paths + path,
trans->paths[path].level,
true, 0, flags);
true, 0, flags, 0);
if (IS_ERR(as))
return PTR_ERR(as);
@ -2016,7 +2019,7 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
struct btree_update *as =
bch2_btree_update_start(trans, trans->paths + path, b->c.level,
true, 0, flags);
true, 0, flags, 0);
if (IS_ERR(as))
return PTR_ERR(as);
@ -2144,7 +2147,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
parent = btree_node_parent(trans->paths + path, b);
as = bch2_btree_update_start(trans, trans->paths + path, level, false,
0, BCH_TRANS_COMMIT_no_enospc|flags);
0, BCH_TRANS_COMMIT_no_enospc|flags, 0);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto err;
@ -2246,11 +2249,12 @@ int bch2_btree_node_get_iter(struct btree_trans *trans, struct btree_iter *iter,
return 0;
}
int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
unsigned target,
enum bch_trans_commit_flags flags)
static int bch2_btree_node_rewrite(struct btree_trans *trans,
struct btree_iter *iter,
struct btree *b,
unsigned target,
enum bch_trans_commit_flags commit_flags,
enum bch_write_flags write_flags)
{
BUG_ON(btree_node_fake(b));
@ -2260,12 +2264,12 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
btree_path_idx_t new_path = 0;
int ret;
flags |= BCH_TRANS_COMMIT_no_enospc;
commit_flags |= BCH_TRANS_COMMIT_no_enospc;
struct btree_path *path = btree_iter_path(trans, iter);
parent = btree_node_parent(path, b);
as = bch2_btree_update_start(trans, path, b->c.level,
false, target, flags);
as = bch2_btree_update_start(trans, path, b->c.level, false, target,
commit_flags, write_flags);
ret = PTR_ERR_OR_ZERO(as);
if (ret)
goto out;
@ -2326,7 +2330,7 @@ int bch2_btree_node_rewrite_key(struct btree_trans *trans,
bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k);
return found
? bch2_btree_node_rewrite(trans, &iter, b, 0, flags)
? bch2_btree_node_rewrite(trans, &iter, b, 0, flags, 0)
: -ENOENT;
}
@ -2334,7 +2338,8 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bpos pos,
unsigned target,
enum bch_trans_commit_flags flags)
enum bch_trans_commit_flags commit_flags,
enum bch_write_flags write_flags)
{
BUG_ON(!level);
@ -2342,19 +2347,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
CLASS(btree_node_iter, iter)(trans, btree, pos, 0, level - 1, 0);
struct btree *b = errptr_try(bch2_btree_iter_peek_node(&iter));
return bch2_btree_node_rewrite(trans, &iter, b, target, flags);
}
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans,
struct btree *b,
enum bch_trans_commit_flags flags)
{
CLASS(btree_iter_uninit, iter)(trans);
int ret = bch2_btree_node_get_iter(trans, &iter, b);
if (ret)
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
return bch2_btree_node_rewrite(trans, &iter, b, 0, flags);
return bch2_btree_node_rewrite(trans, &iter, b, target, commit_flags, write_flags);
}
struct async_btree_rewrite {
@ -2476,6 +2469,11 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bool skip_triggers)
{
struct bch_fs *c = trans->c;
unsigned level = b->c.level;
struct btree_path *path = btree_iter_path(trans, iter);
BUG_ON(path->l[b->c.level].b != b);
BUG_ON(!btree_node_intent_locked(path, b->c.level));
if (!btree_node_will_make_reachable(b)) {
if (!btree_node_is_root(c, b)) {
@ -2518,6 +2516,17 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
CLASS(disk_reservation, res)(c);
try(bch2_trans_commit(trans, &res.r, NULL, commit_flags));
struct btree *new_b = btree_iter_path(trans, iter)->l[level].b;
if (new_b != b) {
/*
* We were asked to update the key for a node that was
* also modified during the commit (due to triggers),
* and that node was freed:
*/
BUG_ON(!btree_node_will_make_reachable(new_b));
return 0;
}
bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
bkey_copy(&b->key, new_key);
bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
@ -2580,11 +2589,10 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
struct btree *b;
int ret;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
do {
ret = bch2_btree_cache_cannibalize_lock(trans, &cl);

View File

@ -5,6 +5,7 @@
#include "btree/cache.h"
#include "btree/locking.h"
#include "btree/update.h"
#include "data/write_types.h"
#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
@ -167,9 +168,6 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
int bch2_btree_node_get_iter(struct btree_trans *, struct btree_iter *, struct btree *);
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
struct btree *, unsigned,
enum bch_trans_commit_flags);
int bch2_btree_node_rewrite_key(struct btree_trans *,
enum btree_id, unsigned,
struct bkey_i *,
@ -177,10 +175,8 @@ int bch2_btree_node_rewrite_key(struct btree_trans *,
int bch2_btree_node_rewrite_pos(struct btree_trans *,
enum btree_id, unsigned,
struct bpos, unsigned,
enum bch_trans_commit_flags);
int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *,
struct btree *,
enum bch_trans_commit_flags);
enum bch_trans_commit_flags,
enum bch_write_flags);
void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);

View File

@ -1057,9 +1057,7 @@ retry_all:
trans_set_locked(trans, false);
if (unlikely(trans->memory_allocation_failure)) {
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
do {
ret = bch2_btree_cache_cannibalize_lock(trans, &cl);

View File

@ -270,10 +270,9 @@ err:
static int read_btree_nodes(struct find_btree_nodes *f)
{
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
struct closure cl;
int ret = 0;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
CLASS(printbuf, buf)();
prt_printf(&buf, "scanning for btree nodes on");

View File

@ -1165,11 +1165,10 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
const struct bkey_i *k, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
struct btree *b;
int ret;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
do {
ret = bch2_btree_cache_cannibalize_lock(trans, &cl);

View File

@ -826,8 +826,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
if (ret)
return stripe_reconstruct_err(c, orig_k, "-ENOMEM");
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
for (unsigned i = 0; i < v->nr_blocks; i++)
ec_block_io(c, buf, REQ_OP_READ, i, &cl);

View File

@ -48,8 +48,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
struct bkey_buf new __cleanup(bch2_bkey_buf_exit);
bch2_bkey_buf_init(&new);
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(iter));

View File

@ -341,7 +341,8 @@ int bch2_move_extent(struct moving_context *ctxt,
if (!bkey_is_btree_ptr(k.k))
ret = __bch2_move_extent(ctxt, bucket_in_flight, iter, k, opts, data_opts);
else if (data_opts.type != BCH_DATA_UPDATE_scrub)
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p, data_opts.target, 0);
ret = bch2_btree_node_rewrite_pos(trans, iter->btree_id, level, k.k->p,
data_opts.target, 0, data_opts.write_flags);
else
ret = bch2_btree_node_scrub(trans, iter->btree_id, level, k, data_opts.read_dev);

View File

@ -23,6 +23,7 @@
#include "init/progress.h"
#include "fs/inode.h"
#include "fs/namei.h"
#include "snapshots/subvolume.h"
@ -32,6 +33,61 @@
#include <linux/kthread.h>
#include <linux/sched/cputime.h>
#define RECONCILE_WORK_IDS() \
x(none) \
x(hipri) \
x(normal) \
x(pending)
enum reconcile_work_id {
#define x(t) RECONCILE_WORK_##t,
RECONCILE_WORK_IDS()
#undef x
};
#define x(n) #n,
static const char * const reconcile_opts[] = {
BCH_REBALANCE_OPTS()
NULL
};
static const char * const reconcile_work_ids[] = {
RECONCILE_WORK_IDS()
NULL
};
static const char * const rebalance_scan_strs[] = {
RECONCILE_SCAN_TYPES()
};
#undef x
#define RECONCILE_SCAN_COOKIE_device 32
#define RECONCILE_SCAN_COOKIE_pending 2
#define RECONCILE_SCAN_COOKIE_metadata 1
#define RECONCILE_SCAN_COOKIE_fs 0
static const enum btree_id reconcile_work_btree[] = {
[RECONCILE_WORK_hipri] = BTREE_ID_reconcile_hipri,
[RECONCILE_WORK_normal] = BTREE_ID_reconcile_work,
[RECONCILE_WORK_pending] = BTREE_ID_reconcile_pending,
};
static enum reconcile_work_id btree_to_reconcile_work_id(enum btree_id btree)
{
switch (btree) {
case BTREE_ID_reconcile_hipri:
return RECONCILE_WORK_hipri;
case BTREE_ID_reconcile_work:
return RECONCILE_WORK_normal;
case BTREE_ID_reconcile_pending:
return RECONCILE_WORK_pending;
default:
BUG();
}
}
/* bch_extent_reconcile: */
int bch2_extent_reconcile_validate(struct bch_fs *c,
@ -78,13 +134,6 @@ const struct bch_extent_reconcile *bch2_bkey_reconcile_opts(const struct bch_fs
return bch2_bkey_ptrs_reconcile_opts(c, bch2_bkey_ptrs_c(k));
}
static const char * const reconcile_opts[] = {
#define x(n) #n,
BCH_REBALANCE_OPTS()
#undef x
NULL
};
void bch2_extent_rebalance_v1_to_text(struct printbuf *out, struct bch_fs *c,
const struct bch_extent_rebalance_v1 *r)
{
@ -191,20 +240,20 @@ void bch2_extent_reconcile_to_text(struct printbuf *out, struct bch_fs *c,
}
}
/*
* XXX: check in bkey_validate that if r->hipri or r->pending are set,
* r->data_replicas are also set
*/
static enum reconcile_work_id rb_work_id(const struct bch_extent_reconcile *r)
{
if (!r || !r->need_rb)
return RECONCILE_WORK_none;
if (r->hipri)
return RECONCILE_WORK_hipri;
if (!r->pending)
return RECONCILE_WORK_normal;
return RECONCILE_WORK_pending;
}
static enum btree_id rb_work_btree(const struct bch_extent_reconcile *r)
{
if (!r || !r->need_rb)
return 0;
if (r->hipri)
return BTREE_ID_reconcile_hipri;
if (r->pending)
return BTREE_ID_reconcile_pending;
return BTREE_ID_reconcile_work;
return reconcile_work_btree[rb_work_id(r)];
}
static inline unsigned rb_accounting_counters(const struct bch_extent_reconcile *r)
@ -1008,18 +1057,6 @@ int bch2_bkey_get_io_opts(struct btree_trans *trans,
return 0;
}
static const char * const bch2_reconcile_state_strs[] = {
#define x(t) #t,
BCH_REBALANCE_STATES()
NULL
#undef x
};
#define RECONCILE_SCAN_COOKIE_device 32
#define RECONCILE_SCAN_COOKIE_pending 2
#define RECONCILE_SCAN_COOKIE_metadata 1
#define RECONCILE_SCAN_COOKIE_fs 0
static u64 reconcile_scan_encode(struct reconcile_scan s)
{
switch (s.type) {
@ -1058,6 +1095,24 @@ static struct reconcile_scan reconcile_scan_decode(struct bch_fs *c, u64 v)
return (struct reconcile_scan) { .type = RECONCILE_SCAN_fs};
}
static void reconcile_scan_to_text(struct printbuf *out,
struct bch_fs *c, struct reconcile_scan s)
{
prt_str(out, rebalance_scan_strs[s.type]);
switch (s.type) {
case RECONCILE_SCAN_device:
prt_str(out, ": ");
bch2_prt_member_name(out, c, s.dev);
break;
case RECONCILE_SCAN_inum:
prt_str(out, ": ");
bch2_trans_do(c, bch2_inum_snapshot_to_path(trans, s.inum, 0, NULL, out));
break;
default:
break;
}
}
int bch2_set_reconcile_needs_scan_trans(struct btree_trans *trans, struct reconcile_scan s)
{
CLASS(btree_iter, iter)(trans, BTREE_ID_reconcile_scan,
@ -1324,7 +1379,6 @@ static int __do_reconcile_extent(struct moving_context *ctxt,
u32 restart_count = trans->restart_count;
ctxt->stats = &c->reconcile.work_stats;
c->reconcile.state = BCH_REBALANCE_working;
int ret = bch2_move_extent(ctxt, NULL, snapshot_io_opts,
reconcile_set_data_opts, NULL,
@ -1334,7 +1388,8 @@ static int __do_reconcile_extent(struct moving_context *ctxt,
if (bch2_err_matches(ret, EROFS))
return ret;
if (bch2_err_matches(ret, BCH_ERR_data_update_fail_no_rw_devs) ||
bch2_err_matches(ret, BCH_ERR_insufficient_devices)) {
bch2_err_matches(ret, BCH_ERR_insufficient_devices) ||
bch2_err_matches(ret, ENOSPC)) {
if (rb_work_btree(bch2_bkey_reconcile_opts(c, k)) !=
BTREE_ID_reconcile_pending)
try(bch2_trans_relock(trans) ?:
@ -1343,8 +1398,7 @@ static int __do_reconcile_extent(struct moving_context *ctxt,
return 0;
}
if (ret) {
WARN_ONCE(ret != -BCH_ERR_data_update_fail_no_snapshot &&
ret != -BCH_ERR_data_update_fail_no_rw_devs,
WARN_ONCE(ret != -BCH_ERR_data_update_fail_no_snapshot,
"unhandled error from move_extent: %s", bch2_err_str(ret));
/* skip it and continue */
}
@ -1359,11 +1413,13 @@ static int __do_reconcile_extent(struct moving_context *ctxt,
static int do_reconcile_extent(struct moving_context *ctxt,
struct per_snapshot_io_opts *snapshot_io_opts,
struct bbpos pos)
struct bpos work_pos)
{
struct bbpos data_pos = rb_work_to_data_pos(work_pos);
struct btree_trans *trans = ctxt->trans;
CLASS(btree_iter, iter)(trans, pos.btree, pos.pos, BTREE_ITER_all_snapshots);
CLASS(btree_iter, iter)(trans, data_pos.btree, data_pos.pos, BTREE_ITER_all_snapshots);
struct bkey_s_c k = bkey_try(bch2_btree_iter_peek_slot(&iter));
return __do_reconcile_extent(ctxt, snapshot_io_opts, &iter, k);
@ -1550,8 +1606,6 @@ static int do_reconcile_scan(struct moving_context *ctxt,
bch2_move_stats_init(&r->scan_stats, "reconcile_scan");
ctxt->stats = &r->scan_stats;
r->state = BCH_REBALANCE_scanning;
struct reconcile_scan s = reconcile_scan_decode(c, cookie_pos.offset);
if (s.type == RECONCILE_SCAN_fs) {
try(do_reconcile_scan_fs(ctxt, snapshot_io_opts, false));
@ -1614,10 +1668,10 @@ static void reconcile_wait(struct bch_fs *c)
r->wait_iotime_end = now + (min_member_capacity >> 6);
if (r->state != BCH_REBALANCE_waiting) {
if (r->running) {
r->wait_iotime_start = now;
r->wait_wallclock_start = ktime_get_real_ns();
r->state = BCH_REBALANCE_waiting;
r->running = false;
}
bch2_kthread_io_clock_wait_once(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
@ -1653,11 +1707,14 @@ static int do_reconcile(struct moving_context *ctxt)
BTREE_ID_reconcile_pending,
};
unsigned i = 0;
struct bpos work_pos = POS_MIN;
r->work_pos = BBPOS(scan_btrees[i], POS_MIN);
struct bkey_i_cookie pending_cookie;
bkey_init(&pending_cookie.k);
bch2_btree_write_buffer_flush_sync(trans);
while (!bch2_move_ratelimit(ctxt)) {
if (!bch2_reconcile_enabled(c)) {
bch2_moving_ctxt_flush_all(ctxt);
@ -1670,13 +1727,13 @@ static int do_reconcile(struct moving_context *ctxt)
if (kick != r->kick) {
kick = r->kick;
i = 0;
work_pos = POS_MIN;
r->work_pos = BBPOS(scan_btrees[i], POS_MIN);
work.nr = 0;
}
bch2_trans_begin(trans);
struct bkey_s_c k = next_reconcile_entry(trans, &work, scan_btrees[i], &work_pos);
struct bkey_s_c k = next_reconcile_entry(trans, &work, r->work_pos.btree, &r->work_pos.pos);
ret = bkey_err(k);
if (ret)
break;
@ -1685,14 +1742,17 @@ static int do_reconcile(struct moving_context *ctxt)
if (++i == ARRAY_SIZE(scan_btrees))
break;
work_pos = POS_MIN;
r->work_pos = BBPOS(scan_btrees[i], POS_MIN);
if (scan_btrees[i] == BTREE_ID_reconcile_pending &&
if (r->work_pos.btree == BTREE_ID_reconcile_pending &&
bkey_deleted(&pending_cookie.k))
break;
continue;
}
r->running = true;
r->work_pos.pos = k.k->p;
if (k.k->type == KEY_TYPE_cookie &&
reconcile_scan_decode(c, k.k->p.offset).type == RECONCILE_SCAN_pending)
bkey_reassemble(&pending_cookie.k_i, k);
@ -1707,8 +1767,7 @@ static int do_reconcile(struct moving_context *ctxt)
bkey_s_c_to_backpointer(k));
else
ret = lockrestart_do(trans,
do_reconcile_extent(ctxt, &snapshot_io_opts,
rb_work_to_data_pos(k.k->p)));
do_reconcile_extent(ctxt, &snapshot_io_opts, k.k->p));
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
@ -1718,8 +1777,8 @@ static int do_reconcile(struct moving_context *ctxt)
if (ret)
break;
if (scan_btrees[i] == BTREE_ID_reconcile_scan)
work_pos = bpos_successor(work_pos);
if (r->work_pos.btree == BTREE_ID_reconcile_scan)
r->work_pos.pos = bpos_successor(r->work_pos.pos);
}
if (!ret && !bkey_deleted(&pending_cookie.k))
@ -1788,13 +1847,10 @@ void bch2_reconcile_status_to_text(struct printbuf *out, struct bch_fs *c)
}
prt_newline(out);
guard(printbuf_indent_nextline)(out);
prt_str(out, bch2_reconcile_state_strs[r->state]);
prt_newline(out);
guard(printbuf_indent)(out);
switch (r->state) {
case BCH_REBALANCE_waiting: {
if (!r->running) {
prt_printf(out, "waiting:\n");
u64 now = atomic64_read(&c->io_clock[WRITE].now);
prt_printf(out, "io wait duration:\t");
@ -1808,16 +1864,28 @@ void bch2_reconcile_status_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "duration waited:\t");
bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start);
prt_newline(out);
break;
} else {
struct bbpos work_pos = r->work_pos;
barrier();
if (work_pos.btree == BTREE_ID_reconcile_scan &&
work_pos.pos.inode == 0) {
prt_printf(out, "scanning:\n");
reconcile_scan_to_text(out, c,
reconcile_scan_decode(c, work_pos.pos.offset));
} else if (work_pos.btree == BTREE_ID_reconcile_scan) {
prt_printf(out, "processing metadata: %s %llu\n",
reconcile_work_ids[work_pos.pos.inode - 1],
work_pos.pos.offset);
} else {
prt_printf(out, "processing data: %s ",
reconcile_work_ids[btree_to_reconcile_work_id(work_pos.btree)]);
bch2_bbpos_to_text(out, rb_work_to_data_pos(work_pos.pos));
prt_newline(out);
}
}
case BCH_REBALANCE_working:
bch2_move_stats_to_text(out, &r->work_stats);
break;
case BCH_REBALANCE_scanning:
bch2_move_stats_to_text(out, &r->scan_stats);
break;
}
prt_newline(out);
struct task_struct *t;
scoped_guard(rcu) {

View File

@ -114,13 +114,18 @@ int bch2_bkey_set_needs_reconcile(struct btree_trans *,
struct per_snapshot_io_opts *, struct bch_inode_opts *,
struct bkey_i *, enum set_needs_reconcile_ctx, u32);
#define RECONCILE_SCAN_TYPES() \
x(fs) \
x(metadata) \
x(pending) \
x(device) \
x(inum)
struct reconcile_scan {
enum reconcile_scan_type {
RECONCILE_SCAN_fs,
RECONCILE_SCAN_metadata,
RECONCILE_SCAN_pending,
RECONCILE_SCAN_device,
RECONCILE_SCAN_inum,
#define x(t) RECONCILE_SCAN_##t,
RECONCILE_SCAN_TYPES()
#undef x
} type;
union {

View File

@ -5,26 +5,16 @@
#include "btree/bbpos_types.h"
#include "move_types.h"
#define BCH_REBALANCE_STATES() \
x(waiting) \
x(working) \
x(scanning)
enum bch_reconcile_states {
#define x(t) BCH_REBALANCE_##t,
BCH_REBALANCE_STATES()
#undef x
};
struct bch_fs_reconcile {
struct task_struct __rcu *thread;
u32 kick;
enum bch_reconcile_states state;
bool running;
u64 wait_iotime_start;
u64 wait_iotime_end;
u64 wait_wallclock_start;
struct bbpos work_pos;
struct bch_move_stats work_stats;
struct bbpos scan_start;

View File

@ -470,11 +470,10 @@ int bch2_update_unwritten_extent(struct btree_trans *trans,
struct bch_fs *c = update->op.c;
struct bkey_i_extent *e;
struct write_point *wp;
struct closure cl;
struct bkey_s_c k;
int ret = 0;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys);
while (bpos_lt(update->op.pos, update->k.k->k.p)) {

View File

@ -528,9 +528,27 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
DARRAY(subvol_inum) inums = {};
if (!snapshot) {
ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
if (ret)
goto disconnected;
if (subvol) {
ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
if (ret)
goto disconnected;
} else {
struct bkey_s_c k;
for_each_btree_key_max_norestart(trans, iter,
BTREE_ID_inodes,
POS(0, inum),
SPOS(0, inum, U32_MAX),
BTREE_ITER_all_snapshots, k, ret) {
if (bkey_is_inode(k.k)) {
snapshot = k.k->p.snapshot;
break;
}
}
if (ret)
return ret;
if (!snapshot)
goto disconnected;
}
}
while (true) {

View File

@ -1262,7 +1262,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
return c;
}
static bool bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
static int bch2_fs_may_start(struct bch_fs *c, struct printbuf *err)
{
unsigned flags = 0;

View File

@ -155,8 +155,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca
struct journal_device *ja = &ca->journal;
int ret = 0;
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)

View File

@ -410,4 +410,19 @@ void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
int bch2_sb_member_alloc(struct bch_fs *);
void bch2_sb_members_clean_deleted(struct bch_fs *);
static inline void bch2_prt_member_name(struct printbuf *out, struct bch_fs *c, unsigned idx)
{
if (idx == BCH_SB_MEMBER_INVALID) {
prt_str(out, "(none)");
} else {
guard(rcu)();
guard(printbuf_atomic)(out);
struct bch_dev *ca = c ? bch2_dev_rcu_noerror(c, idx) : NULL;
if (ca)
prt_str(out, ca->name);
else
prt_printf(out, "(invalid device %u)", idx);
}
}
#endif /* _BCACHEFS_SB_MEMBERS_H */

View File

@ -299,18 +299,12 @@ DEFINE_GUARD(printbuf_atomic, struct printbuf *,
printbuf_atomic_inc(_T),
printbuf_atomic_dec(_T));
static inline void printbuf_indent_add_2(struct printbuf *out)
{
bch2_printbuf_indent_add(out, 2);
}
static inline void printbuf_indent_sub_2(struct printbuf *out)
{
bch2_printbuf_indent_sub(out, 2);
}
DEFINE_GUARD(printbuf_indent, struct printbuf *,
printbuf_indent_add_2(_T),
printbuf_indent_sub_2(_T));
bch2_printbuf_indent_add(_T, 2),
bch2_printbuf_indent_sub(_T, 2));
DEFINE_GUARD(printbuf_indent_nextline, struct printbuf *,
bch2_printbuf_indent_add_nextline(_T, 2),
bch2_printbuf_indent_sub(_T, 2));
#endif /* _BCACHEFS_PRINTBUF_H */

View File

@ -336,6 +336,29 @@ static inline void closure_init_stack_release(struct closure *cl)
#endif
}
/*
* Open coded DEFINE_CLASS(closure_stack, ...)
*
* We can't use DEFINE_CLASS() because that defines a destructor that destructs
* a copy...
*/
typedef struct closure class_closure_stack_t;
static inline void class_closure_stack_destructor(struct closure *cl)
{
closure_sync(cl);
}
static inline struct closure class_closure_stack_constructor(void)
{
return (struct closure) {
.remaining.counter = CLOSURE_REMAINING_INITIALIZER,
#ifdef CONFIG_DEBUG_CLOSURES
.magic = CLOSURE_MAGIC_STACK,
#endif
};
}
/**
* closure_wake_up - wake up all closures on a wait list,
* with memory barrier
@ -432,9 +455,7 @@ static inline void closure_call(struct closure *cl, closure_fn fn,
#define __closure_wait_event(waitlist, _cond) \
do { \
struct closure cl; \
\
closure_init_stack(&cl); \
CLASS(closure_stack, cl)(); \
\
while (1) { \
bch2_closure_wait(waitlist, &cl); \
@ -443,7 +464,6 @@ do { \
closure_sync(&cl); \
} \
closure_wake_up(waitlist); \
closure_sync(&cl); \
} while (0)
#define closure_wait_event(waitlist, _cond) \
@ -454,11 +474,9 @@ do { \
#define __closure_wait_event_timeout(waitlist, _cond, _until) \
({ \
struct closure cl; \
CLASS(closure_stack, cl)(); \
long _t; \
\
closure_init_stack(&cl); \
\
while (1) { \
bch2_closure_wait(waitlist, &cl); \
if (_cond) { \
@ -471,7 +489,6 @@ do { \
closure_sync_timeout(&cl, _t); \
} \
closure_wake_up(waitlist); \
closure_sync(&cl); \
_t; \
})

View File

@ -711,8 +711,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
blk_start_plug(&w->plug);
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
struct folio *folio = NULL;
int ret = 0;

View File

@ -101,12 +101,8 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
static int bch2_inode_flush_nocow_writes(struct bch_fs *c,
struct bch_inode_info *inode)
{
struct closure cl;
closure_init_stack(&cl);
CLASS(closure_stack, cl)();
bch2_inode_flush_nocow_writes_async(c, inode, &cl);
closure_sync(&cl);
return 0;
}