Update bcachefs sources to 581f7e27bc97 bcachefs: s/rebalance/reconcile

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-12 17:42:18 -05:00
parent b101c5201a
commit 7a041ef0c9
56 changed files with 3108 additions and 1820 deletions

View File

@ -1 +1 @@
156233ad4b9043c04d1b6b0c1b244ae2af38c52e
581f7e27bc9728b15e7f45d10784542b4ab23a90

View File

@ -462,7 +462,7 @@ static void image_create(struct bch_opt_strs fs_opt_strs,
struct bch_opts opts = bch2_opts_empty();
opt_set(opts, copygc_enabled, false);
opt_set(opts, rebalance_enabled, false);
opt_set(opts, reconcile_enabled, false);
opt_set(opts, nostart, true);
struct bch_fs *c = bch2_fs_open(&device_paths, &opts);
@ -655,7 +655,7 @@ static int image_update(const char *src_path, const char *dst_image,
struct bch_opts opts = bch2_opts_empty();
opt_set(opts, copygc_enabled, false);
opt_set(opts, rebalance_enabled, false);
opt_set(opts, reconcile_enabled, false);
opt_set(opts, nostart, true);
struct bch_fs *c = bch2_fs_open(&device_paths, &opts);

View File

@ -47,7 +47,7 @@ bcachefs-y := \
data/move.o \
data/nocow_locking.o \
data/read.o \
data/rebalance.o \
data/reconcile.o \
data/reflink.o \
data/update.o \
data/write.o \

View File

@ -234,6 +234,9 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
case BCH_DISK_ACCOUNTING_rebalance_work:
end = field_end(acc_k, rebalance_work);
break;
case BCH_DISK_ACCOUNTING_reconcile_work:
end = field_end(acc_k, reconcile_work);
break;
}
bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
@ -250,7 +253,9 @@ fsck_err:
return ret;
}
void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k)
void bch2_accounting_key_to_text(struct printbuf *out,
struct bch_fs *c,
struct disk_accounting_pos *k)
{
if (k->type >= BCH_DISK_ACCOUNTING_TYPE_NR) {
prt_printf(out, "unknown type %u", k->type);
@ -283,6 +288,19 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po
prt_str(out, "btree=");
bch2_btree_id_to_text(out, k->btree.id);
break;
case BCH_DISK_ACCOUNTING_reconcile_work:
bch2_prt_reconcile_accounting_type(out, k->reconcile_work.type);
break;
case BCH_DISK_ACCOUNTING_dev_leaving: {
guard(rcu)();
guard(printbuf_atomic)(out);
struct bch_dev *ca = c ? bch2_dev_rcu_noerror(c, k->dev_leaving.dev) : NULL;
if (ca)
prt_printf(out, "%s ", ca->name);
else
prt_printf(out, "%u ", k->dev_leaving.dev);
break;
}
}
}
@ -292,7 +310,7 @@ void bch2_accounting_to_text(struct printbuf *out, struct bch_fs *c, struct bkey
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
bch2_accounting_key_to_text(out, &acc_k);
bch2_accounting_key_to_text(out, c, &acc_k);
for (unsigned i = 0; i < bch2_accounting_counters(k.k); i++)
prt_printf(out, " %lli", acc.v->d[i]);
@ -607,7 +625,7 @@ int bch2_gc_accounting_done(struct bch_fs *c)
if (memcmp(dst_v, src_v, nr * sizeof(u64))) {
printbuf_reset(&buf);
prt_str(&buf, "accounting mismatch for ");
bch2_accounting_key_to_text(&buf, &acc_k);
bch2_accounting_key_to_text(&buf, c, &acc_k);
prt_str(&buf, ":\n got");
for (unsigned j = 0; j < nr; j++)
@ -672,7 +690,7 @@ static int disk_accounting_invalid_dev(struct btree_trans *trans,
unsigned dev)
{
CLASS(printbuf, buf)();
bch2_accounting_key_to_text(&buf, acc);
bch2_accounting_key_to_text(&buf, trans->c, acc);
if (ret_fsck_err(trans, accounting_to_invalid_device,
"accounting entry points to invalid device %u\n%s",
@ -716,7 +734,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n%s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, acc),
bch2_accounting_key_to_text(&buf, c, acc),
buf.buf)))
try(bch2_mark_replicas(c, &r.e));
break;
@ -846,7 +864,7 @@ static int accounting_read_mem_fixups(struct btree_trans *trans)
bch2_log_msg_start(c, &underflow_err);
prt_printf(&underflow_err, "Accounting underflow for\n");
}
bch2_accounting_key_to_text(&underflow_err, &k);
bch2_accounting_key_to_text(&underflow_err, c, &k);
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
prt_printf(&underflow_err, " %lli", v[j]);

View File

@ -124,7 +124,7 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool);
int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c,
struct bkey_validate_context);
void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *);
void bch2_accounting_key_to_text(struct printbuf *, struct bch_fs *, struct disk_accounting_pos *);
void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_accounting_swab(const struct bch_fs *, struct bkey_s);

View File

@ -110,7 +110,9 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
x(snapshot, 5, 1) \
x(btree, 6, 3) \
x(rebalance_work, 7, 1) \
x(inum, 8, 3)
x(inum, 8, 3) \
x(reconcile_work, 9, 1) \
x(dev_leaving, 10, 1)
enum disk_accounting_type {
#define x(f, nr, ...) BCH_DISK_ACCOUNTING_##f = nr,
@ -210,6 +212,19 @@ struct bch_acct_inum {
struct bch_acct_rebalance_work {
};
struct bch_acct_reconcile_work {
__u8 type;
};
struct bch_acct_dev_leaving {
__u32 dev;
};
/*
* XXX: need per-device counters for "how much data are we going to move off of
* this device
*/
struct disk_accounting_pos {
union {
struct {
@ -224,6 +239,8 @@ struct disk_accounting_pos {
struct bch_acct_btree btree;
struct bch_acct_rebalance_work rebalance_work;
struct bch_acct_inum inum;
struct bch_acct_reconcile_work reconcile_work;
struct bch_acct_dev_leaving dev_leaving;
} __packed;
} __packed;
struct bpos _pad;

View File

@ -348,7 +348,6 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *
struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL;
prt_newline(out);
guard(printbuf_indent)(out);
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
bch2_prt_data_type(out, a->data_type);

View File

@ -20,7 +20,7 @@
#include "data/copygc.h"
#include "data/ec.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/reflink.h"
#include "fs/inode.h"
@ -312,8 +312,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
if (do_update) {
struct bkey_i *new =
errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
sizeof(struct bch_extent_rebalance)));
errptr_try(bch2_trans_kmalloc(trans, BKEY_EXTENT_U64s_MAX * sizeof(u64)));
bkey_reassemble(new, k);
scoped_guard(rcu)
@ -381,7 +380,7 @@ found:
struct bch_inode_opts opts;
try(bch2_bkey_get_io_opts(trans, NULL, k, &opts));
try(bch2_bkey_set_needs_rebalance(c, &opts, new, SET_NEEDS_REBALANCE_opt_change, 0));
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, new, SET_NEEDS_REBALANCE_opt_change, 0));
if (!(flags & BTREE_TRIGGER_is_root)) {
CLASS(btree_node_iter, iter)(trans, btree, new->k.p, 0, level,
@ -882,7 +881,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
try(__trigger_extent(trans, btree, level, new.s_c,
flags & ~BTREE_TRIGGER_overwrite));
try(bch2_trigger_extent_rebalance(trans, old, new.s_c, flags));
try(bch2_trigger_extent_reconcile(trans, btree, level, old, new, flags));
}
return 0;

View File

@ -3,6 +3,8 @@
#include "alloc/disk_groups.h"
#include "data/reconcile.h"
#include "init/dev.h"
#include "sb/members.h"
@ -467,9 +469,18 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
{
guard(mutex)(&c->sb_lock);
return __bch2_dev_group_set(c, ca, name) ?:
bch2_write_super(c);
struct reconcile_scan s = { .type = REBALANCE_SCAN_pending };
try(bch2_set_reconcile_needs_scan(c, s, false));
/* bch2_reconcile_wakeup_pending goes here */
scoped_guard(mutex,&c->sb_lock) {
try(__bch2_dev_group_set(c, ca, name));
try(bch2_write_super(c));
}
try(bch2_set_reconcile_needs_scan(c, s, true));
return 0;
}
int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,

View File

@ -1005,7 +1005,7 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
bch2_writepoint_stop(c, ca, ec, &c->reconcile_write_point);
bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
scoped_guard(mutex, &c->btree_reserve_cache_lock)
@ -1396,7 +1396,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
}
writepoint_init(&c->btree_write_point, BCH_DATA_btree);
writepoint_init(&c->rebalance_write_point, BCH_DATA_user);
writepoint_init(&c->reconcile_write_point, BCH_DATA_user);
writepoint_init(&c->copygc_write_point, BCH_DATA_user);
for (wp = c->write_points;
@ -1501,7 +1501,7 @@ void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
bch2_write_point_to_text(out, c, &c->copygc_write_point);
prt_str(out, "Rebalance write point\n");
bch2_write_point_to_text(out, c, &c->rebalance_write_point);
bch2_write_point_to_text(out, c, &c->reconcile_write_point);
prt_str(out, "Btree write point\n");
bch2_write_point_to_text(out, c, &c->btree_write_point);

View File

@ -249,7 +249,7 @@
#include "data/ec_types.h"
#include "data/keylist_types.h"
#include "data/nocow_locking_types.h"
#include "data/rebalance_types.h"
#include "data/reconcile_types.h"
#include "debug/async_objs_types.h"
#include "debug/trace.h"
@ -995,7 +995,7 @@ struct bch_fs {
open_bucket_idx_t open_buckets_partial_nr;
struct write_point btree_write_point;
struct write_point rebalance_write_point;
struct write_point reconcile_write_point;
struct write_point write_points[WRITE_POINT_MAX];
struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
@ -1060,7 +1060,7 @@ struct bch_fs {
struct mutex moving_context_lock;
/* REBALANCE */
struct bch_fs_rebalance rebalance;
struct bch_fs_reconcile reconcile;
/* COPYGC */
struct task_struct *copygc_thread;

View File

@ -711,7 +711,8 @@ struct bch_sb_field_ext {
x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \
x(31bit_dirent_offset, BCH_VERSION(1, 30)) \
x(btree_node_accounting, BCH_VERSION(1, 31)) \
x(sb_field_extent_type_u64s, BCH_VERSION(1, 32))
x(sb_field_extent_type_u64s, BCH_VERSION(1, 32)) \
x(reconcile, BCH_VERSION(1, 33))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@ -1420,7 +1421,7 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
x(rebalance_work, 18, \
x(reconcile_work, 18, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
@ -1430,6 +1431,17 @@ enum btree_id_flags {
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_accounting)) \
x(reconcile_hipri, 21, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)) \
x(reconcile_pending, 22, \
BTREE_IS_snapshot_field| \
BTREE_IS_write_buffer, \
BIT_ULL(KEY_TYPE_set)) \
x(reconcile_scan, 23, 0, \
BIT_ULL(KEY_TYPE_cookie)| \
BIT_ULL(KEY_TYPE_backpointer))
enum btree_id {
#define x(name, nr, ...) BTREE_ID_##name = nr,
@ -1470,7 +1482,10 @@ static inline bool btree_id_can_reconstruct(enum btree_id btree)
switch (btree) {
case BTREE_ID_snapshot_trees:
case BTREE_ID_deleted_inodes:
case BTREE_ID_rebalance_work:
case BTREE_ID_reconcile_work:
case BTREE_ID_reconcile_hipri:
case BTREE_ID_reconcile_pending:
case BTREE_ID_reconcile_scan:
case BTREE_ID_subvolume_children:
return true;
default:

View File

@ -336,6 +336,7 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
bch2_bkey_to_text(out, k.k);
if (bkey_val_bytes(k.k)) {
guard(printbuf_atomic)(out);
prt_printf(out, ": ");
bch2_val_to_text(out, c, k);
}

View File

@ -682,9 +682,11 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
try(bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k),
BTREE_TRIGGER_check_repair|flags));
if (bch2_trans_has_updates(trans))
return bch2_trans_commit(trans, NULL, NULL, 0) ?:
if (bch2_trans_has_updates(trans)) {
CLASS(disk_reservation, res)(c);
return bch2_trans_commit(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
-BCH_ERR_transaction_restart_nested;
}
try(bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k),
BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags));

View File

@ -22,6 +22,7 @@
#include "data/extents.h"
#include "data/keylist.h"
#include "data/reconcile.h"
#include "data/write.h"
#include "init/error.h"
@ -654,6 +655,35 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
bch2_write_super(c);
}
static void bkey_strip_reconcile(const struct bch_fs *c, struct bkey_s k)
{
bool dropped;
do {
dropped = false;
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
bkey_extent_entry_for_each(ptrs, entry)
if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_reconcile ||
extent_entry_type(entry) == BCH_EXTENT_ENTRY_reconcile_bp) {
extent_entry_drop(c, k, entry);
dropped = true;
break;
}
} while (dropped);
}
static bool bkey_has_reconcile(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
bkey_extent_entry_for_each(ptrs, entry)
if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_reconcile)
return true;
return false;
}
/*
* The transactional part of an interior btree node update, where we journal the
* update we did to the interior node and update alloc info:
@ -661,26 +691,70 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
static int btree_update_nodes_written_trans(struct btree_trans *trans,
struct btree_update *as)
{
struct bch_fs *c = trans->c;
struct bch_inode_opts opts;
bch2_inode_opts_get(as->c, &opts, true);
trans->journal_pin = &as->journal;
darray_for_each(as->old_nodes, i)
darray_for_each(as->old_nodes, i) {
try(bch2_key_trigger_old(trans, as->btree_id, i->level + 1, bkey_i_to_s_c(&i->key),
BTREE_TRIGGER_transactional));
darray_for_each(as->new_nodes, i) {
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
BTREE_TRIGGER_transactional));
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(i->key.k.u64s))),
i->root
? BCH_JSET_ENTRY_btree_root
: BCH_JSET_ENTRY_btree_keys,
BCH_JSET_ENTRY_overwrite,
as->btree_id,
i->root ? i->level : i->level + 1,
i->level + 1,
&i->key, i->key.k.u64s);
}
darray_for_each(as->new_nodes, i) {
i->update_node_key = false;
bkey_strip_reconcile(c, bkey_i_to_s(&i->key));
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, &i->key,
SET_NEEDS_REBALANCE_foreground, 0));
if (bkey_has_reconcile(c, bkey_i_to_s_c(&i->key))) {
CLASS(btree_iter_uninit, iter)(trans);
int ret = bch2_btree_node_get_iter(trans, &iter, i->b);
if (ret && ret != -BCH_ERR_btree_node_dying)
return ret;
if (!ret)
i->update_node_key = true;
else
bkey_strip_reconcile(c, bkey_i_to_s(&i->key));
}
try(bch2_key_trigger_new(trans, as->btree_id, i->level + 1, bkey_i_to_s(&i->key),
BTREE_TRIGGER_transactional));
if (!i->update_node_key || i->root) {
journal_entry_set(errptr_try(bch2_trans_jset_entry_alloc(trans,
jset_u64s(i->key.k.u64s))),
i->root
? BCH_JSET_ENTRY_btree_root
: BCH_JSET_ENTRY_btree_keys,
as->btree_id,
i->root ? i->level : i->level + 1,
&i->key, i->key.k.u64s);
} else {
CLASS(btree_node_iter, parent_iter)(trans,
as->btree_id,
i->key.k.p,
0,
i->level + 1,
BTREE_ITER_intent);
try(bch2_btree_iter_traverse(&parent_iter));
/*
* XXX: we shouldn't be logging overwrites here, need a
* flag for that
*/
try(bch2_trans_update(trans, &parent_iter, &i->key, BTREE_TRIGGER_norun));
}
}
return 0;
}
@ -760,19 +834,23 @@ static void btree_update_nodes_written(struct btree_update *as)
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
btree_update_nodes_written_trans(trans, as));
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal),
c, "%s", bch2_err_str(ret));
/*
* Clear will_make_reachable while we still hold intent locks on
* all our new nodes, to avoid racing with
* btree_node_update_key():
*/
darray_for_each(as->new_nodes, i)
darray_for_each(as->new_nodes, i) {
if (i->update_node_key)
bkey_copy(&i->b->key, &i->key);
if (i->b) {
BUG_ON(i->b->will_make_reachable != (unsigned long) as);
i->b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(i->b);
}
}
}
/*
@ -2422,7 +2500,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
*/
}
try(bch2_trans_commit(trans, NULL, NULL, commit_flags));
CLASS(disk_reservation, res)(c);
try(bch2_trans_commit(trans, &res.r, NULL, commit_flags));
bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
bkey_copy(&b->key, new_key);

View File

@ -26,6 +26,7 @@ struct btree_update_node {
struct btree *b;
unsigned level;
bool root;
bool update_node_key;
__le64 seq;
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
};

View File

@ -561,7 +561,7 @@ struct btree_trans {
struct bch_fs_usage_base fs_usage_delta;
unsigned journal_u64s;
unsigned extra_disk_res; /* XXX kill */
u64 extra_disk_res;
__BKEY_PADDED(btree_path_down, BKEY_BTREE_PTR_VAL_U64s_MAX);

View File

@ -6,6 +6,7 @@
#include "btree/sort.h"
#include "btree/write.h"
#include "data/reconcile.h"
#include "data/write.h"
#include "debug/async_objs.h"
@ -100,7 +101,9 @@ static int btree_node_write_update_key(struct btree_trans *trans,
if (ret)
return ret == -BCH_ERR_btree_node_dying ? 0 : ret;
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k)));
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&b->key.k) +
sizeof(struct bch_extent_reconcile) +
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
bkey_copy(n, &b->key);
bkey_i_to_btree_ptr_v2(n)->v.sectors_written =
@ -112,6 +115,13 @@ static int btree_node_write_update_key(struct btree_trans *trans,
if (!bch2_bkey_nr_dirty_ptrs(c, bkey_i_to_s_c(n)))
return bch_err_throw(c, btree_node_write_all_failed);
if (wbio->wbio.failed.nr) {
struct bch_inode_opts opts;
try(bch2_bkey_get_io_opts(trans, NULL, bkey_i_to_s_c(n), &opts));
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, n,
SET_NEEDS_REBALANCE_opt_change, 0));
}
return bch2_btree_node_update_key(trans, &iter, b, n,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|

View File

@ -143,6 +143,17 @@ static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
return bch2_csum_opt_to_type(opts.data_checksum, true);
}
static inline enum bch_csum_type bch2_data_checksum_type_rb(struct bch_fs *c,
struct bch_extent_reconcile opts)
{
if (c->sb.encryption_type)
return c->opts.wide_macs
? BCH_CSUM_chacha20_poly1305_128
: BCH_CSUM_chacha20_poly1305_80;
return bch2_csum_opt_to_type(opts.data_checksum, true);
}
static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
{
if (c->sb.encryption_type)

View File

@ -24,7 +24,7 @@
#include "data/read.h"
#include "data/write.h"
#include "data/keylist.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "sb/io.h"
@ -141,6 +141,10 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid csum granularity (%u >= 64)",
s->csum_granularity_bits);
bkey_fsck_err_on(!s->sectors,
c, stripe_sectors_zero,
"invalid sectors zero");
ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
@ -1091,6 +1095,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct ec_stripe_buf *s,
struct bkey_s_c_backpointer bp,
struct stripe_update_bucket_stats *stats,
struct disk_reservation *res,
struct wb_maybe_flush *last_flushed)
{
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
@ -1156,7 +1161,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
.idx = s->key.k.p.offset,
};
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr)));
struct bkey_i *n = errptr_try(bch2_trans_kmalloc(trans, BKEY_EXTENT_U64s_MAX * sizeof(u64)));
bkey_reassemble(n, k);
@ -1172,10 +1177,9 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bch_inode_opts opts;
try(bch2_bkey_get_io_opts(trans, NULL, bkey_i_to_s_c(n), &opts));
try(bch2_bkey_set_needs_rebalance(trans->c, &opts, n,
SET_NEEDS_REBALANCE_other, 0));
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, n, SET_NEEDS_REBALANCE_other, 0));
try(bch2_trans_update(trans, &iter, n, 0));
try(bch2_trans_commit(trans, NULL, NULL,
try(bch2_trans_commit(trans, res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc));
@ -1205,6 +1209,8 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct stripe_update_bucket_stats stats = {};
CLASS(disk_reservation, res)(c);
try(for_each_btree_key_max(trans, bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp_start(ca, bucket_pos),
bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, ({
@ -1220,7 +1226,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
wb_maybe_flush_inc(&last_flushed);
ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, bp,
&stats, &last_flushed);
&stats, &res.r, &last_flushed);
})));
if (trace_stripe_update_bucket_enabled()) {

View File

@ -19,7 +19,7 @@
#include "data/checksum.h"
#include "data/compress.h"
#include "data/extents.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "fs/inode.h"
@ -796,7 +796,7 @@ void bch2_bkey_propagate_incompressible(const struct bch_fs *c, struct bkey_i *d
/*
* XXX: if some data actually is compressed, we want
* bch_extent_rebalance.wont_recompress_smaller
* bch_extent_reconcile.wont_recompress_smaller
*/
bkey_extent_entry_for_each(ptrs, entry) {
@ -884,6 +884,15 @@ static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k)
return durability;
}
void bch2_bkey_extent_entry_drop_s(const struct bch_fs *c, struct bkey_s k, union bch_extent_entry *entry)
{
union bch_extent_entry *end = bkey_val_end(k);
union bch_extent_entry *next = extent_entry_next(c, entry);
memmove_u64s(entry, next, (u64 *) end - (u64 *) next);
k.k->u64s -= extent_entry_u64s(c, entry);
}
void bch2_bkey_extent_entry_drop(const struct bch_fs *c, struct bkey_i *k, union bch_extent_entry *entry)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
@ -1388,7 +1397,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
if (c)
prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
guard(printbuf_indent)(out);
prt_newline(out);
guard(printbuf_atomic)(out);
guard(rcu)();
@ -1421,14 +1430,22 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "idx %llu block %u", (u64) ec->idx, ec->block);
break;
}
case BCH_EXTENT_ENTRY_rebalance:
bch2_extent_rebalance_to_text(out, c, &entry->rebalance);
case BCH_EXTENT_ENTRY_rebalance_v1:
bch2_extent_rebalance_v1_to_text(out, c, &entry->rebalance_v1);
break;
case BCH_EXTENT_ENTRY_reconcile:
bch2_extent_reconcile_to_text(out, c, &entry->reconcile);
break;
case BCH_EXTENT_ENTRY_flags:
prt_bitflags(out, bch2_extent_flags_strs, entry->flags.flags);
break;
case BCH_EXTENT_ENTRY_reconcile_bp:
prt_printf(out, "idx %llu", (u64) entry->reconcile_bp.idx);
break;
default:
prt_printf(out, "(unknown extent entry %.16llx)", *((u64 *) entry));
return;
@ -1482,6 +1499,18 @@ fsck_err:
return ret;
}
static inline bool btree_ptr_entry_type_allowed(enum bch_extent_entry_type type)
{
switch (type) {
case BCH_EXTENT_ENTRY_ptr:
case BCH_EXTENT_ENTRY_reconcile:
case BCH_EXTENT_ENTRY_reconcile_bp:
return true;
default:
return false;
};
}
int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
struct bkey_validate_context from)
{
@ -1492,23 +1521,27 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false;
bool have_inval_dev_ptrs = false, have_non_inval_dev_ptrs = false;
int ret = 0;
if (bkey_is_btree_ptr(k.k))
size_ondisk = btree_sectors(c);
bkey_extent_entry_for_each(ptrs, entry) {
bkey_fsck_err_on(extent_entry_type(entry) >= c->extent_types_known,
unsigned type = extent_entry_type(entry);
bkey_fsck_err_on(type >= c->extent_types_known,
c, extent_ptrs_invalid_entry,
"invalid extent entry type (got %u, max %u)",
extent_entry_type(entry), c->extent_types_known);
type, c->extent_types_known);
bkey_fsck_err_on(bkey_is_btree_ptr(k.k) &&
!extent_entry_is_ptr(entry),
type < BCH_EXTENT_ENTRY_MAX &&
!btree_ptr_entry_type_allowed(type),
c, btree_ptr_has_non_ptr,
"has non ptr field");
"has non allowed field");
switch (extent_entry_type(entry)) {
switch (type) {
case BCH_EXTENT_ENTRY_ptr:
try(extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false));
@ -1523,6 +1556,12 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
have_ec = false;
crc_since_last_ptr = false;
if (entry->ptr.dev == BCH_SB_MEMBER_INVALID)
have_inval_dev_ptrs = true;
else
have_non_inval_dev_ptrs = true;
nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
@ -1570,30 +1609,18 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
c, ptr_stripe_redundant,
"redundant stripe entry");
have_ec = true;
have_non_inval_dev_ptrs = true;
break;
case BCH_EXTENT_ENTRY_rebalance: {
/*
* this shouldn't be a fsck error, for forward
* compatibility; the rebalance code should just refetch
* the compression opt if it's unknown
*/
#if 0
const struct bch_extent_rebalance *r = &entry->rebalance;
if (!bch2_compression_opt_valid(r->compression)) {
union bch_compression_opt opt = { .value = r->compression };
prt_printf(err, "invalid compression opt %u:%u",
opt.type, opt.level);
return bch_err_throw(c, invalid_bkey);
}
#endif
case BCH_EXTENT_ENTRY_reconcile:
try(bch2_extent_reconcile_validate(c, k, from, &entry->reconcile));
break;
}
case BCH_EXTENT_ENTRY_flags:
bkey_fsck_err_on(entry != ptrs.start,
c, extent_flags_not_at_start,
"extent flags entry not at start");
break;
case BCH_EXTENT_ENTRY_reconcile_bp:
break;
}
}
@ -1615,6 +1642,15 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
bkey_fsck_err_on(have_ec,
c, extent_ptrs_redundant_stripe,
"redundant stripe entry");
/*
* we don't use KEY_TYPE_error for dead btree nodes - we still want the
* other fields in bch_btree_ptr_v2
*/
bkey_fsck_err_on(!bkey_is_btree_ptr(k.k) &&
have_inval_dev_ptrs && !have_non_inval_dev_ptrs,
c, extent_ptrs_all_invalid,
"extent ptrs all to BCH_SB_MEMBER_INVALID");
fsck_err:
return ret;
}
@ -1651,7 +1687,8 @@ void bch2_ptr_swab(const struct bch_fs *c, struct bkey_s k)
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
case BCH_EXTENT_ENTRY_rebalance:
case BCH_EXTENT_ENTRY_rebalance_v1:
case BCH_EXTENT_ENTRY_reconcile:
break;
default:
/* Bad entry type: will be caught by validate() */
@ -1725,8 +1762,10 @@ int bch2_cut_front_s(const struct bch_fs *c, struct bpos where, struct bkey_s k)
entry->crc128.offset += sub;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
case BCH_EXTENT_ENTRY_rebalance:
case BCH_EXTENT_ENTRY_rebalance_v1:
case BCH_EXTENT_ENTRY_reconcile:
case BCH_EXTENT_ENTRY_flags:
case BCH_EXTENT_ENTRY_reconcile_bp:
break;
}

View File

@ -603,6 +603,7 @@ bool bch2_bkey_devs_rw(struct bch_fs *, struct bkey_s_c);
bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned);
void bch2_bkey_extent_entry_drop_s(const struct bch_fs *, struct bkey_s, union bch_extent_entry *);
void bch2_bkey_extent_entry_drop(const struct bch_fs *, struct bkey_i *, union bch_extent_entry *);
static inline void bch2_bkey_append_ptr(const struct bch_fs *c, struct bkey_i *k, struct bch_extent_ptr ptr)

View File

@ -79,9 +79,11 @@
x(crc64, 2) \
x(crc128, 3) \
x(stripe_ptr, 4) \
x(rebalance, 5) \
x(flags, 6)
#define BCH_EXTENT_ENTRY_MAX 7
x(rebalance_v1, 5) \
x(flags, 6) \
x(reconcile, 7) \
x(reconcile_bp, 8)
#define BCH_EXTENT_ENTRY_MAX 9
enum bch_extent_entry_type {
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
@ -221,8 +223,8 @@ struct bch_extent_flags {
#endif
};
/* bch_extent_rebalance: */
#include "rebalance_format.h"
/* bch_extent_reconcile: */
#include "reconcile_format.h"
union bch_extent_entry {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
@ -270,13 +272,13 @@ struct bch_extent {
} __packed __aligned(8);
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
#define BKEY_EXTENT_PTR_U64s_MAX \
((sizeof(struct bch_extent_crc128) + \
sizeof(struct bch_extent_ptr)) / sizeof(__u64))
/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
(5 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
/* * Maximum possible size of an entire extent, key + value: */
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
@ -284,7 +286,9 @@ struct bch_extent {
/* Btree pointers don't carry around checksums: */
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
((sizeof(struct bch_btree_ptr_v2) + \
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX + \
sizeof(struct bch_extent_reconcile) + \
sizeof(struct bch_extent_reconcile_bp)) / sizeof(__u64))
#define BKEY_BTREE_PTR_U64s_MAX \
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)

View File

@ -14,7 +14,7 @@
#include "data/extents.h"
#include "data/extent_update.h"
#include "data/io_misc.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/write.h"
#include "fs/inode.h"

View File

@ -20,7 +20,7 @@
#include "data/keylist.h"
#include "data/migrate.h"
#include "data/move.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "journal/journal.h"
@ -41,7 +41,8 @@ static struct bkey_i *drop_dev_ptrs(struct btree_trans *trans, struct bkey_s_c k
return NULL;
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
sizeof(struct bch_extent_rebalance));
sizeof(struct bch_extent_reconcile) +
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX);
if (IS_ERR(n))
return n;
bkey_reassemble(n, k);
@ -63,7 +64,7 @@ static struct bkey_i *drop_dev_ptrs(struct btree_trans *trans, struct bkey_s_c k
if (n->k.type != KEY_TYPE_error) {
struct bch_inode_opts opts;
int ret = bch2_bkey_get_io_opts(trans, NULL, k, &opts) ?:
bch2_bkey_set_needs_rebalance(c, &opts, n,
bch2_bkey_set_needs_reconcile(trans, NULL, &opts, n,
SET_NEEDS_REBALANCE_opt_change, 0);
if (ret)
return ERR_PTR(ret);
@ -125,6 +126,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
unsigned flags, struct printbuf *err)
{
CLASS(btree_trans, trans)(c);
CLASS(disk_reservation, res)(c);
/* FIXME: this does not handle unknown btrees with data pointers */
for (unsigned id = 0; id < BTREE_ID_NR; id++) {
@ -135,14 +137,13 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
if (id == BTREE_ID_stripes)
continue;
int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
try(for_each_btree_key_commit(trans, iter, id, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
&res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({
bch2_disk_reservation_put(c, &res.r);
bch2_progress_update_iter(trans, progress, &iter, "dropping user data") ?:
bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags, err);
}));
if (ret)
return ret;
})));
}
return 0;
@ -225,6 +226,7 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig
struct printbuf *err)
{
CLASS(btree_trans, trans)(c);
CLASS(disk_reservation, res)(c);
struct wb_maybe_flush last_flushed __cleanup(wb_maybe_flush_exit);
wb_maybe_flush_init(&last_flushed);
@ -233,11 +235,12 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig
for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
POS(dev_idx, 0),
POS(dev_idx, U64_MAX), 0, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
&res.r, NULL, BCH_TRANS_COMMIT_no_enospc, ({
if (k.k->type != KEY_TYPE_backpointer)
continue;
wb_maybe_flush_inc(&last_flushed);
bch2_disk_reservation_put(c, &res.r);
data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
&last_flushed, flags, err);

View File

@ -20,7 +20,7 @@
#include "data/keylist.h"
#include "data/move.h"
#include "data/read.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/reflink.h"
#include "data/write.h"
@ -319,8 +319,11 @@ int bch2_move_extent(struct moving_context *ctxt,
struct bch_inode_opts opts;
try(bch2_bkey_get_io_opts(trans, snapshot_io_opts, k, &opts));
try(bch2_update_rebalance_opts(trans, &opts, iter, k, SET_NEEDS_REBALANCE_other));
try(bch2_trans_commit_lazy(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc));
try(bch2_update_reconcile_opts(trans, snapshot_io_opts, &opts, iter, level, k,
SET_NEEDS_REBALANCE_other));
CLASS(disk_reservation, res)(c);
try(bch2_trans_commit_lazy(trans, &res.r, NULL, BCH_TRANS_COMMIT_no_enospc));
struct data_update_opts data_opts = { .read_dev = -1 };
int ret = pred(trans, arg, iter->btree_id, k, &opts, &data_opts);
@ -496,46 +499,6 @@ next_nondata:
return ret;
}
static int bch2_move_data(struct bch_fs *c,
struct bbpos start,
struct bbpos end,
unsigned min_depth,
struct bch_ratelimit *rate,
struct bch_move_stats *stats,
struct write_point_specifier wp,
bool wait_on_copygc,
move_pred_fn pred, void *arg)
{
struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit);
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
for (enum btree_id id = start.btree;
id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
id++) {
ctxt.stats->pos = BBPOS(id, POS_MIN);
if (!bch2_btree_id_root(c, id)->b)
continue;
unsigned min_depth_this_btree = min_depth;
/* Stripe keys have pointers, but are handled separately */
if (!btree_type_has_data_ptrs(id) ||
id == BTREE_ID_stripes)
min_depth_this_btree = max(min_depth_this_btree, 1);
for (unsigned level = min_depth_this_btree;
level < BTREE_MAX_DEPTH;
level++)
try(bch2_move_data_btree(&ctxt,
id == start.btree ? start.pos : POS_MIN,
id == end.btree ? end.pos : POS_MAX,
pred, arg, id, level));
}
return 0;
}
static int __bch2_move_data_phys(struct moving_context *ctxt,
struct move_bucket *bucket_in_flight,
unsigned dev,
@ -717,218 +680,6 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
evacuate_bucket_pred, &arg);
}
typedef bool (*move_btree_pred)(struct bch_fs *, void *,
struct btree *, struct bch_inode_opts *,
struct data_update_opts *);
static int bch2_move_btree(struct bch_fs *c,
struct bbpos start,
struct bbpos end,
move_btree_pred pred, void *arg,
struct bch_move_stats *stats)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct btree *b;
enum btree_id btree;
int ret = 0;
struct bch_inode_opts io_opts;
bch2_inode_opts_get(c, &io_opts, true);
struct moving_context ctxt __cleanup(bch2_moving_ctxt_exit);
bch2_moving_ctxt_init(&ctxt, c, NULL, stats, writepoint_ptr(&c->btree_write_point), true);
struct btree_trans *trans = ctxt.trans;
CLASS(btree_iter_uninit, iter)(trans);
stats->data_type = BCH_DATA_btree;
for (btree = start.btree;
btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
btree ++) {
stats->pos = BBPOS(btree, POS_MIN);
if (!bch2_btree_id_root(c, btree)->b)
continue;
bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0,
BTREE_ITER_prefetch);
retry:
ret = 0;
while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (kthread && kthread_should_stop())
break;
if ((cmp_int(btree, end.btree) ?:
bpos_cmp(b->key.k.p, end.pos)) > 0)
break;
stats->pos = BBPOS(iter.btree_id, iter.pos);
if (btree_node_fake(b))
goto next;
struct data_update_opts data_opts = {};
if (!pred(c, arg, b, &io_opts, &data_opts))
goto next;
ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0) ?: ret;
if (ret)
break;
next:
bch2_btree_iter_next_node(&iter);
}
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (kthread && kthread_should_stop())
break;
}
bch2_trans_unlock(trans);
bch2_btree_interior_updates_flush(c);
bch_err_fn(c, ret);
return ret;
}
static int rereplicate_pred(struct btree_trans *trans, void *arg,
enum btree_id btree, struct bkey_s_c k,
struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
unsigned nr_good = bch2_bkey_durability(c, k);
unsigned replicas = bkey_is_btree_ptr(k.k)
? c->opts.metadata_replicas
: io_opts->data_replicas;
guard(rcu)();
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
unsigned i = 0;
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
if (!ptr->cached &&
(!ca || !ca->mi.durability))
data_opts->ptrs_kill |= BIT(i);
i++;
}
if (!data_opts->ptrs_kill &&
(!nr_good || nr_good >= replicas))
return false;
data_opts->extra_replicas = replicas - nr_good;
return true;
}
static int migrate_pred(struct btree_trans *trans, void *arg,
enum btree_id btree, struct bkey_s_c k,
struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct bch_ioctl_data *op = arg;
unsigned ptr_bit = 1;
bkey_for_each_ptr(ptrs, ptr) {
if (ptr->dev == op->migrate.dev)
data_opts->ptrs_rewrite |= ptr_bit;
ptr_bit <<= 1;
}
return data_opts->ptrs_rewrite != 0;
}
/*
* Ancient versions of bcachefs produced packed formats which could represent
* keys that the in memory format cannot represent; this checks for those
* formats so we can get rid of them.
*/
static bool bformat_needs_redo(struct bkey_format *f)
{
for (unsigned i = 0; i < f->nr_fields; i++)
if (bch2_bkey_format_field_overflows(f, i))
return true;
return false;
}
static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
btree_node_need_rewrite(b) ||
bformat_needs_redo(&b->format))
return true;
return false;
}
int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
{
int ret;
ret = bch2_move_btree(c,
BBPOS_MIN,
BBPOS_MAX,
rewrite_old_nodes_pred, c, stats);
if (!ret) {
guard(mutex)(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
}
bch_err_fn(c, ret);
return ret;
}
static int drop_extra_replicas_pred(struct btree_trans *trans, void *arg,
enum btree_id btree, struct bkey_s_c k,
struct bch_inode_opts *io_opts,
struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
unsigned durability = bch2_bkey_durability(c, k);
unsigned replicas = bkey_is_btree_ptr(k.k)
? c->opts.metadata_replicas
: io_opts->data_replicas;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned i = 0;
guard(rcu)();
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
unsigned d = bch2_extent_ptr_durability(c, &p);
if (d && durability - d >= replicas) {
data_opts->ptrs_kill |= BIT(i);
durability -= d;
}
i++;
}
i = 0;
bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
if (p.has_ec && durability - p.ec.redundancy >= replicas) {
data_opts->ptrs_kill_ec |= BIT(i);
durability -= p.ec.redundancy;
}
i++;
}
return (data_opts->ptrs_kill|data_opts->ptrs_kill_ec) != 0;
}
static int scrub_pred(struct btree_trans *trans, void *_arg,
enum btree_id btree, struct bkey_s_c k,
struct bch_inode_opts *io_opts,
@ -958,8 +709,6 @@ int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data *op)
{
struct bbpos start = BBPOS(op->start_btree, op->start_pos);
struct bbpos end = BBPOS(op->end_btree, op->end_pos);
int ret = 0;
if (op->op >= BCH_DATA_OP_NR)
@ -984,39 +733,6 @@ int bch2_data_job(struct bch_fs *c,
scrub_pred, op) ?: ret;
break;
case BCH_DATA_OP_rereplicate:
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1);
ret = bch2_move_data(c, start, end, 0, NULL, stats,
writepoint_hashed((unsigned long) current),
true,
rereplicate_pred, c) ?: ret;
bch2_btree_interior_updates_flush(c);
break;
case BCH_DATA_OP_migrate:
if (op->migrate.dev >= c->sb.nr_devices)
return -EINVAL;
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, op->migrate.dev);
ret = bch2_move_data_phys(c, op->migrate.dev, 0, U64_MAX,
~0,
NULL,
stats,
writepoint_hashed((unsigned long) current),
true,
migrate_pred, op) ?: ret;
bch2_btree_interior_updates_flush(c);
break;
case BCH_DATA_OP_rewrite_old_nodes:
ret = bch2_scan_old_btree_nodes(c, stats);
break;
case BCH_DATA_OP_drop_extra_replicas:
ret = bch2_move_data(c, start, end, 0, NULL, stats,
writepoint_hashed((unsigned long) current),
true,
drop_extra_replicas_pred, c) ?: ret;
break;
default:
ret = -EINVAL;
}

View File

@ -86,8 +86,6 @@ void bch2_moving_ctxt_flush_all(struct moving_context *);
void bch2_move_ctxt_wait_for_io(struct moving_context *);
int bch2_move_ratelimit(struct moving_context *);
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
struct per_snapshot_io_opts;
int bch2_move_extent(struct moving_context *, struct move_bucket *,
struct per_snapshot_io_opts *, move_pred_fn, void *,

File diff suppressed because it is too large Load Diff

View File

@ -1,126 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REBALANCE_H
#define _BCACHEFS_REBALANCE_H
#include "data/compress.h"
#include "alloc/disk_groups.h"
#include "rebalance_types.h"
static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c,
struct bch_inode_opts *opts)
{
struct bch_extent_rebalance r = {
.type = BIT(BCH_EXTENT_ENTRY_rebalance),
#define x(_name) \
._name = opts->_name, \
._name##_from_inode = opts->_name##_from_inode,
BCH_REBALANCE_OPTS()
#undef x
};
if (r.background_target &&
!bch2_target_accepts_data(c, BCH_DATA_user, r.background_target))
r.background_target = 0;
return r;
};
void bch2_extent_rebalance_to_text(struct printbuf *, struct bch_fs *,
const struct bch_extent_rebalance *);
int bch2_trigger_extent_rebalance(struct btree_trans *,
struct bkey_s_c, struct bkey_s_c,
enum btree_iter_update_trigger_flags);
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
enum set_needs_rebalance_ctx {
SET_NEEDS_REBALANCE_opt_change,
SET_NEEDS_REBALANCE_opt_change_indirect,
SET_NEEDS_REBALANCE_foreground,
SET_NEEDS_REBALANCE_other,
};
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_inode_opts *,
struct bkey_i *, enum set_needs_rebalance_ctx, u32);
/* Inodes in different snapshots may have different IO options: */
struct snapshot_io_opts_entry {
u32 snapshot;
struct bch_inode_opts io_opts;
};
struct per_snapshot_io_opts {
u64 cur_inum;
bool metadata;
struct bch_inode_opts fs_io_opts;
DARRAY(struct snapshot_io_opts_entry) d;
};
static inline struct per_snapshot_io_opts per_snapshot_io_opts_init(struct bch_fs *c)
{
return (struct per_snapshot_io_opts) {
/* io_opts->fs_io_opts will be initialized when we know the key type */
.fs_io_opts.change_cookie = atomic_read(&c->opt_change_cookie) - 1,
};
}
static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
{
darray_exit(&io_opts->d);
}
DEFINE_CLASS(per_snapshot_io_opts, struct per_snapshot_io_opts,
per_snapshot_io_opts_exit(&_T),
per_snapshot_io_opts_init(c),
struct bch_fs *c);
int bch2_update_rebalance_opts(struct btree_trans *,
struct bch_inode_opts *,
struct btree_iter *,
struct bkey_s_c,
enum set_needs_rebalance_ctx);
int bch2_bkey_get_io_opts(struct btree_trans *,
struct per_snapshot_io_opts *, struct bkey_s_c,
struct bch_inode_opts *opts);
struct rebalance_scan {
enum rebalance_scan_type {
REBALANCE_SCAN_fs,
REBALANCE_SCAN_metadata,
REBALANCE_SCAN_device,
REBALANCE_SCAN_inum,
} type;
union {
unsigned dev;
u64 inum;
};
};
int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, struct rebalance_scan);
int bch2_set_rebalance_needs_scan(struct bch_fs *, struct rebalance_scan);
int bch2_set_fs_needs_rebalance(struct bch_fs *);
static inline void bch2_rebalance_wakeup(struct bch_fs *c)
{
c->rebalance.kick++;
guard(rcu)();
struct task_struct *p = rcu_dereference(c->rebalance.thread);
if (p)
wake_up_process(p);
}
void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
void bch2_rebalance_stop(struct bch_fs *);
int bch2_rebalance_start(struct bch_fs *);
void bch2_fs_rebalance_exit(struct bch_fs *);
int bch2_fs_rebalance_init(struct bch_fs *);
int bch2_check_rebalance_work(struct bch_fs *);
#endif /* _BCACHEFS_REBALANCE_H */

View File

@ -1,53 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REBALANCE_FORMAT_H
#define _BCACHEFS_REBALANCE_FORMAT_H
struct bch_extent_rebalance {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:6,
unused:3,
promote_target_from_inode:1,
erasure_code_from_inode:1,
data_checksum_from_inode:1,
background_compression_from_inode:1,
data_replicas_from_inode:1,
background_target_from_inode:1,
promote_target:16,
erasure_code:1,
data_checksum:4,
data_replicas:4,
background_compression:8, /* enum bch_compression_opt */
background_target:16;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 background_target:16,
background_compression:8,
data_replicas:4,
data_checksum:4,
erasure_code:1,
promote_target:16,
background_target_from_inode:1,
data_replicas_from_inode:1,
background_compression_from_inode:1,
data_checksum_from_inode:1,
erasure_code_from_inode:1,
promote_target_from_inode:1,
unused:3,
type:6;
#endif
};
/* subset of BCH_INODE_OPTS */
#define BCH_REBALANCE_OPTS() \
x(data_checksum) \
x(background_compression) \
x(data_replicas) \
x(promote_target) \
x(background_target) \
x(erasure_code)
#endif /* _BCACHEFS_REBALANCE_FORMAT_H */

2206
libbcachefs/data/reconcile.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,156 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REBALANCE_H
#define _BCACHEFS_REBALANCE_H
#include "data/compress.h"
#include "alloc/disk_groups.h"
#include "reconcile_types.h"
int bch2_extent_reconcile_validate(struct bch_fs *, struct bkey_s_c,
struct bkey_validate_context,
const struct bch_extent_reconcile *);
static inline struct bch_extent_reconcile io_opts_to_reconcile_opts(struct bch_fs *c,
struct bch_inode_opts *opts)
{
return (struct bch_extent_reconcile) {
.type = BIT(BCH_EXTENT_ENTRY_reconcile),
#define x(_name) \
._name = opts->_name, \
._name##_from_inode = opts->_name##_from_inode,
BCH_REBALANCE_OPTS()
#undef x
};
};
void bch2_extent_rebalance_v1_to_text(struct printbuf *, struct bch_fs *,
const struct bch_extent_rebalance_v1 *);
void bch2_extent_reconcile_to_text(struct printbuf *, struct bch_fs *,
const struct bch_extent_reconcile *);
const struct bch_extent_reconcile *bch2_bkey_reconcile_opts(const struct bch_fs *, struct bkey_s_c);
int __bch2_trigger_extent_reconcile(struct btree_trans *,
enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
const struct bch_extent_reconcile *,
const struct bch_extent_reconcile *,
enum btree_iter_update_trigger_flags);
static inline unsigned rb_needs_trigger(const struct bch_extent_reconcile *r)
{
return r ? r->need_rb|r->ptrs_moving : 0;
}
static inline int bch2_trigger_extent_reconcile(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s new,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
const struct bch_extent_reconcile *old_r = bch2_bkey_reconcile_opts(c, old);
const struct bch_extent_reconcile *new_r = bch2_bkey_reconcile_opts(c, new.s_c);
return rb_needs_trigger(old_r) || rb_needs_trigger(new_r)
? __bch2_trigger_extent_reconcile(trans, btree, level, old, new, old_r, new_r, flags)
: 0;
}
enum set_needs_reconcile_ctx {
SET_NEEDS_REBALANCE_opt_change,
SET_NEEDS_REBALANCE_opt_change_indirect,
SET_NEEDS_REBALANCE_foreground,
SET_NEEDS_REBALANCE_other,
};
/* Inodes in different snapshots may have different IO options: */
struct snapshot_io_opts_entry {
u32 snapshot;
struct bch_inode_opts io_opts;
};
struct per_snapshot_io_opts {
u64 cur_inum;
bool metadata;
bool fs_scan_cookie;
bool inum_scan_cookie;
struct bch_devs_mask dev_cookie;
struct bch_inode_opts fs_io_opts;
DARRAY(struct snapshot_io_opts_entry) d;
};
static inline struct per_snapshot_io_opts per_snapshot_io_opts_init(struct bch_fs *c)
{
return (struct per_snapshot_io_opts) {
/* io_opts->fs_io_opts will be initialized when we know the key type */
.fs_io_opts.change_cookie = atomic_read(&c->opt_change_cookie) - 1,
};
}
static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
{
darray_exit(&io_opts->d);
}
DEFINE_CLASS(per_snapshot_io_opts, struct per_snapshot_io_opts,
per_snapshot_io_opts_exit(&_T),
per_snapshot_io_opts_init(c),
struct bch_fs *c);
int bch2_bkey_get_io_opts(struct btree_trans *,
struct per_snapshot_io_opts *, struct bkey_s_c,
struct bch_inode_opts *opts);
int bch2_update_reconcile_opts(struct btree_trans *,
struct per_snapshot_io_opts *,
struct bch_inode_opts *,
struct btree_iter *,
unsigned level,
struct bkey_s_c,
enum set_needs_reconcile_ctx);
int bch2_bkey_set_needs_reconcile(struct btree_trans *,
struct per_snapshot_io_opts *, struct bch_inode_opts *,
struct bkey_i *, enum set_needs_reconcile_ctx, u32);
struct reconcile_scan {
enum reconcile_scan_type {
REBALANCE_SCAN_fs,
REBALANCE_SCAN_metadata,
REBALANCE_SCAN_pending,
REBALANCE_SCAN_device,
REBALANCE_SCAN_inum,
} type;
union {
unsigned dev;
u64 inum;
};
};
int bch2_set_reconcile_needs_scan_trans(struct btree_trans *, struct reconcile_scan);
int bch2_set_reconcile_needs_scan(struct bch_fs *, struct reconcile_scan, bool);
int bch2_set_fs_needs_reconcile(struct bch_fs *);
static inline void bch2_reconcile_wakeup(struct bch_fs *c)
{
c->reconcile.kick++;
guard(rcu)();
struct task_struct *p = rcu_dereference(c->reconcile.thread);
if (p)
wake_up_process(p);
}
void bch2_reconcile_status_to_text(struct printbuf *, struct bch_fs *);
void bch2_reconcile_scan_pending_to_text(struct printbuf *, struct bch_fs *);
void bch2_reconcile_stop(struct bch_fs *);
int bch2_reconcile_start(struct bch_fs *);
void bch2_fs_reconcile_exit(struct bch_fs *);
int bch2_fs_reconcile_init(struct bch_fs *);
int bch2_check_reconcile_work(struct bch_fs *);
#endif /* _BCACHEFS_REBALANCE_H */

View File

@ -0,0 +1,178 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REBALANCE_FORMAT_H
#define _BCACHEFS_REBALANCE_FORMAT_H
/*
* rebalance on disk data structures:
*
* extents will contain a bch_extent_reconcile if they have background
* processing pending; additionally, indirect extents will always have a
* bch_extent_reconcile if they had any io path options set on the inode, since
* we don't (yet) have backpointers that would let us look up the "owning" inode
* of an indirect extent to recover the io path options.
*
* We also have 4 btrees for keeping track of pending rebalance work:
*
* BTREE_ID_reconcile_scan:
* Inum 0:
* Holds "scan cookies", which are created on option change to indicate that
* new options need to be propagated to each extent; this happens before the
* actual data processing.
*
* A scan cookie may be for the entire filesystem, a specific device, or a
* specific inode.
*
* Inum 1:
* Btree nodes that need background processing cannot be tracked by the
* other rebalance btrees; instead they have backpointers
* (KEY_TYPE_backpointer) created here.
*
* This has the added benefit that btree nodes will be processed before
* regular data, which is beneficial if e.g. we're recovering from data
* being degraded.
*
* BTREE_ID_reconcile_work:
* The main "pending rebalance work" btree: it's a simple bitset btree where
* a set bit indicates that an an extent in BTREE_ID_extents or
* BTREE_ID_reflink needs to be processed.
*
* BTREE_ID_reconcile_hipri:
* If bch_extent_reconcile.hipri is set, the extent will be tracked here
* instead of BTREE_ID_reconcile_work and processed ahead of extents in
* BTREE_ID_reconcile_work; this is so that we can evacuate failed devices
* before other work.
*
* BTREE_ID_reconcile_pending:
* If we'd like to move an extent to a specific target, but can't because the
* target is full, we set bch_extent_reconcile.pending and switch to tracking
* it here; pending rebalance work is re-attempted on device resize, add, or
* label change.
*/
struct bch_extent_rebalance_v1 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:6,
unused:3,
promote_target_from_inode:1,
erasure_code_from_inode:1,
data_checksum_from_inode:1,
background_compression_from_inode:1,
data_replicas_from_inode:1,
background_target_from_inode:1,
promote_target:16,
erasure_code:1,
data_checksum:4,
data_replicas:4,
background_compression:8, /* enum bch_compression_opt */
background_target:16;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 background_target:16,
background_compression:8,
data_replicas:4,
data_checksum:4,
erasure_code:1,
promote_target:16,
background_target_from_inode:1,
data_replicas_from_inode:1,
background_compression_from_inode:1,
data_checksum_from_inode:1,
erasure_code_from_inode:1,
promote_target_from_inode:1,
unused:3,
type:6;
#endif
};
struct bch_extent_reconcile {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:8,
unused:2,
ptrs_moving:5,
hipri:1,
pending:1,
need_rb:5,
data_replicas_from_inode:1,
data_checksum_from_inode:1,
erasure_code_from_inode:1,
background_compression_from_inode:1,
background_target_from_inode:1,
promote_target_from_inode:1,
data_replicas:3,
data_checksum:4,
erasure_code:1,
background_compression:8, /* enum bch_compression_opt */
background_target:10,
promote_target:10;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 promote_target:10,
background_target:10,
background_compression:8,
erasure_code:1,
data_checksum:4,
data_replicas:3,
promote_target_from_inode:1,
background_target_from_inode:1,
background_compression_from_inode:1,
erasure_code_from_inode:1,
data_checksum_from_inode:1,
data_replicas_from_inode:1,
need_rb:5,
pending:1,
hipri:1,
ptrs_moving:5,
unused:2,
type:8;
#endif
};
struct bch_extent_reconcile_bp {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:9,
idx:55;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 idx:55,
type:9;
#endif
};
/* subset of BCH_INODE_OPTS */
#define BCH_REBALANCE_OPTS() \
x(data_replicas) \
x(data_checksum) \
x(erasure_code) \
x(background_compression) \
x(background_target) \
x(promote_target)
enum bch_reconcile_opts {
#define x(n) BCH_REBALANCE_##n,
BCH_REBALANCE_OPTS()
#undef x
};
#define BCH_REBALANCE_ACCOUNTING() \
x(replicas, 0) \
x(checksum, 1) \
x(erasure_code, 2) \
x(compression, 3) \
x(target, 4) \
x(high_priority, 5) \
x(pending, 6) \
enum bch_reconcile_accounting_type {
#define x(t, n) BCH_REBALANCE_ACCOUNTING_##t = n,
BCH_REBALANCE_ACCOUNTING()
#undef x
BCH_REBALANCE_ACCOUNTING_NR,
};
#endif /* _BCACHEFS_REBALANCE_FORMAT_H */

View File

@ -10,18 +10,17 @@
x(working) \
x(scanning)
enum bch_rebalance_states {
enum bch_reconcile_states {
#define x(t) BCH_REBALANCE_##t,
BCH_REBALANCE_STATES()
#undef x
};
struct bch_fs_rebalance {
struct bch_fs_reconcile {
struct task_struct __rcu *thread;
u32 kick;
struct bch_pd_controller pd;
enum bch_rebalance_states state;
enum bch_reconcile_states state;
u64 wait_iotime_start;
u64 wait_iotime_end;
u64 wait_wallclock_start;

View File

@ -8,7 +8,7 @@
#include "data/extents.h"
#include "data/io_misc.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/reflink.h"
#include "data/write.h"

View File

@ -15,7 +15,7 @@
#include "data/keylist.h"
#include "data/move.h"
#include "data/nocow_locking.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/update.h"
#include "data/write.h"
@ -161,14 +161,23 @@ static int data_update_index_update_key(struct btree_trans *trans,
/* make a local copy, so that we can trace it after the transaction commit: */
k = bkey_i_to_s_c(errptr_try(bch2_bkey_make_mut_noupdate(trans, k)));
/*
* We're calling set_needs_reconcile() on both @insert and @new,
* and it can add a bch_extent_reconcile and additional
* pointers to BCH_SB_MEMBER_INVALID if the extent is now
* degraded due to option changes:
*/
struct bkey_i_extent *new = bkey_i_to_extent(bch2_keylist_front(&u->op.insert_keys));
new = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&new->k)));
new = errptr_try(bch2_trans_kmalloc(trans, bkey_bytes(&new->k) +
sizeof(struct bch_extent_reconcile) +
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
bkey_copy(&new->k_i, bch2_keylist_front(&u->op.insert_keys));
struct bkey_i *insert = errptr_try(bch2_trans_kmalloc(trans,
bkey_bytes(k.k) +
bkey_val_bytes(&new->k) +
sizeof(struct bch_extent_rebalance)));
sizeof(struct bch_extent_reconcile) +
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
bkey_reassemble(insert, k);
if (!bch2_extents_match(c, k, old)) {
@ -274,7 +283,17 @@ static int data_update_index_update_key(struct btree_trans *trans,
try(bch2_insert_snapshot_whiteouts(trans, u->btree_id, k.k->p, bkey_start_pos(&insert->k)));
try(bch2_insert_snapshot_whiteouts(trans, u->btree_id, k.k->p, insert->k.p));
try(bch2_bkey_set_needs_rebalance(c, &opts, insert,
/*
* This set_needs_reconcile call is only for verifying that the data we
* just wrote was written correctly, otherwise we could fail to flag
* incorrectly written data due to needs_rb already being set on the
* existing extent
*/
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, &new->k_i,
SET_NEEDS_REBALANCE_foreground,
u->op.opts.change_cookie));
/* This is the real set_needs_reconcile() call */
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, insert,
SET_NEEDS_REBALANCE_foreground,
u->op.opts.change_cookie));
@ -404,7 +423,8 @@ static void data_update_trace(struct data_update *u, int ret)
trace_data_update_no_io(c, buf.buf);
}
count_event(c, data_update_no_io);
} else if (ret != -BCH_ERR_data_update_fail_no_rw_devs) {
} else if (ret != -BCH_ERR_data_update_fail_no_rw_devs &&
ret != -BCH_ERR_insufficient_devices) {
if (trace_data_update_fail_enabled()) {
CLASS(printbuf, buf)();
bch2_data_update_to_text(&buf, u);
@ -689,10 +709,6 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
if (*i != BCH_SB_MEMBER_INVALID)
__clear_bit(*i, devs.d);
bool trace = trace_data_update_fail_enabled();
CLASS(printbuf, buf)();
guard(printbuf_atomic)(&buf);
guard(rcu)();
unsigned nr_replicas = 0, i;
@ -705,10 +721,6 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
bch2_dev_usage_read_fast(ca, &usage);
u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark);
if (trace)
prt_printf(&buf, "%s=%llu ", ca->name, nr_free);
if (!nr_free)
continue;
@ -717,24 +729,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m)
break;
}
if (!nr_replicas) {
/*
* If it's a promote that's failing because the promote target
* is full - we expect that in normal operation; it'll still
* show up in io_read_nopromote and error_throw:
*/
if (m->opts.type != BCH_DATA_UPDATE_promote) {
if (trace) {
prt_printf(&buf, " - got replicas %u\n", nr_replicas);
bch2_data_update_to_text(&buf, m);
prt_printf(&buf, "\nret:\t%s\n", bch2_err_str(-BCH_ERR_data_update_fail_no_rw_devs));
trace_data_update_fail(c, buf.buf);
}
count_event(c, data_update_fail);
}
if (!nr_replicas)
return bch_err_throw(c, data_update_fail_no_rw_devs);
}
return 0;
}

View File

@ -13,7 +13,7 @@ struct moving_context;
#define BCH_DATA_UPDATE_TYPES() \
x(other) \
x(copygc) \
x(rebalance) \
x(reconcile) \
x(promote) \
x(self_heal) \
x(scrub)

View File

@ -20,7 +20,7 @@
#include "data/keylist.h"
#include "data/move.h"
#include "data/nocow_locking.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/write.h"
#include "debug/async_objs.h"
@ -348,7 +348,7 @@ int bch2_extent_update(struct btree_trans *trans,
bch2_inode_opts_get_inode(c, &inode, &opts);
try(bch2_bkey_set_needs_rebalance(c, &opts, k,
try(bch2_bkey_set_needs_reconcile(trans, NULL, &opts, k,
SET_NEEDS_REBALANCE_foreground,
change_cookie));
try(bch2_trans_update(trans, iter, k, 0));
@ -383,6 +383,13 @@ static int bch2_write_index_default(struct bch_write_op *op)
bch2_trans_begin(trans);
k = bch2_keylist_front(keys);
/*
* If we did a degraded write, bch2_bkey_set_needs_reconcile() will add
* pointers to BCH_SB_MEMBER_INVALID so the extent is accounted as
* degraded
*/
bch2_bkey_buf_realloc(&sk, k->k.u64s + 1 + BCH_REPLICAS_MAX);
bch2_bkey_buf_copy(&sk, k);
int ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &sk.k->k.p.snapshot);
@ -1220,8 +1227,15 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return 0;
}
/*
* If we did a degraded write, bch2_bkey_set_needs_reconcile() will add
* pointers to BCH_SB_MEMBER_INVALID so the extent is accounted as
* degraded
*/
struct bkey_i *new = errptr_try(bch2_trans_kmalloc_nomemzero(trans,
bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)));
bkey_bytes(k.k) +
sizeof(struct bch_extent_reconcile) +
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX));
bkey_reassemble(new, k);
bch2_cut_front(c, bkey_start_pos(&orig->k), new);
@ -1240,7 +1254,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
*/
/*
* For transactional consistency, set_needs_rebalance() has to be called
* For transactional consistency, set_needs_reconcile() has to be called
* with the io_opts from the btree in the same transaction:
*/
struct bch_inode_unpacked inode;
@ -1249,7 +1263,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
return bch2_extent_update_i_size_sectors(trans, iter,
min(new->k.p.offset << 9, new_i_size), 0, &inode) ?:
(bch2_inode_opts_get_inode(c, &inode, &opts),
bch2_bkey_set_needs_rebalance(c, &opts, new,
bch2_bkey_set_needs_reconcile(trans, NULL, &opts, new,
SET_NEEDS_REBALANCE_foreground,
op->opts.change_cookie)) ?:
bch2_trans_update(trans, iter, new,
@ -1266,7 +1280,8 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
bkey_start_pos(&orig->k), orig->k.p,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
&op->res, NULL,
BCH_TRANS_COMMIT_no_enospc, ({
bch2_nocow_write_convert_one_unwritten(trans, &iter, op, orig, k, op->new_i_size);
}));
if (ret)

View File

@ -32,7 +32,7 @@
#include "data/ec.h"
#include "data/move.h"
#include "data/nocow_locking.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "debug/sysfs.h"
#include "debug/tests.h"
@ -216,8 +216,8 @@ rw_attribute(label);
read_attribute(copy_gc_wait);
sysfs_pd_controller_attribute(rebalance);
read_attribute(rebalance_status);
read_attribute(reconcile_status);
read_attribute(reconcile_scan_pending);
read_attribute(snapshot_delete_status);
read_attribute(recovery_status);
@ -332,13 +332,14 @@ SHOW(bch2_fs)
if (attr == &sysfs_gc_gens_pos)
bch2_gc_gens_pos_to_text(out, c);
sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
if (attr == &sysfs_copy_gc_wait)
bch2_copygc_wait_to_text(out, c);
if (attr == &sysfs_rebalance_status)
bch2_rebalance_status_to_text(out, c);
if (attr == &sysfs_reconcile_status)
bch2_reconcile_status_to_text(out, c);
if (attr == &sysfs_reconcile_scan_pending)
bch2_reconcile_scan_pending_to_text(out, c);
if (attr == &sysfs_snapshot_delete_status)
bch2_snapshot_delete_status_to_text(out, c);
@ -409,8 +410,6 @@ STORE(bch2_fs)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
/* Debugging: */
if (!test_bit(BCH_FS_started, &c->flags))
@ -516,7 +515,8 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_cache_size,
&sysfs_btree_write_stats,
&sysfs_rebalance_status,
&sysfs_reconcile_status,
&sysfs_reconcile_scan_pending,
&sysfs_snapshot_delete_status,
&sysfs_recovery_status,
@ -627,8 +627,6 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_copy_gc_wait,
sysfs_pd_controller_files(rebalance),
&sysfs_moving_ctxts,
&sysfs_internal_uuid,

View File

@ -576,8 +576,7 @@ fsck_err:
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "\n");
guard(printbuf_indent)(out);
prt_newline(out);
prt_printf(out, "mode=%o\n", inode->bi_mode);
prt_str(out, "flags=");
@ -604,6 +603,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{
prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot);
guard(printbuf_indent)(out);
__bch2_inode_unpacked_to_text(out, inode);
}

View File

@ -292,14 +292,14 @@ void bch2_inode_opts_get_inode(struct bch_fs *, struct bch_inode_unpacked *, str
int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *, unsigned);
#include "data/rebalance.h"
#include "data/reconcile.h"
static inline struct bch_extent_rebalance
bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode)
static inline struct bch_extent_reconcile
bch2_inode_reconcile_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode)
{
struct bch_inode_opts io_opts;
bch2_inode_opts_get_inode(c, inode, &io_opts);
return io_opts_to_rebalance_opts(c, &io_opts);
return io_opts_to_reconcile_opts(c, &io_opts);
}
#define BCACHEFS_ROOT_SUBVOL_INUM \

View File

@ -6,7 +6,7 @@
#include "btree/update.h"
#include "data/extents.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "fs/acl.h"
#include "fs/dirent.h"

View File

@ -7,9 +7,11 @@
#include "alloc/check.h"
#include "alloc/replicas.h"
#include "btree/interior.h"
#include "data/ec.h"
#include "data/migrate.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "debug/sysfs.h"
@ -520,7 +522,7 @@ int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb, struct prin
bch2_dev_sysfs_online(c, ca);
bch2_rebalance_wakeup(c);
bch2_reconcile_wakeup(c);
return 0;
}
@ -598,6 +600,17 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
bch_notice(ca, "%s", bch2_member_states[new_state]);
bool do_reconcile_scan =
new_state == BCH_MEMBER_STATE_rw ||
new_state == BCH_MEMBER_STATE_failed;
struct reconcile_scan s = new_state == BCH_MEMBER_STATE_rw
? (struct reconcile_scan) { .type = REBALANCE_SCAN_pending }
: (struct reconcile_scan) { .type = REBALANCE_SCAN_device, .dev = ca->dev_idx };
if (do_reconcile_scan)
try(bch2_set_reconcile_needs_scan(c, s, false));
scoped_guard(mutex, &c->sb_lock) {
struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
SET_BCH_MEMBER_STATE(m, new_state);
@ -607,7 +620,8 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
if (new_state == BCH_MEMBER_STATE_rw)
__bch2_dev_read_write(c, ca);
bch2_rebalance_wakeup(c);
if (do_reconcile_scan)
try(bch2_set_reconcile_needs_scan(c, s, true));
return ret;
}
@ -648,6 +662,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
if (ret)
goto err;
bch2_btree_interior_updates_flush(c);
/* Check if device still has data before blowing away alloc info */
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
for (unsigned i = 0; i < BCH_DATA_NR; i++)
@ -795,6 +811,15 @@ int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err)
if (ret)
goto err;
struct reconcile_scan s = { .type = REBALANCE_SCAN_pending };
if (test_bit(BCH_FS_started, &c->flags)) {
/*
* Technically incorrect, but 'bcachefs image update' is the
* only thing that adds a device to a not-started filesystem:
*/
try(bch2_set_reconcile_needs_scan(c, s, false));
}
scoped_guard(rwsem_write, &c->state_lock) {
scoped_guard(mutex, &c->sb_lock) {
SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true);
@ -879,6 +904,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path, struct printbuf *err)
};
kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp);
}
if (test_bit(BCH_FS_started, &c->flags))
try(bch2_set_reconcile_needs_scan(c, s, true));
out:
bch_err_fn(c, ret);
return ret;
@ -983,6 +1011,11 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct p
return -EINVAL;
}
bool wakeup_reconcile_pending = nbuckets > ca->mi.nbuckets;
struct reconcile_scan s = { .type = REBALANCE_SCAN_pending };
if (wakeup_reconcile_pending)
try(bch2_set_reconcile_needs_scan(c, s, false));
if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) {
prt_printf(err, "New device size too big (%llu greater than max %u)\n",
nbuckets, BCH_MEMBER_NBUCKETS_MAX);
@ -1026,6 +1059,9 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets, struct p
}
bch2_recalc_capacity(c);
if (wakeup_reconcile_pending)
try(bch2_set_reconcile_needs_scan(c, s, true));
return 0;
}

View File

@ -33,7 +33,7 @@
#include "data/move.h"
#include "data/nocow_locking.h"
#include "data/read.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "data/write.h"
#include "debug/async_objs.h"
@ -266,7 +266,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
bch2_fs_ec_stop(c);
bch2_open_buckets_stop(c, NULL, true);
bch2_rebalance_stop(c);
bch2_reconcile_stop(c);
bch2_copygc_stop(c);
bch2_fs_ec_flush(c);
@ -514,7 +514,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
int ret = bch2_journal_reclaim_start(&c->journal) ?:
bch2_copygc_start(c) ?:
bch2_rebalance_start(c);
bch2_reconcile_start(c);
if (ret) {
bch2_fs_read_only(c);
return ret;
@ -559,7 +559,7 @@ static void __bch2_fs_free(struct bch_fs *c)
utf8_unload(c->cf_encoding);
#endif
bch2_rebalance_stop(c);
bch2_reconcile_stop(c);
bch2_copygc_stop(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
@ -568,7 +568,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_snapshots_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_replicas_exit(c);
bch2_fs_rebalance_exit(c);
bch2_fs_reconcile_exit(c);
bch2_fs_quota_exit(c);
bch2_fs_nocow_locking_exit(c);
bch2_fs_journal_exit(&c->journal);
@ -769,7 +769,7 @@ int bch2_fs_init_rw(struct bch_fs *c)
bch2_fs_journal_init(&c->journal) ?:
bch2_journal_reclaim_start(&c->journal) ?:
bch2_copygc_start(c) ?:
bch2_rebalance_start(c);
bch2_reconcile_start(c);
if (ret)
return ret;
@ -1000,16 +1000,17 @@ static int bch2_fs_opt_version_init(struct bch_fs *c, struct printbuf *out)
unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
if (BCH_SB_INITIALIZED(c->disk_sb.sb)) {
if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
if (!(c->sb.features & BIT_ULL(BCH_FEATURE_new_extent_overwrite))) {
prt_str_indented(out, "feature new_extent_overwrite not set, filesystem no longer supported\n");
return -EINVAL;
}
if (!c->sb.clean &&
!(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
prt_str_indented(out, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix\n");
if (c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) {
prt_str_indented(out, "version_min < version_btree_ptr_sectors_written\n");
prt_str_indented(out, "filesystem needs upgrade from older version; run fsck from older bcachefs-tools to fix\n");
return -EINVAL;
}
}
return 0;
@ -1179,7 +1180,7 @@ static int bch2_fs_init(struct bch_fs *c, struct bch_sb *sb,
try(bch2_fs_fsio_init(c));
try(bch2_fs_fs_io_direct_init(c));
try(bch2_fs_io_read_init(c));
try(bch2_fs_rebalance_init(c));
try(bch2_fs_reconcile_init(c));
try(bch2_fs_sb_errors_init(c));
try(bch2_fs_vfs_init(c));

View File

@ -13,7 +13,7 @@
#include "data/copygc.h"
#include "data/ec.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "fs/check.h"
#include "fs/inode.h"
@ -571,7 +571,7 @@ static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run,
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
r->pass_done > BCH_RECOVERY_PASS_check_snapshots) {
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
bch2_reconcile_wakeup(c);
}
}

View File

@ -60,11 +60,11 @@
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
x(check_rebalance_work, 43, PASS_ONLINE|PASS_FSCK) \
x(check_reconcile_work, 43, PASS_ONLINE|PASS_FSCK) \
x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_ALWAYS) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
x(set_fs_needs_reconcile, 34, 0) \
x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT)
/* We normally enumerate recovery passes in the order we run them: */

View File

@ -16,7 +16,7 @@
#include "data/move.h"
#include "data/copygc.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "fs/dirent.h"
#include "fs/logged_ops.h"
@ -875,21 +875,6 @@ use_clean:
bch2_write_super(c);
}
if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) {
struct bch_move_stats stats;
bch2_move_stats_init(&stats, "recovery");
CLASS(printbuf, buf)();
bch2_version_to_text(&buf, c->sb.version_min);
bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf);
try(bch2_fs_read_write_early(c));
try(bch2_scan_old_btree_nodes(c, &stats));
bch_info(c, "scanning for old btree nodes done");
}
if (test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
!c->opts.nochanges) {
bch2_fs_read_write_early(c);
@ -1009,7 +994,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->recovery.pass_done = BCH_RECOVERY_PASS_NR - 1;
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
bch2_reconcile_wakeup(c);
if (enabled_qtypes(c))
try(bch2_fs_quota_read(c));

View File

@ -11,7 +11,7 @@
#include "data/compress.h"
#include "data/copygc.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "init/dev.h"
#include "init/error.h"
@ -108,6 +108,11 @@ static const char * const __bch2_fs_usage_types[] = {
NULL
};
const char * const __bch2_reconcile_accounting_types[] = {
BCH_REBALANCE_ACCOUNTING()
NULL
};
#undef x
static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
@ -132,6 +137,7 @@ PRT_STR_OPT_BOUNDSCHECKED(csum_opt, enum bch_csum_opt);
PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type);
PRT_STR_OPT_BOUNDSCHECKED(reconcile_accounting_type, enum bch_reconcile_accounting_type);
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
@ -525,7 +531,8 @@ void bch2_opts_to_text(struct printbuf *out,
}
}
static int opt_hook_io(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id, bool post)
static int opt_hook_io(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_opt_id id,
u64 v, bool post)
{
if (!test_bit(BCH_FS_started, &c->flags))
return 0;
@ -539,16 +546,28 @@ static int opt_hook_io(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum bch_
case Opt_data_checksum:
case Opt_data_replicas:
case Opt_erasure_code: {
struct rebalance_scan s = {
struct reconcile_scan s = {
.type = !inum ? REBALANCE_SCAN_fs : REBALANCE_SCAN_inum,
.inum = inum,
};
try(bch2_set_rebalance_needs_scan(c, s));
if (post)
bch2_rebalance_wakeup(c);
try(bch2_set_reconcile_needs_scan(c, s, post));
break;
}
case Opt_metadata_target:
case Opt_metadata_checksum:
case Opt_metadata_replicas:
try(bch2_set_reconcile_needs_scan(c,
(struct reconcile_scan) { .type = REBALANCE_SCAN_metadata, .dev = inum }, post));
break;
case Opt_durability:
if (!post && v > ca->mi.durability)
try(bch2_set_reconcile_needs_scan(c,
(struct reconcile_scan) { .type = REBALANCE_SCAN_pending}, post));
try(bch2_set_reconcile_needs_scan(c,
(struct reconcile_scan) { .type = REBALANCE_SCAN_device, .dev = inum }, post));
break;
default:
break;
}
@ -584,7 +603,7 @@ int bch2_opt_hook_pre_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, enum b
}
if (change)
try(opt_hook_io(c, ca, inum, id, false));
try(opt_hook_io(c, ca, inum, id, v, false));
return 0;
}
@ -600,11 +619,11 @@ int bch2_opts_hooks_pre_set(struct bch_fs *c)
void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum,
enum bch_opt_id id, u64 v)
{
opt_hook_io(c, ca, inum, id, true);
opt_hook_io(c, ca, inum, id, v, true);
switch (id) {
case Opt_rebalance_enabled:
bch2_rebalance_wakeup(c);
case Opt_reconcile_enabled:
bch2_reconcile_wakeup(c);
break;
case Opt_copygc_enabled:
bch2_copygc_wakeup(c);

View File

@ -25,6 +25,7 @@ extern const char * const __bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
extern const char * const __bch2_reconcile_accounting_types[];
extern const char * const bch2_d_types[];
void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type);
@ -34,6 +35,7 @@ void bch2_prt_csum_opt(struct printbuf *, enum bch_csum_opt);
void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type);
void bch2_prt_reconcile_accounting_type(struct printbuf *, enum bch_reconcile_accounting_type);
static inline const char *bch2_d_type_str(unsigned d_type)
{
@ -500,17 +502,17 @@ enum fsck_err_opts {
BCH2_NO_SB_OPT, true, \
NULL, "Enable copygc: disable for debugging, or to\n"\
"quiet the system when doing performance testing\n")\
x(rebalance_enabled, u8, \
x(reconcile_enabled, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable rebalance: disable for debugging, or to\n"\
NULL, "Enable reconcile: disable for debugging, or to\n"\
"quiet the system when doing performance testing\n")\
x(rebalance_on_ac_only, u8, \
x(reconcile_on_ac_only, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_REBALANCE_AC_ONLY, false, \
NULL, "Enable rebalance while on mains power only\n") \
NULL, "Enable reconcile while on mains power only\n") \
x(auto_snapshot_deletion, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \

View File

@ -275,8 +275,6 @@ void bch2_fs_mark_clean(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;

View File

@ -37,7 +37,7 @@ enum counters_flags {
x(io_move_write, 36, TYPE_SECTORS) \
x(io_move_start_fail, 39, TYPE_COUNTER) \
x(io_move_noop, 92, TYPE_COUNTER) \
x(io_move_created_rebalance, 83, TYPE_COUNTER) \
x(io_move_created_reconcile, 83, TYPE_COUNTER) \
x(io_move_evacuate_bucket, 84, TYPE_COUNTER) \
x(rebalance_extent, 96, TYPE_COUNTER) \
x(bucket_invalidate, 3, TYPE_COUNTER) \

View File

@ -52,7 +52,7 @@
BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
x(rebalance_work, \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_reconcile)) \
x(subvolume_fs_parent, \
BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
@ -110,7 +110,16 @@
BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)\
x(btree_node_accounting, \
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch)
BCH_FSCK_ERR_accounting_mismatch) \
x(reconcile, \
BIT_ULL(BCH_RECOVERY_PASS_check_reconcile_work), \
BCH_FSCK_ERR_accounting_mismatch, \
BCH_FSCK_ERR_extent_io_opts_not_set)
#define UPGRADE_TABLE_INCOMPAT() \
x(reconcile, \
BIT_ULL(BCH_RECOVERY_PASS_check_reconcile_work), \
BCH_FSCK_ERR_extent_io_opts_not_set)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@ -175,17 +184,32 @@ struct upgrade_downgrade_entry {
UPGRADE_TABLE()
#undef x
#define x(ver, passes, ...) static const u16 upgrade_incompat_##ver##_errors[] = { __VA_ARGS__ };
UPGRADE_TABLE_INCOMPAT()
#undef x
static const struct upgrade_downgrade_entry upgrade_table[] = {
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver,\
.nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
.errors = upgrade_##ver##_errors, \
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver, \
.nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
.errors = upgrade_##ver##_errors, \
},
UPGRADE_TABLE()
#undef x
};
static const struct upgrade_downgrade_entry upgrade_table_incompat[] = {
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver, \
.nr_errors = ARRAY_SIZE(upgrade_incompat_##ver##_errors), \
.errors = upgrade_incompat_##ver##_errors, \
},
UPGRADE_TABLE_INCOMPAT()
#undef x
};
static int have_stripes(struct bch_fs *c)
{
if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
@ -219,17 +243,17 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c)
return ret < 0 ? ret : 0;
}
void bch2_sb_set_upgrade(struct bch_fs *c,
unsigned old_version,
unsigned new_version)
static void __bch2_sb_set_upgrade(struct bch_fs *c,
unsigned old_version,
unsigned new_version,
const struct upgrade_downgrade_entry *table,
size_t nr_entries)
{
lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
for (const struct upgrade_downgrade_entry *i = upgrade_table;
i < upgrade_table + ARRAY_SIZE(upgrade_table);
i++)
for (const struct upgrade_downgrade_entry *i = table; i < table + nr_entries; i++)
if (i->version > old_version && i->version <= new_version) {
u64 passes = i->recovery_passes;
@ -245,6 +269,24 @@ void bch2_sb_set_upgrade(struct bch_fs *c,
}
}
void bch2_sb_set_upgrade(struct bch_fs *c,
unsigned old_version,
unsigned new_version)
{
return __bch2_sb_set_upgrade(c, old_version, new_version,
upgrade_table,
ARRAY_SIZE(upgrade_table));
}
void bch2_sb_set_upgrade_incompat(struct bch_fs *c,
unsigned old_version,
unsigned new_version)
{
return __bch2_sb_set_upgrade(c, old_version, new_version,
upgrade_table_incompat,
ARRAY_SIZE(upgrade_table_incompat));
}
#define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ };
DOWNGRADE_TABLE()
#undef x

View File

@ -6,6 +6,7 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
int bch2_sb_downgrade_update(struct bch_fs *);
void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned);
void bch2_sb_set_upgrade_incompat(struct bch_fs *, unsigned, unsigned);
int bch2_sb_set_upgrade_extra(struct bch_fs *);
void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);

View File

@ -160,6 +160,10 @@ enum bch_fsck_flags {
x(extent_ptrs_redundant_stripe, 139, 0) \
x(extent_ptrs_unwritten, 140, 0) \
x(extent_ptrs_written_and_unwritten, 141, 0) \
x(extent_ptrs_all_invalid, 338, 0) \
x(extent_reconcile_bad_pending, 332, 0) \
x(extent_reconcile_bad_hipri, 333, 0) \
x(extent_reconcile_bad_replicas, 339, 0) \
x(ptr_to_invalid_device, 142, 0) \
x(ptr_to_removed_device, 322, FSCK_AUTOFIX) \
x(ptr_to_duplicate_device, 143, 0) \
@ -194,6 +198,7 @@ enum bch_fsck_flags {
x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \
x(stripe_csum_granularity_bad, 290, 0) \
x(stripe_sectors_zero, 340, 0) \
x(stripe_sector_count_wrong, 169, 0) \
x(snapshot_tree_pos_bad, 170, 0) \
x(snapshot_tree_to_missing_snapshot, 171, 0) \
@ -338,10 +343,16 @@ enum bch_fsck_flags {
x(directory_size_mismatch, 303, FSCK_AUTOFIX) \
x(dirent_cf_name_too_big, 304, 0) \
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(reconcile_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(validate_error_in_commit, 329, 0) \
x(MAX, 330, 0)
x(extent_io_opts_not_set, 330, FSCK_AUTOFIX) \
x(extent_io_opts_unneeded, 331, FSCK_AUTOFIX) \
x(reconcile_bp_to_missing_btree_ptr, 310, FSCK_AUTOFIX) \
x(reconcile_bp_to_leaf_node_key, 334, FSCK_AUTOFIX) \
x(btree_ptr_with_no_reconcile_bp, 335, FSCK_AUTOFIX) \
x(btree_ptr_with_bad_reconcile_bp, 336, FSCK_AUTOFIX) \
x(btree_ptr_to_bad_reconcile_bp, 337, FSCK_AUTOFIX) \
x(MAX, 341, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -1282,6 +1282,8 @@ void bch2_sb_upgrade_incompat(struct bch_fs *c)
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version));
bch2_sb_set_upgrade_incompat(c, c->sb.version_incompat_allowed, c->sb.version);
bch2_write_super(c);
}

View File

@ -8,7 +8,7 @@
#include "btree/update.h"
#include "data/rebalance.h"
#include "data/reconcile.h"
#include "fs/acl.h"
#include "fs/check.h"
@ -108,23 +108,23 @@ static int bch2_write_inode_trans(struct btree_trans *trans,
struct bch_inode_info *inode,
inode_set_fn set,
void *p, unsigned fields,
bool *rebalance_changed)
bool *reconcile_changed)
{
struct bch_fs *c = trans->c;
CLASS(btree_iter_uninit, iter)(trans);
struct bch_inode_unpacked inode_u;
try(bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent));
struct bch_extent_rebalance old_r = bch2_inode_rebalance_opts_get(c, &inode_u);
struct bch_extent_reconcile old_r = bch2_inode_reconcile_opts_get(c, &inode_u);
if (set)
try(set(trans, inode, &inode_u, p));
struct bch_extent_rebalance new_r = bch2_inode_rebalance_opts_get(c, &inode_u);
*rebalance_changed = memcmp(&old_r, &new_r, sizeof(new_r));
if (*rebalance_changed)
try(bch2_set_rebalance_needs_scan_trans(trans,
(struct rebalance_scan) {
struct bch_extent_reconcile new_r = bch2_inode_reconcile_opts_get(c, &inode_u);
*reconcile_changed = memcmp(&old_r, &new_r, sizeof(new_r));
if (*reconcile_changed)
try(bch2_set_reconcile_needs_scan_trans(trans,
(struct reconcile_scan) {
.type = REBALANCE_SCAN_inum,
.inum = inode_u.bi_inum }));
@ -145,12 +145,12 @@ int __must_check bch2_write_inode(struct bch_fs *c,
void *p, unsigned fields)
{
CLASS(btree_trans, trans)(c);
bool rebalance_changed = false;
bool reconcile_changed = false;
int ret = lockrestart_do(trans, bch2_write_inode_trans(trans, inode, set, p,
fields, &rebalance_changed));
fields, &reconcile_changed));
if (!ret && rebalance_changed)
bch2_rebalance_wakeup(c);
if (!ret && reconcile_changed)
bch2_reconcile_wakeup(c);
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
"%s: inode %llu:%llu not found when updating",