mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-10 00:00:24 +03:00
Update bcachefs sources to b4927db2cdc7 bcachefs: bcachefs_metadata_version_fast_device_removal
Some checks failed
build / bcachefs-tools-deb (ubuntu-22.04) (push) Has been cancelled
build / bcachefs-tools-deb (ubuntu-24.04) (push) Has been cancelled
build / bcachefs-tools-rpm (push) Has been cancelled
build / bcachefs-tools-msrv (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
Some checks failed
build / bcachefs-tools-deb (ubuntu-22.04) (push) Has been cancelled
build / bcachefs-tools-deb (ubuntu-24.04) (push) Has been cancelled
build / bcachefs-tools-rpm (push) Has been cancelled
build / bcachefs-tools-msrv (push) Has been cancelled
Nix Flake actions / nix-matrix (push) Has been cancelled
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Has been cancelled
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
6e4bda5ad5
commit
401a20ed98
@ -1 +1 @@
|
||||
5a0455ae19afb354634b3c5c9bf55d2171005a2f
|
||||
b4927db2cdc7f124f968f9eaa1d785298ae31c1a
|
||||
|
||||
@ -1255,6 +1255,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
||||
erasure_code = false;
|
||||
|
||||
req->nr_replicas = nr_replicas;
|
||||
req->target = target;
|
||||
req->ec = erasure_code;
|
||||
@ -1262,9 +1265,6 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
req->flags = flags;
|
||||
req->devs_have = devs_have;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
||||
erasure_code = false;
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
retry:
|
||||
req->ptrs.nr = 0;
|
||||
|
||||
@ -696,7 +696,8 @@ struct bch_sb_field_ext {
|
||||
x(stripe_lru, BCH_VERSION(1, 23)) \
|
||||
x(casefolding, BCH_VERSION(1, 24)) \
|
||||
x(extent_flags, BCH_VERSION(1, 25)) \
|
||||
x(snapshot_deletion_v2, BCH_VERSION(1, 26))
|
||||
x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
|
||||
x(fast_device_removal, BCH_VERSION(1, 27))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
|
||||
@ -1079,6 +1079,10 @@ out:
|
||||
* allocator thread - issue wakeup in case they blocked on gc_lock:
|
||||
*/
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
|
||||
bch2_sb_members_clean_deleted(c);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -212,17 +212,13 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
|
||||
case TARGET_DEV:
|
||||
return dev == t.dev;
|
||||
case TARGET_GROUP: {
|
||||
rcu_read_lock();
|
||||
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
|
||||
const struct bch_devs_mask *m =
|
||||
g && t.group < g->nr && !g->entries[t.group].deleted
|
||||
? &g->entries[t.group].devs
|
||||
: NULL;
|
||||
|
||||
bool ret = m ? test_bit(dev, m->d) : false;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
return m ? test_bit(dev, m->d) : false;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
|
||||
@ -1121,8 +1121,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
|
||||
static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
if (!opts->promote_target ||
|
||||
!bch2_dev_in_target(c, ptr->dev, opts->promote_target))
|
||||
unsigned target = opts->promote_target ?: opts->foreground_target;
|
||||
|
||||
if (target && !bch2_dev_in_target(c, ptr->dev, target))
|
||||
return false;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
@ -1135,33 +1136,43 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c,
|
||||
struct bkey_s k,
|
||||
struct bch_extent_ptr *ptr)
|
||||
{
|
||||
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
|
||||
struct bkey_ptrs ptrs;
|
||||
union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
bool have_cached_ptr;
|
||||
|
||||
rcu_read_lock();
|
||||
if (!want_cached_ptr(c, opts, ptr)) {
|
||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||
goto out;
|
||||
restart_drop_ptrs:
|
||||
ptrs = bch2_bkey_ptrs(k);
|
||||
have_cached_ptr = false;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
/*
|
||||
* Check if it's erasure coded - stripes can't contain cached
|
||||
* data. Possibly something we can fix in the future?
|
||||
*/
|
||||
if (&entry->ptr == ptr && p.has_ec)
|
||||
goto drop;
|
||||
|
||||
if (p.ptr.cached) {
|
||||
if (have_cached_ptr || !want_cached_ptr(c, opts, &p.ptr)) {
|
||||
bch2_bkey_drop_ptr_noerror(k, &entry->ptr);
|
||||
goto restart_drop_ptrs;
|
||||
}
|
||||
|
||||
have_cached_ptr = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Stripes can't contain cached data, for - reasons.
|
||||
*
|
||||
* Possibly something we can fix in the future?
|
||||
*/
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||
if (&entry->ptr == ptr) {
|
||||
if (p.has_ec)
|
||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||
else
|
||||
ptr->cached = true;
|
||||
goto out;
|
||||
}
|
||||
if (have_cached_ptr || !want_cached_ptr(c, opts, ptr))
|
||||
goto drop;
|
||||
|
||||
BUG();
|
||||
out:
|
||||
ptr->cached = true;
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
drop:
|
||||
rcu_read_unlock();
|
||||
bch2_bkey_drop_ptr_noerror(k, ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@ -790,6 +790,7 @@ static int ref_visible2(struct bch_fs *c,
|
||||
|
||||
struct inode_walker_entry {
|
||||
struct bch_inode_unpacked inode;
|
||||
bool whiteout;
|
||||
u64 count;
|
||||
u64 i_size;
|
||||
};
|
||||
@ -818,12 +819,20 @@ static struct inode_walker inode_walker_init(void)
|
||||
static int add_inode(struct bch_fs *c, struct inode_walker *w,
|
||||
struct bkey_s_c inode)
|
||||
{
|
||||
struct bch_inode_unpacked u;
|
||||
|
||||
return bch2_inode_unpack(inode, &u) ?:
|
||||
darray_push(&w->inodes, ((struct inode_walker_entry) {
|
||||
.inode = u,
|
||||
int ret = darray_push(&w->inodes, ((struct inode_walker_entry) {
|
||||
.whiteout = !bkey_is_inode(inode.k),
|
||||
}));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct inode_walker_entry *n = &darray_last(w->inodes);
|
||||
if (!n->whiteout) {
|
||||
return bch2_inode_unpack(inode, &n->inode);
|
||||
} else {
|
||||
n->inode.bi_inum = inode.k->p.inode;
|
||||
n->inode.bi_snapshot = inode.k->p.snapshot;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int get_inodes_all_snapshots(struct btree_trans *trans,
|
||||
@ -843,13 +852,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
|
||||
w->recalculate_sums = false;
|
||||
w->inodes.nr = 0;
|
||||
|
||||
for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
if (k.k->p.offset != inum)
|
||||
for_each_btree_key_max_norestart(trans, iter,
|
||||
BTREE_ID_inodes, POS(0, inum), SPOS(0, inum, U32_MAX),
|
||||
BTREE_ITER_all_snapshots, k, ret) {
|
||||
ret = add_inode(c, w, k);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (bkey_is_inode(k.k))
|
||||
add_inode(c, w, k);
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
@ -861,63 +869,6 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct inode_walker_entry *
|
||||
lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
struct inode_walker_entry *i;
|
||||
__darray_for_each(w->inodes, i)
|
||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
|
||||
goto found;
|
||||
|
||||
return NULL;
|
||||
found:
|
||||
BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
|
||||
trans, snapshot_key_missing_inode_snapshot,
|
||||
"have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
|
||||
"unexpected because we should always update the inode when we update a key in that inode\n"
|
||||
"%s",
|
||||
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
|
||||
(bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
struct bch_inode_unpacked new = i->inode;
|
||||
|
||||
new.bi_snapshot = k.k->p.snapshot;
|
||||
|
||||
ret = __bch2_fsck_write_inode(trans, &new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, 0) ?:
|
||||
-BCH_ERR_transaction_restart_nested;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
return i;
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
|
||||
struct inode_walker *w,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
if (w->last_pos.inode != k.k->p.inode) {
|
||||
int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
w->last_pos = k.k->p;
|
||||
|
||||
return lookup_inode_for_snapshot(trans, w, k);
|
||||
}
|
||||
|
||||
static int get_visible_inodes(struct btree_trans *trans,
|
||||
struct inode_walker *w,
|
||||
struct snapshots_seen *s,
|
||||
@ -953,6 +904,80 @@ static int get_visible_inodes(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct inode_walker_entry *
|
||||
lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
struct inode_walker_entry *i;
|
||||
__darray_for_each(w->inodes, i)
|
||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
|
||||
goto found;
|
||||
|
||||
return NULL;
|
||||
found:
|
||||
BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
|
||||
trans, snapshot_key_missing_inode_snapshot,
|
||||
"have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
|
||||
"unexpected because we should always update the inode when we update a key in that inode\n"
|
||||
"%s",
|
||||
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
|
||||
(bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
struct bch_inode_unpacked new = i->inode;
|
||||
|
||||
new.bi_snapshot = k.k->p.snapshot;
|
||||
|
||||
ret = __bch2_fsck_write_inode(trans, &new) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, 0);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
struct inode_walker_entry new_entry = *i;
|
||||
|
||||
new_entry.inode.bi_snapshot = k.k->p.snapshot;
|
||||
new_entry.count = 0;
|
||||
new_entry.i_size = 0;
|
||||
|
||||
while (i > w->inodes.data && i[-1].inode.bi_snapshot > k.k->p.snapshot)
|
||||
--i;
|
||||
|
||||
size_t pos = i - w->inodes.data;
|
||||
ret = darray_insert_item(&w->inodes, pos, new_entry);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
ret = -BCH_ERR_transaction_restart_nested;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
printbuf_exit(&buf);
|
||||
return i;
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
|
||||
struct inode_walker *w,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
if (w->last_pos.inode != k.k->p.inode) {
|
||||
int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
w->last_pos = k.k->p;
|
||||
|
||||
return lookup_inode_for_snapshot(trans, w, k);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prefer to delete the first one, since that will be the one at the wrong
|
||||
* offset:
|
||||
|
||||
@ -240,6 +240,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
|
||||
u64 v[2];
|
||||
|
||||
unpacked->bi_inum = inode.k->p.offset;
|
||||
unpacked->bi_snapshot = inode.k->p.snapshot;
|
||||
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
|
||||
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
|
||||
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
|
||||
@ -284,13 +285,12 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
|
||||
{
|
||||
memset(unpacked, 0, sizeof(*unpacked));
|
||||
|
||||
unpacked->bi_snapshot = k.k->p.snapshot;
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_inode: {
|
||||
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
|
||||
|
||||
unpacked->bi_inum = inode.k->p.offset;
|
||||
unpacked->bi_snapshot = inode.k->p.snapshot;
|
||||
unpacked->bi_journal_seq= 0;
|
||||
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
|
||||
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
|
||||
@ -309,6 +309,7 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
|
||||
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
|
||||
|
||||
unpacked->bi_inum = inode.k->p.offset;
|
||||
unpacked->bi_snapshot = inode.k->p.snapshot;
|
||||
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
|
||||
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
|
||||
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
|
||||
@ -326,8 +327,6 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
|
||||
int bch2_inode_unpack(struct bkey_s_c k,
|
||||
struct bch_inode_unpacked *unpacked)
|
||||
{
|
||||
unpacked->bi_snapshot = k.k->p.snapshot;
|
||||
|
||||
return likely(k.k->type == KEY_TYPE_inode_v3)
|
||||
? bch2_inode_unpack_v3(k, unpacked)
|
||||
: bch2_inode_unpack_slowpath(k, unpacked);
|
||||
|
||||
@ -1465,6 +1465,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
rcu_read_lock();
|
||||
darray_for_each(*devs, i) {
|
||||
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
|
||||
if (!ca)
|
||||
@ -1486,6 +1487,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
|
||||
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void __journal_write_alloc(struct journal *j,
|
||||
@ -1498,7 +1500,8 @@ static void __journal_write_alloc(struct journal *j,
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
darray_for_each(*devs, i) {
|
||||
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
|
||||
struct bch_dev *ca = bch2_dev_get_ioref(c, *i, WRITE,
|
||||
BCH_DEV_WRITE_REF_journal_write);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
@ -1512,8 +1515,10 @@ static void __journal_write_alloc(struct journal *j,
|
||||
ca->mi.state != BCH_MEMBER_STATE_rw ||
|
||||
!ja->nr ||
|
||||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
|
||||
sectors > ja->sectors_free)
|
||||
sectors > ja->sectors_free) {
|
||||
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
|
||||
continue;
|
||||
}
|
||||
|
||||
bch2_dev_stripe_increment(ca, &j->wp.stripe);
|
||||
|
||||
@ -1536,15 +1541,8 @@ static void __journal_write_alloc(struct journal *j,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* journal_write_alloc - decide where to write next journal entry
|
||||
*
|
||||
* @j: journal object
|
||||
* @w: journal buf (entry to be written)
|
||||
*
|
||||
* Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
|
||||
*/
|
||||
static int journal_write_alloc(struct journal *j, struct journal_buf *w)
|
||||
static int journal_write_alloc(struct journal *j, struct journal_buf *w,
|
||||
unsigned *replicas)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_devs_mask devs;
|
||||
@ -1552,29 +1550,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
|
||||
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
|
||||
unsigned target = c->opts.metadata_target ?:
|
||||
c->opts.foreground_target;
|
||||
unsigned replicas = 0, replicas_want =
|
||||
READ_ONCE(c->opts.metadata_replicas);
|
||||
unsigned replicas_want = READ_ONCE(c->opts.metadata_replicas);
|
||||
unsigned replicas_need = min_t(unsigned, replicas_want,
|
||||
READ_ONCE(c->opts.metadata_replicas_required));
|
||||
bool advance_done = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* We might run more than once if we have to stop and do discards: */
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
|
||||
bkey_for_each_ptr(ptrs, p) {
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
|
||||
if (ca)
|
||||
replicas += ca->mi.durability;
|
||||
}
|
||||
|
||||
retry_target:
|
||||
devs = target_rw_devs(c, BCH_DATA_journal, target);
|
||||
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
|
||||
retry_alloc:
|
||||
__journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
|
||||
__journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
|
||||
|
||||
if (likely(replicas >= replicas_want))
|
||||
if (likely(*replicas >= replicas_want))
|
||||
goto done;
|
||||
|
||||
if (!advance_done) {
|
||||
@ -1583,18 +1570,16 @@ retry_alloc:
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
if (replicas < replicas_want && target) {
|
||||
if (*replicas < replicas_want && target) {
|
||||
/* Retry from all devices: */
|
||||
target = 0;
|
||||
advance_done = false;
|
||||
goto retry_target;
|
||||
}
|
||||
done:
|
||||
rcu_read_unlock();
|
||||
|
||||
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
|
||||
|
||||
return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
|
||||
return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
|
||||
}
|
||||
|
||||
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
|
||||
@ -1780,13 +1765,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
|
||||
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
|
||||
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
|
||||
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
|
||||
BCH_DEV_WRITE_REF_journal_write);
|
||||
if (!ca) {
|
||||
/* XXX: fix this */
|
||||
bch_err(c, "missing device %u for journal write", ptr->dev);
|
||||
continue;
|
||||
}
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
|
||||
sectors);
|
||||
@ -2066,57 +2045,45 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
|
||||
j->write_start_time = local_clock();
|
||||
|
||||
mutex_lock(&j->buf_lock);
|
||||
journal_buf_realloc(j, w);
|
||||
|
||||
ret = bch2_journal_write_prep(j, w);
|
||||
mutex_unlock(&j->buf_lock);
|
||||
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (nr_rw_members > 1)
|
||||
w->separate_flush = true;
|
||||
|
||||
ret = bch2_journal_write_pick_flush(j, w);
|
||||
spin_unlock(&j->lock);
|
||||
if (ret)
|
||||
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
mutex_lock(&j->buf_lock);
|
||||
journal_buf_realloc(j, w);
|
||||
|
||||
ret = bch2_journal_write_prep(j, w);
|
||||
mutex_unlock(&j->buf_lock);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
j->entry_bytes_written += vstruct_bytes(w->data);
|
||||
|
||||
unsigned replicas_allocated = 0;
|
||||
while (1) {
|
||||
spin_lock(&j->lock);
|
||||
ret = journal_write_alloc(j, w);
|
||||
ret = journal_write_alloc(j, w, &replicas_allocated);
|
||||
if (!ret || !j->can_discard)
|
||||
break;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
bch2_journal_do_discards(j);
|
||||
}
|
||||
|
||||
if (ret && !bch2_journal_error(j)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
buf.atomic++;
|
||||
|
||||
__bch2_journal_debug_to_text(&buf, j);
|
||||
spin_unlock(&j->lock);
|
||||
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
|
||||
le64_to_cpu(w->data->seq),
|
||||
vstruct_sectors(w->data, c->block_bits),
|
||||
bch2_err_str(ret));
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
if (unlikely(ret))
|
||||
goto err_allocate_write;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
* write is allocated, no longer need to account for it in
|
||||
* bch2_journal_space_available():
|
||||
*/
|
||||
w->sectors = 0;
|
||||
w->write_allocated = true;
|
||||
j->entry_bytes_written += vstruct_bytes(w->data);
|
||||
|
||||
/*
|
||||
* journal entry has been compacted and allocated, recalculate space
|
||||
@ -2128,9 +2095,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
|
||||
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
|
||||
|
||||
if (c->opts.nochanges)
|
||||
goto no_io;
|
||||
|
||||
/*
|
||||
* Mark journal replicas before we submit the write to guarantee
|
||||
* recovery will find the journal entries after a crash.
|
||||
@ -2141,15 +2105,33 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.nochanges)
|
||||
goto no_io;
|
||||
|
||||
if (!JSET_NO_FLUSH(w->data))
|
||||
continue_at(cl, journal_write_preflush, j->wq);
|
||||
else
|
||||
continue_at(cl, journal_write_submit, j->wq);
|
||||
return;
|
||||
no_io:
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
return;
|
||||
err_allocate_write:
|
||||
if (!bch2_journal_error(j)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_journal_debug_to_text(&buf, j);
|
||||
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
|
||||
le64_to_cpu(w->data->seq),
|
||||
vstruct_sectors(w->data, c->block_bits),
|
||||
bch2_err_str(ret));
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
err:
|
||||
bch2_fatal_error(c);
|
||||
no_io:
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
|
||||
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
|
||||
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
|
||||
}
|
||||
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
}
|
||||
|
||||
@ -4,9 +4,11 @@
|
||||
*/
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "buckets.h"
|
||||
#include "errcode.h"
|
||||
#include "extents.h"
|
||||
@ -20,7 +22,7 @@
|
||||
#include "super-io.h"
|
||||
|
||||
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
|
||||
unsigned dev_idx, int flags, bool metadata)
|
||||
unsigned dev_idx, unsigned flags, bool metadata)
|
||||
{
|
||||
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
|
||||
unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
|
||||
@ -37,11 +39,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter,
|
||||
struct btree *b, unsigned dev_idx, unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_buf k;
|
||||
|
||||
bch2_bkey_buf_init(&k);
|
||||
bch2_bkey_buf_copy(&k, c, &b->key);
|
||||
|
||||
int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?:
|
||||
bch2_btree_node_update_key(trans, iter, b, k.k, 0, false);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
bch2_bkey_buf_exit(&k, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
unsigned dev_idx,
|
||||
int flags)
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *n;
|
||||
@ -77,9 +96,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_dev_btree_drop_key(struct btree_trans *trans,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
unsigned dev_idx,
|
||||
struct bkey_buf *last_flushed,
|
||||
unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed);
|
||||
int ret = PTR_ERR_OR_ZERO(b);
|
||||
if (ret)
|
||||
return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret;
|
||||
|
||||
ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
|
||||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_dev_usrdata_drop(struct bch_fs *c,
|
||||
struct progress_indicator_state *progress,
|
||||
unsigned dev_idx, int flags)
|
||||
unsigned dev_idx, unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
enum btree_id id;
|
||||
@ -106,7 +143,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
|
||||
|
||||
static int bch2_dev_metadata_drop(struct bch_fs *c,
|
||||
struct progress_indicator_state *progress,
|
||||
unsigned dev_idx, int flags)
|
||||
unsigned dev_idx, unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans;
|
||||
struct btree_iter iter;
|
||||
@ -137,20 +174,12 @@ retry:
|
||||
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
|
||||
goto next;
|
||||
|
||||
bch2_bkey_buf_copy(&k, c, &b->key);
|
||||
|
||||
ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
|
||||
dev_idx, flags, true);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
|
||||
ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
|
||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
|
||||
ret = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
bch_err_msg(c, ret, "updating btree node key");
|
||||
if (ret)
|
||||
break;
|
||||
next:
|
||||
@ -176,7 +205,57 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
|
||||
struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
|
||||
unsigned flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
|
||||
int ret = bkey_err(k);
|
||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||
return 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bch2_bkey_has_device_c(k, dev_idx))
|
||||
goto out;
|
||||
|
||||
ret = bkey_is_btree_ptr(k.k)
|
||||
? bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags)
|
||||
: bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
struct bkey_buf last_flushed;
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
|
||||
for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
|
||||
POS(dev_idx, 0),
|
||||
POS(dev_idx, U64_MAX), 0, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
continue;
|
||||
|
||||
data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
|
||||
&last_flushed, flags);
|
||||
|
||||
}));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, trans->c);
|
||||
bch2_trans_put(trans);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags)
|
||||
{
|
||||
struct progress_indicator_state progress;
|
||||
bch2_progress_init(&progress, c,
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#ifndef _BCACHEFS_MIGRATE_H
|
||||
#define _BCACHEFS_MIGRATE_H
|
||||
|
||||
int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
|
||||
int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned);
|
||||
int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned);
|
||||
|
||||
#endif /* _BCACHEFS_MIGRATE_H */
|
||||
|
||||
@ -80,11 +80,13 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
|
||||
unsigned ptr_bit = 1;
|
||||
unsigned rewrite_ptrs = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
bkey_for_each_ptr(ptrs, ptr) {
|
||||
if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
|
||||
rewrite_ptrs |= ptr_bit;
|
||||
ptr_bit <<= 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return rewrite_ptrs;
|
||||
}
|
||||
@ -132,10 +134,14 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
incompressible:
|
||||
if (opts->background_target)
|
||||
if (opts->background_target) {
|
||||
rcu_read_lock();
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
|
||||
if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
|
||||
if (!p.ptr.cached &&
|
||||
!bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
|
||||
sectors += p.crc.compressed_size;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
return sectors;
|
||||
}
|
||||
|
||||
@ -737,11 +737,6 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
c->opts.read_only = true;
|
||||
}
|
||||
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
|
||||
bch_info(c, "filesystem is an unresized image file, mounting ro");
|
||||
c->opts.read_only = true;
|
||||
}
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
|
||||
bool write_sb = false;
|
||||
@ -895,6 +890,17 @@ use_clean:
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_fs_resize_on_mount(c);
|
||||
if (ret) {
|
||||
up_write(&c->state_lock);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
|
||||
bch_info(c, "filesystem is an unresized image file, mounting ro");
|
||||
c->opts.read_only = true;
|
||||
}
|
||||
|
||||
if (!c->opts.read_only &&
|
||||
(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) {
|
||||
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
|
||||
|
||||
@ -525,6 +525,7 @@ int bch2_sb_member_alloc(struct bch_fs *c)
|
||||
unsigned u64s;
|
||||
int best = -1;
|
||||
u64 best_last_mount = 0;
|
||||
unsigned nr_deleted = 0;
|
||||
|
||||
if (dev_idx < BCH_SB_MEMBERS_MAX)
|
||||
goto have_slot;
|
||||
@ -535,7 +536,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
|
||||
continue;
|
||||
|
||||
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
|
||||
if (bch2_member_alive(&m))
|
||||
|
||||
nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
|
||||
|
||||
if (!bch2_is_zero(&m.uuid, sizeof(m.uuid)))
|
||||
continue;
|
||||
|
||||
u64 last_mount = le64_to_cpu(m.last_mount);
|
||||
@ -549,6 +553,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
|
||||
goto have_slot;
|
||||
}
|
||||
|
||||
if (nr_deleted)
|
||||
bch_err(c, "unable to allocate new member, but have %u deleted: run fsck",
|
||||
nr_deleted);
|
||||
|
||||
return -BCH_ERR_ENOSPC_sb_members;
|
||||
have_slot:
|
||||
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
|
||||
@ -564,3 +572,22 @@ have_slot:
|
||||
c->disk_sb.sb->nr_devices = nr_devices;
|
||||
return dev_idx;
|
||||
}
|
||||
|
||||
void bch2_sb_members_clean_deleted(struct bch_fs *c)
|
||||
{
|
||||
mutex_lock(&c->sb_lock);
|
||||
bool write_sb = false;
|
||||
|
||||
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
|
||||
struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i);
|
||||
|
||||
if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) {
|
||||
memset(&m->uuid, 0, sizeof(m->uuid));
|
||||
write_sb = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
}
|
||||
|
||||
@ -320,7 +320,8 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
|
||||
|
||||
static inline bool bch2_member_alive(struct bch_member *m)
|
||||
{
|
||||
return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
|
||||
return !bch2_is_zero(&m->uuid, sizeof(m->uuid)) &&
|
||||
!uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID);
|
||||
}
|
||||
|
||||
static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
|
||||
@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
int bch2_sb_member_alloc(struct bch_fs *);
|
||||
void bch2_sb_members_clean_deleted(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_SB_MEMBERS_H */
|
||||
|
||||
@ -13,6 +13,10 @@
|
||||
*/
|
||||
#define BCH_SB_MEMBER_INVALID 255
|
||||
|
||||
#define BCH_SB_MEMBER_DELETED_UUID \
|
||||
UUID_INIT(0xffffffff, 0xffff, 0xffff, \
|
||||
0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
|
||||
|
||||
#define BCH_MIN_NR_NBUCKETS (1 << 6)
|
||||
|
||||
#define BCH_IOPS_MEASUREMENTS() \
|
||||
|
||||
@ -1427,6 +1427,12 @@ static unsigned live_child(struct bch_fs *c, u32 id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
|
||||
{
|
||||
return snapshot_list_has_id(&d->delete_leaves, id) ||
|
||||
interior_delete_has_id(&d->delete_interior, id) != 0;
|
||||
}
|
||||
|
||||
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
@ -1468,11 +1474,20 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter)
|
||||
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
|
||||
u64 inum = iter->btree_id != BTREE_ID_inodes
|
||||
? iter->pos.inode
|
||||
: iter->pos.offset;
|
||||
|
||||
if (*prev_inum == inum)
|
||||
return false;
|
||||
|
||||
*prev_inum = inum;
|
||||
|
||||
bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
|
||||
bch2_snapshot_tree(c, iter->pos.snapshot));
|
||||
if (unlikely(ret)) {
|
||||
@ -1486,6 +1501,129 @@ static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
|
||||
for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
|
||||
struct disk_reservation res = { 0 };
|
||||
u64 prev_inum = 0;
|
||||
|
||||
d->pos.pos = POS_MIN;
|
||||
|
||||
if (!btree_type_has_snapshots(d->pos.btree))
|
||||
continue;
|
||||
|
||||
int ret = for_each_btree_key_commit(trans, iter,
|
||||
d->pos.btree, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
d->pos.pos = iter.pos;
|
||||
|
||||
if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
|
||||
continue;
|
||||
|
||||
delete_dead_snapshots_process_key(trans, &iter, k);
|
||||
}));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree,
|
||||
struct bpos start, struct bpos end)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct disk_reservation res = { 0 };
|
||||
|
||||
d->pos.btree = btree;
|
||||
d->pos.pos = POS_MIN;
|
||||
|
||||
int ret = for_each_btree_key_max_commit(trans, iter,
|
||||
btree, start, end,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
d->pos.pos = iter.pos;
|
||||
delete_dead_snapshots_process_key(trans, &iter, k);
|
||||
}));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct disk_reservation res = { 0 };
|
||||
u64 prev_inum = 0;
|
||||
int ret = 0;
|
||||
|
||||
struct btree_iter iter;
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
|
||||
|
||||
while (1) {
|
||||
struct bkey_s_c k;
|
||||
ret = lockrestart_do(trans,
|
||||
bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
d->pos.btree = iter.btree_id;
|
||||
d->pos.pos = iter.pos;
|
||||
|
||||
if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
|
||||
continue;
|
||||
|
||||
if (snapshot_id_dying(d, k.k->p.snapshot)) {
|
||||
struct bpos start = POS(k.k->p.offset, 0);
|
||||
struct bpos end = POS(k.k->p.offset, U64_MAX);
|
||||
|
||||
ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?:
|
||||
delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?:
|
||||
delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1));
|
||||
} else {
|
||||
bch2_btree_iter_advance(trans, &iter);
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prev_inum = 0;
|
||||
ret = for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_inodes, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
d->pos.btree = iter.btree_id;
|
||||
d->pos.pos = iter.pos;
|
||||
|
||||
if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
|
||||
continue;
|
||||
|
||||
delete_dead_snapshots_process_key(trans, &iter, k);
|
||||
}));
|
||||
err:
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given snapshot, if it doesn't have a subvolume that points to it, and
|
||||
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
|
||||
@ -1500,6 +1638,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
|
||||
struct snapshot_delete *d = &c->snapshot_delete;
|
||||
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
|
||||
unsigned live_children = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (BCH_SNAPSHOT_SUBVOL(s.v))
|
||||
return 0;
|
||||
@ -1507,6 +1646,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
|
||||
if (BCH_SNAPSHOT_DELETED(s.v))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&d->lock);
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
u32 child = le32_to_cpu(s.v->children[i]);
|
||||
|
||||
@ -1517,7 +1657,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
|
||||
u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
|
||||
|
||||
if (live_children == 0) {
|
||||
return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
|
||||
ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
|
||||
snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
|
||||
} else if (live_children == 1) {
|
||||
struct snapshot_interior_delete n = {
|
||||
@ -1527,14 +1667,15 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
|
||||
|
||||
if (!n.live_child) {
|
||||
bch_err(c, "error finding live child of snapshot %u", n.id);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
|
||||
darray_push(&d->delete_interior, n);
|
||||
}
|
||||
|
||||
return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
|
||||
darray_push(&d->delete_interior, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
mutex_unlock(&d->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
|
||||
@ -1641,13 +1782,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
* For every snapshot node: If we have no live children and it's not
|
||||
* pointed to by a subvolume, delete it:
|
||||
*/
|
||||
mutex_lock(&d->lock);
|
||||
d->running = true;
|
||||
d->pos = BBPOS_MIN;
|
||||
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
|
||||
check_should_delete_snapshot(trans, k));
|
||||
mutex_unlock(&d->lock);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch_err_msg(c, ret, "walking snapshots");
|
||||
if (ret)
|
||||
@ -1666,33 +1805,13 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
|
||||
struct disk_reservation res = { 0 };
|
||||
|
||||
d->pos.pos = POS_MIN;
|
||||
|
||||
if (!btree_type_has_snapshots(d->pos.btree))
|
||||
continue;
|
||||
|
||||
ret = for_each_btree_key_commit(trans, iter,
|
||||
d->pos.btree, POS_MIN,
|
||||
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
|
||||
&res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
d->pos.pos = iter.pos;
|
||||
|
||||
if (skip_unrelated_snapshot_tree(trans, &iter))
|
||||
continue;
|
||||
|
||||
delete_dead_snapshots_process_key(trans, &iter, k);
|
||||
}));
|
||||
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch_err_msg(c, ret, "deleting keys from dying snapshots");
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
|
||||
? delete_dead_snapshot_keys_v2(trans)
|
||||
: delete_dead_snapshot_keys_v1(trans);
|
||||
if (!bch2_err_matches(ret, EROFS))
|
||||
bch_err_msg(c, ret, "deleting keys from dying snapshots");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
darray_for_each(d->delete_leaves, i) {
|
||||
ret = commit_do(trans, NULL, NULL, 0,
|
||||
|
||||
@ -87,7 +87,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_str(&buf, "requested incompat feature ");
|
||||
bch2_version_to_text(&buf, version);
|
||||
prt_str(&buf, " currently not enabled");
|
||||
prt_str(&buf, " currently not enabled, allowed up to ");
|
||||
bch2_version_to_text(&buf, version);
|
||||
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
|
||||
|
||||
bch_notice(c, "%s", buf.buf);
|
||||
|
||||
@ -214,7 +214,6 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
|
||||
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
|
||||
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
|
||||
static int bch2_fs_init_rw(struct bch_fs *);
|
||||
static int bch2_fs_resize_on_mount(struct bch_fs *);
|
||||
|
||||
struct bch_fs *bch2_dev_to_fs(dev_t dev)
|
||||
{
|
||||
@ -1150,15 +1149,11 @@ int bch2_fs_start(struct bch_fs *c)
|
||||
cpu_to_le64(now);
|
||||
rcu_read_unlock();
|
||||
|
||||
bch2_write_super(c);
|
||||
/*
|
||||
* Dno't write superblock yet: recovery might have to downgrade
|
||||
*/
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ret = bch2_fs_resize_on_mount(c);
|
||||
if (ret) {
|
||||
up_write(&c->state_lock);
|
||||
goto err;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_online_member_rcu(c, ca)
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw)
|
||||
@ -1724,6 +1719,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
{
|
||||
struct bch_member *m;
|
||||
unsigned dev_idx = ca->dev_idx, data;
|
||||
bool fast_device_removal = !bch2_request_incompat_feature(c,
|
||||
bcachefs_metadata_version_fast_device_removal);
|
||||
int ret;
|
||||
|
||||
down_write(&c->state_lock);
|
||||
@ -1742,11 +1739,24 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
|
||||
__bch2_dev_read_only(c, ca);
|
||||
|
||||
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
|
||||
bch_err_msg(ca, ret, "bch2_dev_data_drop()");
|
||||
ret = fast_device_removal
|
||||
? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags)
|
||||
: bch2_dev_data_drop(c, ca->dev_idx, flags);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* Check if device still has data */
|
||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
||||
for (unsigned i = 0; i < BCH_DATA_NR; i++)
|
||||
if (!data_type_is_empty(i) &&
|
||||
!data_type_is_hidden(i) &&
|
||||
usage.buckets[i]) {
|
||||
bch_err(ca, "Remove failed: still has data (%s, %llu buckets)",
|
||||
__bch2_data_types[i], usage.buckets[i]);
|
||||
ret = -EBUSY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_dev_remove_alloc(c, ca);
|
||||
bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
|
||||
if (ret)
|
||||
@ -1810,7 +1820,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
*/
|
||||
mutex_lock(&c->sb_lock);
|
||||
m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
|
||||
memset(&m->uuid, 0, sizeof(m->uuid));
|
||||
|
||||
if (fast_device_removal)
|
||||
m->uuid = BCH_SB_MEMBER_DELETED_UUID;
|
||||
else
|
||||
memset(&m->uuid, 0, sizeof(m->uuid));
|
||||
|
||||
bch2_write_super(c);
|
||||
|
||||
@ -2120,7 +2134,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_fs_resize_on_mount(struct bch_fs *c)
|
||||
int bch2_fs_resize_on_mount(struct bch_fs *c)
|
||||
{
|
||||
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
|
||||
u64 old_nbuckets = ca->mi.nbuckets;
|
||||
|
||||
@ -38,6 +38,8 @@ void bch2_fs_read_only(struct bch_fs *);
|
||||
int bch2_fs_read_write(struct bch_fs *);
|
||||
int bch2_fs_read_write_early(struct bch_fs *);
|
||||
|
||||
int bch2_fs_resize_on_mount(struct bch_fs *);
|
||||
|
||||
void __bch2_fs_stop(struct bch_fs *);
|
||||
void bch2_fs_free(struct bch_fs *);
|
||||
void bch2_fs_stop(struct bch_fs *);
|
||||
|
||||
@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki
|
||||
struct stdio_buf *buf = &stdio->output;
|
||||
unsigned long flags;
|
||||
ssize_t ret;
|
||||
|
||||
again:
|
||||
if (stdio->done)
|
||||
return -EPIPE;
|
||||
|
||||
spin_lock_irqsave(&buf->lock, flags);
|
||||
ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args);
|
||||
spin_unlock_irqrestore(&buf->lock, flags);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user