mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 8c94740b1bf8 bcachefs: Add missing vaidation for jset_entry_data_usage
This commit is contained in:
parent
138397d892
commit
3a0cc86e76
@ -1 +1 @@
|
||||
783085c3cc440183ba5e987b1aa7791cc1ca42ba
|
||||
8c94740b1bf8645d3398170f41c9c88b78332252
|
||||
|
@ -261,10 +261,8 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
case BCH_DATA_free:
|
||||
case BCH_DATA_need_gc_gens:
|
||||
case BCH_DATA_need_discard:
|
||||
bkey_fsck_err_on(a.v->dirty_sectors ||
|
||||
a.v->cached_sectors ||
|
||||
a.v->stripe, c, err,
|
||||
alloc_key_empty_but_have_data,
|
||||
bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe,
|
||||
c, err, alloc_key_empty_but_have_data,
|
||||
"empty data type free but have data");
|
||||
break;
|
||||
case BCH_DATA_sb:
|
||||
@ -272,22 +270,21 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
case BCH_DATA_btree:
|
||||
case BCH_DATA_user:
|
||||
case BCH_DATA_parity:
|
||||
bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
|
||||
alloc_key_dirty_sectors_0,
|
||||
bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
|
||||
c, err, alloc_key_dirty_sectors_0,
|
||||
"data_type %s but dirty_sectors==0",
|
||||
bch2_data_types[a.v->data_type]);
|
||||
break;
|
||||
case BCH_DATA_cached:
|
||||
bkey_fsck_err_on(!a.v->cached_sectors ||
|
||||
a.v->dirty_sectors ||
|
||||
a.v->stripe, c, err,
|
||||
alloc_key_cached_inconsistency,
|
||||
bch2_bucket_sectors_dirty(*a.v) ||
|
||||
a.v->stripe,
|
||||
c, err, alloc_key_cached_inconsistency,
|
||||
"data type inconsistency");
|
||||
|
||||
bkey_fsck_err_on(!a.v->io_time[READ] &&
|
||||
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
|
||||
c, err,
|
||||
alloc_key_cached_but_read_time_zero,
|
||||
c, err, alloc_key_cached_but_read_time_zero,
|
||||
"cached bucket with read_time == 0");
|
||||
break;
|
||||
case BCH_DATA_stripe:
|
||||
@ -790,8 +787,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
|
||||
|
||||
new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
|
||||
|
||||
if (new_a->dirty_sectors > old_a->dirty_sectors ||
|
||||
new_a->cached_sectors > old_a->cached_sectors) {
|
||||
if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
|
||||
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
|
||||
new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
|
||||
@ -1509,6 +1505,27 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
if (a->data_type != BCH_DATA_cached)
|
||||
return 0;
|
||||
|
||||
if (fsck_err_on(!a->io_time[READ], c,
|
||||
alloc_key_cached_but_read_time_zero,
|
||||
"cached bucket with read_time 0\n"
|
||||
" %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
struct bkey_i_alloc_v4 *a_mut =
|
||||
bch2_alloc_to_v4_mut(trans, alloc_k);
|
||||
ret = PTR_ERR_OR_ZERO(a_mut);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
|
||||
ret = bch2_trans_update(trans, alloc_iter,
|
||||
&a_mut->k_i, BTREE_TRIGGER_NORUN);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
a = &a_mut->v;
|
||||
}
|
||||
|
||||
lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
|
||||
lru_pos(alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
@ -1517,41 +1534,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(!a->io_time[READ], c,
|
||||
alloc_key_cached_but_read_time_zero,
|
||||
"cached bucket with read_time 0\n"
|
||||
" %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
|
||||
fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
|
||||
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
|
||||
alloc_key_to_missing_lru_entry,
|
||||
"missing lru entry\n"
|
||||
" %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
u64 read_time = a->io_time[READ] ?:
|
||||
atomic64_read(&c->io_clock[READ].now);
|
||||
|
||||
ret = bch2_lru_set(trans,
|
||||
alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
read_time);
|
||||
a->io_time[READ]);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (a->io_time[READ] != read_time) {
|
||||
struct bkey_i_alloc_v4 *a_mut =
|
||||
bch2_alloc_to_v4_mut(trans, alloc_k);
|
||||
ret = PTR_ERR_OR_ZERO(a_mut);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
a_mut->v.io_time[READ] = read_time;
|
||||
ret = bch2_trans_update(trans, alloc_iter,
|
||||
&a_mut->k_i, BTREE_TRIGGER_NORUN);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
@ -1564,14 +1558,12 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter)));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
@ -1734,28 +1726,25 @@ void bch2_do_discards(struct bch_fs *c)
|
||||
static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bpos *last_flushed_pos,
|
||||
s64 *nr_to_invalidate)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter alloc_iter = { NULL };
|
||||
struct bkey_i_alloc_v4 *a = NULL;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
unsigned cached_sectors;
|
||||
int ret = 0;
|
||||
|
||||
if (*nr_to_invalidate <= 0)
|
||||
return 1;
|
||||
|
||||
if (!bch2_dev_bucket_exists(c, bucket)) {
|
||||
prt_str(&buf, "lru entry points to invalid bucket");
|
||||
goto err;
|
||||
}
|
||||
ret = bch2_check_lru_key(trans, lru_iter, lru_k, last_flushed_pos);
|
||||
if (ret)
|
||||
return ret < 0 ? ret : 0;
|
||||
|
||||
struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
|
||||
if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
|
||||
return 0;
|
||||
|
||||
a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
|
||||
struct btree_iter alloc_iter;
|
||||
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
|
||||
ret = PTR_ERR_OR_ZERO(a);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -1769,7 +1758,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
if (!a->v.cached_sectors)
|
||||
bch_err(c, "invalidating empty bucket, confused");
|
||||
|
||||
cached_sectors = a->v.cached_sectors;
|
||||
unsigned cached_sectors = a->v.cached_sectors;
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
|
||||
a->v.gen++;
|
||||
@ -1791,28 +1780,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
--*nr_to_invalidate;
|
||||
out:
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
err:
|
||||
prt_str(&buf, "\n lru key: ");
|
||||
bch2_bkey_val_to_text(&buf, c, lru_k);
|
||||
|
||||
prt_str(&buf, "\n lru entry: ");
|
||||
bch2_lru_pos_to_text(&buf, lru_iter->pos);
|
||||
|
||||
prt_str(&buf, "\n alloc key: ");
|
||||
if (!a)
|
||||
bch2_bpos_to_text(&buf, bucket);
|
||||
else
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
|
||||
|
||||
bch_err(c, "%s", buf.buf);
|
||||
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) {
|
||||
bch2_inconsistent_error(c);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
@ -1822,6 +1790,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bpos last_flushed_pos = POS_MIN;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
@ -1837,7 +1806,8 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
||||
lru_pos(ca->dev_idx, 0, 0),
|
||||
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
|
||||
BTREE_ITER_INTENT, k,
|
||||
invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
|
||||
invalidate_one_bucket(trans, &iter, k, &last_flushed_pos,
|
||||
&nr_to_invalidate));
|
||||
|
||||
if (ret < 0) {
|
||||
percpu_ref_put(&ca->ref);
|
||||
|
@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
|
||||
return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a)
|
||||
{
|
||||
return a.dirty_sectors + a.cached_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
|
||||
{
|
||||
return a.dirty_sectors;
|
||||
}
|
||||
|
||||
static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
|
||||
struct bch_alloc_v4 a)
|
||||
{
|
||||
unsigned d = bch2_bucket_sectors_dirty(a);
|
||||
|
||||
return d ? max(0U, ca->mi.bucket_size - d) : 0;
|
||||
}
|
||||
|
||||
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
|
||||
{
|
||||
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
|
||||
@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
||||
struct bch_dev *ca)
|
||||
{
|
||||
if (!data_type_movable(a.data_type) ||
|
||||
a.dirty_sectors >= ca->mi.bucket_size)
|
||||
!bch2_bucket_sectors_fragmented(ca, a))
|
||||
return 0;
|
||||
|
||||
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
|
||||
u64 d = bch2_bucket_sectors_dirty(a);
|
||||
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
|
||||
}
|
||||
|
||||
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
|
||||
|
@ -1345,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
|
||||
erasure_code = false;
|
||||
|
||||
BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
|
||||
|
||||
BUG_ON(!nr_replicas || !nr_replicas_required);
|
||||
|
@ -935,7 +935,7 @@ struct bch_fs {
|
||||
mempool_t compression_bounce[2];
|
||||
mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
|
||||
mempool_t decompress_workspace;
|
||||
ZSTD_parameters zstd_params;
|
||||
size_t zstd_workspace_size;
|
||||
|
||||
struct crypto_shash *sha256;
|
||||
struct crypto_sync_skcipher *chacha20;
|
||||
|
@ -151,7 +151,11 @@ struct bpos {
|
||||
#else
|
||||
#error edit for your odd byteorder.
|
||||
#endif
|
||||
} __packed __aligned(4);
|
||||
} __packed
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
__aligned(4)
|
||||
#endif
|
||||
;
|
||||
|
||||
#define KEY_INODE_MAX ((__u64)~0ULL)
|
||||
#define KEY_OFFSET_MAX ((__u64)~0ULL)
|
||||
@ -2203,7 +2207,7 @@ struct jset_entry_dev_usage {
|
||||
__le32 dev;
|
||||
__u32 pad;
|
||||
|
||||
__le64 buckets_ec;
|
||||
__le64 _buckets_ec; /* No longer used */
|
||||
__le64 _buckets_unavailable; /* No longer used */
|
||||
|
||||
struct jset_entry_dev_usage_type d[];
|
||||
|
@ -81,6 +81,8 @@ struct bch_ioctl_incremental {
|
||||
#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
|
||||
#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
|
||||
|
||||
#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2)
|
||||
|
||||
/* ioctl below act on a particular file, not the filesystem as a whole: */
|
||||
|
||||
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
|
||||
@ -298,7 +300,20 @@ struct bch_ioctl_dev_usage {
|
||||
__u64 buckets;
|
||||
__u64 sectors;
|
||||
__u64 fragmented;
|
||||
} d[BCH_DATA_NR];
|
||||
} d[10];
|
||||
};
|
||||
|
||||
struct bch_ioctl_dev_usage_v2 {
|
||||
__u64 dev;
|
||||
__u32 flags;
|
||||
__u8 state;
|
||||
__u8 nr_data_types;
|
||||
__u8 pad[6];
|
||||
|
||||
__u32 bucket_size;
|
||||
__u64 nr_buckets;
|
||||
|
||||
struct bch_ioctl_dev_usage_type d[0];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1254,9 +1254,6 @@ static int bch2_gc_done(struct bch_fs *c,
|
||||
copy_dev_field(dev_usage_fragmented_wrong,
|
||||
d[i].fragmented, "%s fragmented", bch2_data_types[i]);
|
||||
}
|
||||
|
||||
copy_dev_field(dev_usage_buckets_ec_wrong,
|
||||
buckets_ec, "buckets_ec");
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -361,7 +361,6 @@ noinline static int
|
||||
btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
|
||||
struct btree_path *path, unsigned new_u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
struct bkey_cached *ck = (void *) path->l[0].b;
|
||||
struct bkey_i *new_k;
|
||||
@ -372,7 +371,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
|
||||
|
||||
new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
|
||||
if (!new_k) {
|
||||
bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
|
||||
bch2_btree_id_str(path->btree_id), new_u64s);
|
||||
return -BCH_ERR_ENOMEM_btree_key_cache_insert;
|
||||
}
|
||||
|
@ -29,14 +29,12 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re
|
||||
#ifdef CONFIG_X86_64
|
||||
int cmp;
|
||||
|
||||
asm(".intel_syntax noprefix;"
|
||||
"mov rax, [%[l]];"
|
||||
"sub rax, [%[r]];"
|
||||
"mov rax, [%[l] + 8];"
|
||||
"sbb rax, [%[r] + 8];"
|
||||
"mov rax, [%[l] + 16];"
|
||||
"sbb rax, [%[r] + 16];"
|
||||
".att_syntax prefix;"
|
||||
asm("mov (%[l]), %%rax;"
|
||||
"sub (%[r]), %%rax;"
|
||||
"mov 8(%[l]), %%rax;"
|
||||
"sbb 8(%[r]), %%rax;"
|
||||
"mov 16(%[l]), %%rax;"
|
||||
"sbb 16(%[r]), %%rax;"
|
||||
: "=@ccae" (cmp)
|
||||
: [l] "r" (l), [r] "r" (r)
|
||||
: "rax", "cc");
|
||||
@ -297,7 +295,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
|
||||
|
||||
skipped++;
|
||||
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);;
|
||||
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
|
||||
k->journal_seq = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -277,12 +277,28 @@ void bch2_dev_usage_init(struct bch_dev *ca)
|
||||
ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
|
||||
}
|
||||
|
||||
static inline int bucket_sectors_fragmented(struct bch_dev *ca,
|
||||
struct bch_alloc_v4 a)
|
||||
void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
|
||||
{
|
||||
return a.dirty_sectors
|
||||
? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
|
||||
: 0;
|
||||
prt_tab(out);
|
||||
prt_str(out, "buckets");
|
||||
prt_tab_rjust(out);
|
||||
prt_str(out, "sectors");
|
||||
prt_tab_rjust(out);
|
||||
prt_str(out, "fragmented");
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
|
||||
prt_str(out, bch2_data_types[i]);
|
||||
prt_tab(out);
|
||||
prt_u64(out, usage->d[i].buckets);
|
||||
prt_tab_rjust(out);
|
||||
prt_u64(out, usage->d[i].sectors);
|
||||
prt_tab_rjust(out);
|
||||
prt_u64(out, usage->d[i].fragmented);
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -306,41 +322,37 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
u->d[old.data_type].buckets--;
|
||||
u->d[new.data_type].buckets++;
|
||||
|
||||
u->buckets_ec -= (int) !!old.stripe;
|
||||
u->buckets_ec += (int) !!new.stripe;
|
||||
|
||||
u->d[old.data_type].sectors -= old.dirty_sectors;
|
||||
u->d[new.data_type].sectors += new.dirty_sectors;
|
||||
u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old);
|
||||
u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new);
|
||||
|
||||
u->d[BCH_DATA_cached].sectors += new.cached_sectors;
|
||||
u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
|
||||
|
||||
u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
|
||||
u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
|
||||
u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old);
|
||||
u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
|
||||
{
|
||||
return (struct bch_alloc_v4) {
|
||||
.gen = b.gen,
|
||||
.data_type = b.data_type,
|
||||
.dirty_sectors = b.dirty_sectors,
|
||||
.cached_sectors = b.cached_sectors,
|
||||
.stripe = b.stripe,
|
||||
};
|
||||
}
|
||||
|
||||
static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket old, struct bucket new,
|
||||
u64 journal_seq, bool gc)
|
||||
{
|
||||
struct bch_alloc_v4 old_a = {
|
||||
.gen = old.gen,
|
||||
.data_type = old.data_type,
|
||||
.dirty_sectors = old.dirty_sectors,
|
||||
.cached_sectors = old.cached_sectors,
|
||||
.stripe = old.stripe,
|
||||
};
|
||||
struct bch_alloc_v4 new_a = {
|
||||
.gen = new.gen,
|
||||
.data_type = new.data_type,
|
||||
.dirty_sectors = new.dirty_sectors,
|
||||
.cached_sectors = new.cached_sectors,
|
||||
.stripe = new.stripe,
|
||||
};
|
||||
|
||||
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
|
||||
bch2_dev_usage_update(c, ca,
|
||||
bucket_m_to_alloc(old),
|
||||
bucket_m_to_alloc(new),
|
||||
journal_seq, gc);
|
||||
}
|
||||
|
||||
static inline int __update_replicas(struct bch_fs *c,
|
||||
@ -640,7 +652,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
g->data_type = data_type;
|
||||
g->dirty_sectors += sectors;
|
||||
new = *g;
|
||||
@ -657,14 +668,11 @@ static int check_bucket_ref(struct btree_trans *trans,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
s64 sectors, enum bch_data_type ptr_data_type,
|
||||
u8 b_gen, u8 bucket_data_type,
|
||||
u32 dirty_sectors, u32 cached_sectors)
|
||||
u32 bucket_sectors)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
|
||||
u32 bucket_sectors = !ptr->cached
|
||||
? dirty_sectors
|
||||
: cached_sectors;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
@ -799,7 +807,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
||||
|
||||
ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
|
||||
g->gen, g->data_type,
|
||||
g->dirty_sectors, g->cached_sectors);
|
||||
g->dirty_sectors);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -829,8 +837,7 @@ static int __mark_pointer(struct btree_trans *trans,
|
||||
? dirty_sectors
|
||||
: cached_sectors;
|
||||
int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
|
||||
bucket_gen, *bucket_data_type,
|
||||
*dirty_sectors, *cached_sectors);
|
||||
bucket_gen, *bucket_data_type, *dst_sectors);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1559,7 +1566,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
||||
|
||||
ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
|
||||
a->v.gen, a->v.data_type,
|
||||
a->v.dirty_sectors, a->v.cached_sectors);
|
||||
a->v.dirty_sectors);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -2073,8 +2080,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
bucket_gens->first_bucket = ca->mi.first_bucket;
|
||||
bucket_gens->nbuckets = nbuckets;
|
||||
|
||||
bch2_copygc_stop(c);
|
||||
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
|
@ -203,6 +203,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
|
||||
}
|
||||
|
||||
void bch2_dev_usage_init(struct bch_dev *);
|
||||
void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
|
||||
|
||||
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
|
||||
{
|
||||
|
@ -33,8 +33,6 @@ struct bucket_gens {
|
||||
};
|
||||
|
||||
struct bch_dev_usage {
|
||||
u64 buckets_ec;
|
||||
|
||||
struct {
|
||||
u64 buckets;
|
||||
u64 sectors; /* _compressed_ sectors: */
|
||||
|
@ -23,6 +23,12 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
__must_check
|
||||
static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
return copy_to_user(to, from, n) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
/* returns with ref on ca->ref */
|
||||
static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
|
||||
unsigned flags)
|
||||
@ -149,10 +155,8 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
|
||||
static long bch2_ioctl_query_uuid(struct bch_fs *c,
|
||||
struct bch_ioctl_query_uuid __user *user_arg)
|
||||
{
|
||||
if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
|
||||
sizeof(c->sb.user_uuid)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
|
||||
sizeof(c->sb.user_uuid));
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -341,10 +345,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
|
||||
if (len < sizeof(e))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(buf, &e, sizeof(e)))
|
||||
return -EFAULT;
|
||||
|
||||
return sizeof(e);
|
||||
return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
|
||||
}
|
||||
|
||||
static const struct file_operations bcachefs_data_ops = {
|
||||
@ -474,14 +475,15 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
if (copy_to_user(user_arg, arg,
|
||||
sizeof(*arg) + arg->replica_entries_bytes))
|
||||
ret = -EFAULT;
|
||||
|
||||
ret = copy_to_user_errcode(user_arg, arg,
|
||||
sizeof(*arg) + arg->replica_entries_bytes);
|
||||
err:
|
||||
kfree(arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* obsolete, didn't allow for new data types: */
|
||||
static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
struct bch_ioctl_dev_usage __user *user_arg)
|
||||
{
|
||||
@ -511,7 +513,6 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
arg.state = ca->mi.state;
|
||||
arg.bucket_size = ca->mi.bucket_size;
|
||||
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
|
||||
arg.buckets_ec = src.buckets_ec;
|
||||
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
arg.d[i].buckets = src.d[i].buckets;
|
||||
@ -521,10 +522,58 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
|
||||
|
||||
percpu_ref_put(&ca->ref);
|
||||
|
||||
if (copy_to_user(user_arg, &arg, sizeof(arg)))
|
||||
return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
|
||||
}
|
||||
|
||||
static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
|
||||
struct bch_ioctl_dev_usage_v2 __user *user_arg)
|
||||
{
|
||||
struct bch_ioctl_dev_usage_v2 arg;
|
||||
struct bch_dev_usage src;
|
||||
struct bch_dev *ca;
|
||||
int ret = 0;
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&arg, user_arg, sizeof(arg)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
if ((arg.flags & ~BCH_BY_INDEX) ||
|
||||
arg.pad[0] ||
|
||||
arg.pad[1] ||
|
||||
arg.pad[2])
|
||||
return -EINVAL;
|
||||
|
||||
ca = bch2_device_lookup(c, arg.dev, arg.flags);
|
||||
if (IS_ERR(ca))
|
||||
return PTR_ERR(ca);
|
||||
|
||||
src = bch2_dev_usage_read(ca);
|
||||
|
||||
arg.state = ca->mi.state;
|
||||
arg.bucket_size = ca->mi.bucket_size;
|
||||
arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
|
||||
arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
|
||||
|
||||
ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
for (unsigned i = 0; i < arg.nr_data_types; i++) {
|
||||
struct bch_ioctl_dev_usage_type t = {
|
||||
.buckets = src.d[i].buckets,
|
||||
.sectors = src.d[i].sectors,
|
||||
.fragmented = src.d[i].fragmented,
|
||||
};
|
||||
|
||||
ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
err:
|
||||
percpu_ref_put(&ca->ref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long bch2_ioctl_read_super(struct bch_fs *c,
|
||||
@ -561,9 +610,8 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
|
||||
vstruct_bytes(sb)))
|
||||
ret = -EFAULT;
|
||||
ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
|
||||
vstruct_bytes(sb));
|
||||
err:
|
||||
if (!IS_ERR_OR_NULL(ca))
|
||||
percpu_ref_put(&ca->ref);
|
||||
@ -663,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
|
||||
return bch2_ioctl_fs_usage(c, arg);
|
||||
case BCH_IOCTL_DEV_USAGE:
|
||||
return bch2_ioctl_dev_usage(c, arg);
|
||||
case BCH_IOCTL_DEV_USAGE_V2:
|
||||
return bch2_ioctl_dev_usage_v2(c, arg);
|
||||
#if 0
|
||||
case BCH_IOCTL_START:
|
||||
BCH_IOCTL(start, struct bch_ioctl_start);
|
||||
|
@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
|
||||
*/
|
||||
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
|
||||
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
|
||||
ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
|
||||
zstd_cctx_workspace_bound(¶ms.cParams));
|
||||
ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
|
||||
|
||||
/*
|
||||
* ZSTD requires that when we decompress we pass in the exact
|
||||
@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
|
||||
size_t len = zstd_compress_cctx(ctx,
|
||||
dst + 4, dst_len - 4 - 7,
|
||||
src, src_len,
|
||||
&c->zstd_params);
|
||||
¶ms);
|
||||
if (zstd_is_error(len))
|
||||
return 0;
|
||||
|
||||
@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
size_t decompress_workspace_size = 0;
|
||||
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
|
||||
c->opts.encoded_extent_max);
|
||||
|
||||
/*
|
||||
* ZSTD is lying: if we allocate the size of the workspace it says it
|
||||
* requires, it returns memory allocation errors
|
||||
*/
|
||||
c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams) * 2;
|
||||
|
||||
struct {
|
||||
unsigned feature;
|
||||
enum bch_compression_type type;
|
||||
@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
||||
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize(), },
|
||||
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
|
||||
zstd_cctx_workspace_bound(¶ms.cParams),
|
||||
c->zstd_workspace_size,
|
||||
zstd_dctx_workspace_bound() },
|
||||
}, *i;
|
||||
bool have_compressed = false;
|
||||
|
||||
c->zstd_params = params;
|
||||
|
||||
for (i = compression_types;
|
||||
i < compression_types + ARRAY_SIZE(compression_types);
|
||||
i++)
|
||||
|
@ -267,6 +267,20 @@ restart_drop_extra_replicas:
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (trace_data_update_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "\nold: ");
|
||||
bch2_bkey_val_to_text(&buf, c, old);
|
||||
prt_str(&buf, "\nk: ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
prt_str(&buf, "\nnew: ");
|
||||
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
|
||||
|
||||
trace_data_update(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
k.k->p, bkey_start_pos(&insert->k)) ?:
|
||||
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
|
||||
@ -356,7 +370,7 @@ void bch2_data_update_exit(struct data_update *update)
|
||||
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
|
||||
}
|
||||
|
||||
void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
struct data_update *update)
|
||||
{
|
||||
struct bch_fs *c = update->op.c;
|
||||
@ -436,7 +450,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct data_update_opts data_opts)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *n;
|
||||
int ret;
|
||||
|
||||
n = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (data_opts.kill_ptrs) {
|
||||
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
||||
struct bch_extent_ptr *ptr;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
|
||||
data_opts.kill_ptrs ^= 1U << drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_error key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, bkey_i_to_s(n));
|
||||
|
||||
/*
|
||||
* Since we're not inserting through an extent iterator
|
||||
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
|
||||
* we aren't using the extent overwrite path to delete, we're
|
||||
* just using the normal key deletion path:
|
||||
*/
|
||||
if (bkey_deleted(&n->k))
|
||||
n->k.size = 0;
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
int bch2_data_update_init(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct moving_context *ctxt,
|
||||
struct data_update *m,
|
||||
struct write_point_specifier wp,
|
||||
@ -452,7 +510,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
|
||||
unsigned ptrs_locked = 0;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&m->k);
|
||||
bch2_bkey_buf_reassemble(&m->k, c, k);
|
||||
@ -478,6 +536,8 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
bkey_for_each_ptr(ptrs, ptr)
|
||||
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
|
||||
|
||||
unsigned durability_have = 0, durability_removing = 0;
|
||||
|
||||
i = 0;
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
bool locked;
|
||||
@ -489,8 +549,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
reserve_sectors += k.k->size;
|
||||
|
||||
m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
|
||||
} else if (!p.ptr.cached) {
|
||||
durability_removing += bch2_extent_ptr_desired_durability(c, &p);
|
||||
} else if (!p.ptr.cached &&
|
||||
!((1U << i) & m->data_opts.kill_ptrs)) {
|
||||
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
|
||||
durability_have += bch2_extent_ptr_durability(c, &p);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -529,6 +592,29 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If current extent durability is less than io_opts.data_replicas,
|
||||
* we're not trying to rereplicate the extent up to data_replicas here -
|
||||
* unless extra_replicas was specified
|
||||
*
|
||||
* Increasing replication is an explicit operation triggered by
|
||||
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
|
||||
*/
|
||||
if (durability_have >= io_opts.data_replicas) {
|
||||
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
|
||||
m->data_opts.rewrite_ptrs = 0;
|
||||
/* if iter == NULL, it's just a promote */
|
||||
if (iter)
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
|
||||
goto done;
|
||||
}
|
||||
|
||||
m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
|
||||
m->data_opts.extra_replicas;
|
||||
m->op.nr_replicas_required = m->op.nr_replicas;
|
||||
|
||||
BUG_ON(!m->op.nr_replicas);
|
||||
|
||||
if (reserve_sectors) {
|
||||
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
|
||||
m->data_opts.extra_replicas
|
||||
@ -538,14 +624,11 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
m->op.nr_replicas += m->data_opts.extra_replicas;
|
||||
m->op.nr_replicas_required = m->op.nr_replicas;
|
||||
if (bkey_extent_is_unwritten(k)) {
|
||||
bch2_update_unwritten_extent(trans, m);
|
||||
goto done;
|
||||
}
|
||||
|
||||
BUG_ON(!m->op.nr_replicas);
|
||||
|
||||
/* Special handling required: */
|
||||
if (bkey_extent_is_unwritten(k))
|
||||
return -BCH_ERR_unwritten_extent_update;
|
||||
return 0;
|
||||
err:
|
||||
i = 0;
|
||||
@ -560,6 +643,9 @@ err:
|
||||
bch2_bkey_buf_exit(&m->k, c);
|
||||
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
|
||||
return ret;
|
||||
done:
|
||||
bch2_data_update_exit(m);
|
||||
return ret ?: -BCH_ERR_data_update_done;
|
||||
}
|
||||
|
||||
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
|
||||
|
@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
|
||||
void bch2_data_update_read_done(struct data_update *,
|
||||
struct bch_extent_crc_unpacked);
|
||||
|
||||
int bch2_extent_drop_ptrs(struct btree_trans *,
|
||||
struct btree_iter *,
|
||||
struct bkey_s_c,
|
||||
struct data_update_opts);
|
||||
|
||||
void bch2_data_update_exit(struct data_update *);
|
||||
void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
|
||||
int bch2_data_update_init(struct btree_trans *, struct moving_context *,
|
||||
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
|
||||
struct moving_context *,
|
||||
struct data_update *,
|
||||
struct write_point_specifier,
|
||||
struct bch_io_opts, struct data_update_opts,
|
||||
|
@ -160,7 +160,7 @@
|
||||
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
|
||||
x(BCH_ERR_fsck, fsck_repair_impossible) \
|
||||
x(0, restart_recovery) \
|
||||
x(0, unwritten_extent_update) \
|
||||
x(0, data_update_done) \
|
||||
x(EINVAL, device_state_not_allowed) \
|
||||
x(EINVAL, member_info_missing) \
|
||||
x(EINVAL, mismatched_block_size) \
|
||||
@ -208,6 +208,7 @@
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_members) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
|
||||
x(BCH_ERR_invalid_sb, invalid_replicas_entry) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_journal) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
|
||||
x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
|
||||
|
@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
return replicas;
|
||||
}
|
||||
|
||||
unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
|
||||
if (p->ptr.cached)
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
return p->has_ec
|
||||
? p->ec.redundancy + 1
|
||||
: ca->mi.durability;
|
||||
}
|
||||
|
||||
return ca->mi.durability +
|
||||
(p->has_ec
|
||||
? p->ec.redundancy
|
||||
: 0);
|
||||
unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
|
||||
return __extent_ptr_durability(ca, p);
|
||||
}
|
||||
|
||||
unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
|
||||
if (p->ptr.cached)
|
||||
return 0;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_failed)
|
||||
return 0;
|
||||
|
||||
return ca->mi.durability +
|
||||
(p->has_ec
|
||||
? p->ec.redundancy
|
||||
: 0);
|
||||
return __extent_ptr_durability(ca, p);
|
||||
}
|
||||
|
||||
unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
|
||||
|
@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
|
||||
bio = &op->write.op.wbio.bio;
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
|
||||
|
||||
ret = bch2_data_update_init(trans, NULL, &op->write,
|
||||
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
|
||||
writepoint_hashed((unsigned long) current),
|
||||
opts,
|
||||
(struct data_update_opts) {
|
||||
|
@ -548,6 +548,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
|
||||
struct printbuf err = PRINTBUF;
|
||||
int ret = 0;
|
||||
|
||||
if (journal_entry_err_on(bytes < sizeof(*u) ||
|
||||
@ -556,10 +557,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
|
||||
journal_entry_data_usage_bad_size,
|
||||
"invalid journal entry usage: bad size")) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
|
||||
c, version, jset, entry,
|
||||
journal_entry_data_usage_bad_size,
|
||||
"invalid journal entry usage: %s", err.buf)) {
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -676,8 +686,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
le64_to_cpu(u->d[i].sectors),
|
||||
le64_to_cpu(u->d[i].fragmented));
|
||||
}
|
||||
|
||||
prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec));
|
||||
}
|
||||
|
||||
static int journal_entry_log_validate(struct bch_fs *c,
|
||||
|
@ -40,7 +40,7 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
|
||||
u64_to_bucket(lru.offset).offset);
|
||||
}
|
||||
|
||||
static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
||||
static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
||||
u64 dev_bucket, u64 time, bool set)
|
||||
{
|
||||
return time
|
||||
@ -51,12 +51,12 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
||||
|
||||
int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
|
||||
{
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, time, false);
|
||||
}
|
||||
|
||||
int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
|
||||
{
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, time, true);
|
||||
}
|
||||
|
||||
int bch2_lru_change(struct btree_trans *trans,
|
||||
@ -66,8 +66,8 @@ int bch2_lru_change(struct btree_trans *trans,
|
||||
if (old_time == new_time)
|
||||
return 0;
|
||||
|
||||
return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
|
||||
bch2_lru_set(trans, lru_id, dev_bucket, new_time);
|
||||
return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?:
|
||||
__bch2_lru_set(trans, lru_id, dev_bucket, new_time, true);
|
||||
}
|
||||
|
||||
static const char * const bch2_lru_types[] = {
|
||||
@ -77,7 +77,8 @@ static const char * const bch2_lru_types[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
/* Returns 1 if key has been deleted */
|
||||
int bch2_check_lru_key(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
struct bpos *last_flushed_pos)
|
||||
@ -89,7 +90,6 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
const struct bch_alloc_v4 *a;
|
||||
struct printbuf buf1 = PRINTBUF;
|
||||
struct printbuf buf2 = PRINTBUF;
|
||||
enum bch_lru_type type = lru_type(lru_k);
|
||||
struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
|
||||
u64 idx;
|
||||
int ret;
|
||||
@ -98,7 +98,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
lru_entry_to_invalid_bucket,
|
||||
"lru key points to nonexistent device:bucket %llu:%llu",
|
||||
alloc_pos.inode, alloc_pos.offset))
|
||||
return bch2_btree_delete_at(trans, lru_iter, 0);
|
||||
goto delete;
|
||||
|
||||
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
|
||||
ret = bkey_err(k);
|
||||
@ -107,6 +107,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
|
||||
enum bch_lru_type type = lru_type(lru_k);
|
||||
switch (type) {
|
||||
case BCH_LRU_read:
|
||||
idx = alloc_lru_idx_read(*a);
|
||||
@ -114,6 +115,9 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
case BCH_LRU_fragmentation:
|
||||
idx = a->fragmentation_lru;
|
||||
break;
|
||||
default:
|
||||
/* unknown LRU type, don't check: */
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (lru_k.k->type != KEY_TYPE_set ||
|
||||
@ -125,16 +129,18 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (c->opts.reconstruct_alloc ||
|
||||
if ((c->opts.reconstruct_alloc &&
|
||||
c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_lrus) ||
|
||||
fsck_err(c, lru_entry_bad,
|
||||
"incorrect lru entry: lru %s time %llu\n"
|
||||
" %s\n"
|
||||
" for %s",
|
||||
"for\n"
|
||||
" %s",
|
||||
bch2_lru_types[type],
|
||||
lru_pos_time(lru_k.k->p),
|
||||
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
|
||||
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
|
||||
ret = bch2_btree_delete_at(trans, lru_iter, 0);
|
||||
goto delete;
|
||||
}
|
||||
out:
|
||||
err:
|
||||
@ -143,6 +149,14 @@ fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
return ret;
|
||||
delete:
|
||||
ret = bch2_btree_delete_at(trans, lru_iter, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_WATERMARK_btree|
|
||||
BCH_TRANS_COMMIT_lazy_rw|
|
||||
BCH_TRANS_COMMIT_no_enospc) ?:
|
||||
1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_check_lrus(struct bch_fs *c)
|
||||
@ -150,15 +164,14 @@ int bch2_check_lrus(struct bch_fs *c)
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bpos last_flushed_pos = POS_MIN;
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter,
|
||||
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
|
||||
bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
|
||||
if (ret)
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key2(trans, iter,
|
||||
BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ({
|
||||
int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
|
||||
|
||||
ret2 < 0 ? ret2 : 0;
|
||||
})));
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
@ -64,6 +64,8 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
|
||||
|
||||
int bch2_check_lru_key(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_s_c, struct bpos *);
|
||||
int bch2_check_lrus(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_LRU_H */
|
||||
|
@ -173,6 +173,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
||||
static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
|
||||
{
|
||||
move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
|
||||
bch2_trans_unlock_long(ctxt->trans);
|
||||
closure_sync(&ctxt->cl);
|
||||
}
|
||||
|
||||
@ -235,49 +236,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
|
||||
scnprintf(stats->name, sizeof(stats->name), "%s", name);
|
||||
}
|
||||
|
||||
static int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
struct data_update_opts data_opts)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_i *n;
|
||||
int ret;
|
||||
|
||||
n = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (data_opts.kill_ptrs) {
|
||||
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
|
||||
struct bch_extent_ptr *ptr;
|
||||
|
||||
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
|
||||
data_opts.kill_ptrs ^= 1U << drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the new extent no longer has any pointers, bch2_extent_normalize()
|
||||
* will do the appropriate thing with it (turning it into a
|
||||
* KEY_TYPE_error key, or just a discard if it was a cached extent)
|
||||
*/
|
||||
bch2_extent_normalize(c, bkey_i_to_s(n));
|
||||
|
||||
/*
|
||||
* Since we're not inserting through an extent iterator
|
||||
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
|
||||
* we aren't using the extent overwrite path to delete, we're
|
||||
* just using the normal key deletion path:
|
||||
*/
|
||||
if (bkey_deleted(&n->k))
|
||||
n->k.size = 0;
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}
|
||||
|
||||
int bch2_move_extent(struct moving_context *ctxt,
|
||||
struct move_bucket_in_flight *bucket_in_flight,
|
||||
struct btree_iter *iter,
|
||||
@ -347,19 +305,11 @@ int bch2_move_extent(struct moving_context *ctxt,
|
||||
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
|
||||
io->rbio.bio.bi_end_io = move_read_endio;
|
||||
|
||||
ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
|
||||
ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
|
||||
io_opts, data_opts, iter->btree_id, k);
|
||||
if (ret && ret != -BCH_ERR_unwritten_extent_update)
|
||||
if (ret)
|
||||
goto err_free_pages;
|
||||
|
||||
if (ret == -BCH_ERR_unwritten_extent_update) {
|
||||
bch2_update_unwritten_extent(trans, &io->write);
|
||||
move_free(io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
BUG_ON(ret);
|
||||
|
||||
io->write.op.end_io = move_write_done;
|
||||
|
||||
if (ctxt->rate)
|
||||
@ -403,6 +353,9 @@ err_free_pages:
|
||||
err_free:
|
||||
kfree(io);
|
||||
err:
|
||||
if (ret == -BCH_ERR_data_update_done)
|
||||
return 0;
|
||||
|
||||
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
|
||||
trace_move_extent_alloc_mem_fail2(c, k);
|
||||
return ret;
|
||||
@ -506,22 +459,13 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
|
||||
do {
|
||||
delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
|
||||
|
||||
|
||||
if (delay) {
|
||||
if (delay > HZ / 10)
|
||||
bch2_trans_unlock_long(ctxt->trans);
|
||||
else
|
||||
bch2_trans_unlock(ctxt->trans);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
if (kthread_should_stop())
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (delay)
|
||||
schedule_timeout(delay);
|
||||
move_ctxt_wait_event_timeout(ctxt,
|
||||
freezing(current) || kthread_should_stop(),
|
||||
delay);
|
||||
|
||||
if (unlikely(freezing(current))) {
|
||||
bch2_moving_ctxt_flush_all(ctxt);
|
||||
@ -729,7 +673,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
}
|
||||
|
||||
a = bch2_alloc_to_v4(k, &a_convert);
|
||||
dirty_sectors = a->dirty_sectors;
|
||||
dirty_sectors = bch2_bucket_sectors_dirty(*a);
|
||||
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
|
||||
fragmentation = a->fragmentation_lru;
|
||||
|
||||
|
@ -38,6 +38,25 @@ struct moving_context {
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \
|
||||
({ \
|
||||
int _ret = 0; \
|
||||
while (true) { \
|
||||
bool cond_finished = false; \
|
||||
bch2_moving_ctxt_do_pending_writes(_ctxt); \
|
||||
\
|
||||
if (_cond) \
|
||||
break; \
|
||||
bch2_trans_unlock_long((_ctxt)->trans); \
|
||||
_ret = __wait_event_timeout((_ctxt)->wait, \
|
||||
bch2_moving_ctxt_next_pending_write(_ctxt) || \
|
||||
(cond_finished = (_cond)), _timeout); \
|
||||
if (_ret || ( cond_finished)) \
|
||||
break; \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define move_ctxt_wait_event(_ctxt, _cond) \
|
||||
do { \
|
||||
bool cond_finished = false; \
|
||||
|
@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
|
||||
a = bch2_alloc_to_v4(k, &_a);
|
||||
b->k.gen = a->gen;
|
||||
b->sectors = a->dirty_sectors;
|
||||
b->sectors = bch2_bucket_sectors_dirty(*a);
|
||||
|
||||
ret = data_type_movable(a->data_type) &&
|
||||
a->fragmentation_lru &&
|
||||
@ -149,6 +149,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
struct bkey_s_c k;
|
||||
size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
|
||||
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
|
||||
struct bpos last_flushed_pos = POS_MIN;
|
||||
int ret;
|
||||
|
||||
move_buckets_wait(ctxt, buckets_in_flight, false);
|
||||
@ -165,11 +166,16 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
|
||||
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
|
||||
0, k, ({
|
||||
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
|
||||
int ret2 = 0;
|
||||
int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
|
||||
if (ret2) {
|
||||
ret2 = ret2 < 0 ? ret2 : 0;
|
||||
goto next;
|
||||
}
|
||||
|
||||
saw++;
|
||||
|
||||
struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
|
||||
|
||||
if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
|
||||
not_movable++;
|
||||
else if (bucket_in_flight(buckets_in_flight, b.k))
|
||||
@ -179,6 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
|
||||
if (ret2 >= 0)
|
||||
sectors += b.sectors;
|
||||
}
|
||||
next:
|
||||
ret2;
|
||||
}));
|
||||
|
||||
|
@ -171,6 +171,20 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
|
||||
if (trace_rebalance_extent_enabled()) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
prt_str(&buf, "target=");
|
||||
bch2_target_to_text(&buf, c, r->target);
|
||||
prt_str(&buf, " compression=");
|
||||
prt_str(&buf, bch2_compression_opts[r->compression]);
|
||||
prt_str(&buf, " ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
|
||||
trace_rebalance_extent(c, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
|
@ -302,8 +302,6 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
|
||||
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
|
||||
|
||||
ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec);
|
||||
|
||||
for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
|
||||
ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
|
||||
ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors);
|
||||
|
@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
|
||||
prt_printf(out, "]");
|
||||
}
|
||||
|
||||
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
|
||||
struct bch_sb *sb,
|
||||
struct printbuf *err)
|
||||
{
|
||||
if (!r->nr_devs) {
|
||||
prt_printf(err, "no devices in entry ");
|
||||
goto bad;
|
||||
}
|
||||
|
||||
if (r->nr_required > 1 &&
|
||||
r->nr_required >= r->nr_devs) {
|
||||
prt_printf(err, "bad nr_required in entry ");
|
||||
goto bad;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < r->nr_devs; i++)
|
||||
if (!bch2_dev_exists(sb, r->devs[i])) {
|
||||
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bad:
|
||||
bch2_replicas_entry_to_text(err, r);
|
||||
return -BCH_ERR_invalid_replicas_entry;
|
||||
}
|
||||
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *out,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu
|
||||
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
cpu_replicas_add_entry(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *old,
|
||||
struct bch_replicas_entry_v1 *new_entry)
|
||||
{
|
||||
unsigned i;
|
||||
@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
|
||||
replicas_entry_bytes(new_entry)),
|
||||
};
|
||||
|
||||
for (i = 0; i < new_entry->nr_devs; i++)
|
||||
BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
|
||||
|
||||
BUG_ON(!new_entry->data_type);
|
||||
verify_replicas_entry(new_entry);
|
||||
|
||||
@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
|
||||
if (c->replicas_gc.entries &&
|
||||
!__replicas_has_entry(&c->replicas_gc, new_entry)) {
|
||||
new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
|
||||
new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
|
||||
if (!new_gc.entries) {
|
||||
ret = -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
goto err;
|
||||
@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
|
||||
}
|
||||
|
||||
if (!__replicas_has_entry(&c->replicas, new_entry)) {
|
||||
new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
|
||||
new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
|
||||
if (!new_r.entries) {
|
||||
ret = -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
goto err;
|
||||
@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
|
||||
if (idx < 0) {
|
||||
struct bch_replicas_cpu n;
|
||||
|
||||
n = cpu_replicas_add_entry(&c->replicas, r);
|
||||
n = cpu_replicas_add_entry(c, &c->replicas, r);
|
||||
if (!n.entries)
|
||||
return -BCH_ERR_ENOMEM_cpu_replicas;
|
||||
|
||||
@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
struct bch_sb *sb,
|
||||
struct printbuf *err)
|
||||
{
|
||||
unsigned i, j;
|
||||
unsigned i;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
cpu_replicas_entry(cpu_r, i);
|
||||
|
||||
if (e->data_type >= BCH_DATA_NR) {
|
||||
prt_printf(err, "invalid data type in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
if (!e->nr_devs) {
|
||||
prt_printf(err, "no devices in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
if (e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs) {
|
||||
prt_printf(err, "bad nr_required in entry ");
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
|
||||
for (j = 0; j < e->nr_devs; j++)
|
||||
if (!bch2_dev_exists(sb, e->devs[j])) {
|
||||
prt_printf(err, "invalid device %u in entry ", e->devs[j]);
|
||||
bch2_replicas_entry_to_text(err, e);
|
||||
return -BCH_ERR_invalid_sb_replicas;
|
||||
}
|
||||
int ret = bch2_replicas_entry_validate(e, sb, err);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (i + 1 < cpu_r->nr) {
|
||||
struct bch_replicas_entry_v1 *n =
|
||||
|
@ -9,6 +9,8 @@
|
||||
void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
|
||||
void bch2_replicas_entry_to_text(struct printbuf *,
|
||||
struct bch_replicas_entry_v1 *);
|
||||
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
|
||||
struct bch_sb *, struct printbuf *);
|
||||
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
|
||||
static inline struct bch_replicas_entry_v1 *
|
||||
|
@ -256,7 +256,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
|
||||
u->entry.type = BCH_JSET_ENTRY_dev_usage;
|
||||
u->dev = cpu_to_le32(dev);
|
||||
u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec);
|
||||
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
|
||||
|
@ -259,6 +259,11 @@ static void member_to_text(struct printbuf *out,
|
||||
prt_printf(out, "(none)");
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "Durability:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m));
|
||||
prt_newline(out);
|
||||
|
||||
prt_printf(out, "Discard:");
|
||||
prt_tab(out);
|
||||
prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
|
||||
|
@ -658,7 +658,7 @@ reread:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __bch2_read_super(const char *path, struct bch_opts *opts,
|
||||
static int __bch2_read_super(const char *path, struct bch_opts *opts,
|
||||
struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
|
||||
{
|
||||
u64 offset = opt_get(*opts, sb);
|
||||
|
@ -270,6 +270,8 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
|
||||
|
||||
bch_verbose(c, "going read-only");
|
||||
|
||||
/*
|
||||
* Block new foreground-end write operations from starting - any new
|
||||
* writes will return -EROFS:
|
||||
@ -297,13 +299,21 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
|
||||
test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
|
||||
|
||||
bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
if (writes_disabled)
|
||||
bch_verbose(c, "finished waiting for writes to stop");
|
||||
|
||||
__bch2_fs_read_only(c);
|
||||
|
||||
wait_event(bch2_read_only_wait,
|
||||
test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
|
||||
|
||||
if (!writes_disabled)
|
||||
bch_verbose(c, "finished waiting for writes to stop");
|
||||
|
||||
clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
|
||||
clear_bit(BCH_FS_GOING_RO, &c->flags);
|
||||
clear_bit(BCH_FS_RW, &c->flags);
|
||||
|
||||
if (!bch2_journal_error(&c->journal) &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags) &&
|
||||
@ -319,9 +329,9 @@ void bch2_fs_read_only(struct bch_fs *c)
|
||||
|
||||
bch_verbose(c, "marking filesystem clean");
|
||||
bch2_fs_mark_clean(c);
|
||||
} else {
|
||||
bch_verbose(c, "done going read-only, filesystem not clean");
|
||||
}
|
||||
|
||||
clear_bit(BCH_FS_RW, &c->flags);
|
||||
}
|
||||
|
||||
static void bch2_fs_read_only_work(struct work_struct *work)
|
||||
@ -424,6 +434,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
bch2_recalc_capacity(c);
|
||||
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
#else
|
||||
for (i = 0; i < BCH_WRITE_REF_NR; i++) {
|
||||
BUG_ON(atomic_long_read(&c->writes[i]));
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = bch2_gc_thread_start(c);
|
||||
if (ret) {
|
||||
bch_err(c, "error starting gc thread");
|
||||
@ -440,23 +462,15 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
goto err;
|
||||
}
|
||||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_reinit(&c->writes);
|
||||
#else
|
||||
for (i = 0; i < BCH_WRITE_REF_NR; i++) {
|
||||
BUG_ON(atomic_long_read(&c->writes[i]));
|
||||
atomic_long_inc(&c->writes[i]);
|
||||
}
|
||||
#endif
|
||||
set_bit(BCH_FS_RW, &c->flags);
|
||||
set_bit(BCH_FS_WAS_RW, &c->flags);
|
||||
|
||||
bch2_do_discards(c);
|
||||
bch2_do_invalidates(c);
|
||||
bch2_do_stripe_deletes(c);
|
||||
bch2_do_pending_node_rewrites(c);
|
||||
return 0;
|
||||
err:
|
||||
if (test_bit(BCH_FS_RW, &c->flags))
|
||||
bch2_fs_read_only(c);
|
||||
else
|
||||
__bch2_fs_read_only(c);
|
||||
return ret;
|
||||
}
|
||||
|
@ -258,15 +258,16 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
enum btree_id id;
|
||||
u64 nr_uncompressed_extents = 0,
|
||||
nr_compressed_extents = 0,
|
||||
nr_incompressible_extents = 0,
|
||||
uncompressed_sectors = 0,
|
||||
incompressible_sectors = 0,
|
||||
compressed_sectors_compressed = 0,
|
||||
compressed_sectors_uncompressed = 0;
|
||||
struct compression_type_stats {
|
||||
u64 nr_extents;
|
||||
u64 sectors_compressed;
|
||||
u64 sectors_uncompressed;
|
||||
} s[BCH_COMPRESSION_TYPE_NR];
|
||||
u64 compressed_incompressible = 0;
|
||||
int ret = 0;
|
||||
|
||||
memset(s, 0, sizeof(s));
|
||||
|
||||
if (!test_bit(BCH_FS_STARTED, &c->flags))
|
||||
return -EPERM;
|
||||
|
||||
@ -279,36 +280,30 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
for_each_btree_key(trans, iter, id, POS_MIN,
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
bool compressed = false, uncompressed = false, incompressible = false;
|
||||
bool compressed = false, incompressible = false;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
switch (p.crc.compression_type) {
|
||||
case BCH_COMPRESSION_TYPE_none:
|
||||
uncompressed = true;
|
||||
uncompressed_sectors += k.k->size;
|
||||
break;
|
||||
case BCH_COMPRESSION_TYPE_incompressible:
|
||||
incompressible = true;
|
||||
incompressible_sectors += k.k->size;
|
||||
break;
|
||||
default:
|
||||
compressed_sectors_compressed +=
|
||||
p.crc.compressed_size;
|
||||
compressed_sectors_uncompressed +=
|
||||
p.crc.uncompressed_size;
|
||||
compressed = true;
|
||||
break;
|
||||
bkey_for_each_crc(k.k, ptrs, crc, entry) {
|
||||
incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
|
||||
compressed |= crc_is_compressed(crc);
|
||||
|
||||
if (crc_is_compressed(crc)) {
|
||||
s[crc.compression_type].nr_extents++;
|
||||
s[crc.compression_type].sectors_compressed += crc.compressed_size;
|
||||
s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (incompressible)
|
||||
nr_incompressible_extents++;
|
||||
else if (uncompressed)
|
||||
nr_uncompressed_extents++;
|
||||
else if (compressed)
|
||||
nr_compressed_extents++;
|
||||
compressed_incompressible += compressed && incompressible;
|
||||
|
||||
if (!compressed) {
|
||||
unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0;
|
||||
|
||||
s[t].nr_extents++;
|
||||
s[t].sectors_compressed += k.k->size;
|
||||
s[t].sectors_uncompressed += k.k->size;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
@ -318,26 +313,45 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
prt_printf(out, "uncompressed:\n");
|
||||
prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents);
|
||||
prt_printf(out, " size: ");
|
||||
prt_human_readable_u64(out, uncompressed_sectors << 9);
|
||||
prt_printf(out, "\n");
|
||||
prt_str(out, "type");
|
||||
printbuf_tabstop_push(out, 12);
|
||||
prt_tab(out);
|
||||
|
||||
prt_printf(out, "compressed:\n");
|
||||
prt_printf(out, " nr extents: %llu\n", nr_compressed_extents);
|
||||
prt_printf(out, " compressed size: ");
|
||||
prt_human_readable_u64(out, compressed_sectors_compressed << 9);
|
||||
prt_printf(out, "\n");
|
||||
prt_printf(out, " uncompressed size: ");
|
||||
prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
|
||||
prt_printf(out, "\n");
|
||||
prt_str(out, "compressed");
|
||||
printbuf_tabstop_push(out, 16);
|
||||
prt_tab_rjust(out);
|
||||
|
||||
prt_str(out, "uncompressed");
|
||||
printbuf_tabstop_push(out, 16);
|
||||
prt_tab_rjust(out);
|
||||
|
||||
prt_str(out, "average extent size");
|
||||
printbuf_tabstop_push(out, 24);
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
|
||||
prt_str(out, bch2_compression_types[i]);
|
||||
prt_tab(out);
|
||||
|
||||
prt_human_readable_u64(out, s[i].sectors_compressed << 9);
|
||||
prt_tab_rjust(out);
|
||||
|
||||
prt_human_readable_u64(out, s[i].sectors_uncompressed << 9);
|
||||
prt_tab_rjust(out);
|
||||
|
||||
prt_human_readable_u64(out, s[i].nr_extents
|
||||
? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents)
|
||||
: 0);
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
if (compressed_incompressible) {
|
||||
prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
prt_printf(out, "incompressible:\n");
|
||||
prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents);
|
||||
prt_printf(out, " size: ");
|
||||
prt_human_readable_u64(out, incompressible_sectors << 9);
|
||||
prt_printf(out, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -786,32 +800,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
printbuf_tabstop_push(out, 16);
|
||||
printbuf_tabstop_push(out, 16);
|
||||
|
||||
prt_tab(out);
|
||||
prt_str(out, "buckets");
|
||||
prt_tab_rjust(out);
|
||||
prt_str(out, "sectors");
|
||||
prt_tab_rjust(out);
|
||||
prt_str(out, "fragmented");
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
|
||||
for (i = 0; i < BCH_DATA_NR; i++) {
|
||||
prt_str(out, bch2_data_types[i]);
|
||||
prt_tab(out);
|
||||
prt_u64(out, stats.d[i].buckets);
|
||||
prt_tab_rjust(out);
|
||||
prt_u64(out, stats.d[i].sectors);
|
||||
prt_tab_rjust(out);
|
||||
prt_u64(out, stats.d[i].fragmented);
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
prt_str(out, "ec");
|
||||
prt_tab(out);
|
||||
prt_u64(out, stats.buckets_ec);
|
||||
prt_tab_rjust(out);
|
||||
prt_newline(out);
|
||||
bch2_dev_usage_to_text(out, &stats);
|
||||
|
||||
prt_newline(out);
|
||||
|
||||
|
@ -32,19 +32,21 @@ DECLARE_EVENT_CLASS(bpos,
|
||||
TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bkey,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k),
|
||||
DECLARE_EVENT_CLASS(str,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(k, k )
|
||||
__field(dev_t, dev )
|
||||
__string(str, str )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(k, k);
|
||||
__entry->dev = c->dev;
|
||||
__assign_str(str, str);
|
||||
),
|
||||
|
||||
TP_printk("%s", __get_str(k))
|
||||
TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btree_node,
|
||||
@ -736,22 +738,22 @@ TRACE_EVENT(bucket_evacuate,
|
||||
__entry->dev_idx, __entry->bucket)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent,
|
||||
DEFINE_EVENT(str, move_extent,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_read,
|
||||
DEFINE_EVENT(str, move_extent_read,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_write,
|
||||
DEFINE_EVENT(str, move_extent_write,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_finish,
|
||||
DEFINE_EVENT(str, move_extent_finish,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
);
|
||||
@ -773,7 +775,7 @@ TRACE_EVENT(move_extent_fail,
|
||||
TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
|
||||
DEFINE_EVENT(str, move_extent_alloc_mem_fail,
|
||||
TP_PROTO(struct bch_fs *c, const char *k),
|
||||
TP_ARGS(c, k)
|
||||
);
|
||||
@ -1366,6 +1368,16 @@ TRACE_EVENT(write_buffer_flush_slowpath,
|
||||
TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(str, rebalance_extent,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(str, data_update,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_BCACHEFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
Loading…
Reference in New Issue
Block a user