mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 15f6e66e86 bcachefs: pass around bset_tree less
This commit is contained in:
parent
9c62b2b4f1
commit
cef2f30ae2
@ -1 +1 @@
|
||||
eab3b355cf6fcabbf07d7a9032c68e95cab37ad0
|
||||
15f6e66e86a97245d967fedcb2f33598c174fd96
|
||||
|
@ -204,9 +204,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
|
||||
|
||||
buf[0] = '\t';
|
||||
|
||||
for_each_btree_node_key_unpack(b, k, &node_iter,
|
||||
btree_node_is_extents(b),
|
||||
&unpacked) {
|
||||
for_each_btree_node_key_unpack(b, k, &node_iter, &unpacked) {
|
||||
bch2_bkey_val_to_text(c, bkey_type(0, btree_id),
|
||||
buf + 1, sizeof(buf) - 1, k);
|
||||
puts(buf);
|
||||
|
@ -121,7 +121,7 @@ static void update_inode(struct bch_fs *c,
|
||||
|
||||
bch2_inode_pack(&packed, inode);
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
if (ret)
|
||||
die("error creating file: %s", strerror(-ret));
|
||||
}
|
||||
@ -350,7 +350,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
extent_i_to_s_c(e).s_c);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
|
||||
&res, NULL, NULL, 0);
|
||||
&res, NULL, 0);
|
||||
if (ret)
|
||||
die("btree insert error %s", strerror(-ret));
|
||||
|
||||
|
@ -101,6 +101,10 @@
|
||||
#define swap(a, b) \
|
||||
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
|
||||
|
||||
/* This counts to 12. Any more, it will return 13th argument. */
|
||||
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
|
||||
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
|
||||
#define _RET_IP_ (unsigned long)__builtin_return_address(0)
|
||||
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
/*
|
||||
* deal with unrepresentable constant logarithms
|
||||
*/
|
||||
extern __attribute__((const, noreturn))
|
||||
extern __attribute__((const))
|
||||
int ____ilog2_NaN(void);
|
||||
|
||||
/*
|
||||
|
@ -146,6 +146,7 @@ static inline struct timespec current_kernel_time(void)
|
||||
return ts;
|
||||
}
|
||||
|
||||
#define current_kernel_time64() current_kernel_time()
|
||||
#define CURRENT_TIME (current_kernel_time())
|
||||
|
||||
#endif /* __TOOLS_LINUX_SCHED_H */
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define timespec64 timespec
|
||||
|
||||
typedef __s64 time64_t;
|
||||
|
||||
/* Parameters used to convert the timespec values: */
|
||||
@ -42,4 +44,8 @@ static inline struct timespec timespec_trunc(struct timespec t, unsigned gran)
|
||||
return t;
|
||||
}
|
||||
|
||||
#define ns_to_timespec64 ns_to_timespec
|
||||
#define timespec64_to_ns timespec_to_ns
|
||||
#define timespec64_trunc timespec_trunc
|
||||
|
||||
#endif /* _LINUX_TIME64_H */
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(bpos,
|
||||
TP_PROTO(struct bpos p),
|
||||
TP_PROTO(struct bpos *p),
|
||||
TP_ARGS(p),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
@ -16,8 +16,8 @@ DECLARE_EVENT_CLASS(bpos,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->inode = p.inode;
|
||||
__entry->offset = p.offset;
|
||||
__entry->inode = p->inode;
|
||||
__entry->offset = p->offset;
|
||||
),
|
||||
|
||||
TP_printk("%llu:%llu", __entry->inode, __entry->offset)
|
||||
@ -43,21 +43,6 @@ DECLARE_EVENT_CLASS(bkey,
|
||||
__entry->offset, __entry->size)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bch_dev,
|
||||
TP_PROTO(struct bch_dev *ca),
|
||||
TP_ARGS(ca),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, uuid, 16 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->uuid, ca->uuid.b, 16);
|
||||
),
|
||||
|
||||
TP_printk("%pU", __entry->uuid)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(bch_fs,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c),
|
||||
@ -138,7 +123,7 @@ DEFINE_EVENT(bio, journal_write,
|
||||
/* bset.c: */
|
||||
|
||||
DEFINE_EVENT(bpos, bkey_pack_pos_fail,
|
||||
TP_PROTO(struct bpos p),
|
||||
TP_PROTO(struct bpos *p),
|
||||
TP_ARGS(p)
|
||||
);
|
||||
|
||||
@ -360,16 +345,6 @@ DEFINE_EVENT(bch_fs, gc_coalesce_end,
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_dev, sectors_saturated,
|
||||
TP_PROTO(struct bch_dev *ca),
|
||||
TP_ARGS(ca)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, gc_sectors_saturated,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
|
||||
TP_PROTO(struct bch_fs *c),
|
||||
TP_ARGS(c)
|
||||
|
@ -284,10 +284,9 @@ static int inode_update_for_set_acl_fn(struct bch_inode_info *inode,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct timespec now = current_time(&inode->v);
|
||||
umode_t mode = (unsigned long) p;
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, now);
|
||||
bi->bi_ctime = bch2_current_time(c);
|
||||
bi->bi_mode = mode;
|
||||
return 0;
|
||||
}
|
||||
@ -301,13 +300,14 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
umode_t mode = inode->v.i_mode;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
bch2_trans_init(&trans, c);
|
||||
|
||||
if (type == ACL_TYPE_ACCESS && acl) {
|
||||
ret = posix_acl_update_mode(&inode->v, &mode, &acl);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
@ -318,7 +318,7 @@ retry:
|
||||
bch2_write_inode_trans(&trans, inode, &inode_u,
|
||||
inode_update_for_set_acl_fn,
|
||||
(void *)(unsigned long) mode) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&inode->ei_journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK);
|
||||
@ -333,6 +333,7 @@ retry:
|
||||
set_cached_acl(&inode->v, type, acl);
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -372,7 +373,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_trans_update(trans, iter, &new->k_i, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new->k_i));
|
||||
*new_acl = acl;
|
||||
acl = NULL;
|
||||
err:
|
||||
|
@ -154,8 +154,8 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
int bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
buf[0] = '\0';
|
||||
|
||||
@ -163,6 +163,8 @@ void bch2_alloc_to_text(struct bch_fs *c, char *buf,
|
||||
case BCH_ALLOC:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned get_alloc_field(const u8 **p, unsigned bytes)
|
||||
@ -288,53 +290,41 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
|
||||
static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct btree_iter *iter,
|
||||
u64 *journal_seq, bool nowait)
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
struct bucket_mark m;
|
||||
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
|
||||
struct bucket *g;
|
||||
struct bkey_i_alloc *a;
|
||||
u8 *d;
|
||||
int ret;
|
||||
unsigned flags = BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE;
|
||||
|
||||
if (nowait)
|
||||
flags |= BTREE_INSERT_NOWAIT;
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
m = READ_ONCE(g->mark);
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
a->k.p = POS(ca->dev_idx, b);
|
||||
a->v.fields = 0;
|
||||
a->v.gen = m.gen;
|
||||
set_bkey_val_u64s(&a->k, bch_alloc_val_u64s(&a->v));
|
||||
|
||||
do {
|
||||
ret = btree_iter_err(bch2_btree_iter_peek_slot(iter));
|
||||
if (ret)
|
||||
break;
|
||||
d = a->v.data;
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
|
||||
put_alloc_field(&d, 2, g->io_time[READ]);
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
put_alloc_field(&d, 2, g->io_time[WRITE]);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
/* read mark under btree node lock: */
|
||||
m = READ_ONCE(g->mark);
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
a->k.p = iter->pos;
|
||||
a->v.fields = 0;
|
||||
a->v.gen = m.gen;
|
||||
set_bkey_val_u64s(&a->k, bch_alloc_val_u64s(&a->v));
|
||||
bch2_btree_iter_set_pos(iter, a->k.p);
|
||||
|
||||
d = a->v.data;
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
|
||||
put_alloc_field(&d, 2, g->io_time[READ]);
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
put_alloc_field(&d, 2, g->io_time[WRITE]);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
} while (ret == -EINTR);
|
||||
|
||||
return ret;
|
||||
return bch2_btree_insert_at(c, NULL, journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
}
|
||||
|
||||
int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
@ -354,8 +344,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter,
|
||||
NULL, false);
|
||||
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL, 0);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return ret;
|
||||
}
|
||||
@ -375,8 +364,8 @@ int bch2_alloc_write(struct bch_fs *c)
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) {
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket, &iter,
|
||||
NULL, false);
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket,
|
||||
&iter, NULL, 0);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -582,47 +571,6 @@ static bool bch2_can_invalidate_bucket(struct bch_dev *ca,
|
||||
return gc_gen < BUCKET_GC_GEN_MAX;
|
||||
}
|
||||
|
||||
static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket)
|
||||
{
|
||||
struct bucket_mark m;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (!bch2_invalidate_bucket(c, ca, bucket, &m)) {
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
verify_not_on_freelist(c, ca, bucket);
|
||||
BUG_ON(!fifo_push(&ca->free_inc, bucket));
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
/* gc lock held: */
|
||||
bucket_io_clock_reset(c, ca, bucket, READ);
|
||||
bucket_io_clock_reset(c, ca, bucket, WRITE);
|
||||
|
||||
if (m.cached_sectors) {
|
||||
ca->allocator_invalidating_data = true;
|
||||
} else if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
ca->allocator_journal_seq_flush =
|
||||
max(ca->allocator_journal_seq_flush, bucket_seq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determines what order we're going to reuse buckets, smallest bucket_key()
|
||||
* first.
|
||||
@ -674,11 +622,18 @@ static inline int bucket_alloc_cmp(alloc_heap *h,
|
||||
(l.bucket > r.bucket) - (l.bucket < r.bucket);
|
||||
}
|
||||
|
||||
static inline int bucket_idx_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct alloc_heap_entry *l = _l, *r = _r;
|
||||
|
||||
return (l->bucket > r->bucket) - (l->bucket < r->bucket);
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
struct alloc_heap_entry e = { 0 };
|
||||
size_t b;
|
||||
size_t b, i, nr = 0;
|
||||
|
||||
ca->alloc_heap.used = 0;
|
||||
|
||||
@ -720,55 +675,58 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
if (e.nr)
|
||||
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
|
||||
for (i = 0; i < ca->alloc_heap.used; i++)
|
||||
nr += ca->alloc_heap.data[i].nr;
|
||||
|
||||
while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
|
||||
nr -= ca->alloc_heap.data[0].nr;
|
||||
heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp);
|
||||
}
|
||||
|
||||
up_read(&ca->bucket_lock);
|
||||
mutex_unlock(&c->bucket_clock[READ].lock);
|
||||
|
||||
heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
|
||||
|
||||
while (heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp)) {
|
||||
for (b = e.bucket;
|
||||
b < e.bucket + e.nr;
|
||||
b++) {
|
||||
if (fifo_full(&ca->free_inc))
|
||||
return;
|
||||
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets = bucket_array(ca);
|
||||
struct bucket_mark m;
|
||||
size_t b, checked;
|
||||
size_t b, start;
|
||||
|
||||
for (checked = 0;
|
||||
checked < ca->mi.nbuckets && !fifo_full(&ca->free_inc);
|
||||
checked++) {
|
||||
if (ca->fifo_last_bucket < ca->mi.first_bucket ||
|
||||
ca->fifo_last_bucket >= ca->mi.nbuckets)
|
||||
if (ca->fifo_last_bucket < ca->mi.first_bucket ||
|
||||
ca->fifo_last_bucket >= ca->mi.nbuckets)
|
||||
ca->fifo_last_bucket = ca->mi.first_bucket;
|
||||
|
||||
start = ca->fifo_last_bucket;
|
||||
|
||||
do {
|
||||
ca->fifo_last_bucket++;
|
||||
if (ca->fifo_last_bucket == ca->mi.nbuckets)
|
||||
ca->fifo_last_bucket = ca->mi.first_bucket;
|
||||
|
||||
b = ca->fifo_last_bucket++;
|
||||
|
||||
b = ca->fifo_last_bucket;
|
||||
m = READ_ONCE(buckets->b[b].mark);
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, b, m))
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
if (bch2_can_invalidate_bucket(ca, b, m)) {
|
||||
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
|
||||
|
||||
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
|
||||
if (heap_full(&ca->alloc_heap))
|
||||
break;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
} while (ca->fifo_last_bucket != start);
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets = bucket_array(ca);
|
||||
struct bucket_mark m;
|
||||
size_t checked;
|
||||
size_t checked, i;
|
||||
|
||||
for (checked = 0;
|
||||
checked < ca->mi.nbuckets / 2 && !fifo_full(&ca->free_inc);
|
||||
checked < ca->mi.nbuckets / 2;
|
||||
checked++) {
|
||||
size_t b = bch2_rand_range(ca->mi.nbuckets -
|
||||
ca->mi.first_bucket) +
|
||||
@ -776,17 +734,34 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
|
||||
|
||||
m = READ_ONCE(buckets->b[b].mark);
|
||||
|
||||
if (bch2_can_invalidate_bucket(ca, b, m))
|
||||
bch2_invalidate_one_bucket(c, ca, b);
|
||||
if (bch2_can_invalidate_bucket(ca, b, m)) {
|
||||
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
|
||||
|
||||
heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
|
||||
if (heap_full(&ca->alloc_heap))
|
||||
break;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
sort(ca->alloc_heap.data,
|
||||
ca->alloc_heap.used,
|
||||
sizeof(ca->alloc_heap.data[0]),
|
||||
bucket_idx_cmp, NULL);
|
||||
|
||||
/* remove duplicates: */
|
||||
for (i = 0; i + 1 < ca->alloc_heap.used; i++)
|
||||
if (ca->alloc_heap.data[i].bucket ==
|
||||
ca->alloc_heap.data[i + 1].bucket)
|
||||
ca->alloc_heap.data[i].nr = 0;
|
||||
}
|
||||
|
||||
static void find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
size_t i, nr = 0;
|
||||
|
||||
ca->inc_gen_needs_gc = 0;
|
||||
ca->inc_gen_really_needs_gc = 0;
|
||||
|
||||
switch (ca->mi.replacement) {
|
||||
case CACHE_REPLACEMENT_LRU:
|
||||
@ -799,86 +774,132 @@ static void find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
find_reclaimable_buckets_random(c, ca);
|
||||
break;
|
||||
}
|
||||
|
||||
heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
|
||||
|
||||
for (i = 0; i < ca->alloc_heap.used; i++)
|
||||
nr += ca->alloc_heap.data[i].nr;
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
static int size_t_cmp(const void *_l, const void *_r)
|
||||
static inline long next_alloc_bucket(struct bch_dev *ca)
|
||||
{
|
||||
const size_t *l = _l, *r = _r;
|
||||
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
|
||||
|
||||
return (*l > *r) - (*l < *r);
|
||||
while (ca->alloc_heap.used) {
|
||||
if (top->nr) {
|
||||
size_t b = top->bucket;
|
||||
|
||||
top->bucket++;
|
||||
top->nr--;
|
||||
return b;
|
||||
}
|
||||
|
||||
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca)
|
||||
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket, u64 *flush_seq)
|
||||
{
|
||||
BUG_ON(ca->free_inc.front);
|
||||
struct bucket_mark m;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
sort(ca->free_inc.data,
|
||||
ca->free_inc.back,
|
||||
sizeof(ca->free_inc.data[0]),
|
||||
size_t_cmp, NULL);
|
||||
|
||||
bch2_invalidate_bucket(c, ca, bucket, &m);
|
||||
|
||||
verify_not_on_freelist(c, ca, bucket);
|
||||
BUG_ON(!fifo_push(&ca->free_inc, bucket));
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
bucket_io_clock_reset(c, ca, bucket, READ);
|
||||
bucket_io_clock_reset(c, ca, bucket, WRITE);
|
||||
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
*flush_seq = max(*flush_seq, bucket_seq);
|
||||
}
|
||||
|
||||
return m.cached_sectors != 0;
|
||||
}
|
||||
|
||||
static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 *journal_seq, size_t nr,
|
||||
bool nowait)
|
||||
/*
|
||||
* Pull buckets off ca->alloc_heap, invalidate them, move them to ca->free_inc:
|
||||
*/
|
||||
static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
u64 journal_seq = 0;
|
||||
int ret = 0;
|
||||
long b;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
/* Only use nowait if we've already invalidated at least one bucket: */
|
||||
while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) {
|
||||
size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated);
|
||||
while (!ret &&
|
||||
!fifo_full(&ca->free_inc) &&
|
||||
(b = next_alloc_bucket(ca)) >= 0) {
|
||||
bool must_flush =
|
||||
bch2_invalidate_one_bucket(c, ca, b, &journal_seq);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq,
|
||||
nowait && ca->nr_invalidated);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ca->nr_invalidated++;
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter,
|
||||
must_flush ? &journal_seq : NULL,
|
||||
!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0);
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
/* If we used NOWAIT, don't return the error: */
|
||||
return ca->nr_invalidated ? 0 : ret;
|
||||
}
|
||||
if (!fifo_empty(&ca->free_inc))
|
||||
ret = 0;
|
||||
if (ret) {
|
||||
bch_err(ca, "error invalidating buckets: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
|
||||
{
|
||||
unsigned i;
|
||||
if (journal_seq)
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
if (ret) {
|
||||
bch_err(ca, "journal error: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't remove from free_inc until after it's added to
|
||||
* freelist, so gc can find it:
|
||||
*/
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
--ca->nr_invalidated;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return true;
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
|
||||
{
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (__push_invalidated_bucket(c, ca, bucket))
|
||||
break;
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
goto out;
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
if ((current->flags & PF_KTHREAD) &&
|
||||
kthread_should_stop()) {
|
||||
@ -889,22 +910,20 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
|
||||
schedule();
|
||||
try_to_freeze();
|
||||
}
|
||||
|
||||
out:
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an invalidated, ready to use bucket: issue a discard to it if enabled,
|
||||
* then add it to the freelist, waiting until there's room if necessary:
|
||||
* Pulls buckets off free_inc, discards them (if enabled), then adds them to
|
||||
* freelists, waiting until there's room if necessary:
|
||||
*/
|
||||
static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
while (ca->nr_invalidated) {
|
||||
while (!fifo_empty(&ca->free_inc)) {
|
||||
size_t bucket = fifo_peek(&ca->free_inc);
|
||||
|
||||
BUG_ON(fifo_empty(&ca->free_inc) || !ca->nr_invalidated);
|
||||
|
||||
if (ca->mi.discard &&
|
||||
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
@ -930,68 +949,37 @@ static int bch2_allocator_thread(void *arg)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
struct bch_fs *c = ca->fs;
|
||||
u64 journal_seq;
|
||||
size_t nr;
|
||||
int ret;
|
||||
|
||||
set_freezable();
|
||||
|
||||
while (1) {
|
||||
while (1) {
|
||||
cond_resched();
|
||||
cond_resched();
|
||||
|
||||
pr_debug("discarding %zu invalidated buckets",
|
||||
ca->nr_invalidated);
|
||||
pr_debug("discarding %zu invalidated buckets",
|
||||
fifo_used(&ca->free_inc));
|
||||
|
||||
ret = discard_invalidated_buckets(c, ca);
|
||||
if (ret)
|
||||
goto stop;
|
||||
ret = discard_invalidated_buckets(c, ca);
|
||||
if (ret)
|
||||
goto stop;
|
||||
|
||||
if (fifo_empty(&ca->free_inc))
|
||||
break;
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
pr_debug("invalidating %zu buckets",
|
||||
fifo_used(&ca->free_inc));
|
||||
ret = bch2_invalidate_buckets(c, ca);
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
goto stop;
|
||||
}
|
||||
|
||||
journal_seq = 0;
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
|
||||
SIZE_MAX, true);
|
||||
if (ret) {
|
||||
bch_err(ca, "error invalidating buckets: %i", ret);
|
||||
goto stop;
|
||||
}
|
||||
|
||||
if (!ca->nr_invalidated) {
|
||||
bch_err(ca, "allocator thread unable to make forward progress!");
|
||||
goto stop;
|
||||
}
|
||||
|
||||
if (ca->allocator_invalidating_data)
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
else if (ca->allocator_journal_seq_flush)
|
||||
ret = bch2_journal_flush_seq(&c->journal,
|
||||
ca->allocator_journal_seq_flush);
|
||||
|
||||
/*
|
||||
* journal error - buckets haven't actually been
|
||||
* invalidated, can't discard them:
|
||||
*/
|
||||
if (ret) {
|
||||
bch_err(ca, "journal error: %i", ret);
|
||||
goto stop;
|
||||
}
|
||||
if (!fifo_empty(&ca->free_inc)) {
|
||||
up_read(&c->gc_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
pr_debug("free_inc now empty");
|
||||
|
||||
/* Reset front/back so we can easily sort fifo entries later: */
|
||||
ca->free_inc.front = ca->free_inc.back = 0;
|
||||
ca->allocator_journal_seq_flush = 0;
|
||||
ca->allocator_invalidating_data = false;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
while (1) {
|
||||
size_t prev = fifo_used(&ca->free_inc);
|
||||
|
||||
do {
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
|
||||
up_read(&c->gc_lock);
|
||||
bch_err(ca, "gc failure");
|
||||
@ -1007,56 +995,46 @@ static int bch2_allocator_thread(void *arg)
|
||||
|
||||
pr_debug("scanning for reclaimable buckets");
|
||||
|
||||
find_reclaimable_buckets(c, ca);
|
||||
nr = find_reclaimable_buckets(c, ca);
|
||||
|
||||
pr_debug("found %zu buckets (free_inc %zu/%zu)",
|
||||
fifo_used(&ca->free_inc) - prev,
|
||||
fifo_used(&ca->free_inc), ca->free_inc.size);
|
||||
pr_debug("found %zu buckets", nr);
|
||||
|
||||
trace_alloc_batch(ca, fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
trace_alloc_batch(ca, nr, ca->alloc_heap.size);
|
||||
|
||||
if ((ca->inc_gen_needs_gc >= ca->free_inc.size ||
|
||||
(!fifo_full(&ca->free_inc) &&
|
||||
ca->inc_gen_really_needs_gc >=
|
||||
fifo_free(&ca->free_inc))) &&
|
||||
if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
|
||||
ca->inc_gen_really_needs_gc) &&
|
||||
c->gc_thread) {
|
||||
atomic_inc(&c->kick_gc);
|
||||
wake_up_process(c->gc_thread);
|
||||
}
|
||||
|
||||
if (fifo_full(&ca->free_inc))
|
||||
break;
|
||||
|
||||
if (!fifo_empty(&ca->free_inc) &&
|
||||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))
|
||||
break;
|
||||
|
||||
/*
|
||||
* copygc may be waiting until either its reserve fills
|
||||
* up, or we can't make forward progress:
|
||||
* If we found any buckets, we have to invalidate them
|
||||
* before we scan for more - but if we didn't find very
|
||||
* many we may want to wait on more buckets being
|
||||
* available so we don't spin:
|
||||
*/
|
||||
ca->allocator_blocked = true;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
if (!nr ||
|
||||
(nr < ALLOC_SCAN_BATCH(ca) &&
|
||||
!fifo_full(&ca->free[RESERVE_MOVINGGC]))) {
|
||||
ca->allocator_blocked = true;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
|
||||
ret = wait_buckets_available(c, ca);
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
goto stop;
|
||||
ret = wait_buckets_available(c, ca);
|
||||
if (ret) {
|
||||
up_read(&c->gc_lock);
|
||||
goto stop;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (!nr);
|
||||
|
||||
ca->allocator_blocked = false;
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
pr_debug("free_inc now %zu/%zu",
|
||||
fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
sort_free_inc(c, ca);
|
||||
pr_debug("%zu buckets to invalidate", nr);
|
||||
|
||||
/*
|
||||
* free_inc is now full of newly-invalidated buckets: next,
|
||||
* alloc_heap is now full of newly-invalidated buckets: next,
|
||||
* write out the new bucket gens:
|
||||
*/
|
||||
}
|
||||
@ -1733,7 +1711,7 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
|
||||
void bch2_recalc_capacity(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
u64 total_capacity, capacity = 0, reserved_sectors = 0;
|
||||
u64 capacity = 0, reserved_sectors = 0, gc_reserve;
|
||||
unsigned long ra_pages = 0;
|
||||
unsigned i, j;
|
||||
|
||||
@ -1748,7 +1726,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
bch2_set_ra_pages(c, ra_pages);
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
size_t reserve = 0;
|
||||
u64 dev_reserve = 0;
|
||||
|
||||
/*
|
||||
* We need to reserve buckets (from the number
|
||||
@ -1767,30 +1745,36 @@ void bch2_recalc_capacity(struct bch_fs *c)
|
||||
* not -ENOSPC calculations.
|
||||
*/
|
||||
for (j = 0; j < RESERVE_NONE; j++)
|
||||
reserve += ca->free[j].size;
|
||||
dev_reserve += ca->free[j].size;
|
||||
|
||||
reserve += ca->free_inc.size;
|
||||
dev_reserve += ca->free_inc.size;
|
||||
|
||||
reserve += ARRAY_SIZE(c->write_points);
|
||||
dev_reserve += ARRAY_SIZE(c->write_points);
|
||||
|
||||
reserve += 1; /* btree write point */
|
||||
dev_reserve += 1; /* btree write point */
|
||||
dev_reserve += 1; /* copygc write point */
|
||||
dev_reserve += 1; /* rebalance write point */
|
||||
dev_reserve += WRITE_POINT_COUNT;
|
||||
|
||||
reserved_sectors += bucket_to_sector(ca, reserve);
|
||||
dev_reserve *= ca->mi.bucket_size;
|
||||
|
||||
ca->copygc_threshold = dev_reserve;
|
||||
|
||||
capacity += bucket_to_sector(ca, ca->mi.nbuckets -
|
||||
ca->mi.first_bucket);
|
||||
|
||||
reserved_sectors += dev_reserve * 2;
|
||||
}
|
||||
|
||||
total_capacity = capacity;
|
||||
gc_reserve = c->opts.gc_reserve_bytes
|
||||
? c->opts.gc_reserve_bytes >> 9
|
||||
: div64_u64(capacity * c->opts.gc_reserve_percent, 100);
|
||||
|
||||
capacity *= (100 - c->opts.gc_reserve_percent);
|
||||
capacity = div64_u64(capacity, 100);
|
||||
reserved_sectors = max(gc_reserve, reserved_sectors);
|
||||
|
||||
BUG_ON(reserved_sectors > total_capacity);
|
||||
reserved_sectors = min(reserved_sectors, capacity);
|
||||
|
||||
capacity = min(capacity, total_capacity - reserved_sectors);
|
||||
|
||||
c->capacity = capacity;
|
||||
c->capacity = capacity - reserved_sectors;
|
||||
|
||||
if (c->capacity) {
|
||||
bch2_io_timer_add(&c->io_clock[READ],
|
||||
@ -1946,39 +1930,83 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flush_held_btree_writes(struct bch_fs *c)
|
||||
{
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
bool flush_updates;
|
||||
size_t i, nr_pending_updates;
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
pr_debug("flushing dirty btree nodes");
|
||||
cond_resched();
|
||||
|
||||
flush_updates = false;
|
||||
nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
if (btree_node_dirty(b) && (!b->written || b->level)) {
|
||||
if (btree_node_may_write(b)) {
|
||||
rcu_read_unlock();
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
} else {
|
||||
flush_updates = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (c->btree_roots_dirty)
|
||||
bch2_journal_meta(&c->journal);
|
||||
|
||||
/*
|
||||
* This is ugly, but it's needed to flush btree node writes
|
||||
* without spinning...
|
||||
*/
|
||||
if (flush_updates) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
bch2_btree_interior_updates_nr_pending(c) <
|
||||
nr_pending_updates);
|
||||
goto again;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void allocator_start_issue_discards(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_iter;
|
||||
size_t i, bu;
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
unsigned done = 0;
|
||||
|
||||
fifo_for_each_entry(bu, &ca->free_inc, i) {
|
||||
if (done == ca->nr_invalidated)
|
||||
break;
|
||||
size_t bu;
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
while (fifo_pop(&ca->free_inc, bu))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca, bu),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
done++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
size_t bu, i;
|
||||
unsigned dev_iter;
|
||||
u64 journal_seq = 0;
|
||||
long bu;
|
||||
bool invalidating_data = false;
|
||||
int ret = 0;
|
||||
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
|
||||
return -1;
|
||||
|
||||
if (test_alloc_startup(c)) {
|
||||
invalidating_data = true;
|
||||
goto not_enough;
|
||||
}
|
||||
|
||||
/* Scan for buckets that are already invalidated: */
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
struct btree_iter iter;
|
||||
@ -2003,7 +2031,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
fifo_push(&ca->free_inc, bu);
|
||||
ca->nr_invalidated++;
|
||||
|
||||
if (fifo_full(&ca->free_inc))
|
||||
break;
|
||||
@ -2022,24 +2049,23 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
not_enough:
|
||||
pr_debug("did not find enough empty buckets; issuing discards");
|
||||
|
||||
/* clear out free_inc - find_reclaimable_buckets() assumes it's empty */
|
||||
/* clear out free_inc, we'll be using it again below: */
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
discard_invalidated_buckets(c, ca);
|
||||
|
||||
pr_debug("scanning for reclaimable buckets");
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
BUG_ON(!fifo_empty(&ca->free_inc));
|
||||
ca->free_inc.front = ca->free_inc.back = 0;
|
||||
|
||||
find_reclaimable_buckets(c, ca);
|
||||
sort_free_inc(c, ca);
|
||||
|
||||
invalidating_data |= ca->allocator_invalidating_data;
|
||||
while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
|
||||
(bu = next_alloc_bucket(ca)) >= 0) {
|
||||
invalidating_data |=
|
||||
bch2_invalidate_one_bucket(c, ca, bu, &journal_seq);
|
||||
|
||||
fifo_for_each_entry(bu, &ca->free_inc, i)
|
||||
if (!fifo_push(&ca->free[RESERVE_BTREE], bu))
|
||||
break;
|
||||
fifo_push(&ca->free[RESERVE_BTREE], bu);
|
||||
set_bit(bu, ca->buckets_dirty);
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("done scanning for reclaimable buckets");
|
||||
@ -2054,6 +2080,8 @@ not_enough:
|
||||
* invalidated on disk:
|
||||
*/
|
||||
if (invalidating_data) {
|
||||
BUG();
|
||||
pr_info("holding writes");
|
||||
pr_debug("invalidating existing data");
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
} else {
|
||||
@ -2065,16 +2093,9 @@ not_enough:
|
||||
* XXX: it's possible for this to deadlock waiting on journal reclaim,
|
||||
* since we're holding btree writes. What then?
|
||||
*/
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
|
||||
ca->free[RESERVE_BTREE].size,
|
||||
false);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = bch2_alloc_write(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (invalidating_data) {
|
||||
pr_debug("flushing journal");
|
||||
@ -2087,57 +2108,11 @@ not_enough:
|
||||
allocator_start_issue_discards(c);
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
while (ca->nr_invalidated) {
|
||||
BUG_ON(!fifo_pop(&ca->free_inc, bu));
|
||||
ca->nr_invalidated--;
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
|
||||
|
||||
/* now flush dirty btree nodes: */
|
||||
if (invalidating_data) {
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
bool flush_updates;
|
||||
size_t nr_pending_updates;
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
pr_debug("flushing dirty btree nodes");
|
||||
cond_resched();
|
||||
|
||||
flush_updates = false;
|
||||
nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
|
||||
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
if (btree_node_dirty(b) && (!b->written || b->level)) {
|
||||
if (btree_node_may_write(b)) {
|
||||
rcu_read_unlock();
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read);
|
||||
bch2_btree_node_write(c, b, SIX_LOCK_read);
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
} else {
|
||||
flush_updates = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* This is ugly, but it's needed to flush btree node writes
|
||||
* without spinning...
|
||||
*/
|
||||
if (flush_updates) {
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
bch2_btree_interior_updates_nr_pending(c) <
|
||||
nr_pending_updates);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
if (invalidating_data)
|
||||
flush_held_btree_writes(c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -9,8 +9,10 @@ struct bch_dev;
|
||||
struct bch_fs;
|
||||
struct bch_devs_List;
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) ((ca)->mi.nbuckets >> 9)
|
||||
|
||||
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_alloc_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_alloc_invalid, \
|
||||
|
@ -267,6 +267,10 @@ do { \
|
||||
"Store the journal sequence number in the version " \
|
||||
"number of every btree key, and verify that btree " \
|
||||
"update ordering is preserved during recovery") \
|
||||
BCH_DEBUG_PARAM(test_alloc_startup, \
|
||||
"Force allocator startup to use the slowpath where it" \
|
||||
"can't find enough free buckets without invalidating" \
|
||||
"cached data")
|
||||
|
||||
#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
|
||||
|
||||
@ -400,7 +404,6 @@ struct bch_dev {
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
alloc_fifo free_inc;
|
||||
spinlock_t freelist_lock;
|
||||
size_t nr_invalidated;
|
||||
|
||||
u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
unsigned open_buckets_partial_nr;
|
||||
@ -410,11 +413,8 @@ struct bch_dev {
|
||||
/* last calculated minimum prio */
|
||||
u16 max_last_bucket_io[2];
|
||||
|
||||
atomic_long_t saturated_count;
|
||||
size_t inc_gen_needs_gc;
|
||||
size_t inc_gen_really_needs_gc;
|
||||
u64 allocator_journal_seq_flush;
|
||||
bool allocator_invalidating_data;
|
||||
bool allocator_blocked;
|
||||
|
||||
alloc_heap alloc_heap;
|
||||
@ -424,6 +424,7 @@ struct bch_dev {
|
||||
copygc_heap copygc_heap;
|
||||
struct bch_pd_controller copygc_pd;
|
||||
struct write_point copygc_write_point;
|
||||
u64 copygc_threshold;
|
||||
|
||||
atomic64_t rebalance_work;
|
||||
|
||||
@ -576,6 +577,8 @@ struct bch_fs {
|
||||
struct mutex btree_interior_update_lock;
|
||||
struct closure_waitlist btree_interior_update_wait;
|
||||
|
||||
mempool_t btree_iters_pool;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
@ -716,7 +719,7 @@ struct bch_fs {
|
||||
|
||||
struct journal journal;
|
||||
|
||||
unsigned bucket_journal_seq;
|
||||
u64 last_bucket_seq_cleanup;
|
||||
|
||||
/* The rest of this all shows up in sysfs */
|
||||
atomic_long_t read_realloc_races;
|
||||
|
@ -1214,6 +1214,7 @@ LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
|
||||
|
||||
LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
|
||||
struct bch_sb, flags[2], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
|
||||
|
||||
/* Features: */
|
||||
enum bch_sb_features {
|
||||
|
@ -122,16 +122,27 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
|
||||
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
|
||||
|
||||
int bch2_bpos_to_text(char *buf, size_t size, struct bpos pos)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
if (!bkey_cmp(pos, POS_MIN))
|
||||
p("POS_MIN");
|
||||
else if (!bkey_cmp(pos, POS_MAX))
|
||||
p("POS_MAX");
|
||||
else
|
||||
p("%llu:%llu", pos.inode, pos.offset);
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
int bch2_bkey_to_text(char *buf, size_t size, const struct bkey *k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
|
||||
p("u64s %u type %u ", k->u64s, k->type);
|
||||
|
||||
if (bkey_cmp(k->p, POS_MAX))
|
||||
p("%llu:%llu", k->p.inode, k->p.offset);
|
||||
else
|
||||
p("POS_MAX");
|
||||
out += bch2_bpos_to_text(out, end - out, k->p);
|
||||
|
||||
p(" snap %u len %u ver %llu", k->p.snapshot, k->size, k->version.lo);
|
||||
|
||||
@ -159,7 +170,7 @@ int bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
break;
|
||||
default:
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text)
|
||||
ops->val_to_text(c, buf, size, k);
|
||||
out += ops->val_to_text(c, out, end - out, k);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ struct bkey_ops {
|
||||
struct bkey_s_c);
|
||||
void (*key_debugcheck)(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
void (*val_to_text)(struct bch_fs *, char *,
|
||||
int (*val_to_text)(struct bch_fs *, char *,
|
||||
size_t, struct bkey_s_c);
|
||||
void (*swab)(const struct bkey_format *, struct bkey_packed *);
|
||||
key_filter_fn key_normalize;
|
||||
@ -72,6 +72,7 @@ const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
|
||||
int bch2_bpos_to_text(char *, size_t, struct bpos);
|
||||
int bch2_bkey_to_text(char *, size_t, const struct bkey *);
|
||||
int bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
|
@ -21,14 +21,19 @@
|
||||
#include "alloc_types.h"
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
|
||||
struct btree *);
|
||||
|
||||
struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
unsigned offset = __btree_node_key_to_offset(b, k);
|
||||
struct bset_tree *t;
|
||||
|
||||
for_each_bset(b, t)
|
||||
if (k >= btree_bkey_first(b, t) &&
|
||||
k < btree_bkey_last(b, t))
|
||||
if (offset <= t->end_offset) {
|
||||
EBUG_ON(offset < btree_bkey_first_offset(t));
|
||||
return t;
|
||||
}
|
||||
|
||||
BUG();
|
||||
}
|
||||
@ -64,8 +69,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
|
||||
_n = bkey_next(_k);
|
||||
|
||||
bch2_bkey_to_text(buf, sizeof(buf), &k);
|
||||
printk(KERN_ERR "block %u key %zi/%u: %s\n", set,
|
||||
_k->_data - i->_data, i->u64s, buf);
|
||||
printk(KERN_ERR "block %u key %5u: %s\n", set,
|
||||
__btree_node_key_to_offset(b, _k), buf);
|
||||
|
||||
if (_n == vstruct_last(i))
|
||||
continue;
|
||||
@ -121,20 +126,6 @@ void bch2_dump_btree_node_iter(struct btree *b,
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
|
||||
static bool keys_out_of_order(struct btree *b,
|
||||
const struct bkey_packed *prev,
|
||||
const struct bkey_packed *next,
|
||||
bool is_extents)
|
||||
{
|
||||
struct bkey nextu = bkey_unpack_key(b, next);
|
||||
|
||||
return bkey_cmp_left_packed_byval(b, prev, bkey_start_pos(&nextu)) > 0 ||
|
||||
((is_extents
|
||||
? !bkey_deleted(next)
|
||||
: !bkey_deleted(prev)) &&
|
||||
!bkey_cmp_packed(b, prev, next));
|
||||
}
|
||||
|
||||
void __bch2_verify_btree_nr_keys(struct btree *b)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
@ -151,16 +142,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
|
||||
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
|
||||
}
|
||||
|
||||
static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
struct bkey_packed *k)
|
||||
static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
|
||||
struct btree *b)
|
||||
{
|
||||
const struct bkey_packed *n = bch2_btree_node_iter_peek_all(iter, b);
|
||||
struct btree_node_iter iter = *_iter;
|
||||
const struct bkey_packed *k, *n;
|
||||
|
||||
k = bch2_btree_node_iter_peek_all(&iter, b);
|
||||
__bch2_btree_node_iter_advance(&iter, b);
|
||||
n = bch2_btree_node_iter_peek_all(&iter, b);
|
||||
|
||||
bkey_unpack_key(b, k);
|
||||
|
||||
if (n &&
|
||||
keys_out_of_order(b, k, n, iter->is_extents)) {
|
||||
__btree_node_iter_cmp(b, k, n) > 0) {
|
||||
struct btree_node_iter_set *set;
|
||||
struct bkey ku = bkey_unpack_key(b, k);
|
||||
struct bkey nu = bkey_unpack_key(b, n);
|
||||
char buf1[80], buf2[80];
|
||||
@ -168,106 +164,104 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
|
||||
bch2_dump_btree_node(b);
|
||||
bch2_bkey_to_text(buf1, sizeof(buf1), &ku);
|
||||
bch2_bkey_to_text(buf2, sizeof(buf2), &nu);
|
||||
panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2);
|
||||
printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
|
||||
buf1, buf2);
|
||||
printk(KERN_ERR "iter was:");
|
||||
|
||||
btree_node_iter_for_each(_iter, set) {
|
||||
struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, k);
|
||||
printk(" [%zi %zi]", t - b->set,
|
||||
k->_data - bset(b, t)->_data);
|
||||
}
|
||||
panic("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
|
||||
struct btree *b)
|
||||
struct btree *b)
|
||||
{
|
||||
struct btree_node_iter_set *set, *prev = NULL;
|
||||
struct btree_node_iter_set *set, *s2;
|
||||
struct bset_tree *t;
|
||||
struct bkey_packed *k, *first;
|
||||
|
||||
if (bch2_btree_node_iter_end(iter))
|
||||
return;
|
||||
/* Verify no duplicates: */
|
||||
btree_node_iter_for_each(iter, set)
|
||||
btree_node_iter_for_each(iter, s2)
|
||||
BUG_ON(set != s2 && set->end == s2->end);
|
||||
|
||||
/* Verify that set->end is correct: */
|
||||
btree_node_iter_for_each(iter, set) {
|
||||
k = __btree_node_offset_to_key(b, set->k);
|
||||
t = bch2_bkey_to_bset(b, k);
|
||||
|
||||
BUG_ON(__btree_node_offset_to_key(b, set->end) !=
|
||||
btree_bkey_last(b, t));
|
||||
|
||||
BUG_ON(prev &&
|
||||
btree_node_iter_cmp(iter, b, *prev, *set) > 0);
|
||||
|
||||
prev = set;
|
||||
for_each_bset(b, t)
|
||||
if (set->end == t->end_offset)
|
||||
goto found;
|
||||
BUG();
|
||||
found:
|
||||
BUG_ON(set->k < btree_bkey_first_offset(t) ||
|
||||
set->k >= t->end_offset);
|
||||
}
|
||||
|
||||
first = __btree_node_offset_to_key(b, iter->data[0].k);
|
||||
|
||||
for_each_bset(b, t)
|
||||
if (bch2_btree_node_iter_bset_pos(iter, b, t) ==
|
||||
btree_bkey_last(b, t) &&
|
||||
(k = bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))))
|
||||
BUG_ON(__btree_node_iter_cmp(iter->is_extents, b,
|
||||
k, first) > 0);
|
||||
/* Verify iterator is sorted: */
|
||||
btree_node_iter_for_each(iter, set)
|
||||
BUG_ON(set != iter->data &&
|
||||
btree_node_iter_cmp(b, set[-1], set[0]) > 0);
|
||||
}
|
||||
|
||||
void bch2_verify_key_order(struct btree *b,
|
||||
struct btree_node_iter *iter,
|
||||
struct bkey_packed *where)
|
||||
void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
|
||||
struct bkey_packed *insert, unsigned clobber_u64s)
|
||||
{
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, where);
|
||||
struct bkey_packed *k, *prev;
|
||||
struct bkey uk, uw = bkey_unpack_key(b, where);
|
||||
|
||||
k = bch2_bkey_prev_all(b, t, where);
|
||||
if (k &&
|
||||
keys_out_of_order(b, k, where, iter->is_extents)) {
|
||||
char buf1[100], buf2[100];
|
||||
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
|
||||
struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
|
||||
#if 0
|
||||
BUG_ON(prev &&
|
||||
__btree_node_iter_cmp(b, prev, insert) > 0);
|
||||
#else
|
||||
if (prev &&
|
||||
__btree_node_iter_cmp(b, prev, insert) > 0) {
|
||||
struct bkey k1 = bkey_unpack_key(b, prev);
|
||||
struct bkey k2 = bkey_unpack_key(b, insert);
|
||||
char buf1[100];
|
||||
char buf2[100];
|
||||
|
||||
bch2_dump_btree_node(b);
|
||||
uk = bkey_unpack_key(b, k);
|
||||
bch2_bkey_to_text(buf1, sizeof(buf1), &uk);
|
||||
bch2_bkey_to_text(buf2, sizeof(buf2), &uw);
|
||||
panic("out of order with prev:\n%s\n%s\n",
|
||||
buf1, buf2);
|
||||
bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
|
||||
bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
|
||||
|
||||
panic("prev > insert:\n"
|
||||
"prev key %5u %s\n"
|
||||
"insert key %5u %s\n",
|
||||
__btree_node_key_to_offset(b, prev), buf1,
|
||||
__btree_node_key_to_offset(b, insert), buf2);
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
BUG_ON(next != btree_bkey_last(b, t) &&
|
||||
__btree_node_iter_cmp(b, insert, next) > 0);
|
||||
#else
|
||||
if (next != btree_bkey_last(b, t) &&
|
||||
__btree_node_iter_cmp(b, insert, next) > 0) {
|
||||
struct bkey k1 = bkey_unpack_key(b, insert);
|
||||
struct bkey k2 = bkey_unpack_key(b, next);
|
||||
char buf1[100];
|
||||
char buf2[100];
|
||||
|
||||
k = bkey_next(where);
|
||||
BUG_ON(k != btree_bkey_last(b, t) &&
|
||||
keys_out_of_order(b, where, k, iter->is_extents));
|
||||
bch2_dump_btree_node(b);
|
||||
bch2_bkey_to_text(buf1, sizeof(buf1), &k1);
|
||||
bch2_bkey_to_text(buf2, sizeof(buf2), &k2);
|
||||
|
||||
for_each_bset(b, t) {
|
||||
if (where >= btree_bkey_first(b, t) ||
|
||||
where < btree_bkey_last(b, t))
|
||||
continue;
|
||||
|
||||
k = bch2_btree_node_iter_bset_pos(iter, b, t);
|
||||
|
||||
if (k == btree_bkey_last(b, t))
|
||||
k = bch2_bkey_prev_all(b, t, k);
|
||||
|
||||
while (bkey_cmp_left_packed_byval(b, k, bkey_start_pos(&uw)) > 0 &&
|
||||
(prev = bch2_bkey_prev_all(b, t, k)))
|
||||
k = prev;
|
||||
|
||||
for (;
|
||||
k != btree_bkey_last(b, t);
|
||||
k = bkey_next(k)) {
|
||||
uk = bkey_unpack_key(b, k);
|
||||
|
||||
if (iter->is_extents) {
|
||||
BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
|
||||
bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
|
||||
} else {
|
||||
BUG_ON(!bkey_cmp(uw.p, uk.p) &&
|
||||
!bkey_deleted(&uk));
|
||||
}
|
||||
|
||||
if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0)
|
||||
break;
|
||||
}
|
||||
panic("insert > next:\n"
|
||||
"insert key %5u %s\n"
|
||||
"next key %5u %s\n",
|
||||
__btree_node_key_to_offset(b, insert), buf1,
|
||||
__btree_node_key_to_offset(b, next), buf2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
struct bkey_packed *k) {}
|
||||
struct btree *b) {}
|
||||
|
||||
#endif
|
||||
|
||||
@ -622,28 +616,30 @@ static unsigned rw_aux_tree_bsearch(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
unsigned offset)
|
||||
{
|
||||
unsigned l = 0, r = t->size;
|
||||
unsigned bset_offs = offset - btree_bkey_first_offset(t);
|
||||
unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
|
||||
unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
|
||||
|
||||
EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
|
||||
EBUG_ON(!t->size);
|
||||
EBUG_ON(idx > t->size);
|
||||
|
||||
while (l < r) {
|
||||
unsigned m = (l + r) >> 1;
|
||||
while (idx < t->size &&
|
||||
rw_aux_tree(b, t)[idx].offset < offset)
|
||||
idx++;
|
||||
|
||||
if (rw_aux_tree(b, t)[m].offset < offset)
|
||||
l = m + 1;
|
||||
else
|
||||
r = m;
|
||||
}
|
||||
while (idx &&
|
||||
rw_aux_tree(b, t)[idx - 1].offset >= offset)
|
||||
idx--;
|
||||
|
||||
EBUG_ON(l < t->size &&
|
||||
rw_aux_tree(b, t)[l].offset < offset);
|
||||
EBUG_ON(l &&
|
||||
rw_aux_tree(b, t)[l - 1].offset >= offset);
|
||||
EBUG_ON(idx < t->size &&
|
||||
rw_aux_tree(b, t)[idx].offset < offset);
|
||||
EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
|
||||
EBUG_ON(idx + 1 < t->size &&
|
||||
rw_aux_tree(b, t)[idx].offset ==
|
||||
rw_aux_tree(b, t)[idx + 1].offset);
|
||||
|
||||
EBUG_ON(l > r);
|
||||
EBUG_ON(l > t->size);
|
||||
|
||||
return l;
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline unsigned bfloat_mantissa(const struct bkey_float *f,
|
||||
@ -1129,9 +1125,10 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
|
||||
* modified, fix any auxiliary search tree by remaking all the nodes in the
|
||||
* auxiliary search tree that @k corresponds to
|
||||
*/
|
||||
void bch2_bset_fix_invalidated_key(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, k);
|
||||
|
||||
switch (bset_aux_tree_type(t)) {
|
||||
case BSET_NO_AUX_TREE:
|
||||
break;
|
||||
@ -1158,13 +1155,9 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
|
||||
if (!bset_has_rw_aux_tree(t))
|
||||
return;
|
||||
|
||||
/* returns first entry >= where */
|
||||
l = rw_aux_tree_bsearch(b, t, where);
|
||||
|
||||
/* l is first >= than @where */
|
||||
|
||||
EBUG_ON(l < t->size && rw_aux_tree(b, t)[l].offset < where);
|
||||
EBUG_ON(l && rw_aux_tree(b, t)[l - 1].offset >= where);
|
||||
|
||||
if (!l) /* never delete first entry */
|
||||
l++;
|
||||
else if (l < t->size &&
|
||||
@ -1242,6 +1235,7 @@ void bch2_bset_insert(struct btree *b,
|
||||
struct bkey_packed packed, *src = bkey_to_packed(insert);
|
||||
|
||||
bch2_bset_verify_rw_aux_tree(b, t);
|
||||
bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
|
||||
|
||||
if (bch2_bkey_pack_key(&packed, &insert->k, f))
|
||||
src = &packed;
|
||||
@ -1268,7 +1262,6 @@ void bch2_bset_insert(struct btree *b,
|
||||
|
||||
bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
|
||||
|
||||
bch2_verify_key_order(b, iter, where);
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
}
|
||||
|
||||
@ -1474,11 +1467,11 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
|
||||
noinline __flatten __attribute__((cold))
|
||||
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
|
||||
struct btree *b, struct bpos search,
|
||||
bool strictly_greater, bool is_extents)
|
||||
bool strictly_greater)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
|
||||
trace_bkey_pack_pos_fail(search);
|
||||
trace_bkey_pack_pos_fail(&search);
|
||||
|
||||
for_each_bset(b, t)
|
||||
__bch2_btree_node_iter_push(iter, b,
|
||||
@ -1531,7 +1524,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
|
||||
*/
|
||||
void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
struct btree *b, struct bpos search,
|
||||
bool strictly_greater, bool is_extents)
|
||||
bool strictly_greater)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
struct bkey_packed p, *packed_search = NULL;
|
||||
@ -1539,7 +1532,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
EBUG_ON(bkey_cmp(search, b->data->min_key) < 0);
|
||||
bset_aux_tree_verify(b);
|
||||
|
||||
__bch2_btree_node_iter_init(iter, is_extents);
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
|
||||
case BKEY_PACK_POS_EXACT:
|
||||
@ -1550,7 +1543,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
break;
|
||||
case BKEY_PACK_POS_FAIL:
|
||||
btree_node_iter_init_pack_failed(iter, b, search,
|
||||
strictly_greater, is_extents);
|
||||
strictly_greater);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1565,12 +1558,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
bool is_extents)
|
||||
struct btree *b)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
|
||||
__bch2_btree_node_iter_init(iter, is_extents);
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
for_each_bset(b, t)
|
||||
__bch2_btree_node_iter_push(iter, b,
|
||||
@ -1598,7 +1590,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if ((ret = (btree_node_iter_cmp(iter, b,
|
||||
if ((ret = (btree_node_iter_cmp(b,
|
||||
iter->data[first],
|
||||
iter->data[first + 1]) > 0)))
|
||||
swap(iter->data[first], iter->data[first + 1]);
|
||||
@ -1653,23 +1645,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
|
||||
btree_node_iter_sort_two(iter, b, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_node_iter_advance - advance @iter by one key
|
||||
*
|
||||
* Doesn't do debugchecks - for cases where (insert_fixup_extent()) a bset might
|
||||
* momentarily have out of order extents.
|
||||
*/
|
||||
void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b);
|
||||
|
||||
__bch2_btree_node_iter_advance(iter, b);
|
||||
bch2_btree_node_iter_next_check(iter, b, k);
|
||||
#else
|
||||
__bch2_btree_node_iter_advance(iter, b);
|
||||
bch2_btree_node_iter_verify(iter, b);
|
||||
bch2_btree_node_iter_next_check(iter, b);
|
||||
#endif
|
||||
__bch2_btree_node_iter_advance(iter, b);
|
||||
}
|
||||
|
||||
static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
|
||||
@ -1702,8 +1685,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
|
||||
bch2_btree_node_iter_bset_pos(iter, b, t),
|
||||
min_key_type);
|
||||
if (k &&
|
||||
(!prev || __btree_node_iter_cmp(iter->is_extents, b,
|
||||
k, prev) > 0)) {
|
||||
(!prev || __btree_node_iter_cmp(b, k, prev) > 0)) {
|
||||
prev = k;
|
||||
end = t->end_offset;
|
||||
}
|
||||
@ -1736,11 +1718,11 @@ out:
|
||||
struct btree_node_iter iter2 = *iter;
|
||||
|
||||
if (prev)
|
||||
bch2_btree_node_iter_advance(&iter2, b);
|
||||
__bch2_btree_node_iter_advance(&iter2, b);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) {
|
||||
BUG_ON(k->type >= min_key_type);
|
||||
bch2_btree_node_iter_advance(&iter2, b);
|
||||
__bch2_btree_node_iter_advance(&iter2, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -342,8 +342,7 @@ void bch2_bset_init_first(struct btree *, struct bset *);
|
||||
void bch2_bset_init_next(struct bch_fs *, struct btree *,
|
||||
struct btree_node_entry *);
|
||||
void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
|
||||
void bch2_bset_fix_invalidated_key(struct btree *, struct bset_tree *,
|
||||
struct bkey_packed *);
|
||||
void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
|
||||
|
||||
void bch2_bset_insert(struct btree *, struct btree_node_iter *,
|
||||
struct bkey_packed *, struct bkey_i *, unsigned);
|
||||
@ -368,6 +367,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
|
||||
return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
|
||||
}
|
||||
|
||||
/* Returns true if @k is after iterator position @pos */
|
||||
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
|
||||
const struct bkey *k)
|
||||
{
|
||||
int cmp = bkey_cmp(k->p, iter->pos);
|
||||
|
||||
return cmp > 0 ||
|
||||
(cmp == 0 &&
|
||||
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
|
||||
}
|
||||
|
||||
/* Returns true if @k is after iterator position @pos */
|
||||
static inline bool btree_iter_pos_cmp_packed(const struct btree *b,
|
||||
struct bpos *pos,
|
||||
@ -418,7 +428,7 @@ enum bch_extent_overlap {
|
||||
|
||||
/* Returns how k overlaps with m */
|
||||
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
|
||||
const struct bkey *m)
|
||||
const struct bkey *m)
|
||||
{
|
||||
int cmp1 = bkey_cmp(k->p, m->p) < 0;
|
||||
int cmp2 = bkey_cmp(bkey_start_pos(k),
|
||||
@ -429,20 +439,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
|
||||
|
||||
/* Btree key iteration */
|
||||
|
||||
static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
bool is_extents)
|
||||
{
|
||||
iter->is_extents = is_extents;
|
||||
memset(iter->data, 0, sizeof(iter->data));
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
|
||||
const struct bkey_packed *,
|
||||
const struct bkey_packed *);
|
||||
void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
|
||||
struct bpos, bool, bool);
|
||||
struct bpos, bool);
|
||||
void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
|
||||
struct btree *, bool);
|
||||
struct btree *);
|
||||
struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
|
||||
struct btree *,
|
||||
struct bset_tree *);
|
||||
@ -469,32 +472,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
|
||||
return __btree_node_iter_set_end(iter, 0);
|
||||
}
|
||||
|
||||
static inline int __btree_node_iter_cmp(bool is_extents,
|
||||
struct btree *b,
|
||||
struct bkey_packed *l,
|
||||
struct bkey_packed *r)
|
||||
static inline int __btree_node_iter_cmp(struct btree *b,
|
||||
const struct bkey_packed *l,
|
||||
const struct bkey_packed *r)
|
||||
{
|
||||
/*
|
||||
* For non extents, when keys compare equal the deleted keys have to
|
||||
* come first - so that bch2_btree_node_iter_next_check() can detect
|
||||
* duplicate nondeleted keys (and possibly other reasons?)
|
||||
*
|
||||
* For extents, bkey_deleted() is used as a proxy for k->size == 0, so
|
||||
* deleted keys have to sort last.
|
||||
*/
|
||||
/* When keys compare equal deleted keys come first */
|
||||
return bkey_cmp_packed(b, l, r)
|
||||
?: (is_extents
|
||||
? (int) bkey_deleted(l) - (int) bkey_deleted(r)
|
||||
: (int) bkey_deleted(r) - (int) bkey_deleted(l))
|
||||
?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
|
||||
?: (l > r) - (l < r);
|
||||
}
|
||||
|
||||
static inline int btree_node_iter_cmp(struct btree_node_iter *iter,
|
||||
struct btree *b,
|
||||
static inline int btree_node_iter_cmp(struct btree *b,
|
||||
struct btree_node_iter_set l,
|
||||
struct btree_node_iter_set r)
|
||||
{
|
||||
return __btree_node_iter_cmp(iter->is_extents, b,
|
||||
return __btree_node_iter_cmp(b,
|
||||
__btree_node_offset_to_key(b, l.k),
|
||||
__btree_node_offset_to_key(b, r.k));
|
||||
}
|
||||
@ -581,21 +573,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
|
||||
return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterates over all _live_ keys - skipping deleted (and potentially
|
||||
* overlapping) keys
|
||||
*/
|
||||
#define for_each_btree_node_key(b, k, iter, _is_extents) \
|
||||
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
|
||||
((k) = bch2_btree_node_iter_peek(iter, b)); \
|
||||
bch2_btree_node_iter_advance(iter, b))
|
||||
|
||||
struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
|
||||
struct btree *,
|
||||
struct bkey *);
|
||||
|
||||
#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\
|
||||
for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\
|
||||
#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
|
||||
for (bch2_btree_node_iter_init_from_start((iter), (b)); \
|
||||
(k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
|
||||
bch2_btree_node_iter_advance(iter, b))
|
||||
|
||||
@ -620,6 +603,13 @@ static inline void btree_keys_account_key(struct btree_nr_keys *n,
|
||||
#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \
|
||||
btree_keys_account_key(_nr, _bset_idx, _k, -1)
|
||||
|
||||
#define btree_account_key_add(_b, _k) \
|
||||
btree_keys_account_key(&(_b)->nr, \
|
||||
bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
|
||||
#define btree_account_key_drop(_b, _k) \
|
||||
btree_keys_account_key(&(_b)->nr, \
|
||||
bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
|
||||
|
||||
struct bset_stats {
|
||||
struct {
|
||||
size_t nr, bytes;
|
||||
@ -645,17 +635,18 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
|
||||
|
||||
void __bch2_verify_btree_nr_keys(struct btree *);
|
||||
void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
|
||||
void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
|
||||
struct bkey_packed *);
|
||||
void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
|
||||
struct bkey_packed *, unsigned);
|
||||
|
||||
#else
|
||||
|
||||
static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
|
||||
static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
|
||||
struct btree *b) {}
|
||||
static inline void bch2_verify_key_order(struct btree *b,
|
||||
struct btree_node_iter *iter,
|
||||
struct bkey_packed *where) {}
|
||||
static inline void bch2_verify_insert_pos(struct btree *b,
|
||||
struct bkey_packed *where,
|
||||
struct bkey_packed *insert,
|
||||
unsigned clobber_u64s) {}
|
||||
#endif
|
||||
|
||||
static inline void bch2_verify_btree_nr_keys(struct btree *b)
|
||||
|
@ -122,13 +122,14 @@ static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
|
||||
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size, true, pos, NULL,
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
break;
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
bch2_mark_key(c, k, k.k->size, false, pos, NULL,
|
||||
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
@ -215,7 +216,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
|
||||
|
||||
if (btree_node_has_ptrs(b))
|
||||
for_each_btree_node_key_unpack(b, k, &iter,
|
||||
btree_node_is_extents(b),
|
||||
&unpacked) {
|
||||
bch2_bkey_debugcheck(c, b, k);
|
||||
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
|
||||
@ -324,9 +324,16 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned i;
|
||||
u64 b;
|
||||
|
||||
/*
|
||||
* This conditional is kind of gross, but we may be called from the
|
||||
* device add path, before the new device has actually been added to the
|
||||
* running filesystem:
|
||||
*/
|
||||
if (c) {
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
} else {
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||
@ -354,6 +361,8 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (c) {
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
spin_unlock(&c->journal.lock);
|
||||
} else {
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
@ -386,7 +395,8 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
|
||||
for_each_pending_btree_node_free(c, as, d)
|
||||
if (d->index_update_done)
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
|
||||
c->opts.btree_node_size, true, pos,
|
||||
c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE, pos,
|
||||
&stats, 0,
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
@ -479,7 +489,8 @@ static void bch2_gc_start(struct bch_fs *c)
|
||||
struct bch_fs_usage *p =
|
||||
per_cpu_ptr(c->usage_percpu, cpu);
|
||||
|
||||
memset(p->s, 0, sizeof(p->s));
|
||||
memset(p->replicas, 0, sizeof(p->replicas));
|
||||
memset(p->buckets, 0, sizeof(p->buckets));
|
||||
}
|
||||
|
||||
percpu_up_write(&c->usage_lock);
|
||||
@ -558,9 +569,6 @@ void bch2_gc(struct bch_fs *c)
|
||||
bch2_mark_pending_btree_node_frees(c);
|
||||
bch2_mark_allocator_buckets(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
atomic_long_set(&ca->saturated_count, 0);
|
||||
|
||||
/* Indicates that gc is no longer in progress: */
|
||||
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
|
||||
c->gc_count++;
|
||||
@ -587,15 +595,14 @@ out:
|
||||
|
||||
static void recalc_packed_keys(struct btree *b)
|
||||
{
|
||||
struct bset *i = btree_bset_first(b);
|
||||
struct bkey_packed *k;
|
||||
|
||||
memset(&b->nr, 0, sizeof(b->nr));
|
||||
|
||||
BUG_ON(b->nsets != 1);
|
||||
|
||||
for (k = btree_bkey_first(b, b->set);
|
||||
k != btree_bkey_last(b, b->set);
|
||||
k = bkey_next(k))
|
||||
vstruct_for_each(i, k)
|
||||
btree_keys_account_key_add(&b->nr, 0, k);
|
||||
}
|
||||
|
||||
@ -1032,7 +1039,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
|
||||
struct bkey_s_c k;
|
||||
|
||||
for_each_btree_node_key_unpack(b, k, &node_iter,
|
||||
btree_node_is_extents(b),
|
||||
&unpacked) {
|
||||
ret = bch2_btree_mark_key_initial(c,
|
||||
btree_node_type(b), k);
|
||||
|
@ -22,7 +22,7 @@
|
||||
/* btree_node_iter_large: */
|
||||
|
||||
#define btree_node_iter_cmp_heap(h, _l, _r) \
|
||||
__btree_node_iter_cmp((iter)->is_extents, b, \
|
||||
__btree_node_iter_cmp(b, \
|
||||
__btree_node_offset_to_key(b, (_l).k), \
|
||||
__btree_node_offset_to_key(b, (_r).k))
|
||||
|
||||
@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
|
||||
sort_iter_sort(iter, sort_extent_whiteouts_cmp);
|
||||
|
||||
while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
|
||||
if (bkey_deleted(in))
|
||||
continue;
|
||||
|
||||
EBUG_ON(bkeyp_val_u64s(f, in));
|
||||
EBUG_ON(in->type != KEY_TYPE_DISCARD);
|
||||
|
||||
@ -309,7 +312,7 @@ static unsigned should_compact_bset(struct btree *b, struct bset_tree *t,
|
||||
|
||||
if (mode == COMPACT_LAZY) {
|
||||
if (should_compact_bset_lazy(b, t) ||
|
||||
(compacting && bset_unwritten(b, bset(b, t))))
|
||||
(compacting && !bset_written(b, bset(b, t))))
|
||||
return dead_u64s;
|
||||
} else {
|
||||
if (bset_written(b, bset(b, t)))
|
||||
@ -356,7 +359,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
|
||||
struct bkey_packed *k, *n, *out, *start, *end;
|
||||
struct btree_node_entry *src = NULL, *dst = NULL;
|
||||
|
||||
if (t != b->set && bset_unwritten(b, i)) {
|
||||
if (t != b->set && !bset_written(b, i)) {
|
||||
src = container_of(i, struct btree_node_entry, keys);
|
||||
dst = max(write_block(b),
|
||||
(void *) btree_bkey_last(b, t -1));
|
||||
@ -396,7 +399,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
|
||||
continue;
|
||||
|
||||
if (bkey_whiteout(k)) {
|
||||
unreserve_whiteout(b, t, k);
|
||||
unreserve_whiteout(b, k);
|
||||
memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
|
||||
set_bkeyp_val_u64s(f, u_pos, 0);
|
||||
u_pos = bkey_next(u_pos);
|
||||
@ -467,7 +470,7 @@ static bool bch2_drop_whiteouts(struct btree *b)
|
||||
start = btree_bkey_first(b, t);
|
||||
end = btree_bkey_last(b, t);
|
||||
|
||||
if (bset_unwritten(b, i) &&
|
||||
if (!bset_written(b, i) &&
|
||||
t != b->set) {
|
||||
struct bset *dst =
|
||||
max_t(struct bset *, write_block(b),
|
||||
@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
|
||||
|
||||
bch2_bset_set_no_aux_tree(dst, dst->set);
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&src_iter, src,
|
||||
btree_node_is_extents(src));
|
||||
bch2_btree_node_iter_init_from_start(&src_iter, src);
|
||||
|
||||
if (btree_node_ops(src)->key_normalize ||
|
||||
btree_node_ops(src)->key_merge)
|
||||
@ -829,7 +831,7 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b,
|
||||
for (unwritten_idx = 0;
|
||||
unwritten_idx < b->nsets;
|
||||
unwritten_idx++)
|
||||
if (bset_unwritten(b, bset(b, &b->set[unwritten_idx])))
|
||||
if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
|
||||
break;
|
||||
|
||||
if (b->nsets - unwritten_idx > 1) {
|
||||
@ -852,7 +854,7 @@ void bch2_btree_build_aux_trees(struct btree *b)
|
||||
|
||||
for_each_bset(b, t)
|
||||
bch2_bset_build_aux_tree(b, t,
|
||||
bset_unwritten(b, bset(b, t)) &&
|
||||
!bset_written(b, bset(b, t)) &&
|
||||
t == bset_tree_last(b));
|
||||
}
|
||||
|
||||
@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
||||
int ret, retry_read = 0, write = READ;
|
||||
|
||||
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
|
||||
__bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b));
|
||||
iter->used = 0;
|
||||
|
||||
if (bch2_meta_read_fault("btree"))
|
||||
btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
@ -1945,9 +1947,9 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
|
||||
clear_btree_node_just_written(b);
|
||||
|
||||
/*
|
||||
* Note: immediately after write, bset_unwritten()/bset_written() don't
|
||||
* work - the amount of data we had to write after compaction might have
|
||||
* been smaller than the offset of the last bset.
|
||||
* Note: immediately after write, bset_written() doesn't work - the
|
||||
* amount of data we had to write after compaction might have been
|
||||
* smaller than the offset of the last bset.
|
||||
*
|
||||
* However, we know that all bsets have been written here, as long as
|
||||
* we're still holding the write lock:
|
||||
|
@ -145,20 +145,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *);
|
||||
/* Sorting */
|
||||
|
||||
struct btree_node_iter_large {
|
||||
u8 is_extents;
|
||||
u16 used;
|
||||
|
||||
struct btree_node_iter_set data[MAX_BSETS];
|
||||
};
|
||||
|
||||
static inline void
|
||||
__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter,
|
||||
bool is_extents)
|
||||
{
|
||||
iter->used = 0;
|
||||
iter->is_extents = is_extents;
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *,
|
||||
struct btree *);
|
||||
|
||||
|
@ -34,10 +34,10 @@ void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
|
||||
struct btree_iter *linked;
|
||||
|
||||
EBUG_ON(iter->l[b->level].b != b);
|
||||
EBUG_ON(iter->lock_seq[b->level] + 1 != b->lock.state.seq);
|
||||
EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
|
||||
|
||||
for_each_btree_iter_with_node(iter, b, linked)
|
||||
linked->lock_seq[b->level] += 2;
|
||||
linked->l[b->level].lock_seq += 2;
|
||||
|
||||
six_unlock_write(&b->lock);
|
||||
}
|
||||
@ -68,26 +68,6 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
|
||||
&b->lock.state.counter);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a btree node if we already have it locked on one of our linked
|
||||
* iterators:
|
||||
*/
|
||||
static inline bool btree_node_lock_increment(struct btree_iter *iter,
|
||||
struct btree *b, unsigned level,
|
||||
enum btree_node_locked_type want)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
|
||||
for_each_linked_btree_iter(iter, linked)
|
||||
if (linked->l[level].b == b &&
|
||||
btree_node_locked_type(linked, level) >= want) {
|
||||
six_lock_increment(&b->lock, want);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
|
||||
{
|
||||
struct btree *b = btree_iter_node(iter, level);
|
||||
@ -99,8 +79,8 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
|
||||
if (race_fault())
|
||||
return false;
|
||||
|
||||
if (!six_relock_type(&b->lock, want, iter->lock_seq[level]) &&
|
||||
!(iter->lock_seq[level] >> 1 == b->lock.state.seq >> 1 &&
|
||||
if (!six_relock_type(&b->lock, want, iter->l[level].lock_seq) &&
|
||||
!(iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
|
||||
btree_node_lock_increment(iter, b, level, want)))
|
||||
return false;
|
||||
|
||||
@ -125,10 +105,10 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
|
||||
|
||||
if (btree_node_locked(iter, level)
|
||||
? six_lock_tryupgrade(&b->lock)
|
||||
: six_relock_type(&b->lock, SIX_LOCK_intent, iter->lock_seq[level]))
|
||||
: six_relock_type(&b->lock, SIX_LOCK_intent, iter->l[level].lock_seq))
|
||||
goto success;
|
||||
|
||||
if (iter->lock_seq[level] >> 1 == b->lock.state.seq >> 1 &&
|
||||
if (iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
|
||||
btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
|
||||
btree_node_unlock(iter, level);
|
||||
goto success;
|
||||
@ -189,34 +169,12 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
struct btree_iter *linked;
|
||||
bool ret = true;
|
||||
|
||||
/* Can't have children locked before ancestors: */
|
||||
EBUG_ON(iter->nodes_locked && level > __ffs(iter->nodes_locked));
|
||||
|
||||
/*
|
||||
* Can't hold any read locks while we block taking an intent lock - see
|
||||
* below for reasoning, and we should have already dropped any read
|
||||
* locks in the current iterator
|
||||
*/
|
||||
EBUG_ON(type == SIX_LOCK_intent &&
|
||||
iter->nodes_locked != iter->nodes_intent_locked);
|
||||
|
||||
if (btree_node_lock_increment(iter, b, level, type))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Must lock btree nodes in key order - this case happens when locking
|
||||
* the prev sibling in btree node merging:
|
||||
*/
|
||||
if (iter->nodes_locked &&
|
||||
__ffs(iter->nodes_locked) <= level &&
|
||||
__btree_iter_cmp(iter->btree_id, pos, iter))
|
||||
return false;
|
||||
|
||||
for_each_linked_btree_iter(iter, linked) {
|
||||
/* Check if it's safe to block: */
|
||||
for_each_btree_iter(iter, linked) {
|
||||
if (!linked->nodes_locked)
|
||||
continue;
|
||||
|
||||
/* We have to lock btree nodes in key order: */
|
||||
/* * Must lock btree nodes in key order: */
|
||||
if (__btree_iter_cmp(iter->btree_id, pos, linked) < 0)
|
||||
ret = false;
|
||||
|
||||
@ -251,9 +209,10 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
if (linked->btree_id == iter->btree_id &&
|
||||
level > __fls(linked->nodes_locked)) {
|
||||
if (may_drop_locks) {
|
||||
linked->locks_want = max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want);
|
||||
linked->locks_want =
|
||||
max(level + 1, max_t(unsigned,
|
||||
linked->locks_want,
|
||||
iter->locks_want));
|
||||
btree_iter_get_locks(linked, true);
|
||||
}
|
||||
ret = false;
|
||||
@ -415,14 +374,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
struct btree_node_iter tmp = l->iter;
|
||||
struct bkey_packed *k;
|
||||
|
||||
if (iter->uptodate > BTREE_ITER_NEED_PEEK)
|
||||
return;
|
||||
|
||||
bch2_btree_node_iter_verify(&l->iter, b);
|
||||
|
||||
/*
|
||||
* For interior nodes, the iterator will have skipped past
|
||||
* deleted keys:
|
||||
*
|
||||
* For extents, the iterator may have skipped past deleted keys (but not
|
||||
* whiteouts)
|
||||
*/
|
||||
k = b->level
|
||||
? bch2_btree_node_iter_prev(&tmp, b)
|
||||
k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS
|
||||
? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD)
|
||||
: bch2_btree_node_iter_prev_all(&tmp, b);
|
||||
if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS)) {
|
||||
@ -430,7 +395,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
bch2_bkey_to_text(buf, sizeof(buf), &uk);
|
||||
panic("prev key should be before after pos:\n%s\n%llu:%llu\n",
|
||||
panic("prev key should be before iter pos:\n%s\n%llu:%llu\n",
|
||||
buf, iter->pos.inode, iter->pos.offset);
|
||||
}
|
||||
|
||||
@ -441,15 +406,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
bch2_bkey_to_text(buf, sizeof(buf), &uk);
|
||||
panic("next key should be before iter pos:\n%llu:%llu\n%s\n",
|
||||
panic("iter should be after current key:\n"
|
||||
"iter pos %llu:%llu\n"
|
||||
"cur key %s\n",
|
||||
iter->pos.inode, iter->pos.offset, buf);
|
||||
}
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE &&
|
||||
(iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) {
|
||||
BUG_ON(!bkey_whiteout(&iter->k) &&
|
||||
bch2_btree_node_iter_end(&l->iter));
|
||||
}
|
||||
BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
|
||||
(iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS &&
|
||||
!bkey_whiteout(&iter->k) &&
|
||||
bch2_btree_node_iter_end(&l->iter));
|
||||
}
|
||||
|
||||
void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
|
||||
@ -460,6 +426,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b)
|
||||
__bch2_btree_iter_verify(linked, b);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
|
||||
struct btree *b) {}
|
||||
|
||||
#endif
|
||||
|
||||
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
@ -474,7 +445,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
struct btree_node_iter_set *set;
|
||||
unsigned offset = __btree_node_key_to_offset(b, where);
|
||||
int shift = new_u64s - clobber_u64s;
|
||||
unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift;
|
||||
unsigned old_end = t->end_offset - shift;
|
||||
|
||||
btree_node_iter_for_each(node_iter, set)
|
||||
if (set->end == old_end)
|
||||
@ -496,7 +467,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
}
|
||||
return;
|
||||
found:
|
||||
set->end = (int) set->end + shift;
|
||||
set->end = t->end_offset;
|
||||
|
||||
/* Iterator hasn't gotten to the key that changed yet: */
|
||||
if (set->k < offset)
|
||||
@ -557,8 +528,7 @@ iter_current_key_not_modified:
|
||||
k = bch2_bkey_prev_all(b, t,
|
||||
bch2_btree_node_iter_bset_pos(node_iter, b, t));
|
||||
if (k &&
|
||||
__btree_node_iter_cmp(node_iter, b,
|
||||
k, where) > 0) {
|
||||
__btree_node_iter_cmp(b, k, where) > 0) {
|
||||
struct btree_node_iter_set *set;
|
||||
unsigned offset =
|
||||
__btree_node_key_to_offset(b, bkey_next(k));
|
||||
@ -580,13 +550,13 @@ next_bset:
|
||||
}
|
||||
|
||||
void bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
struct btree *b,
|
||||
struct btree_node_iter *node_iter,
|
||||
struct bset_tree *t,
|
||||
struct bkey_packed *where,
|
||||
unsigned clobber_u64s,
|
||||
unsigned new_u64s)
|
||||
struct btree *b,
|
||||
struct btree_node_iter *node_iter,
|
||||
struct bkey_packed *where,
|
||||
unsigned clobber_u64s,
|
||||
unsigned new_u64s)
|
||||
{
|
||||
struct bset_tree *t = bch2_bkey_to_bset(b, where);
|
||||
struct btree_iter *linked;
|
||||
|
||||
if (node_iter != &iter->l[b->level].iter)
|
||||
@ -597,10 +567,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
|
||||
__bch2_btree_node_iter_fix(linked, b,
|
||||
&linked->l[b->level].iter, t,
|
||||
where, clobber_u64s, new_u64s);
|
||||
|
||||
/* interior node iterators are... special... */
|
||||
if (!b->level)
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
|
||||
@ -687,17 +653,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
|
||||
btree_node_unlock(iter, b->level + 1);
|
||||
}
|
||||
|
||||
/* Returns true if @k is after iterator position @pos */
|
||||
static inline bool btree_iter_pos_cmp(struct btree_iter *iter,
|
||||
const struct bkey *k)
|
||||
{
|
||||
int cmp = bkey_cmp(k->p, iter->pos);
|
||||
|
||||
return cmp > 0 ||
|
||||
(cmp == 0 &&
|
||||
!(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k));
|
||||
}
|
||||
|
||||
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
|
||||
struct btree *b)
|
||||
{
|
||||
@ -719,8 +674,7 @@ static inline void __btree_iter_init(struct btree_iter *iter,
|
||||
struct btree_iter_level *l = &iter->l[b->level];
|
||||
|
||||
bch2_btree_node_iter_init(&l->iter, b, iter->pos,
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS,
|
||||
btree_node_is_extents(b));
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS);
|
||||
|
||||
/* Skip to first non whiteout: */
|
||||
if (b->level)
|
||||
@ -737,7 +691,7 @@ static inline void btree_iter_node_set(struct btree_iter *iter,
|
||||
EBUG_ON(!btree_iter_pos_in_node(iter, b));
|
||||
EBUG_ON(b->lock.state.seq & 1);
|
||||
|
||||
iter->lock_seq[b->level] = b->lock.state.seq;
|
||||
iter->l[b->level].lock_seq = b->lock.state.seq;
|
||||
iter->l[b->level].b = b;
|
||||
__btree_iter_init(iter, b);
|
||||
}
|
||||
@ -1020,8 +974,6 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
if (__bch2_btree_iter_relock(iter))
|
||||
return 0;
|
||||
|
||||
iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF;
|
||||
|
||||
/*
|
||||
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
|
||||
* here unnecessary
|
||||
@ -1062,7 +1014,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
iter->uptodate = BTREE_ITER_NEED_PEEK;
|
||||
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
__bch2_btree_iter_verify(iter, iter->l[iter->level].b);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1083,7 +1037,6 @@ static inline void bch2_btree_iter_checks(struct btree_iter *iter,
|
||||
enum btree_iter_type type)
|
||||
{
|
||||
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
|
||||
EBUG_ON((iter->flags & BTREE_ITER_TYPE) != type);
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS &&
|
||||
type != BTREE_ITER_NODES));
|
||||
@ -1199,10 +1152,8 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
|
||||
iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
__btree_iter_advance(l);
|
||||
|
||||
if (!k && btree_iter_pos_after_node(iter, l->b)) {
|
||||
if (!k && btree_iter_pos_after_node(iter, l->b))
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
iter->flags |= BTREE_ITER_AT_END_OF_LEAF;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
@ -1403,9 +1354,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c
|
||||
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey n;
|
||||
int ret;
|
||||
@ -1416,6 +1368,17 @@ recheck:
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
|
||||
__btree_iter_advance(l);
|
||||
|
||||
/*
|
||||
* iterator is now at the correct position for inserting at iter->pos,
|
||||
* but we need to keep iterating until we find the first non whiteout so
|
||||
* we know how big a hole we have, if any:
|
||||
*/
|
||||
|
||||
node_iter = l->iter;
|
||||
if (k.k && bkey_whiteout(k.k))
|
||||
k = __btree_iter_unpack(iter, l, &iter->k,
|
||||
bch2_btree_node_iter_peek(&node_iter, l->b));
|
||||
|
||||
/*
|
||||
* If we got to the end of the node, check if we need to traverse to the
|
||||
* next node:
|
||||
@ -1432,6 +1395,13 @@ recheck:
|
||||
if (k.k &&
|
||||
!bkey_whiteout(k.k) &&
|
||||
bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
|
||||
/*
|
||||
* if we skipped forward to find the first non whiteout and
|
||||
* there _wasn't_ actually a hole, we want the iterator to be
|
||||
* pointed at the key we found:
|
||||
*/
|
||||
l->iter = node_iter;
|
||||
|
||||
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
|
||||
EBUG_ON(bkey_deleted(k.k));
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
@ -1439,36 +1409,39 @@ recheck:
|
||||
}
|
||||
|
||||
/* hole */
|
||||
|
||||
/* holes can't span inode numbers: */
|
||||
if (iter->pos.offset == KEY_OFFSET_MAX) {
|
||||
if (iter->pos.inode == KEY_INODE_MAX)
|
||||
return bkey_s_c_null;
|
||||
|
||||
iter->pos = bkey_successor(iter->pos);
|
||||
goto recheck;
|
||||
}
|
||||
|
||||
if (!k.k)
|
||||
k.k = &l->b->key.k;
|
||||
|
||||
bkey_init(&n);
|
||||
n.p = iter->pos;
|
||||
bch2_key_resize(&n,
|
||||
min_t(u64, KEY_SIZE_MAX,
|
||||
(k.k->p.inode == n.p.inode
|
||||
? bkey_start_offset(k.k)
|
||||
: KEY_OFFSET_MAX) -
|
||||
n.p.offset));
|
||||
|
||||
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
|
||||
if (n.p.offset == KEY_OFFSET_MAX) {
|
||||
if (n.p.inode == KEY_INODE_MAX)
|
||||
return bkey_s_c_null;
|
||||
//EBUG_ON(!n.size);
|
||||
if (!n.size) {
|
||||
char buf[100];
|
||||
bch2_dump_btree_node(iter->l[0].b);
|
||||
|
||||
iter->pos = bkey_successor(iter->pos);
|
||||
goto recheck;
|
||||
}
|
||||
|
||||
if (k.k && bkey_whiteout(k.k)) {
|
||||
struct btree_node_iter node_iter = l->iter;
|
||||
|
||||
k = __btree_iter_unpack(iter, l, &iter->k,
|
||||
bch2_btree_node_iter_peek(&node_iter, l->b));
|
||||
}
|
||||
|
||||
if (!k.k)
|
||||
k.k = &l->b->key.k;
|
||||
|
||||
bch2_key_resize(&n,
|
||||
min_t(u64, KEY_SIZE_MAX,
|
||||
(k.k->p.inode == n.p.inode
|
||||
? bkey_start_offset(k.k)
|
||||
: KEY_OFFSET_MAX) -
|
||||
n.p.offset));
|
||||
|
||||
EBUG_ON(!n.size);
|
||||
bch2_bkey_to_text(buf, sizeof(buf), k.k);
|
||||
panic("iter at %llu:%llu\n"
|
||||
"next key %s\n",
|
||||
iter->pos.inode,
|
||||
iter->pos.offset,
|
||||
buf);
|
||||
}
|
||||
|
||||
iter->k = n;
|
||||
@ -1476,6 +1449,50 @@ recheck:
|
||||
return (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
|
||||
static inline struct bkey_s_c
|
||||
__bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter_level *l = &iter->l[0];
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
if (iter->flags & BTREE_ITER_IS_EXTENTS)
|
||||
return __bch2_btree_iter_peek_slot_extents(iter);
|
||||
|
||||
recheck:
|
||||
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
|
||||
bkey_deleted(k.k) &&
|
||||
bkey_cmp(k.k->p, iter->pos) == 0)
|
||||
__btree_iter_advance(l);
|
||||
|
||||
/*
|
||||
* If we got to the end of the node, check if we need to traverse to the
|
||||
* next node:
|
||||
*/
|
||||
if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) {
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (unlikely(ret))
|
||||
return bkey_s_c_err(ret);
|
||||
|
||||
goto recheck;
|
||||
}
|
||||
|
||||
if (k.k &&
|
||||
!bkey_deleted(k.k) &&
|
||||
!bkey_cmp(iter->pos, k.k->p)) {
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return k;
|
||||
} else {
|
||||
/* hole */
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos;
|
||||
|
||||
iter->uptodate = BTREE_ITER_UPTODATE;
|
||||
return (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
}
|
||||
|
||||
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
{
|
||||
int ret;
|
||||
@ -1611,17 +1628,29 @@ static void btree_trans_verify(struct btree_trans *trans)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned btree_trans_iter_idx(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
ssize_t idx = iter - trans->iters;
|
||||
|
||||
BUG_ON(idx < 0 || idx >= trans->nr_iters);
|
||||
BUG_ON(!(trans->iters_live & (1U << idx)));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
void bch2_trans_iter_put(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
ssize_t idx = btree_trans_iter_idx(trans, iter);
|
||||
|
||||
trans->iters_live &= ~(1U << idx);
|
||||
}
|
||||
|
||||
void bch2_trans_iter_free(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
unsigned idx;
|
||||
|
||||
for (idx = 0; idx < trans->nr_iters; idx++)
|
||||
if (&trans->iters[idx] == iter)
|
||||
goto found;
|
||||
BUG();
|
||||
found:
|
||||
BUG_ON(!(trans->iters_linked & (1U << idx)));
|
||||
ssize_t idx = btree_trans_iter_idx(trans, iter);
|
||||
|
||||
trans->iters_live &= ~(1U << idx);
|
||||
trans->iters_linked &= ~(1U << idx);
|
||||
@ -1635,10 +1664,7 @@ static int btree_trans_realloc_iters(struct btree_trans *trans)
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
new_iters = kmalloc(sizeof(struct btree_iter) * BTREE_ITER_MAX,
|
||||
GFP_NOFS);
|
||||
if (!new_iters)
|
||||
return -ENOMEM;
|
||||
new_iters = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
|
||||
|
||||
memcpy(new_iters, trans->iters,
|
||||
sizeof(struct btree_iter) * trans->nr_iters);
|
||||
@ -1666,12 +1692,10 @@ static int btree_trans_realloc_iters(struct btree_trans *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_preload_iters(struct btree_trans *trans)
|
||||
void bch2_trans_preload_iters(struct btree_trans *trans)
|
||||
{
|
||||
if (trans->iters != trans->iters_onstack)
|
||||
return 0;
|
||||
|
||||
return btree_trans_realloc_iters(trans);
|
||||
if (trans->iters == trans->iters_onstack)
|
||||
btree_trans_realloc_iters(trans);
|
||||
}
|
||||
|
||||
static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
|
||||
@ -1711,10 +1735,6 @@ got_slot:
|
||||
} else {
|
||||
iter = &trans->iters[idx];
|
||||
|
||||
BUG_ON(iter->btree_id != btree_id);
|
||||
BUG_ON((iter->flags ^ flags) &
|
||||
(BTREE_ITER_SLOTS|BTREE_ITER_IS_EXTENTS));
|
||||
|
||||
iter->flags &= ~(BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
|
||||
iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
|
||||
}
|
||||
@ -1731,6 +1751,9 @@ got_slot:
|
||||
|
||||
btree_trans_verify(trans);
|
||||
|
||||
BUG_ON(iter->btree_id != btree_id);
|
||||
BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
@ -1855,7 +1878,7 @@ int bch2_trans_exit(struct btree_trans *trans)
|
||||
|
||||
kfree(trans->mem);
|
||||
if (trans->iters != trans->iters_onstack)
|
||||
kfree(trans->iters);
|
||||
mempool_free(trans->iters, &trans->c->btree_iters_pool);
|
||||
trans->mem = (void *) 0x1;
|
||||
trans->iters = (void *) 0x1;
|
||||
return ret;
|
||||
|
@ -40,7 +40,7 @@ static inline bool __iter_has_node(const struct btree_iter *iter,
|
||||
*/
|
||||
|
||||
return iter->l[b->level].b == b &&
|
||||
iter->lock_seq[b->level] >> 1 == b->lock.state.seq >> 1;
|
||||
iter->l[b->level].lock_seq >> 1 == b->lock.state.seq >> 1;
|
||||
}
|
||||
|
||||
static inline struct btree_iter *
|
||||
@ -100,8 +100,8 @@ static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
|
||||
#endif
|
||||
|
||||
void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
|
||||
struct btree_node_iter *, struct bset_tree *,
|
||||
struct bkey_packed *, unsigned, unsigned);
|
||||
struct btree_node_iter *, struct bkey_packed *,
|
||||
unsigned, unsigned);
|
||||
|
||||
int bch2_btree_iter_unlock(struct btree_iter *);
|
||||
|
||||
@ -271,9 +271,9 @@ static inline int btree_iter_err(struct bkey_s_c k)
|
||||
|
||||
/* new multiple iterator interface: */
|
||||
|
||||
int bch2_trans_preload_iters(struct btree_trans *);
|
||||
void bch2_trans_iter_free(struct btree_trans *,
|
||||
struct btree_iter *);
|
||||
void bch2_trans_preload_iters(struct btree_trans *);
|
||||
void bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
|
||||
void bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
|
||||
|
||||
struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
|
||||
struct bpos, unsigned, u64);
|
||||
@ -308,6 +308,11 @@ bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
|
||||
|
||||
void __bch2_trans_begin(struct btree_trans *);
|
||||
|
||||
static inline void bch2_trans_begin_updates(struct btree_trans *trans)
|
||||
{
|
||||
trans->nr_updates = 0;
|
||||
}
|
||||
|
||||
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
|
||||
int bch2_trans_unlock(struct btree_trans *);
|
||||
void bch2_trans_init(struct btree_trans *, struct bch_fs *);
|
||||
|
@ -146,6 +146,26 @@ static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
|
||||
__btree_node_lock_type(c, b, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a btree node if we already have it locked on one of our linked
|
||||
* iterators:
|
||||
*/
|
||||
static inline bool btree_node_lock_increment(struct btree_iter *iter,
|
||||
struct btree *b, unsigned level,
|
||||
enum btree_node_locked_type want)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
|
||||
for_each_linked_btree_iter(iter, linked)
|
||||
if (linked->l[level].b == b &&
|
||||
btree_node_locked_type(linked, level) >= want) {
|
||||
six_lock_increment(&b->lock, want);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
|
||||
struct btree_iter *, enum six_lock_type, bool);
|
||||
|
||||
@ -158,6 +178,7 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
return likely(six_trylock_type(&b->lock, type)) ||
|
||||
btree_node_lock_increment(iter, b, level, type) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter,
|
||||
type, may_drop_locks);
|
||||
}
|
||||
@ -184,7 +205,7 @@ void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
|
||||
static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
|
||||
{
|
||||
EBUG_ON(iter->l[b->level].b != b);
|
||||
EBUG_ON(iter->lock_seq[b->level] != b->lock.state.seq);
|
||||
EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
|
||||
|
||||
if (!six_trylock_write(&b->lock))
|
||||
__bch2_btree_node_lock_write(b, iter);
|
||||
|
@ -175,8 +175,6 @@ struct btree_cache {
|
||||
};
|
||||
|
||||
struct btree_node_iter {
|
||||
u8 is_extents;
|
||||
|
||||
struct btree_node_iter_set {
|
||||
u16 k, end;
|
||||
} data[MAX_BSETS];
|
||||
@ -197,11 +195,7 @@ enum btree_iter_type {
|
||||
* @pos or the first key strictly greater than @pos
|
||||
*/
|
||||
#define BTREE_ITER_IS_EXTENTS (1 << 4)
|
||||
/*
|
||||
* indicates we need to call bch2_btree_iter_traverse() to revalidate iterator:
|
||||
*/
|
||||
#define BTREE_ITER_AT_END_OF_LEAF (1 << 5)
|
||||
#define BTREE_ITER_ERROR (1 << 6)
|
||||
#define BTREE_ITER_ERROR (1 << 5)
|
||||
|
||||
enum btree_iter_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
@ -232,10 +226,9 @@ struct btree_iter {
|
||||
struct btree_iter_level {
|
||||
struct btree *b;
|
||||
struct btree_node_iter iter;
|
||||
u32 lock_seq;
|
||||
} l[BTREE_MAX_DEPTH];
|
||||
|
||||
u32 lock_seq[BTREE_MAX_DEPTH];
|
||||
|
||||
/*
|
||||
* Current unpacked key - so that bch2_btree_iter_next()/
|
||||
* bch2_btree_iter_next_slot() can correctly advance pos.
|
||||
@ -258,12 +251,6 @@ struct btree_iter {
|
||||
struct btree_insert_entry {
|
||||
struct btree_iter *iter;
|
||||
struct bkey_i *k;
|
||||
unsigned extra_res;
|
||||
/*
|
||||
* true if entire key was inserted - can only be false for
|
||||
* extents
|
||||
*/
|
||||
bool done;
|
||||
};
|
||||
|
||||
struct btree_trans {
|
||||
@ -339,10 +326,38 @@ static inline struct bset_tree *bset_tree_last(struct btree *b)
|
||||
return b->set + b->nsets - 1;
|
||||
}
|
||||
|
||||
static inline void *
|
||||
__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
|
||||
{
|
||||
return (void *) ((u64 *) b->data + 1 + offset);
|
||||
}
|
||||
|
||||
static inline u16
|
||||
__btree_node_ptr_to_offset(const struct btree *b, const void *p)
|
||||
{
|
||||
u16 ret = (u64 *) p - 1 - (u64 *) b->data;
|
||||
|
||||
EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bset *bset(const struct btree *b,
|
||||
const struct bset_tree *t)
|
||||
{
|
||||
return (void *) b->data + t->data_offset * sizeof(u64);
|
||||
return __btree_node_offset_to_ptr(b, t->data_offset);
|
||||
}
|
||||
|
||||
static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
|
||||
{
|
||||
t->end_offset =
|
||||
__btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
|
||||
}
|
||||
|
||||
static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
|
||||
const struct bset *i)
|
||||
{
|
||||
t->data_offset = __btree_node_ptr_to_offset(b, i);
|
||||
set_btree_bset_end(b, t);
|
||||
}
|
||||
|
||||
static inline struct bset *btree_bset_first(struct btree *b)
|
||||
@ -358,19 +373,27 @@ static inline struct bset *btree_bset_last(struct btree *b)
|
||||
static inline u16
|
||||
__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
|
||||
{
|
||||
size_t ret = (u64 *) k - (u64 *) b->data - 1;
|
||||
|
||||
EBUG_ON(ret > U16_MAX);
|
||||
return ret;
|
||||
return __btree_node_ptr_to_offset(b, k);
|
||||
}
|
||||
|
||||
static inline struct bkey_packed *
|
||||
__btree_node_offset_to_key(const struct btree *b, u16 k)
|
||||
{
|
||||
return (void *) ((u64 *) b->data + k + 1);
|
||||
return __btree_node_offset_to_ptr(b, k);
|
||||
}
|
||||
|
||||
#define btree_bkey_first(_b, _t) (bset(_b, _t)->start)
|
||||
static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
|
||||
{
|
||||
return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
|
||||
}
|
||||
|
||||
#define btree_bkey_first(_b, _t) \
|
||||
({ \
|
||||
EBUG_ON(bset(_b, _t)->start != \
|
||||
__btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
|
||||
\
|
||||
bset(_b, _t)->start; \
|
||||
})
|
||||
|
||||
#define btree_bkey_last(_b, _t) \
|
||||
({ \
|
||||
@ -380,23 +403,6 @@ __btree_node_offset_to_key(const struct btree *b, u16 k)
|
||||
__btree_node_offset_to_key(_b, (_t)->end_offset); \
|
||||
})
|
||||
|
||||
static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
|
||||
{
|
||||
t->end_offset =
|
||||
__btree_node_key_to_offset(b, vstruct_last(bset(b, t)));
|
||||
btree_bkey_last(b, t);
|
||||
}
|
||||
|
||||
static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
|
||||
const struct bset *i)
|
||||
{
|
||||
t->data_offset = (u64 *) i - (u64 *) b->data;
|
||||
|
||||
EBUG_ON(bset(b, t) != i);
|
||||
|
||||
set_btree_bset_end(b, t);
|
||||
}
|
||||
|
||||
static inline unsigned bset_byte_offset(struct btree *b, void *i)
|
||||
{
|
||||
return i - (void *) b->data;
|
||||
@ -439,28 +445,17 @@ struct btree_root {
|
||||
* we're holding the write lock and we know what key is about to be overwritten:
|
||||
*/
|
||||
|
||||
struct btree_iter;
|
||||
struct btree_node_iter;
|
||||
|
||||
enum btree_insert_ret {
|
||||
BTREE_INSERT_OK,
|
||||
/* extent spanned multiple leaf nodes: have to traverse to next node: */
|
||||
BTREE_INSERT_NEED_TRAVERSE,
|
||||
/* write lock held for too long */
|
||||
BTREE_INSERT_NEED_RESCHED,
|
||||
/* leaf node needs to be split */
|
||||
BTREE_INSERT_BTREE_NODE_FULL,
|
||||
BTREE_INSERT_JOURNAL_RES_FULL,
|
||||
BTREE_INSERT_ENOSPC,
|
||||
BTREE_INSERT_NEED_GC_LOCK,
|
||||
};
|
||||
|
||||
struct extent_insert_hook {
|
||||
enum btree_insert_ret
|
||||
(*fn)(struct extent_insert_hook *, struct bpos, struct bpos,
|
||||
struct bkey_s_c, const struct bkey_i *);
|
||||
};
|
||||
|
||||
enum btree_gc_coalesce_fail_reason {
|
||||
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
|
||||
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
|
||||
|
@ -22,7 +22,6 @@ struct btree_insert {
|
||||
struct disk_reservation *disk_res;
|
||||
struct journal_res journal_res;
|
||||
u64 *journal_seq;
|
||||
struct extent_insert_hook *hook;
|
||||
unsigned flags;
|
||||
bool did_work;
|
||||
|
||||
@ -32,22 +31,10 @@ struct btree_insert {
|
||||
|
||||
int __bch2_btree_insert_at(struct btree_insert *);
|
||||
|
||||
#define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
|
||||
#define COUNT_ARGS(...) _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define BTREE_INSERT_ENTRY(_iter, _k) \
|
||||
((struct btree_insert_entry) { \
|
||||
.iter = (_iter), \
|
||||
.k = (_k), \
|
||||
.done = false, \
|
||||
})
|
||||
|
||||
#define BTREE_INSERT_ENTRY_EXTRA_RES(_iter, _k, _extra) \
|
||||
((struct btree_insert_entry) { \
|
||||
.iter = (_iter), \
|
||||
.k = (_k), \
|
||||
.extra_res = (_extra), \
|
||||
.done = false, \
|
||||
})
|
||||
|
||||
/**
|
||||
@ -63,13 +50,11 @@ int __bch2_btree_insert_at(struct btree_insert *);
|
||||
* -EROFS: filesystem read only
|
||||
* -EIO: journal or btree node IO error
|
||||
*/
|
||||
#define bch2_btree_insert_at(_c, _disk_res, _hook, \
|
||||
_journal_seq, _flags, ...) \
|
||||
#define bch2_btree_insert_at(_c, _disk_res, _journal_seq, _flags, ...) \
|
||||
__bch2_btree_insert_at(&(struct btree_insert) { \
|
||||
.c = (_c), \
|
||||
.disk_res = (_disk_res), \
|
||||
.journal_seq = (_journal_seq), \
|
||||
.hook = (_hook), \
|
||||
.flags = (_flags), \
|
||||
.nr = COUNT_ARGS(__VA_ARGS__), \
|
||||
.entries = (struct btree_insert_entry[]) { \
|
||||
@ -123,17 +108,13 @@ enum {
|
||||
int bch2_btree_delete_at(struct btree_iter *, unsigned);
|
||||
|
||||
int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *, u64 *, unsigned);
|
||||
struct disk_reservation *, u64 *, unsigned);
|
||||
|
||||
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *, u64 *, int flags);
|
||||
struct disk_reservation *, u64 *, int flags);
|
||||
|
||||
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
|
||||
struct bpos, struct bpos, struct bversion,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *, u64 *);
|
||||
struct bpos, struct bpos, u64 *);
|
||||
|
||||
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
|
||||
__le64, unsigned);
|
||||
@ -142,11 +123,17 @@ int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
|
||||
|
||||
/* new transactional interface: */
|
||||
|
||||
void bch2_trans_update(struct btree_trans *, struct btree_iter *,
|
||||
struct bkey_i *, unsigned);
|
||||
static inline void
|
||||
bch2_trans_update(struct btree_trans *trans,
|
||||
struct btree_insert_entry entry)
|
||||
{
|
||||
BUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates));
|
||||
|
||||
trans->updates[trans->nr_updates++] = entry;
|
||||
}
|
||||
|
||||
int bch2_trans_commit(struct btree_trans *,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *,
|
||||
u64 *, unsigned);
|
||||
|
||||
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
|
||||
@ -159,7 +146,7 @@ int bch2_trans_commit(struct btree_trans *,
|
||||
do { \
|
||||
bch2_trans_begin(&trans); \
|
||||
\
|
||||
_ret = (_do) ?: bch2_trans_commit(&trans, NULL, NULL, \
|
||||
_ret = (_do) ?: bch2_trans_commit(&trans, NULL, \
|
||||
(_journal_seq), (_flags)); \
|
||||
} while (_ret == -EINTR); \
|
||||
\
|
||||
|
@ -34,7 +34,7 @@ static void btree_node_interior_verify(struct btree *b)
|
||||
|
||||
BUG_ON(!b->level);
|
||||
|
||||
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false);
|
||||
bch2_btree_node_iter_init(&iter, b, b->key.k.p, false);
|
||||
#if 1
|
||||
BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) ||
|
||||
bkey_cmp_left_packed(b, k, &b->key.k.p));
|
||||
@ -183,7 +183,8 @@ found:
|
||||
*/
|
||||
replicas = bch2_extent_nr_dirty_ptrs(k);
|
||||
if (replicas)
|
||||
stats->s[replicas - 1].data[S_META] -= c->opts.btree_node_size;
|
||||
stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
|
||||
c->opts.btree_node_size * replicas;
|
||||
|
||||
/*
|
||||
* We're dropping @k from the btree, but it's still live until the
|
||||
@ -210,7 +211,7 @@ found:
|
||||
struct bch_fs_usage tmp = { 0 };
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&d->key),
|
||||
-c->opts.btree_node_size, true, b
|
||||
-c->opts.btree_node_size, BCH_DATA_BTREE, b
|
||||
? gc_pos_btree_node(b)
|
||||
: gc_pos_btree_root(as->btree_id),
|
||||
&tmp, 0, 0);
|
||||
@ -289,7 +290,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
|
||||
BUG_ON(!pending->index_update_done);
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
|
||||
-c->opts.btree_node_size, true,
|
||||
-c->opts.btree_node_size, BCH_DATA_BTREE,
|
||||
gc_phase(GC_PHASE_PENDING_DELETE),
|
||||
&stats, 0, 0);
|
||||
/*
|
||||
@ -578,6 +579,8 @@ static void bch2_btree_update_free(struct btree_update *as)
|
||||
{
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
bch2_journal_pin_flush(&c->journal, &as->journal);
|
||||
|
||||
BUG_ON(as->nr_new_nodes);
|
||||
BUG_ON(as->nr_pending);
|
||||
|
||||
@ -1095,7 +1098,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
|
||||
__bch2_btree_set_root_inmem(c, b);
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&b->key),
|
||||
c->opts.btree_node_size, true,
|
||||
c->opts.btree_node_size, BCH_DATA_BTREE,
|
||||
gc_pos_btree_root(b->btree_id),
|
||||
&stats, 0, 0);
|
||||
|
||||
@ -1142,7 +1145,8 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
|
||||
struct btree *old;
|
||||
|
||||
trace_btree_set_root(c, b);
|
||||
BUG_ON(!b->written);
|
||||
BUG_ON(!b->written &&
|
||||
!test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
|
||||
|
||||
old = btree_node_root(c, b);
|
||||
|
||||
@ -1182,7 +1186,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
|
||||
|
||||
if (bkey_extent_is_data(&insert->k))
|
||||
bch2_mark_key(c, bkey_i_to_s_c(insert),
|
||||
c->opts.btree_node_size, true,
|
||||
c->opts.btree_node_size, BCH_DATA_BTREE,
|
||||
gc_pos_btree_node(b), &stats, 0, 0);
|
||||
|
||||
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
|
||||
@ -1317,7 +1321,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
|
||||
|
||||
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
|
||||
|
||||
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false);
|
||||
bch2_btree_node_iter_init(&node_iter, b, k->k.p, false);
|
||||
|
||||
while (!bch2_keylist_empty(keys)) {
|
||||
k = bch2_keylist_front(keys);
|
||||
@ -1963,7 +1967,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
bch2_btree_node_lock_write(b, iter);
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i),
|
||||
c->opts.btree_node_size, true,
|
||||
c->opts.btree_node_size, BCH_DATA_BTREE,
|
||||
gc_pos_btree_root(b->btree_id),
|
||||
&stats, 0, 0);
|
||||
bch2_btree_node_free_index(as, NULL,
|
||||
@ -2150,7 +2154,7 @@ ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf)
|
||||
as->mode,
|
||||
as->nodes_written,
|
||||
atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
|
||||
bch2_journal_pin_seq(&c->journal, &as->journal));
|
||||
as->journal.seq);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
return out - buf;
|
||||
|
@ -160,15 +160,6 @@ static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
|
||||
{
|
||||
struct btree *b;
|
||||
|
||||
/*
|
||||
* iterators are inconsistent when they hit end of leaf, until
|
||||
* traversed again
|
||||
*
|
||||
* XXX inconsistent how?
|
||||
*/
|
||||
if (iter->flags & BTREE_ITER_AT_END_OF_LEAF)
|
||||
return;
|
||||
|
||||
if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
|
||||
return;
|
||||
|
||||
@ -240,14 +231,19 @@ static inline void *write_block(struct btree *b)
|
||||
return (void *) b->data + (b->written << 9);
|
||||
}
|
||||
|
||||
static inline bool bset_written(struct btree *b, struct bset *i)
|
||||
static inline bool __btree_addr_written(struct btree *b, void *p)
|
||||
{
|
||||
return (void *) i < write_block(b);
|
||||
return p < write_block(b);
|
||||
}
|
||||
|
||||
static inline bool bset_unwritten(struct btree *b, struct bset *i)
|
||||
static inline bool bset_written(struct btree *b, struct bset *i)
|
||||
{
|
||||
return (void *) i > write_block(b);
|
||||
return __btree_addr_written(b, i);
|
||||
}
|
||||
|
||||
static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
return __btree_addr_written(b, k);
|
||||
}
|
||||
|
||||
static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
|
||||
@ -306,10 +302,9 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void unreserve_whiteout(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
if (bset_written(b, bset(b, t))) {
|
||||
if (bkey_written(b, k)) {
|
||||
EBUG_ON(b->uncompacted_whiteout_u64s <
|
||||
bkeyp_key_u64s(&b->format, k));
|
||||
b->uncompacted_whiteout_u64s -=
|
||||
@ -317,10 +312,9 @@ static inline void unreserve_whiteout(struct btree *b, struct bset_tree *t,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void reserve_whiteout(struct btree *b, struct bset_tree *t,
|
||||
struct bkey_packed *k)
|
||||
static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k)
|
||||
{
|
||||
if (bset_written(b, bset(b, t))) {
|
||||
if (bkey_written(b, k)) {
|
||||
BUG_ON(!k->needs_whiteout);
|
||||
b->uncompacted_whiteout_u64s +=
|
||||
bkeyp_key_u64s(&b->format, k);
|
||||
@ -332,40 +326,14 @@ static inline void reserve_whiteout(struct btree *b, struct bset_tree *t,
|
||||
* insert into could be written out from under us)
|
||||
*/
|
||||
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
|
||||
struct btree *b, unsigned u64s)
|
||||
struct btree *b, unsigned u64s)
|
||||
{
|
||||
if (unlikely(btree_node_fake(b)))
|
||||
return false;
|
||||
|
||||
if (btree_node_is_extents(b)) {
|
||||
/* The insert key might split an existing key
|
||||
* (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
|
||||
*/
|
||||
u64s += BKEY_EXTENT_U64s_MAX;
|
||||
}
|
||||
|
||||
return u64s <= bch_btree_keys_u64s_remaining(c, b);
|
||||
}
|
||||
|
||||
static inline bool journal_res_insert_fits(struct btree_insert *trans,
|
||||
struct btree_insert_entry *insert)
|
||||
{
|
||||
unsigned u64s = 0;
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
/*
|
||||
* If we didn't get a journal reservation, we're in journal replay and
|
||||
* we're not journalling updates:
|
||||
*/
|
||||
if (!trans->journal_res.ref)
|
||||
return true;
|
||||
|
||||
for (i = insert; i < trans->entries + trans->nr; i++)
|
||||
u64s += jset_u64s(i->k->k.u64s + i->extra_res);
|
||||
|
||||
return u64s <= trans->journal_res.u64s;
|
||||
}
|
||||
|
||||
ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
|
||||
|
||||
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
|
||||
|
@ -24,7 +24,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
{
|
||||
const struct bkey_format *f = &b->format;
|
||||
struct bkey_packed *k;
|
||||
struct bset_tree *t;
|
||||
unsigned clobber_u64s;
|
||||
|
||||
EBUG_ON(btree_node_just_written(b));
|
||||
@ -37,9 +36,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
if (k && !bkey_cmp_packed(b, k, &insert->k)) {
|
||||
BUG_ON(bkey_whiteout(k));
|
||||
|
||||
t = bch2_bkey_to_bset(b, k);
|
||||
|
||||
if (bset_unwritten(b, bset(b, t)) &&
|
||||
if (!bkey_written(b, k) &&
|
||||
bkey_val_u64s(&insert->k) == bkeyp_val_u64s(f, k) &&
|
||||
!bkey_whiteout(&insert->k)) {
|
||||
k->type = insert->k.type;
|
||||
@ -50,9 +47,9 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
|
||||
insert->k.needs_whiteout = k->needs_whiteout;
|
||||
|
||||
btree_keys_account_key_drop(&b->nr, t - b->set, k);
|
||||
btree_account_key_drop(b, k);
|
||||
|
||||
if (t == bset_tree_last(b)) {
|
||||
if (k >= btree_bset_last(b)->start) {
|
||||
clobber_u64s = k->u64s;
|
||||
|
||||
/*
|
||||
@ -62,8 +59,9 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
*/
|
||||
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
|
||||
bch2_bset_delete(b, k, clobber_u64s);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, t,
|
||||
k, clobber_u64s, 0);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter,
|
||||
k, clobber_u64s, 0);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -71,11 +69,12 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
}
|
||||
|
||||
k->type = KEY_TYPE_DELETED;
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
|
||||
k->u64s, k->u64s);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, k,
|
||||
k->u64s, k->u64s);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
|
||||
if (bkey_whiteout(&insert->k)) {
|
||||
reserve_whiteout(b, t, k);
|
||||
reserve_whiteout(b, k);
|
||||
return true;
|
||||
} else {
|
||||
k->needs_whiteout = false;
|
||||
@ -90,14 +89,14 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
|
||||
insert->k.needs_whiteout = false;
|
||||
}
|
||||
|
||||
t = bset_tree_last(b);
|
||||
k = bch2_btree_node_iter_bset_pos(node_iter, b, t);
|
||||
k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
|
||||
clobber_u64s = 0;
|
||||
overwrite:
|
||||
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
|
||||
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, t, k,
|
||||
clobber_u64s, k->u64s);
|
||||
bch2_btree_node_iter_fix(iter, b, node_iter, k,
|
||||
clobber_u64s, k->u64s);
|
||||
bch2_btree_iter_verify(iter, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -110,8 +109,7 @@ static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
|
||||
|
||||
btree_node_lock_type(c, b, SIX_LOCK_read);
|
||||
bch2_btree_node_write_cond(c, b,
|
||||
(btree_current_write(b) == w &&
|
||||
w->journal.pin_list == journal_seq_pin(j, seq)));
|
||||
(btree_current_write(b) == w && w->journal.seq == seq));
|
||||
six_unlock_read(&b->lock);
|
||||
}
|
||||
|
||||
@ -297,6 +295,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
|
||||
|
||||
/* Normal update interface: */
|
||||
|
||||
static enum btree_insert_ret
|
||||
btree_key_can_insert(struct btree_insert *trans,
|
||||
struct btree_insert_entry *insert,
|
||||
unsigned *u64s)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = insert->iter->l[0].b;
|
||||
static enum btree_insert_ret ret;
|
||||
|
||||
if (unlikely(btree_node_fake(b)))
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
|
||||
ret = !btree_node_is_extents(b)
|
||||
? BTREE_INSERT_OK
|
||||
: bch2_extent_can_insert(trans, insert, u64s);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (*u64s > bch_btree_keys_u64s_remaining(c, b))
|
||||
return BTREE_INSERT_BTREE_NODE_FULL;
|
||||
|
||||
return BTREE_INSERT_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
@ -309,14 +331,12 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
unsigned u64s;
|
||||
int ret;
|
||||
|
||||
trans_for_each_entry(trans, i) {
|
||||
BUG_ON(i->done);
|
||||
trans_for_each_entry(trans, i)
|
||||
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
}
|
||||
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i)
|
||||
u64s += jset_u64s(i->k->k.u64s + i->extra_res);
|
||||
u64s += jset_u64s(i->k->k.u64s);
|
||||
|
||||
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
|
||||
|
||||
@ -336,24 +356,34 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the insert will fit in the leaf node with the write lock
|
||||
* held, otherwise another thread could write the node changing the
|
||||
* amount of space available:
|
||||
*/
|
||||
u64s = 0;
|
||||
trans_for_each_entry(trans, i) {
|
||||
/* Multiple inserts might go to same leaf: */
|
||||
if (!same_leaf_as_prev(trans, i))
|
||||
u64s = 0;
|
||||
|
||||
/*
|
||||
* bch2_btree_node_insert_fits() must be called under write lock:
|
||||
* with only an intent lock, another thread can still call
|
||||
* bch2_btree_node_write(), converting an unwritten bset to a
|
||||
* written one
|
||||
*/
|
||||
u64s += i->k->k.u64s + i->extra_res;
|
||||
if (!bch2_btree_node_insert_fits(c,
|
||||
i->iter->l[0].b, u64s)) {
|
||||
u64s += i->k->k.u64s;
|
||||
switch (btree_key_can_insert(trans, i, &u64s)) {
|
||||
case BTREE_INSERT_OK:
|
||||
break;
|
||||
case BTREE_INSERT_BTREE_NODE_FULL:
|
||||
ret = -EINTR;
|
||||
*split = i->iter;
|
||||
goto out;
|
||||
case BTREE_INSERT_ENOSPC:
|
||||
ret = -ENOSPC;
|
||||
goto out;
|
||||
case BTREE_INSERT_NEED_GC_LOCK:
|
||||
ret = -EINTR;
|
||||
*cycle_gc_lock = true;
|
||||
goto out;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
@ -369,34 +399,14 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
|
||||
trans_for_each_entry(trans, i) {
|
||||
switch (btree_insert_key_leaf(trans, i)) {
|
||||
case BTREE_INSERT_OK:
|
||||
i->done = true;
|
||||
break;
|
||||
case BTREE_INSERT_JOURNAL_RES_FULL:
|
||||
case BTREE_INSERT_NEED_TRAVERSE:
|
||||
case BTREE_INSERT_NEED_RESCHED:
|
||||
BUG_ON((trans->flags & BTREE_INSERT_ATOMIC));
|
||||
ret = -EINTR;
|
||||
break;
|
||||
case BTREE_INSERT_BTREE_NODE_FULL:
|
||||
ret = -EINTR;
|
||||
*split = i->iter;
|
||||
break;
|
||||
case BTREE_INSERT_ENOSPC:
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
case BTREE_INSERT_NEED_GC_LOCK:
|
||||
ret = -EINTR;
|
||||
*cycle_gc_lock = true;
|
||||
break;
|
||||
goto out;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* If we did some work (i.e. inserted part of an extent),
|
||||
* we have to do all the other updates as well:
|
||||
*/
|
||||
if (!trans->did_work && (ret || *split))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
multi_unlock_write(trans);
|
||||
@ -490,13 +500,8 @@ out:
|
||||
bch2_btree_iter_verify_locks(linked);
|
||||
BUG_ON((trans->flags & BTREE_INSERT_NOUNLOCK) &&
|
||||
trans->did_work &&
|
||||
linked->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
!btree_node_locked(linked, 0));
|
||||
}
|
||||
|
||||
/* make sure we didn't lose an error: */
|
||||
if (!ret)
|
||||
trans_for_each_entry(trans, i)
|
||||
BUG_ON(!i->done);
|
||||
}
|
||||
|
||||
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
|
||||
@ -581,29 +586,8 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
void bch2_trans_update(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_i *k,
|
||||
unsigned extra_journal_res)
|
||||
{
|
||||
struct btree_insert_entry *i;
|
||||
|
||||
BUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates));
|
||||
|
||||
i = &trans->updates[trans->nr_updates++];
|
||||
|
||||
*i = (struct btree_insert_entry) {
|
||||
.iter = iter,
|
||||
.k = k,
|
||||
.extra_res = extra_journal_res,
|
||||
};
|
||||
|
||||
btree_insert_entry_checks(trans->c, i);
|
||||
}
|
||||
|
||||
int bch2_trans_commit(struct btree_trans *trans,
|
||||
struct disk_reservation *disk_res,
|
||||
struct extent_insert_hook *hook,
|
||||
u64 *journal_seq,
|
||||
unsigned flags)
|
||||
{
|
||||
@ -631,7 +615,7 @@ int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
|
||||
bkey_init(&k.k);
|
||||
k.k.p = iter->pos;
|
||||
|
||||
return bch2_btree_insert_at(iter->c, NULL, NULL, NULL,
|
||||
return bch2_btree_insert_at(iter->c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|flags,
|
||||
BTREE_INSERT_ENTRY(iter, &k));
|
||||
@ -640,7 +624,6 @@ int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
|
||||
int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||
struct keylist *keys,
|
||||
struct disk_reservation *disk_res,
|
||||
struct extent_insert_hook *hook,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
BUG_ON(flags & BTREE_INSERT_ATOMIC);
|
||||
@ -648,7 +631,7 @@ int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||
bch2_verify_keylist_sorted(keys);
|
||||
|
||||
while (!bch2_keylist_empty(keys)) {
|
||||
int ret = bch2_btree_insert_at(iter->c, disk_res, hook,
|
||||
int ret = bch2_btree_insert_at(iter->c, disk_res,
|
||||
journal_seq, flags,
|
||||
BTREE_INSERT_ENTRY(iter, bch2_keylist_front(keys)));
|
||||
if (ret)
|
||||
@ -670,7 +653,6 @@ int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||
int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
|
||||
struct bkey_i *k,
|
||||
struct disk_reservation *disk_res,
|
||||
struct extent_insert_hook *hook,
|
||||
u64 *journal_seq, int flags)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
@ -678,7 +660,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
|
||||
|
||||
bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
|
||||
BTREE_ITER_INTENT);
|
||||
ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, flags,
|
||||
ret = bch2_btree_insert_at(c, disk_res, journal_seq, flags,
|
||||
BTREE_INSERT_ENTRY(&iter, k));
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
@ -691,12 +673,8 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
|
||||
* Range is a half open interval - [start, end)
|
||||
*/
|
||||
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
struct bpos start,
|
||||
struct bpos end,
|
||||
struct bversion version,
|
||||
struct disk_reservation *disk_res,
|
||||
struct extent_insert_hook *hook,
|
||||
u64 *journal_seq)
|
||||
struct bpos start, struct bpos end,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
@ -706,14 +684,12 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = btree_iter_err(k))) {
|
||||
!(ret = btree_iter_err(k)) &&
|
||||
bkey_cmp(iter.pos, end) < 0) {
|
||||
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
|
||||
/* really shouldn't be using a bare, unpadded bkey_i */
|
||||
struct bkey_i delete;
|
||||
|
||||
if (bkey_cmp(iter.pos, end) >= 0)
|
||||
break;
|
||||
|
||||
bkey_init(&delete.k);
|
||||
|
||||
/*
|
||||
@ -727,7 +703,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
* bkey_start_pos(k.k)).
|
||||
*/
|
||||
delete.k.p = iter.pos;
|
||||
delete.k.version = version;
|
||||
|
||||
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
|
||||
/* create the biggest key we can */
|
||||
@ -735,7 +710,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
bch2_cut_back(end, &delete.k);
|
||||
}
|
||||
|
||||
ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq,
|
||||
ret = bch2_btree_insert_at(c, NULL, journal_seq,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(&iter, &delete));
|
||||
if (ret)
|
||||
|
@ -72,6 +72,8 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <trace/events/bcachefs.h>
|
||||
|
||||
static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
|
||||
#ifdef DEBUG_BUCKETS
|
||||
|
||||
#define lg_local_lock lg_global_lock
|
||||
@ -81,22 +83,26 @@ static void bch2_fs_stats_verify(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage stats =
|
||||
__bch2_fs_usage_read(c);
|
||||
unsigned i;
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
|
||||
if ((s64) stats.s[i].data[S_META] < 0)
|
||||
panic("replicas %u meta underflow: %lli\n",
|
||||
i + 1, stats.s[i].data[S_META]);
|
||||
for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
|
||||
for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++)
|
||||
if ((s64) stats.replicas[i].data[j] < 0)
|
||||
panic("replicas %u %s sectors underflow: %lli\n",
|
||||
i + 1, bch_data_types[j],
|
||||
stats.replicas[i].data[j]);
|
||||
|
||||
if ((s64) stats.s[i].data[S_DIRTY] < 0)
|
||||
panic("replicas %u dirty underflow: %lli\n",
|
||||
i + 1, stats.s[i].data[S_DIRTY]);
|
||||
|
||||
if ((s64) stats.s[i].persistent_reserved < 0)
|
||||
if ((s64) stats.replicas[i].persistent_reserved < 0)
|
||||
panic("replicas %u reserved underflow: %lli\n",
|
||||
i + 1, stats.s[i].persistent_reserved);
|
||||
i + 1, stats.replicas[i].persistent_reserved);
|
||||
}
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(stats.buckets); j++)
|
||||
if ((s64) stats.replicas[i].data_buckets[j] < 0)
|
||||
panic("%s buckets underflow: %lli\n",
|
||||
bch_data_types[j],
|
||||
stats.buckets[j]);
|
||||
|
||||
if ((s64) stats.online_reserved < 0)
|
||||
panic("sectors_online_reserved underflow: %lli\n",
|
||||
stats.online_reserved);
|
||||
@ -146,6 +152,7 @@ static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {}
|
||||
*/
|
||||
void bch2_bucket_seq_cleanup(struct bch_fs *c)
|
||||
{
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
|
||||
struct bch_dev *ca;
|
||||
struct bucket_array *buckets;
|
||||
@ -153,6 +160,12 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
|
||||
struct bucket_mark m;
|
||||
unsigned i;
|
||||
|
||||
if (journal_seq - c->last_bucket_seq_cleanup <
|
||||
(1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
|
||||
return;
|
||||
|
||||
c->last_bucket_seq_cleanup = journal_seq;
|
||||
|
||||
for_each_member_device(ca, c, i) {
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
@ -232,7 +245,9 @@ bch2_fs_usage_read(struct bch_fs *c)
|
||||
}
|
||||
|
||||
struct fs_usage_sum {
|
||||
u64 hidden;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
};
|
||||
|
||||
@ -241,10 +256,19 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats)
|
||||
struct fs_usage_sum sum = { 0 };
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(stats.s); i++) {
|
||||
sum.data += (stats.s[i].data[S_META] +
|
||||
stats.s[i].data[S_DIRTY]) * (i + 1);
|
||||
sum.reserved += stats.s[i].persistent_reserved * (i + 1);
|
||||
/*
|
||||
* For superblock and journal we count bucket usage, not sector usage,
|
||||
* because any internal fragmentation should _not_ be counted as
|
||||
* free space:
|
||||
*/
|
||||
sum.hidden += stats.buckets[BCH_DATA_SB];
|
||||
sum.hidden += stats.buckets[BCH_DATA_JOURNAL];
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
|
||||
sum.data += stats.replicas[i].data[BCH_DATA_BTREE];
|
||||
sum.data += stats.replicas[i].data[BCH_DATA_USER];
|
||||
sum.cached += stats.replicas[i].data[BCH_DATA_CACHED];
|
||||
sum.reserved += stats.replicas[i].persistent_reserved;
|
||||
}
|
||||
|
||||
sum.reserved += stats.online_reserved;
|
||||
@ -260,14 +284,14 @@ static u64 reserve_factor(u64 r)
|
||||
|
||||
static u64 avail_factor(u64 r)
|
||||
{
|
||||
return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1;
|
||||
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
|
||||
}
|
||||
|
||||
u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
{
|
||||
struct fs_usage_sum sum = __fs_usage_sum(stats);
|
||||
|
||||
return sum.data + reserve_factor(sum.reserved);
|
||||
return sum.hidden + sum.data + reserve_factor(sum.reserved);
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
@ -275,9 +299,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
return min(c->capacity, __bch2_fs_sectors_used(c, stats));
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
{
|
||||
return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats));
|
||||
return c->capacity - bch2_fs_sectors_used(c, stats);
|
||||
}
|
||||
|
||||
static inline int is_unavailable_bucket(struct bucket_mark m)
|
||||
@ -313,9 +337,9 @@ static bool bucket_became_unavailable(struct bch_fs *c,
|
||||
}
|
||||
|
||||
void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *stats,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
struct bch_fs_usage *stats,
|
||||
struct disk_reservation *disk_res,
|
||||
struct gc_pos gc_pos)
|
||||
{
|
||||
struct fs_usage_sum sum = __fs_usage_sum(*stats);
|
||||
s64 added = sum.data + sum.reserved;
|
||||
@ -347,21 +371,21 @@ void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
}
|
||||
|
||||
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bch_fs_usage *stats,
|
||||
struct bucket_mark old, struct bucket_mark new)
|
||||
{
|
||||
struct bch_dev_usage *dev_usage;
|
||||
|
||||
if (c)
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
|
||||
if (old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type) {
|
||||
BUG_ON(!c);
|
||||
bch2_fs_inconsistent(c,
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_types[old.data_type],
|
||||
bch2_data_types[new.data_type]);
|
||||
}
|
||||
bch2_fs_inconsistent_on(old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type, c,
|
||||
"different types of data in same bucket: %s, %s",
|
||||
bch2_data_types[old.data_type],
|
||||
bch2_data_types[new.data_type]);
|
||||
|
||||
stats->buckets[bucket_type(old)] -= ca->mi.bucket_size;
|
||||
stats->buckets[bucket_type(new)] += ca->mi.bucket_size;
|
||||
|
||||
dev_usage = this_cpu_ptr(ca->usage_percpu);
|
||||
|
||||
@ -386,17 +410,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_dev_stats_verify(ca);
|
||||
}
|
||||
|
||||
#define bucket_data_cmpxchg(c, ca, g, new, expr) \
|
||||
#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \
|
||||
({ \
|
||||
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
|
||||
\
|
||||
bch2_dev_usage_update(c, ca, _old, new); \
|
||||
bch2_dev_usage_update(c, ca, stats, _old, new); \
|
||||
_old; \
|
||||
})
|
||||
|
||||
bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct bucket_mark *old)
|
||||
{
|
||||
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
|
||||
struct bucket *g;
|
||||
struct bucket_mark new;
|
||||
|
||||
@ -404,11 +429,8 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
g = bucket(ca, b);
|
||||
|
||||
*old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
if (!is_available_bucket(new)) {
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
return false;
|
||||
}
|
||||
*old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
|
||||
BUG_ON(!is_available_bucket(new));
|
||||
|
||||
new.owned_by_allocator = 1;
|
||||
new.data_type = 0;
|
||||
@ -417,16 +439,22 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.gen++;
|
||||
}));
|
||||
|
||||
/*
|
||||
* This isn't actually correct yet, since fs usage is still
|
||||
* uncompressed sectors:
|
||||
*/
|
||||
stats->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors;
|
||||
|
||||
if (!old->owned_by_allocator && old->cached_sectors)
|
||||
trace_invalidate(ca, bucket_to_sector(ca, b),
|
||||
old->cached_sectors);
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
struct gc_pos pos, unsigned flags)
|
||||
{
|
||||
struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu);
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
|
||||
@ -437,7 +465,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
gc_will_visit(c, pos))
|
||||
return;
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
|
||||
@ -445,17 +473,11 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
c->gc_pos.phase == GC_PHASE_DONE);
|
||||
}
|
||||
|
||||
#define saturated_add(ca, dst, src, max) \
|
||||
#define checked_add(a, b) \
|
||||
do { \
|
||||
BUG_ON((int) (dst) + (src) < 0); \
|
||||
if ((dst) == (max)) \
|
||||
; \
|
||||
else if ((dst) + (src) <= (max)) \
|
||||
dst += (src); \
|
||||
else { \
|
||||
dst = (max); \
|
||||
trace_sectors_saturated(ca); \
|
||||
} \
|
||||
unsigned _res = (unsigned) (a) + (b); \
|
||||
(a) = _res; \
|
||||
BUG_ON((a) != _res); \
|
||||
} while (0)
|
||||
|
||||
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
@ -463,10 +485,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned sectors, struct gc_pos pos,
|
||||
unsigned flags)
|
||||
{
|
||||
struct bch_fs_usage *stats;
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
|
||||
BUG_ON(!type);
|
||||
BUG_ON(type != BCH_DATA_SB &&
|
||||
type != BCH_DATA_JOURNAL);
|
||||
|
||||
if (likely(c)) {
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
@ -474,25 +498,32 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos))
|
||||
return;
|
||||
|
||||
stats = this_cpu_ptr(c->usage_percpu);
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
stats->replicas[0].data[type] += sectors;
|
||||
} else {
|
||||
rcu_read_lock();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_cmpxchg(g, new, ({
|
||||
new.data_type = type;
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
}));
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
saturated_add(ca, new.dirty_sectors, sectors,
|
||||
GC_MAX_SECTORS_USED);
|
||||
new.data_type = type;
|
||||
}));
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
|
||||
bucket_became_unavailable(c, old, new));
|
||||
}
|
||||
|
||||
/* Reverting this until the copygc + compression issue is fixed: */
|
||||
|
||||
static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
|
||||
{
|
||||
if (!sectors)
|
||||
@ -511,16 +542,15 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
struct bkey_s_c_extent e,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
struct bch_extent_crc_unpacked crc,
|
||||
s64 sectors, enum s_alloc type,
|
||||
struct bch_fs_usage *stats,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
unsigned replicas,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
struct bucket_mark old, new;
|
||||
unsigned saturated;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr);
|
||||
enum bch_data_type data_type = type == S_META
|
||||
? BCH_DATA_BTREE : BCH_DATA_USER;
|
||||
s64 uncompressed_sectors = sectors;
|
||||
u64 v;
|
||||
|
||||
if (crc.compression_type) {
|
||||
@ -538,6 +568,20 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
+__disk_sectors(crc, new_sectors);
|
||||
}
|
||||
|
||||
/*
|
||||
* fs level usage (which determines free space) is in uncompressed
|
||||
* sectors, until copygc + compression is sorted out:
|
||||
*
|
||||
* note also that we always update @fs_usage, even when we otherwise
|
||||
* wouldn't do anything because gc is running - this is because the
|
||||
* caller still needs to account w.r.t. its disk reservation. It is
|
||||
* caller's responsibility to not apply @fs_usage if gc is in progress.
|
||||
*/
|
||||
fs_usage->replicas
|
||||
[!ptr->cached && replicas ? replicas - 1 : 0].data
|
||||
[!ptr->cached ? data_type : BCH_DATA_CACHED] +=
|
||||
uncompressed_sectors;
|
||||
|
||||
if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
|
||||
if (journal_seq)
|
||||
bucket_cmpxchg(g, new, ({
|
||||
@ -551,7 +595,6 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
v = atomic64_read(&g->_mark.v);
|
||||
do {
|
||||
new.v.counter = old.v.counter = v;
|
||||
saturated = 0;
|
||||
|
||||
/*
|
||||
* Check this after reading bucket mark to guard against
|
||||
@ -565,17 +608,10 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ptr->cached &&
|
||||
new.dirty_sectors == GC_MAX_SECTORS_USED &&
|
||||
sectors < 0)
|
||||
saturated = -sectors;
|
||||
|
||||
if (ptr->cached)
|
||||
saturated_add(ca, new.cached_sectors, sectors,
|
||||
GC_MAX_SECTORS_USED);
|
||||
if (!ptr->cached)
|
||||
checked_add(new.dirty_sectors, sectors);
|
||||
else
|
||||
saturated_add(ca, new.dirty_sectors, sectors,
|
||||
GC_MAX_SECTORS_USED);
|
||||
checked_add(new.cached_sectors, sectors);
|
||||
|
||||
if (!new.dirty_sectors &&
|
||||
!new.cached_sectors) {
|
||||
@ -597,28 +633,22 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
old.v.counter,
|
||||
new.v.counter)) != old.v.counter);
|
||||
|
||||
bch2_dev_usage_update(c, ca, old, new);
|
||||
bch2_dev_usage_update(c, ca, fs_usage, old, new);
|
||||
|
||||
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
|
||||
bucket_became_unavailable(c, old, new));
|
||||
|
||||
if (saturated &&
|
||||
atomic_long_add_return(saturated,
|
||||
&ca->saturated_count) >=
|
||||
bucket_to_sector(ca, ca->free_inc.size)) {
|
||||
if (c->gc_thread) {
|
||||
trace_gc_sectors_saturated(c);
|
||||
wake_up_process(c->gc_thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
s64 sectors, bool metadata,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct gc_pos pos,
|
||||
struct bch_fs_usage *stats,
|
||||
u64 journal_seq, unsigned flags)
|
||||
{
|
||||
unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
|
||||
|
||||
BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
|
||||
|
||||
/*
|
||||
* synchronization w.r.t. GC:
|
||||
*
|
||||
@ -661,34 +691,20 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
struct bch_extent_crc_unpacked crc;
|
||||
enum s_alloc type = metadata ? S_META : S_DIRTY;
|
||||
unsigned replicas = 0;
|
||||
|
||||
BUG_ON(metadata && bkey_extent_is_cached(e.k));
|
||||
BUG_ON(!sectors);
|
||||
|
||||
extent_for_each_ptr_crc(e, ptr, crc) {
|
||||
bch2_mark_pointer(c, e, ptr, crc, sectors, type,
|
||||
stats, journal_seq, flags);
|
||||
replicas += !ptr->cached;
|
||||
}
|
||||
|
||||
if (replicas) {
|
||||
BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s));
|
||||
stats->s[replicas - 1].data[type] += sectors;
|
||||
}
|
||||
extent_for_each_ptr_crc(e, ptr, crc)
|
||||
bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
|
||||
replicas, stats, journal_seq, flags);
|
||||
break;
|
||||
}
|
||||
case BCH_RESERVATION: {
|
||||
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
|
||||
|
||||
if (r.v->nr_replicas) {
|
||||
BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s));
|
||||
stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
|
||||
}
|
||||
case BCH_RESERVATION:
|
||||
if (replicas)
|
||||
stats->replicas[replicas - 1].persistent_reserved +=
|
||||
sectors * replicas;
|
||||
break;
|
||||
}
|
||||
}
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
}
|
||||
|
||||
@ -701,7 +717,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c)
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
|
||||
|
||||
return bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
|
||||
return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c)));
|
||||
}
|
||||
|
||||
/* Used by gc when it's starting: */
|
||||
@ -833,9 +849,10 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
|
||||
ca->mi.bucket_size / c->opts.btree_node_size);
|
||||
/* XXX: these should be tunable */
|
||||
size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9);
|
||||
size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
|
||||
size_t free_inc_reserve = copygc_reserve / 2;
|
||||
size_t reserve_none = max_t(size_t, 4, nbuckets >> 9);
|
||||
size_t copygc_reserve = max_t(size_t, 16, nbuckets >> 7);
|
||||
size_t free_inc_nr = max(max_t(size_t, 16, nbuckets >> 12),
|
||||
btree_reserve);
|
||||
bool resize = ca->buckets != NULL,
|
||||
start_copygc = ca->copygc_thread != NULL;
|
||||
int ret = -ENOMEM;
|
||||
@ -858,8 +875,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
!init_fifo(&free[RESERVE_MOVINGGC],
|
||||
copygc_reserve, GFP_KERNEL) ||
|
||||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||
!init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) ||
|
||||
!init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) ||
|
||||
!init_fifo(&free_inc, free_inc_nr, GFP_KERNEL) ||
|
||||
!init_heap(&alloc_heap, ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL) ||
|
||||
!init_heap(©gc_heap, copygc_reserve, GFP_KERNEL))
|
||||
goto err;
|
||||
|
||||
|
@ -114,11 +114,6 @@ static inline u8 ptr_stale(struct bch_dev *ca,
|
||||
|
||||
/* bucket gc marks */
|
||||
|
||||
/* The dirty and cached sector counts saturate. If this occurs,
|
||||
* reference counting alone will not free the bucket, and a btree
|
||||
* GC must be performed. */
|
||||
#define GC_MAX_SECTORS_USED ((1U << 15) - 1)
|
||||
|
||||
static inline unsigned bucket_sectors_used(struct bucket_mark mark)
|
||||
{
|
||||
return mark.dirty_sectors + mark.cached_sectors;
|
||||
@ -172,26 +167,12 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline enum bch_data_type s_alloc_to_data_type(enum s_alloc s)
|
||||
{
|
||||
switch (s) {
|
||||
case S_META:
|
||||
return BCH_DATA_BTREE;
|
||||
case S_DIRTY:
|
||||
return BCH_DATA_USER;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *);
|
||||
struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *);
|
||||
void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
struct disk_reservation *, struct gc_pos);
|
||||
|
||||
u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_free(struct bch_fs *, struct bch_fs_usage);
|
||||
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
@ -209,7 +190,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
|
||||
void bch2_bucket_seq_cleanup(struct bch_fs *);
|
||||
|
||||
bool bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
|
||||
void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, struct bucket_mark *);
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, bool, struct gc_pos, unsigned);
|
||||
@ -222,8 +203,8 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
|
||||
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
|
||||
|
||||
void bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64, bool, struct gc_pos,
|
||||
struct bch_fs_usage *, u64, unsigned);
|
||||
void bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64, enum bch_data_type,
|
||||
struct gc_pos, struct bch_fs_usage *, u64, unsigned);
|
||||
|
||||
void bch2_recalc_sectors_available(struct bch_fs *);
|
||||
|
||||
|
@ -1,8 +1,11 @@
|
||||
#ifndef _BUCKETS_TYPES_H
|
||||
#define _BUCKETS_TYPES_H
|
||||
|
||||
#include "bcachefs_format.h"
|
||||
#include "util.h"
|
||||
|
||||
#define BUCKET_JOURNAL_SEQ_BITS 16
|
||||
|
||||
struct bucket_mark {
|
||||
union {
|
||||
struct {
|
||||
@ -56,23 +59,17 @@ struct bch_dev_usage {
|
||||
u64 sectors_fragmented;
|
||||
};
|
||||
|
||||
/* kill, switch to bch_data_type? */
|
||||
enum s_alloc {
|
||||
S_META,
|
||||
S_DIRTY,
|
||||
S_ALLOC_NR,
|
||||
};
|
||||
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
/* _uncompressed_ sectors: */
|
||||
u64 online_reserved;
|
||||
u64 available_cache;
|
||||
|
||||
struct {
|
||||
u64 data[S_ALLOC_NR];
|
||||
u64 data[BCH_DATA_NR];
|
||||
u64 persistent_reserved;
|
||||
} s[BCH_REPLICAS_MAX];
|
||||
} replicas[BCH_REPLICAS_MAX];
|
||||
|
||||
u64 buckets[BCH_DATA_NR];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -403,11 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
dst.persistent_reserved[i] =
|
||||
src.s[i].persistent_reserved;
|
||||
src.replicas[i].persistent_reserved;
|
||||
|
||||
for (j = 0; j < S_ALLOC_NR; j++)
|
||||
dst.sectors[s_alloc_to_data_type(j)][i] =
|
||||
src.s[i].data[j];
|
||||
for (j = 0; j < BCH_DATA_NR; j++)
|
||||
dst.sectors[j][i] = src.replicas[i].data[j];
|
||||
}
|
||||
|
||||
ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst));
|
||||
|
@ -121,24 +121,26 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
int bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
struct bkey_s_c_dirent d;
|
||||
size_t n = 0;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_DIRENT:
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
|
||||
n += bch_scnmemcpy(buf + n, size - n, d.v->d_name,
|
||||
bch2_dirent_name_bytes(d));
|
||||
n += scnprintf(buf + n, size - n, " -> %llu", d.v->d_inum);
|
||||
out += bch_scnmemcpy(out, end - out, d.v->d_name,
|
||||
bch2_dirent_name_bytes(d));
|
||||
out += scnprintf(out, end - out, " -> %llu", d.v->d_inum);
|
||||
break;
|
||||
case BCH_DIRENT_WHITEOUT:
|
||||
scnprintf(buf, size, "whiteout");
|
||||
out += scnprintf(out, end - out, "whiteout");
|
||||
break;
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
|
||||
@ -289,7 +291,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
* new_dst at the src position:
|
||||
*/
|
||||
new_dst->k.p = src_iter->pos;
|
||||
bch2_trans_update(trans, src_iter, &new_dst->k_i, 0);
|
||||
bch2_trans_update(trans,
|
||||
BTREE_INSERT_ENTRY(src_iter,
|
||||
&new_dst->k_i));
|
||||
return 0;
|
||||
} else {
|
||||
/* If we're overwriting, we can't insert new_dst
|
||||
@ -312,8 +316,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
|
||||
bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(src_iter, &new_src->k_i));
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(dst_iter, &new_dst->k_i));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
extern const struct bch_hash_desc bch2_dirent_hash_desc;
|
||||
|
||||
const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_dirent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_dirent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_dirent_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_dirent_invalid, \
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,14 +11,13 @@ struct btree_node_iter;
|
||||
struct btree_node_iter_large;
|
||||
struct btree_insert;
|
||||
struct btree_insert_entry;
|
||||
struct extent_insert_hook;
|
||||
struct bch_devs_mask;
|
||||
union bch_extent_crc;
|
||||
|
||||
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
void bch2_btree_ptr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_btree_ptr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
|
||||
|
||||
#define bch2_bkey_btree_ops (struct bkey_ops) { \
|
||||
@ -30,7 +29,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
|
||||
|
||||
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
void bch2_extent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_extent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s);
|
||||
enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
|
||||
struct bkey_i *, struct bkey_i *);
|
||||
@ -61,9 +60,22 @@ int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
|
||||
struct bch_devs_mask *,
|
||||
struct extent_pick_ptr *);
|
||||
|
||||
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
|
||||
|
||||
static inline bool bch2_extent_is_atomic(struct bkey *k,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct btree *b = iter->l[0].b;
|
||||
|
||||
return bkey_cmp(k->p, b->key.k.p) <= 0 &&
|
||||
bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
|
||||
}
|
||||
|
||||
enum btree_insert_ret
|
||||
bch2_insert_fixup_extent(struct btree_insert *,
|
||||
struct btree_insert_entry *);
|
||||
bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
|
||||
unsigned *);
|
||||
enum btree_insert_ret
|
||||
bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
|
||||
|
||||
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
|
||||
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
|
||||
|
@ -108,17 +108,17 @@ do { \
|
||||
#define fifo_peek(fifo) fifo_peek_front(fifo)
|
||||
|
||||
#define fifo_for_each_entry(_entry, _fifo, _iter) \
|
||||
for (((void) (&(_iter) == &(_fifo)->front)), \
|
||||
_iter = (_fifo)->front; \
|
||||
for (typecheck(typeof((_fifo)->front), _iter), \
|
||||
(_iter) = (_fifo)->front; \
|
||||
((_iter != (_fifo)->back) && \
|
||||
(_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true)); \
|
||||
_iter++)
|
||||
(_iter)++)
|
||||
|
||||
#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter) \
|
||||
for (((void) (&(_iter) == &(_fifo)->front)), \
|
||||
_iter = (_fifo)->front; \
|
||||
for (typecheck(typeof((_fifo)->front), _iter), \
|
||||
(_iter) = (_fifo)->front; \
|
||||
((_iter != (_fifo)->back) && \
|
||||
(_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true)); \
|
||||
_iter++)
|
||||
(_iter)++)
|
||||
|
||||
#endif /* _BCACHEFS_FIFO_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,79 +11,6 @@
|
||||
|
||||
#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
|
||||
|
||||
/* Inode flags: */
|
||||
|
||||
/* bcachefs inode flags -> vfs inode flags: */
|
||||
static const unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_SYNC] = S_SYNC,
|
||||
[__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
|
||||
[__BCH_INODE_APPEND] = S_APPEND,
|
||||
[__BCH_INODE_NOATIME] = S_NOATIME,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_SYNC] = FS_SYNC_FL,
|
||||
[__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_APPEND] = FS_APPEND_FL,
|
||||
[__BCH_INODE_NODUMP] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_NOATIME] = FS_NOATIME_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
static const unsigned bch_flags_to_xflags[] = {
|
||||
[__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
|
||||
[__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
|
||||
[__BCH_INODE_APPEND] = FS_XFLAG_APPEND,
|
||||
[__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP,
|
||||
[__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME,
|
||||
//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
|
||||
};
|
||||
|
||||
#define set_flags(_map, _in, _out) \
|
||||
do { \
|
||||
unsigned _i; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & (1 << _i)) \
|
||||
(_out) |= _map[_i]; \
|
||||
else \
|
||||
(_out) &= ~_map[_i]; \
|
||||
} while (0)
|
||||
|
||||
#define map_flags(_map, _in) \
|
||||
({ \
|
||||
unsigned _out = 0; \
|
||||
\
|
||||
set_flags(_map, _in, _out); \
|
||||
_out; \
|
||||
})
|
||||
|
||||
#define map_flags_rev(_map, _in) \
|
||||
({ \
|
||||
unsigned _i, _out = 0; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & _map[_i]) { \
|
||||
(_out) |= 1 << _i; \
|
||||
(_in) &= ~_map[_i]; \
|
||||
} \
|
||||
(_out); \
|
||||
})
|
||||
|
||||
#define map_defined(_map) \
|
||||
({ \
|
||||
unsigned _in = ~0; \
|
||||
\
|
||||
map_flags_rev(_map, _in); \
|
||||
})
|
||||
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
{
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
}
|
||||
|
||||
struct flags_set {
|
||||
unsigned mask;
|
||||
unsigned flags;
|
||||
@ -95,6 +22,7 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
struct bch_inode_unpacked *bi,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
/*
|
||||
* We're relying on btree locking here for exclusion with other ioctl
|
||||
* calls - use the flags in the btree (@bi), not inode->i_flags:
|
||||
@ -107,14 +35,15 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode,
|
||||
!capable(CAP_LINUX_IMMUTABLE))
|
||||
return -EPERM;
|
||||
|
||||
if (!S_ISREG(inode->v.i_mode) &&
|
||||
!S_ISDIR(inode->v.i_mode) &&
|
||||
if (!S_ISREG(bi->bi_mode) &&
|
||||
!S_ISDIR(bi->bi_mode) &&
|
||||
(newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
|
||||
return -EINVAL;
|
||||
|
||||
bi->bi_flags &= ~s->mask;
|
||||
bi->bi_flags |= newflags;
|
||||
inode->v.i_ctime = current_time(&inode->v);
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -152,10 +81,8 @@ static int bch2_ioc_setflags(struct bch_fs *c,
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &s, 0);
|
||||
|
||||
if (!ret)
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
|
||||
ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
setflags_out:
|
||||
@ -241,9 +168,8 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
ret = __bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, 0);
|
||||
if (!ret)
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
|
||||
ATTR_CTIME);
|
||||
err_unlock:
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
err:
|
||||
|
@ -1,7 +1,78 @@
|
||||
#ifndef _BCACHEFS_FS_IOCTL_H
|
||||
#define _BCACHEFS_FS_IOCTL_H
|
||||
|
||||
void bch2_inode_flags_to_vfs(struct bch_inode_info *);
|
||||
/* Inode flags: */
|
||||
|
||||
/* bcachefs inode flags -> vfs inode flags: */
|
||||
static const unsigned bch_flags_to_vfs[] = {
|
||||
[__BCH_INODE_SYNC] = S_SYNC,
|
||||
[__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
|
||||
[__BCH_INODE_APPEND] = S_APPEND,
|
||||
[__BCH_INODE_NOATIME] = S_NOATIME,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
|
||||
static const unsigned bch_flags_to_uflags[] = {
|
||||
[__BCH_INODE_SYNC] = FS_SYNC_FL,
|
||||
[__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
|
||||
[__BCH_INODE_APPEND] = FS_APPEND_FL,
|
||||
[__BCH_INODE_NODUMP] = FS_NODUMP_FL,
|
||||
[__BCH_INODE_NOATIME] = FS_NOATIME_FL,
|
||||
};
|
||||
|
||||
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
|
||||
static const unsigned bch_flags_to_xflags[] = {
|
||||
[__BCH_INODE_SYNC] = FS_XFLAG_SYNC,
|
||||
[__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
|
||||
[__BCH_INODE_APPEND] = FS_XFLAG_APPEND,
|
||||
[__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP,
|
||||
[__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME,
|
||||
//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
|
||||
};
|
||||
|
||||
#define set_flags(_map, _in, _out) \
|
||||
do { \
|
||||
unsigned _i; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & (1 << _i)) \
|
||||
(_out) |= _map[_i]; \
|
||||
else \
|
||||
(_out) &= ~_map[_i]; \
|
||||
} while (0)
|
||||
|
||||
#define map_flags(_map, _in) \
|
||||
({ \
|
||||
unsigned _out = 0; \
|
||||
\
|
||||
set_flags(_map, _in, _out); \
|
||||
_out; \
|
||||
})
|
||||
|
||||
#define map_flags_rev(_map, _in) \
|
||||
({ \
|
||||
unsigned _i, _out = 0; \
|
||||
\
|
||||
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
|
||||
if ((_in) & _map[_i]) { \
|
||||
(_out) |= 1 << _i; \
|
||||
(_in) &= ~_map[_i]; \
|
||||
} \
|
||||
(_out); \
|
||||
})
|
||||
|
||||
#define map_defined(_map) \
|
||||
({ \
|
||||
unsigned _in = ~0; \
|
||||
\
|
||||
map_flags_rev(_map, _in); \
|
||||
})
|
||||
|
||||
/* Set VFS inode flags from bcachefs inode: */
|
||||
static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
|
||||
{
|
||||
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
|
||||
}
|
||||
|
||||
long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
|
||||
long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
|
||||
|
138
libbcachefs/fs.c
138
libbcachefs/fs.c
@ -47,6 +47,30 @@ static void journal_seq_copy(struct bch_inode_info *dst,
|
||||
} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
|
||||
}
|
||||
|
||||
static inline int ptrcmp(void *l, void *r)
|
||||
{
|
||||
return (l > r) - (l < r);
|
||||
}
|
||||
|
||||
#define __bch2_lock_inodes(_lock, ...) \
|
||||
do { \
|
||||
struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
|
||||
unsigned i; \
|
||||
\
|
||||
bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \
|
||||
\
|
||||
for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \
|
||||
if (a[i] != a[i - 1]) { \
|
||||
if (_lock) \
|
||||
mutex_lock_nested(&a[i]->ei_update_lock, i);\
|
||||
else \
|
||||
mutex_unlock(&a[i]->ei_update_lock); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__)
|
||||
#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__)
|
||||
|
||||
/*
|
||||
* I_SIZE_DIRTY requires special handling:
|
||||
*
|
||||
@ -96,6 +120,8 @@ void bch2_inode_update_after_write(struct bch_fs *c,
|
||||
|
||||
inode->ei_inode = *bi;
|
||||
inode->ei_qid = bch_qid(bi);
|
||||
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
}
|
||||
|
||||
int __must_check bch2_write_inode_trans(struct btree_trans *trans,
|
||||
@ -106,35 +132,22 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,
|
||||
{
|
||||
struct btree_iter *iter;
|
||||
struct bkey_inode_buf *inode_p;
|
||||
struct bkey_s_c k;
|
||||
u64 inum = inode->v.i_ino;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&inode->ei_update_lock);
|
||||
|
||||
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
|
||||
POS(inode->v.i_ino, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
if (IS_ERR(iter))
|
||||
return PTR_ERR(iter);
|
||||
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
if ((ret = btree_iter_err(k)))
|
||||
/* The btree node lock is our lock on the inode: */
|
||||
ret = bch2_btree_iter_traverse(iter);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (WARN_ONCE(k.k->type != BCH_INODE_FS,
|
||||
"inode %llu not found when updating", inum))
|
||||
return -ENOENT;
|
||||
|
||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode_u);
|
||||
if (WARN_ONCE(ret,
|
||||
"error %i unpacking inode %llu", ret, inum))
|
||||
return -ENOENT;
|
||||
|
||||
BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size);
|
||||
|
||||
BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size &&
|
||||
!(inode_u->bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
inode_u->bi_size > i_size_read(&inode->v));
|
||||
*inode_u = inode->ei_inode;
|
||||
|
||||
if (set) {
|
||||
ret = set(inode, inode_u, p);
|
||||
@ -147,14 +160,14 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,
|
||||
return PTR_ERR(inode_p);
|
||||
|
||||
bch2_inode_pack(inode_p, inode_u);
|
||||
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __must_check __bch2_write_inode(struct bch_fs *c,
|
||||
struct bch_inode_info *inode,
|
||||
inode_set_fn set,
|
||||
void *p, unsigned fields)
|
||||
int __must_check bch2_write_inode(struct bch_fs *c,
|
||||
struct bch_inode_info *inode,
|
||||
inode_set_fn set,
|
||||
void *p, unsigned fields)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
@ -165,7 +178,7 @@ retry:
|
||||
bch2_trans_begin(&trans);
|
||||
|
||||
ret = bch2_write_inode_trans(&trans, inode, &inode_u, set, p) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&inode->ei_journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
@ -235,9 +248,8 @@ static int inode_update_for_create_fn(struct bch_inode_info *inode,
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_inode_unpacked *new_inode = p;
|
||||
struct timespec now = current_time(&inode->v);
|
||||
|
||||
bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now);
|
||||
bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
|
||||
|
||||
if (S_ISDIR(new_inode->bi_mode))
|
||||
bi->bi_nlink++;
|
||||
@ -256,6 +268,7 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bch_hash_info hash_info;
|
||||
struct posix_acl *default_acl = NULL, *acl = NULL;
|
||||
u64 journal_seq = 0;
|
||||
int ret;
|
||||
|
||||
bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode);
|
||||
@ -288,6 +301,9 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!tmpfile)
|
||||
mutex_lock(&dir->ei_update_lock);
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
@ -316,8 +332,8 @@ retry:
|
||||
inode_update_for_create_fn,
|
||||
&inode_u)
|
||||
: 0) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
&inode->ei_journal_seq,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK);
|
||||
if (ret == -EINTR)
|
||||
@ -331,9 +347,11 @@ retry:
|
||||
bch2_inode_update_after_write(c, dir, &dir_u,
|
||||
ATTR_MTIME|ATTR_CTIME);
|
||||
journal_seq_copy(dir, inode->ei_journal_seq);
|
||||
mutex_unlock(&dir->ei_update_lock);
|
||||
}
|
||||
|
||||
bch2_vfs_inode_init(c, inode, &inode_u);
|
||||
journal_seq_copy(inode, journal_seq);
|
||||
|
||||
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
|
||||
set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
|
||||
@ -369,6 +387,9 @@ out:
|
||||
posix_acl_release(acl);
|
||||
return inode;
|
||||
err_trans:
|
||||
if (!tmpfile)
|
||||
mutex_unlock(&dir->ei_update_lock);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
make_bad_inode(&inode->v);
|
||||
iput(&inode->v);
|
||||
@ -416,9 +437,8 @@ static int inode_update_for_link_fn(struct bch_inode_info *inode,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct timespec now = current_time(&inode->v);
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, now);
|
||||
bi->bi_ctime = bch2_current_time(c);
|
||||
|
||||
if (bi->bi_flags & BCH_INODE_UNLINKED)
|
||||
bi->bi_flags &= ~BCH_INODE_UNLINKED;
|
||||
@ -437,8 +457,7 @@ static int __bch2_link(struct bch_fs *c,
|
||||
struct bch_inode_unpacked inode_u;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
bch2_trans_init(&trans, c);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
@ -452,7 +471,7 @@ retry:
|
||||
bch2_write_inode_trans(&trans, inode, &inode_u,
|
||||
inode_update_for_link_fn,
|
||||
NULL) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&inode->ei_journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK);
|
||||
@ -464,6 +483,7 @@ retry:
|
||||
bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -475,6 +495,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
|
||||
struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
|
||||
ret = __bch2_link(c, inode, dir, dentry);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
@ -490,9 +512,8 @@ static int inode_update_dir_for_unlink_fn(struct bch_inode_info *inode,
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_inode_info *unlink_inode = p;
|
||||
struct timespec now = current_time(&inode->v);
|
||||
|
||||
bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now);
|
||||
bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
|
||||
|
||||
bi->bi_nlink -= S_ISDIR(unlink_inode->v.i_mode);
|
||||
|
||||
@ -504,9 +525,8 @@ static int inode_update_for_unlink_fn(struct bch_inode_info *inode,
|
||||
void *p)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct timespec now = current_time(&inode->v);
|
||||
|
||||
bi->bi_ctime = timespec_to_bch2_time(c, now);
|
||||
bi->bi_ctime = bch2_current_time(c);
|
||||
if (bi->bi_nlink)
|
||||
bi->bi_nlink--;
|
||||
else
|
||||
@ -524,6 +544,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
|
||||
struct btree_trans trans;
|
||||
int ret;
|
||||
|
||||
bch2_lock_inodes(dir, inode);
|
||||
bch2_trans_init(&trans, c);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
@ -537,7 +558,7 @@ retry:
|
||||
bch2_write_inode_trans(&trans, inode, &inode_u,
|
||||
inode_update_for_unlink_fn,
|
||||
NULL) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&dir->ei_journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
@ -556,6 +577,7 @@ retry:
|
||||
ATTR_MTIME);
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_unlock_inodes(dir, inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -683,8 +705,6 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
|
||||
{
|
||||
struct bch_fs *c = src_vdir->i_sb->s_fs_info;
|
||||
struct rename_info i = {
|
||||
.now = timespec_to_bch2_time(c,
|
||||
current_time(src_vdir)),
|
||||
.src_dir = to_bch_ei(src_vdir),
|
||||
.dst_dir = to_bch_ei(dst_vdir),
|
||||
.src_inode = to_bch_ei(src_dentry->d_inode),
|
||||
@ -718,10 +738,15 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_lock_inodes(i.src_dir,
|
||||
i.dst_dir,
|
||||
i.src_inode,
|
||||
i.dst_inode);
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
retry:
|
||||
bch2_trans_begin(&trans);
|
||||
i.now = timespec_to_bch2_time(c, current_time(src_vdir)),
|
||||
i.now = bch2_current_time(c);
|
||||
|
||||
ret = bch2_dirent_rename(&trans,
|
||||
i.src_dir, &src_dentry->d_name,
|
||||
@ -739,7 +764,7 @@ retry:
|
||||
? bch2_write_inode_trans(&trans, i.dst_inode, &dst_inode_u,
|
||||
inode_update_for_rename_fn, &i)
|
||||
: 0 ) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK);
|
||||
@ -758,6 +783,10 @@ retry:
|
||||
journal_seq_copy(i.dst_dir, journal_seq);
|
||||
}
|
||||
|
||||
journal_seq_copy(i.src_inode, journal_seq);
|
||||
if (i.dst_inode)
|
||||
journal_seq_copy(i.dst_inode, journal_seq);
|
||||
|
||||
bch2_inode_update_after_write(c, i.src_inode, &src_inode_u,
|
||||
ATTR_CTIME);
|
||||
if (i.dst_inode)
|
||||
@ -765,6 +794,10 @@ retry:
|
||||
ATTR_CTIME);
|
||||
err:
|
||||
bch2_trans_exit(&trans);
|
||||
bch2_unlock_inodes(i.src_dir,
|
||||
i.dst_dir,
|
||||
i.src_inode,
|
||||
i.dst_inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -849,7 +882,7 @@ retry:
|
||||
(iattr->ia_valid & ATTR_MODE
|
||||
? bch2_acl_chmod(&trans, inode, iattr->ia_mode, &acl)
|
||||
: 0) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL,
|
||||
&inode->ei_journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOUNLOCK|
|
||||
@ -1198,8 +1231,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
|
||||
inode->ei_quota_reserved = 0;
|
||||
inode->ei_str_hash = bch2_hash_info_init(c, bi);
|
||||
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
|
||||
inode->v.i_mapping->a_ops = &bch_address_space_operations;
|
||||
|
||||
switch (inode->v.i_mode & S_IFMT) {
|
||||
@ -1272,8 +1303,8 @@ static int bch2_vfs_write_inode(struct inode *vinode,
|
||||
int ret;
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = __bch2_write_inode(c, inode, inode_update_times_fn, NULL,
|
||||
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
|
||||
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
|
||||
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (c->opts.journal_flush_disabled)
|
||||
@ -1312,13 +1343,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
{
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_fs_usage usage = bch2_fs_usage_read(c);
|
||||
u64 hidden_metadata = usage.buckets[BCH_DATA_SB] +
|
||||
usage.buckets[BCH_DATA_JOURNAL];
|
||||
unsigned shift = sb->s_blocksize_bits - 9;
|
||||
u64 fsid;
|
||||
|
||||
buf->f_type = BCACHEFS_STATFS_MAGIC;
|
||||
buf->f_bsize = sb->s_blocksize;
|
||||
buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT;
|
||||
buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >>
|
||||
PAGE_SECTOR_SHIFT;
|
||||
buf->f_blocks = (c->capacity - hidden_metadata) >> shift;
|
||||
buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift;
|
||||
buf->f_bavail = buf->f_bfree;
|
||||
buf->f_files = atomic_long_read(&c->nr_inodes);
|
||||
buf->f_ffree = U64_MAX;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef _BCACHEFS_FS_H
|
||||
#define _BCACHEFS_FS_H
|
||||
|
||||
#include "inode.h"
|
||||
#include "opts.h"
|
||||
#include "str_hash.h"
|
||||
#include "quota_types.h"
|
||||
@ -43,6 +44,11 @@ static inline unsigned nlink_bias(umode_t mode)
|
||||
return S_ISDIR(mode) ? 2 : 1;
|
||||
}
|
||||
|
||||
static inline u64 bch2_current_time(struct bch_fs *c)
|
||||
{
|
||||
return timespec_to_bch2_time(c, current_kernel_time64());
|
||||
}
|
||||
|
||||
struct bch_inode_unpacked;
|
||||
|
||||
#ifndef NO_BCACHEFS_FS
|
||||
@ -59,10 +65,8 @@ int __must_check bch2_write_inode_trans(struct btree_trans *,
|
||||
struct bch_inode_info *,
|
||||
struct bch_inode_unpacked *,
|
||||
inode_set_fn, void *);
|
||||
int __must_check __bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
||||
inode_set_fn, void *, unsigned);
|
||||
int __must_check bch2_write_inode(struct bch_fs *,
|
||||
struct bch_inode_info *);
|
||||
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
|
||||
inode_set_fn, void *, unsigned);
|
||||
|
||||
void bch2_vfs_exit(void);
|
||||
int bch2_vfs_init(void);
|
||||
|
@ -72,8 +72,7 @@ static int reattach_inode(struct bch_fs *c,
|
||||
bch2_inode_pack(&packed, lostfound_inode);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
|
||||
NULL, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
if (ret) {
|
||||
bch_err(c, "error %i reattaching inode %llu while updating lost+found",
|
||||
ret, inum);
|
||||
@ -201,7 +200,7 @@ retry:
|
||||
}
|
||||
|
||||
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
|
||||
bch2_trans_commit(&trans, NULL, NULL, NULL,
|
||||
bch2_trans_commit(&trans, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
err:
|
||||
@ -289,6 +288,13 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
|
||||
{
|
||||
return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
|
||||
POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
|
||||
POS(inode_nr + 1, 0), NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
|
||||
* that i_size an i_sectors are consistent
|
||||
@ -319,7 +325,7 @@ static int check_extents(struct bch_fs *c)
|
||||
k.k->type, k.k->p.inode, w.inode.bi_mode)) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
ret = bch2_inode_truncate(c, k.k->p.inode, 0, NULL, NULL);
|
||||
ret = bch2_inode_truncate(c, k.k->p.inode, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
@ -341,10 +347,7 @@ static int check_extents(struct bch_fs *c)
|
||||
bch2_inode_pack(&p, &w.inode);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES,
|
||||
&p.inode.k_i,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&p.inode.k_i, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
if (ret) {
|
||||
bch_err(c, "error in fs gc: error %i "
|
||||
@ -365,8 +368,7 @@ static int check_extents(struct bch_fs *c)
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
ret = bch2_inode_truncate(c, k.k->p.inode,
|
||||
round_up(w.inode.bi_size, PAGE_SIZE) >> 9,
|
||||
NULL, NULL);
|
||||
w.inode.bi_size);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
@ -397,7 +399,7 @@ static int check_dirents(struct bch_fs *c)
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
|
||||
BUG_ON(bch2_trans_preload_iters(&trans));
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
|
||||
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||
@ -507,7 +509,7 @@ static int check_dirents(struct bch_fs *c)
|
||||
bkey_reassemble(&n->k_i, d.s_c);
|
||||
n->v.d_type = mode_to_type(target.bi_mode);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(iter, &n->k_i));
|
||||
kfree(n);
|
||||
@ -538,7 +540,7 @@ static int check_xattrs(struct bch_fs *c)
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
|
||||
BUG_ON(bch2_trans_preload_iters(&trans));
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
|
||||
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||
@ -601,7 +603,7 @@ create_root:
|
||||
bch2_inode_pack(&packed, root_inode);
|
||||
|
||||
return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
|
||||
NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
}
|
||||
|
||||
/* Get lost+found, create if it doesn't exist: */
|
||||
@ -645,7 +647,7 @@ create_lostfound:
|
||||
bch2_inode_pack(&packed, root_inode);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
|
||||
NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
NULL, NULL, BTREE_INSERT_NOFAIL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1093,9 +1095,7 @@ static int check_inode(struct bch_fs *c,
|
||||
* just switch units to bytes and that issue goes away
|
||||
*/
|
||||
|
||||
ret = bch2_inode_truncate(c, u.bi_inum,
|
||||
round_up(u.bi_size, PAGE_SIZE) >> 9,
|
||||
NULL, NULL);
|
||||
ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
|
||||
if (ret) {
|
||||
bch_err(c, "error in fs gc: error %i "
|
||||
"truncating inode", ret);
|
||||
@ -1141,7 +1141,7 @@ static int check_inode(struct bch_fs *c,
|
||||
|
||||
bch2_inode_pack(&p, &u);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
|
||||
if (ret && ret != -EINTR)
|
||||
|
@ -227,8 +227,8 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
int bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = out + size;
|
||||
struct bkey_s_c_inode inode;
|
||||
@ -248,6 +248,8 @@ void bch2_inode_to_text(struct bch_fs *c, char *buf,
|
||||
#undef BCH_INODE_FIELD
|
||||
break;
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
@ -255,8 +257,8 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
struct bch_inode_unpacked *parent)
|
||||
{
|
||||
s64 now = timespec_to_bch2_time(c,
|
||||
timespec_trunc(current_kernel_time(),
|
||||
c->sb.time_precision));
|
||||
timespec64_trunc(current_kernel_time64(),
|
||||
c->sb.time_precision));
|
||||
|
||||
memset(inode_u, 0, sizeof(*inode_u));
|
||||
|
||||
@ -347,7 +349,8 @@ again:
|
||||
inode_u->bi_generation = bkey_generation(k);
|
||||
|
||||
bch2_inode_pack(inode_p, inode_u);
|
||||
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
|
||||
bch2_trans_update(trans,
|
||||
BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -369,33 +372,14 @@ int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
|
||||
__bch2_inode_create(&trans, inode_u, min, max, hint));
|
||||
}
|
||||
|
||||
int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size,
|
||||
struct extent_insert_hook *hook, u64 *journal_seq)
|
||||
{
|
||||
return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
|
||||
POS(inode_nr, new_size),
|
||||
POS(inode_nr + 1, 0),
|
||||
ZERO_VERSION, NULL, hook,
|
||||
journal_seq);
|
||||
}
|
||||
|
||||
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_i_inode_generation delete;
|
||||
struct bpos start = POS(inode_nr, 0);
|
||||
struct bpos end = POS(inode_nr + 1, 0);
|
||||
int ret;
|
||||
|
||||
ret = bch2_inode_truncate(c, inode_nr, 0, NULL, NULL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
|
||||
POS(inode_nr, 0),
|
||||
POS(inode_nr + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If this was a directory, there shouldn't be any real dirents left -
|
||||
* but there could be whiteouts (from hash collisions) that we should
|
||||
@ -404,11 +388,13 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
||||
* XXX: the dirent could ideally would delete whiteouts when they're no
|
||||
* longer needed
|
||||
*/
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
POS(inode_nr, 0),
|
||||
POS(inode_nr + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
if (ret < 0)
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
|
||||
start, end, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_XATTRS,
|
||||
start, end, NULL) ?:
|
||||
bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
start, end, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0),
|
||||
@ -452,7 +438,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
|
||||
delete.v.bi_generation = cpu_to_le32(bi_generation);
|
||||
}
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(&iter, &delete.k_i));
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <linux/math64.h>
|
||||
|
||||
const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_inode_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_inode_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_inode_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_inode_invalid, \
|
||||
@ -45,21 +45,19 @@ int __bch2_inode_create(struct btree_trans *,
|
||||
int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *,
|
||||
u64, u64, u64 *);
|
||||
|
||||
int bch2_inode_truncate(struct bch_fs *, u64, u64,
|
||||
struct extent_insert_hook *, u64 *);
|
||||
int bch2_inode_rm(struct bch_fs *, u64);
|
||||
|
||||
int bch2_inode_find_by_inum(struct bch_fs *, u64,
|
||||
struct bch_inode_unpacked *);
|
||||
|
||||
static inline struct timespec bch2_time_to_timespec(struct bch_fs *c, u64 time)
|
||||
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
|
||||
{
|
||||
return ns_to_timespec(time * c->sb.time_precision + c->sb.time_base_lo);
|
||||
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
|
||||
}
|
||||
|
||||
static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec ts)
|
||||
static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
|
||||
{
|
||||
s64 ns = timespec_to_ns(&ts) - c->sb.time_base_lo;
|
||||
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
|
||||
|
||||
if (c->sb.time_precision == 1)
|
||||
return ns;
|
||||
|
@ -285,7 +285,7 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
|
||||
NULL, op_journal_seq(op),
|
||||
op_journal_seq(op),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@ -1388,7 +1388,7 @@ retry:
|
||||
if (!bch2_extent_narrow_crcs(e, new_crc))
|
||||
goto out;
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_NOWAIT,
|
||||
|
@ -32,14 +32,8 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
||||
test_bit(JOURNAL_NEED_WRITE, &j->flags))
|
||||
bch2_time_stats_update(j->delay_time,
|
||||
j->need_write_time);
|
||||
#if 0
|
||||
closure_call(&j->io, bch2_journal_write, NULL, NULL);
|
||||
#else
|
||||
/* Shut sparse up: */
|
||||
closure_init(&j->io, NULL);
|
||||
set_closure_fn(&j->io, bch2_journal_write, NULL);
|
||||
bch2_journal_write(&j->io);
|
||||
#endif
|
||||
|
||||
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
|
||||
}
|
||||
|
||||
static void journal_pin_new_entry(struct journal *j, int count)
|
||||
@ -96,7 +90,7 @@ static enum {
|
||||
} journal_buf_switch(struct journal *j, bool need_write_just_set)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *buf;
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
|
||||
@ -107,8 +101,11 @@ static enum {
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
|
||||
return JOURNAL_ENTRY_CLOSED;
|
||||
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
|
||||
/* this entry will never be written: */
|
||||
closure_wake_up(&buf->wait);
|
||||
return JOURNAL_ENTRY_ERROR;
|
||||
}
|
||||
|
||||
if (new.prev_buf_unwritten)
|
||||
return JOURNAL_ENTRY_INUSE;
|
||||
@ -129,7 +126,6 @@ static enum {
|
||||
|
||||
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
|
||||
buf = &j->buf[old.idx];
|
||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
|
||||
j->prev_buf_sectors =
|
||||
@ -138,8 +134,26 @@ static enum {
|
||||
c->opts.block_size;
|
||||
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
||||
|
||||
/*
|
||||
* We have to set last_seq here, _before_ opening a new journal entry:
|
||||
*
|
||||
* A threads may replace an old pin with a new pin on their current
|
||||
* journal reservation - the expectation being that the journal will
|
||||
* contain either what the old pin protected or what the new pin
|
||||
* protects.
|
||||
*
|
||||
* After the old pin is dropped journal_last_seq() won't include the old
|
||||
* pin, so we can only write the updated last_seq on the entry that
|
||||
* contains whatever the new pin protects.
|
||||
*
|
||||
* Restated, we can _not_ update last_seq for a given entry if there
|
||||
* could be a newer entry open with reservations/pins that have been
|
||||
* taken against it.
|
||||
*
|
||||
* Hence, we want update/set last_seq on the current journal entry right
|
||||
* before we open a new one:
|
||||
*/
|
||||
bch2_journal_reclaim_fast(j);
|
||||
/* XXX: why set this here, and not in bch2_journal_write()? */
|
||||
buf->data->last_seq = cpu_to_le64(journal_last_seq(j));
|
||||
|
||||
if (journal_entry_empty(buf->data))
|
||||
@ -154,13 +168,6 @@ static enum {
|
||||
cancel_delayed_work(&j->write_work);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (c->bucket_journal_seq > 1 << 14) {
|
||||
c->bucket_journal_seq = 0;
|
||||
bch2_bucket_seq_cleanup(c);
|
||||
}
|
||||
|
||||
c->bucket_journal_seq++;
|
||||
|
||||
/* ugh - might be called from __journal_res_get() under wait_event() */
|
||||
__set_current_state(TASK_RUNNING);
|
||||
bch2_journal_buf_put(j, old.idx, need_write_just_set);
|
||||
@ -265,34 +272,41 @@ static int journal_entry_open(struct journal *j)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns true if there's nothing to flush and no journal write still in flight
|
||||
*/
|
||||
static bool journal_flush_write(struct journal *j)
|
||||
static bool __journal_entry_close(struct journal *j)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = !j->reservations.prev_buf_unwritten;
|
||||
bool set_need_write;
|
||||
|
||||
if (!journal_entry_is_open(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return ret;
|
||||
return true;
|
||||
}
|
||||
|
||||
set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
if (journal_buf_switch(j, false) == JOURNAL_UNLOCKED)
|
||||
ret = false;
|
||||
else
|
||||
set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
if (set_need_write)
|
||||
j->need_write_time = local_clock();
|
||||
|
||||
switch (journal_buf_switch(j, set_need_write)) {
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
spin_unlock(&j->lock);
|
||||
return ret;
|
||||
return false;
|
||||
default:
|
||||
spin_unlock(&j->lock);
|
||||
case JOURNAL_UNLOCKED:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool journal_entry_close(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
return __journal_entry_close(j);
|
||||
}
|
||||
|
||||
static void journal_write_work(struct work_struct *work)
|
||||
{
|
||||
struct journal *j = container_of(work, struct journal, write_work.work);
|
||||
|
||||
journal_flush_write(j);
|
||||
journal_entry_close(j);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -462,6 +476,37 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *pare
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_seq_error(struct journal *j, u64 seq)
|
||||
{
|
||||
union journal_res_state state = READ_ONCE(j->reservations);
|
||||
|
||||
if (seq == journal_cur_seq(j))
|
||||
return bch2_journal_error(j);
|
||||
|
||||
if (seq + 1 == journal_cur_seq(j) &&
|
||||
!state.prev_buf_unwritten &&
|
||||
seq > j->seq_ondisk)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct journal_buf *
|
||||
journal_seq_to_buf(struct journal *j, u64 seq)
|
||||
{
|
||||
/* seq should be for a journal entry that has been opened: */
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
BUG_ON(seq == journal_cur_seq(j) &&
|
||||
j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
|
||||
|
||||
if (seq == journal_cur_seq(j))
|
||||
return journal_cur_buf(j);
|
||||
if (seq + 1 == journal_cur_seq(j) &&
|
||||
j->reservations.prev_buf_unwritten)
|
||||
return journal_prev_buf(j);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_journal_wait_on_seq - wait for a journal entry to be written
|
||||
*
|
||||
@ -470,31 +515,22 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *pare
|
||||
* can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
|
||||
* configurable).
|
||||
*/
|
||||
void bch2_journal_wait_on_seq(struct journal *j, u64 seq, struct closure *parent)
|
||||
void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
|
||||
struct closure *parent)
|
||||
{
|
||||
struct journal_buf *buf;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
|
||||
if (bch2_journal_error(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (seq == journal_cur_seq(j)) {
|
||||
if (!closure_wait(&journal_cur_buf(j)->wait, parent))
|
||||
BUG();
|
||||
} else if (seq + 1 == journal_cur_seq(j) &&
|
||||
j->reservations.prev_buf_unwritten) {
|
||||
if (!closure_wait(&journal_prev_buf(j)->wait, parent))
|
||||
if ((buf = journal_seq_to_buf(j, seq))) {
|
||||
if (!closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
||||
smp_mb();
|
||||
|
||||
/* check if raced with write completion (or failure) */
|
||||
if (!j->reservations.prev_buf_unwritten ||
|
||||
bch2_journal_error(j))
|
||||
closure_wake_up(&journal_prev_buf(j)->wait);
|
||||
if (seq == journal_cur_seq(j)) {
|
||||
smp_mb();
|
||||
if (bch2_journal_error(j))
|
||||
closure_wake_up(&buf->wait);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
@ -506,108 +542,35 @@ void bch2_journal_wait_on_seq(struct journal *j, u64 seq, struct closure *parent
|
||||
* like bch2_journal_wait_on_seq, except that it triggers a write immediately if
|
||||
* necessary
|
||||
*/
|
||||
void bch2_journal_flush_seq_async(struct journal *j, u64 seq, struct closure *parent)
|
||||
void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
|
||||
struct closure *parent)
|
||||
{
|
||||
struct journal_buf *buf;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
|
||||
if (bch2_journal_error(j)) {
|
||||
spin_unlock(&j->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (seq == journal_cur_seq(j)) {
|
||||
bool set_need_write = false;
|
||||
|
||||
buf = journal_cur_buf(j);
|
||||
|
||||
if (parent && !closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
||||
if (!test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
||||
j->need_write_time = local_clock();
|
||||
set_need_write = true;
|
||||
}
|
||||
|
||||
switch (journal_buf_switch(j, set_need_write)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
if (parent)
|
||||
closure_wake_up(&buf->wait);
|
||||
break;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
/*
|
||||
* Journal entry hasn't been opened yet, but caller
|
||||
* claims it has something
|
||||
*/
|
||||
BUG();
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
break;
|
||||
case JOURNAL_UNLOCKED:
|
||||
return;
|
||||
}
|
||||
} else if (parent &&
|
||||
seq + 1 == journal_cur_seq(j) &&
|
||||
j->reservations.prev_buf_unwritten) {
|
||||
buf = journal_prev_buf(j);
|
||||
|
||||
if (parent &&
|
||||
(buf = journal_seq_to_buf(j, seq)))
|
||||
if (!closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
||||
smp_mb();
|
||||
|
||||
/* check if raced with write completion (or failure) */
|
||||
if (!j->reservations.prev_buf_unwritten ||
|
||||
bch2_journal_error(j))
|
||||
closure_wake_up(&buf->wait);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j);
|
||||
else
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static int journal_seq_flushed(struct journal *j, u64 seq)
|
||||
{
|
||||
struct journal_buf *buf;
|
||||
int ret = 1;
|
||||
int ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
BUG_ON(seq > journal_cur_seq(j));
|
||||
ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
|
||||
|
||||
if (seq == journal_cur_seq(j)) {
|
||||
bool set_need_write = false;
|
||||
|
||||
ret = 0;
|
||||
|
||||
buf = journal_cur_buf(j);
|
||||
|
||||
if (!test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags)) {
|
||||
j->need_write_time = local_clock();
|
||||
set_need_write = true;
|
||||
}
|
||||
|
||||
switch (journal_buf_switch(j, set_need_write)) {
|
||||
case JOURNAL_ENTRY_ERROR:
|
||||
ret = -EIO;
|
||||
break;
|
||||
case JOURNAL_ENTRY_CLOSED:
|
||||
/*
|
||||
* Journal entry hasn't been opened yet, but caller
|
||||
* claims it has something
|
||||
*/
|
||||
BUG();
|
||||
case JOURNAL_ENTRY_INUSE:
|
||||
break;
|
||||
case JOURNAL_UNLOCKED:
|
||||
return 0;
|
||||
}
|
||||
} else if (seq + 1 == journal_cur_seq(j) &&
|
||||
j->reservations.prev_buf_unwritten) {
|
||||
ret = bch2_journal_error(j);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
if (seq == journal_cur_seq(j))
|
||||
__journal_entry_close(j);
|
||||
else
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -727,6 +690,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (!journal_buckets)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* We may be called from the device add path, before the new device has
|
||||
* actually been added to the running filesystem:
|
||||
*/
|
||||
if (c)
|
||||
spin_lock(&c->journal.lock);
|
||||
|
||||
@ -743,10 +710,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
long bucket;
|
||||
|
||||
if (new_fs) {
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
bucket = bch2_bucket_alloc_new_fs(ca);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
if (bucket < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
@ -765,6 +729,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (c) {
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&c->journal.lock);
|
||||
} else {
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
__array_insert_item(ja->buckets, ja->nr, ja->last_idx);
|
||||
@ -792,6 +758,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
if (c) {
|
||||
spin_unlock(&c->journal.lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
} else {
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (!new_fs)
|
||||
@ -904,13 +872,16 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
wait_event(j->wait, journal_flush_write(j));
|
||||
wait_event(j->wait, journal_entry_close(j));
|
||||
|
||||
/* do we need to write another journal entry? */
|
||||
if (test_bit(JOURNAL_NOT_EMPTY, &j->flags) ||
|
||||
c->btree_roots_dirty)
|
||||
bch2_journal_meta(j);
|
||||
|
||||
BUG_ON(journal_entry_is_open(j) ||
|
||||
j->reservations.prev_buf_unwritten);
|
||||
|
||||
BUG_ON(!bch2_journal_error(j) &&
|
||||
test_bit(JOURNAL_NOT_EMPTY, &j->flags));
|
||||
|
||||
@ -920,6 +891,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
|
||||
void bch2_fs_journal_start(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_seq_blacklist *bl;
|
||||
u64 blacklist = 0;
|
||||
|
||||
@ -941,6 +913,8 @@ void bch2_fs_journal_start(struct journal *j)
|
||||
journal_pin_new_entry(j, 1);
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
c->last_bucket_seq_cleanup = journal_cur_seq(j);
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
@ -1014,6 +988,7 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
init_waitqueue_head(&j->wait);
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
|
||||
init_waitqueue_head(&j->pin_flush_wait);
|
||||
mutex_init(&j->blacklist_lock);
|
||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
|
@ -901,7 +901,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
bch2_disk_reservation_init(c, 0);
|
||||
|
||||
ret = bch2_btree_insert(c, entry->btree_id, k,
|
||||
&disk_res, NULL, NULL,
|
||||
&disk_res, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_REPLAY);
|
||||
}
|
||||
@ -1204,6 +1204,9 @@ static void journal_write_done(struct closure *cl)
|
||||
struct bch_devs_list devs =
|
||||
bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
|
||||
u64 seq = le64_to_cpu(w->data->seq);
|
||||
u64 last_seq = le64_to_cpu(w->data->last_seq);
|
||||
|
||||
bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
|
||||
if (!devs.nr) {
|
||||
bch_err(c, "unable to write journal to sufficient devices");
|
||||
@ -1212,11 +1215,11 @@ static void journal_write_done(struct closure *cl)
|
||||
|
||||
if (bch2_mark_replicas(c, BCH_DATA_JOURNAL, devs))
|
||||
goto err;
|
||||
out:
|
||||
bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
j->last_seq_ondisk = seq;
|
||||
j->seq_ondisk = seq;
|
||||
j->last_seq_ondisk = last_seq;
|
||||
|
||||
if (seq >= j->pin.front)
|
||||
journal_seq_pin(j, seq)->devs = devs;
|
||||
|
||||
@ -1228,7 +1231,7 @@ out:
|
||||
* bch2_fs_journal_stop():
|
||||
*/
|
||||
mod_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
|
||||
|
||||
out:
|
||||
/* also must come before signalling write completion: */
|
||||
closure_debug_destroy(cl);
|
||||
|
||||
@ -1246,6 +1249,7 @@ out:
|
||||
err:
|
||||
bch2_fatal_error(c);
|
||||
bch2_journal_halt(j);
|
||||
spin_lock(&j->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1385,6 +1389,8 @@ no_io:
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
|
||||
ptr->offset += sectors;
|
||||
|
||||
bch2_bucket_seq_cleanup(c);
|
||||
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
return;
|
||||
err:
|
||||
|
@ -10,34 +10,18 @@
|
||||
* entry, holding it open to ensure it gets replayed during recovery:
|
||||
*/
|
||||
|
||||
static inline u64 journal_pin_seq(struct journal *j,
|
||||
struct journal_entry_pin_list *pin_list)
|
||||
{
|
||||
return fifo_entry_idx_abs(&j->pin, pin_list);
|
||||
}
|
||||
|
||||
u64 bch2_journal_pin_seq(struct journal *j, struct journal_entry_pin *pin)
|
||||
{
|
||||
u64 ret = 0;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
if (journal_pin_active(pin))
|
||||
ret = journal_pin_seq(j, pin->pin_list);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __journal_pin_add(struct journal *j,
|
||||
struct journal_entry_pin_list *pin_list,
|
||||
u64 seq,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
|
||||
|
||||
BUG_ON(journal_pin_active(pin));
|
||||
BUG_ON(!atomic_read(&pin_list->count));
|
||||
|
||||
atomic_inc(&pin_list->count);
|
||||
pin->pin_list = pin_list;
|
||||
pin->seq = seq;
|
||||
pin->flush = flush_fn;
|
||||
|
||||
if (flush_fn)
|
||||
@ -57,19 +41,20 @@ void bch2_journal_pin_add(struct journal *j, u64 seq,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
__journal_pin_add(j, journal_seq_pin(j, seq), pin, flush_fn);
|
||||
__journal_pin_add(j, seq, pin, flush_fn);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static inline void __journal_pin_drop(struct journal *j,
|
||||
struct journal_entry_pin *pin)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list = pin->pin_list;
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
|
||||
if (!journal_pin_active(pin))
|
||||
return;
|
||||
|
||||
pin->pin_list = NULL;
|
||||
pin_list = journal_seq_pin(j, pin->seq);
|
||||
pin->seq = 0;
|
||||
list_del_init(&pin->list);
|
||||
|
||||
/*
|
||||
@ -82,7 +67,7 @@ static inline void __journal_pin_drop(struct journal *j,
|
||||
}
|
||||
|
||||
void bch2_journal_pin_drop(struct journal *j,
|
||||
struct journal_entry_pin *pin)
|
||||
struct journal_entry_pin *pin)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
__journal_pin_drop(j, pin);
|
||||
@ -98,15 +83,21 @@ void bch2_journal_pin_add_if_older(struct journal *j,
|
||||
|
||||
if (journal_pin_active(src_pin) &&
|
||||
(!journal_pin_active(pin) ||
|
||||
journal_pin_seq(j, src_pin->pin_list) <
|
||||
journal_pin_seq(j, pin->pin_list))) {
|
||||
src_pin->seq < pin->seq)) {
|
||||
__journal_pin_drop(j, pin);
|
||||
__journal_pin_add(j, src_pin->pin_list, pin, flush_fn);
|
||||
__journal_pin_add(j, src_pin->seq, pin, flush_fn);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
|
||||
{
|
||||
BUG_ON(journal_pin_active(pin));
|
||||
|
||||
wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
|
||||
}
|
||||
|
||||
/*
|
||||
* Journal reclaim: flush references to open journal entries to reclaim space in
|
||||
* the journal
|
||||
@ -144,41 +135,42 @@ void bch2_journal_reclaim_fast(struct journal *j)
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
static struct journal_entry_pin *
|
||||
__journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
static void journal_pin_mark_flushing(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret;
|
||||
u64 iter;
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
/* no need to iterate over empty fifo entries: */
|
||||
bch2_journal_reclaim_fast(j);
|
||||
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
|
||||
BUG_ON(j->flush_in_progress);
|
||||
j->flush_in_progress = pin;
|
||||
}
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, iter) {
|
||||
if (iter > seq_to_flush)
|
||||
break;
|
||||
static void journal_pin_flush(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
u64 seq)
|
||||
{
|
||||
pin->flush(j, pin, seq);
|
||||
|
||||
ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list);
|
||||
if (ret) {
|
||||
/* must be list_del_init(), see bch2_journal_pin_drop() */
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
*seq = iter;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
BUG_ON(j->flush_in_progress != pin);
|
||||
j->flush_in_progress = NULL;
|
||||
wake_up(&j->pin_flush_wait);
|
||||
}
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
{
|
||||
struct journal_entry_pin *ret;
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = __journal_get_next_pin(j, seq_to_flush, seq);
|
||||
spin_unlock(&j->lock);
|
||||
/* no need to iterate over empty fifo entries: */
|
||||
bch2_journal_reclaim_fast(j);
|
||||
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
|
||||
if (*seq > seq_to_flush ||
|
||||
(ret = list_first_entry_or_null(&pin_list->list,
|
||||
struct journal_entry_pin, list)))
|
||||
break;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -278,15 +270,11 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
if (reclaim_lock_held)
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
|
||||
/* Also flush if the pin fifo is more than half full */
|
||||
spin_lock(&j->lock);
|
||||
seq_to_flush = max_t(s64, seq_to_flush,
|
||||
(s64) journal_cur_seq(j) -
|
||||
(j->pin.size >> 1));
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* If it's been longer than j->reclaim_delay_ms since we last flushed,
|
||||
@ -298,13 +286,31 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
||||
while ((pin = journal_get_next_pin(j, need_flush
|
||||
? U64_MAX
|
||||
: seq_to_flush, &seq))) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
pin->flush(j, pin, seq);
|
||||
need_flush = false;
|
||||
if (!reclaim_lock_held) {
|
||||
spin_unlock(&j->lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
reclaim_lock_held = true;
|
||||
spin_lock(&j->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
journal_pin_mark_flushing(j, pin, seq);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_pin_flush(j, pin, seq);
|
||||
|
||||
need_flush = false;
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
if (reclaim_lock_held)
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
|
||||
if (!test_bit(BCH_FS_RO, &c->flags))
|
||||
queue_delayed_work(system_freezable_wq, &j->reclaim_work,
|
||||
msecs_to_jiffies(j->reclaim_delay_ms));
|
||||
@ -327,11 +333,14 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
* If journal replay hasn't completed, the unreplayed journal entries
|
||||
* hold refs on their corresponding sequence numbers
|
||||
*/
|
||||
ret = (*pin = __journal_get_next_pin(j, seq_to_flush, pin_seq)) != NULL ||
|
||||
ret = (*pin = journal_get_next_pin(j, seq_to_flush, pin_seq)) != NULL ||
|
||||
!test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
|
||||
journal_last_seq(j) > seq_to_flush ||
|
||||
(fifo_used(&j->pin) == 1 &&
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) == 1);
|
||||
if (*pin)
|
||||
journal_pin_mark_flushing(j, *pin, *pin_seq);
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
@ -345,14 +354,18 @@ void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
|
||||
while (1) {
|
||||
wait_event(j->wait, journal_flush_done(j, seq_to_flush,
|
||||
&pin, &pin_seq));
|
||||
if (!pin)
|
||||
break;
|
||||
|
||||
pin->flush(j, pin, pin_seq);
|
||||
journal_pin_flush(j, pin, pin_seq);
|
||||
}
|
||||
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
}
|
||||
|
||||
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
|
||||
|
@ -5,19 +5,17 @@
|
||||
|
||||
static inline bool journal_pin_active(struct journal_entry_pin *pin)
|
||||
{
|
||||
return pin->pin_list != NULL;
|
||||
return pin->seq != 0;
|
||||
}
|
||||
|
||||
static inline struct journal_entry_pin_list *
|
||||
journal_seq_pin(struct journal *j, u64 seq)
|
||||
{
|
||||
BUG_ON(seq < j->pin.front || seq >= j->pin.back);
|
||||
EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
|
||||
|
||||
return &j->pin.data[seq & j->pin.mask];
|
||||
}
|
||||
|
||||
u64 bch2_journal_pin_seq(struct journal *, struct journal_entry_pin *);
|
||||
|
||||
void bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
|
||||
@ -25,6 +23,7 @@ void bch2_journal_pin_add_if_older(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
|
||||
|
||||
void bch2_journal_reclaim_fast(struct journal *);
|
||||
void bch2_journal_reclaim_work(struct work_struct *);
|
||||
|
@ -47,7 +47,7 @@ typedef void (*journal_pin_flush_fn)(struct journal *j,
|
||||
struct journal_entry_pin {
|
||||
struct list_head list;
|
||||
journal_pin_flush_fn flush;
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
u64 seq;
|
||||
};
|
||||
|
||||
/* corresponds to a btree node with a blacklisted bset: */
|
||||
@ -150,7 +150,8 @@ struct journal {
|
||||
/* Sequence number of most recent journal entry (last entry in @pin) */
|
||||
atomic64_t seq;
|
||||
|
||||
/* last_seq from the most recent journal entry written */
|
||||
/* seq, last_seq from the most recent journal entry successfully written */
|
||||
u64 seq_ondisk;
|
||||
u64 last_seq_ondisk;
|
||||
|
||||
/*
|
||||
@ -173,6 +174,10 @@ struct journal {
|
||||
u64 front, back, size, mask;
|
||||
struct journal_entry_pin_list *data;
|
||||
} pin;
|
||||
|
||||
struct journal_entry_pin *flush_in_progress;
|
||||
wait_queue_head_t pin_flush_wait;
|
||||
|
||||
u64 replay_journal_seq;
|
||||
|
||||
struct mutex blacklist_lock;
|
||||
|
@ -78,7 +78,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
|
||||
iter.pos = bkey_start_pos(&tmp.key.k);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL,
|
||||
BTREE_INSERT_ENTRY(&iter, &tmp.key));
|
||||
|
@ -158,7 +158,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
break;
|
||||
|
||||
ret = bch2_btree_insert_at(c, &op->res,
|
||||
NULL, op_journal_seq(op),
|
||||
op_journal_seq(op),
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
|
@ -227,16 +227,10 @@ static int bch2_copygc_thread(void *arg)
|
||||
|
||||
last = atomic_long_read(&clock->now);
|
||||
|
||||
reserve = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) *
|
||||
ca->mi.bucket_size *
|
||||
c->opts.gc_reserve_percent, 200);
|
||||
reserve = ca->copygc_threshold;
|
||||
|
||||
usage = bch2_dev_usage_read(c, ca);
|
||||
|
||||
/*
|
||||
* don't start copygc until less than half the gc reserve is
|
||||
* available:
|
||||
*/
|
||||
available = __dev_buckets_available(ca, usage) *
|
||||
ca->mi.bucket_size;
|
||||
if (available > reserve) {
|
||||
|
@ -113,9 +113,12 @@ enum opt_type {
|
||||
BCH_OPT(inodes_32bit, u8, OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, false) \
|
||||
BCH_OPT(gc_reserve_percent, u8, OPT_MOUNT, \
|
||||
BCH_OPT(gc_reserve_percent, u8, OPT_RUNTIME, \
|
||||
OPT_UINT(5, 21), \
|
||||
BCH_SB_GC_RESERVE, 8) \
|
||||
BCH_OPT(gc_reserve_bytes, u64, OPT_RUNTIME, \
|
||||
OPT_UINT(0, U64_MAX), \
|
||||
BCH_SB_GC_RESERVE_BYTES, 0) \
|
||||
BCH_OPT(root_reserve_percent, u8, OPT_MOUNT, \
|
||||
OPT_UINT(0, 100), \
|
||||
BCH_SB_ROOT_RESERVE, 0) \
|
||||
|
@ -45,10 +45,10 @@ static const char * const bch2_quota_counters[] = {
|
||||
"inodes",
|
||||
};
|
||||
|
||||
void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
int bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end= buf + size;
|
||||
char *out = buf, *end = buf + size;
|
||||
struct bkey_s_c_quota dq;
|
||||
unsigned i;
|
||||
|
||||
@ -63,6 +63,8 @@ void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
le64_to_cpu(dq.v->c[i].softlimit));
|
||||
break;
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
@ -538,7 +540,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_USR, 0),
|
||||
POS(QTYP_USR + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -550,7 +552,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_GRP, 0),
|
||||
POS(QTYP_GRP + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -562,7 +564,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_PRJ, 0),
|
||||
POS(QTYP_PRJ + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -761,7 +763,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
|
||||
if (qdq->d_fieldmask & QC_INO_HARD)
|
||||
new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
|
||||
|
||||
const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_quota_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_quota_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_quota_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_quota_invalid, \
|
||||
|
@ -330,7 +330,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
err = "error creating root directory";
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES,
|
||||
&packed_inode.inode.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -343,7 +343,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
err = "error creating lost+found";
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES,
|
||||
&packed_inode.inode.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -254,14 +254,14 @@ not_found:
|
||||
return -ENOENT;
|
||||
|
||||
insert->k.p = slot->pos;
|
||||
bch2_trans_update(trans, slot, insert, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert));
|
||||
return 0;
|
||||
found:
|
||||
if (flags & BCH_HASH_SET_MUST_CREATE)
|
||||
return -EEXIST;
|
||||
|
||||
insert->k.p = iter->pos;
|
||||
bch2_trans_update(trans, iter, insert, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -296,7 +296,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans,
|
||||
delete->k.p = iter->pos;
|
||||
delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
|
||||
|
||||
bch2_trans_update(trans, iter, delete, 0);
|
||||
bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -403,6 +403,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_fs_compress_exit(c);
|
||||
percpu_free_rwsem(&c->usage_lock);
|
||||
free_percpu(c->usage_percpu);
|
||||
mempool_exit(&c->btree_iters_pool);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
@ -435,6 +436,8 @@ void bch2_fs_stop(struct bch_fs *c)
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
|
||||
bch_verbose(c, "shutting down");
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
if (ca->kobj.state_in_sysfs &&
|
||||
ca->disk_sb.bdev)
|
||||
@ -476,6 +479,8 @@ void bch2_fs_stop(struct bch_fs *c)
|
||||
if (c->devs[i])
|
||||
bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
|
||||
|
||||
bch_verbose(c, "shutdown complete");
|
||||
|
||||
kobject_put(&c->kobj);
|
||||
}
|
||||
|
||||
@ -628,6 +633,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
percpu_init_rwsem(&c->usage_lock) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
|
||||
sizeof(struct btree_iter) * BTREE_ITER_MAX) ||
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
|
||||
bch2_fs_journal_init(&c->journal) ||
|
||||
@ -1019,14 +1026,6 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
|
||||
ca->disk_sb.bdev->bd_holder = ca;
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
|
||||
if (ca->fs)
|
||||
mutex_lock(&ca->fs->sb_lock);
|
||||
|
||||
bch2_mark_dev_superblock(ca->fs, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
|
||||
if (ca->fs)
|
||||
mutex_unlock(&ca->fs->sb_lock);
|
||||
|
||||
percpu_ref_reinit(&ca->io_ref);
|
||||
|
||||
return 0;
|
||||
@ -1052,6 +1051,11 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(ca->fs, ca,
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
bch2_dev_sysfs_online(c, ca);
|
||||
|
||||
if (c->sb.nr_devices == 1)
|
||||
@ -1280,8 +1284,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC,
|
||||
POS(ca->dev_idx, 0),
|
||||
POS(ca->dev_idx + 1, 0),
|
||||
ZERO_VERSION,
|
||||
NULL, NULL, NULL);
|
||||
NULL);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed, error deleting alloc info");
|
||||
goto err;
|
||||
@ -1329,6 +1332,24 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dev_usage_clear(struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct bch_dev_usage *p =
|
||||
per_cpu_ptr(ca->usage_percpu, cpu);
|
||||
memset(p, 0, sizeof(*p));
|
||||
}
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
|
||||
up_read(&ca->bucket_lock);
|
||||
}
|
||||
|
||||
/* Add new device to running filesystem: */
|
||||
int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
{
|
||||
@ -1367,11 +1388,28 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to allocate journal on the new device before adding the new
|
||||
* device to the filesystem because allocating after we attach requires
|
||||
* spinning up the allocator thread, and the allocator thread requires
|
||||
* doing btree writes, which if the existing devices are RO isn't going
|
||||
* to work
|
||||
*
|
||||
* So we have to mark where the superblocks are, but marking allocated
|
||||
* data normally updates the filesystem usage too, so we have to mark,
|
||||
* allocate the journal, reset all the marks, then remark after we
|
||||
* attach...
|
||||
*/
|
||||
bch2_mark_dev_superblock(ca->fs, ca,
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
|
||||
err = "journal alloc failed";
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
dev_usage_clear(ca);
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
||||
@ -1422,6 +1460,9 @@ have_slot:
|
||||
ca->disk_sb.sb->dev_idx = dev_idx;
|
||||
bch2_dev_attach(c, ca, dev_idx);
|
||||
|
||||
bch2_mark_dev_superblock(c, ca,
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
|
@ -229,41 +229,42 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
|
||||
|
||||
static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
{
|
||||
char *out = buf, *end = buf + PAGE_SIZE;
|
||||
struct bch_fs_usage stats = bch2_fs_usage_read(c);
|
||||
unsigned replicas, type;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
"capacity:\t\t%llu\n"
|
||||
"1 replicas:\n"
|
||||
"\tmeta:\t\t%llu\n"
|
||||
"\tdirty:\t\t%llu\n"
|
||||
"\treserved:\t%llu\n"
|
||||
"2 replicas:\n"
|
||||
"\tmeta:\t\t%llu\n"
|
||||
"\tdirty:\t\t%llu\n"
|
||||
"\treserved:\t%llu\n"
|
||||
"3 replicas:\n"
|
||||
"\tmeta:\t\t%llu\n"
|
||||
"\tdirty:\t\t%llu\n"
|
||||
"\treserved:\t%llu\n"
|
||||
"4 replicas:\n"
|
||||
"\tmeta:\t\t%llu\n"
|
||||
"\tdirty:\t\t%llu\n"
|
||||
"\treserved:\t%llu\n"
|
||||
out += scnprintf(out, end - out,
|
||||
"capacity:\t\t%llu\n",
|
||||
c->capacity);
|
||||
|
||||
for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) {
|
||||
out += scnprintf(out, end - out,
|
||||
"%u replicas:\n",
|
||||
replicas + 1);
|
||||
|
||||
for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
|
||||
out += scnprintf(out, end - out,
|
||||
"\t%s:\t\t%llu\n",
|
||||
bch2_data_types[type],
|
||||
stats.replicas[replicas].data[type]);
|
||||
out += scnprintf(out, end - out,
|
||||
"\treserved:\t%llu\n",
|
||||
stats.replicas[replicas].persistent_reserved);
|
||||
}
|
||||
|
||||
out += scnprintf(out, end - out, "bucket usage\n");
|
||||
|
||||
for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
|
||||
out += scnprintf(out, end - out,
|
||||
"\t%s:\t\t%llu\n",
|
||||
bch2_data_types[type],
|
||||
stats.buckets[type]);
|
||||
|
||||
out += scnprintf(out, end - out,
|
||||
"online reserved:\t%llu\n",
|
||||
c->capacity,
|
||||
stats.s[0].data[S_META],
|
||||
stats.s[0].data[S_DIRTY],
|
||||
stats.s[0].persistent_reserved,
|
||||
stats.s[1].data[S_META],
|
||||
stats.s[1].data[S_DIRTY],
|
||||
stats.s[1].persistent_reserved,
|
||||
stats.s[2].data[S_META],
|
||||
stats.s[2].data[S_DIRTY],
|
||||
stats.s[2].persistent_reserved,
|
||||
stats.s[3].data[S_META],
|
||||
stats.s[3].data[S_DIRTY],
|
||||
stats.s[3].persistent_reserved,
|
||||
stats.online_reserved);
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
|
||||
@ -779,13 +780,15 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
" meta: %llu\n"
|
||||
" user: %llu\n"
|
||||
" cached: %llu\n"
|
||||
" available: %llu\n"
|
||||
" available: %lli\n"
|
||||
"sectors:\n"
|
||||
" sb: %llu\n"
|
||||
" journal: %llu\n"
|
||||
" meta: %llu\n"
|
||||
" user: %llu\n"
|
||||
" cached: %llu\n"
|
||||
" fragmented: %llu\n"
|
||||
" copygc threshold: %llu\n"
|
||||
"freelist_wait: %s\n"
|
||||
"open buckets: %u/%u (reserved %u)\n"
|
||||
"open_buckets_wait: %s\n",
|
||||
@ -800,12 +803,14 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
|
||||
stats.buckets[BCH_DATA_BTREE],
|
||||
stats.buckets[BCH_DATA_USER],
|
||||
stats.buckets[BCH_DATA_CACHED],
|
||||
__dev_buckets_available(ca, stats),
|
||||
ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
|
||||
stats.sectors[BCH_DATA_SB],
|
||||
stats.sectors[BCH_DATA_JOURNAL],
|
||||
stats.sectors[BCH_DATA_BTREE],
|
||||
stats.sectors[BCH_DATA_USER],
|
||||
stats.sectors[BCH_DATA_CACHED],
|
||||
stats.sectors_fragmented,
|
||||
ca->copygc_threshold,
|
||||
c->freelist_wait.list.first ? "waiting" : "empty",
|
||||
c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
|
||||
c->open_buckets_wait.list.first ? "waiting" : "empty");
|
||||
|
@ -14,12 +14,12 @@ static void delete_test_keys(struct bch_fs *c)
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -39,7 +39,7 @@ static void test_delete(struct bch_fs *c, u64 nr)
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &k.k_i));
|
||||
BUG_ON(ret);
|
||||
|
||||
@ -68,7 +68,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr)
|
||||
ret = bch2_btree_iter_traverse(&iter);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &k.k_i));
|
||||
BUG_ON(ret);
|
||||
|
||||
@ -98,7 +98,7 @@ static void test_iterate(struct bch_fs *c, u64 nr)
|
||||
k.k.p.offset = i;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -140,7 +140,7 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr)
|
||||
k.k.size = 8;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -185,7 +185,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr)
|
||||
k.k.p.offset = i * 2;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -235,7 +235,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
|
||||
k.k.size = 8;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -270,6 +270,63 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
/* extent unit tests */
|
||||
|
||||
u64 test_version;
|
||||
|
||||
static void insert_test_extent(struct bch_fs *c,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
struct bkey_i_cookie k;
|
||||
int ret;
|
||||
|
||||
//pr_info("inserting %llu-%llu v %llu", start, end, test_version);
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k_i.k.p.offset = end;
|
||||
k.k_i.k.size = end - start;
|
||||
k.k_i.k.version.lo = test_version++;
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
static void __test_extent_overwrite(struct bch_fs *c,
|
||||
u64 e1_start, u64 e1_end,
|
||||
u64 e2_start, u64 e2_end)
|
||||
{
|
||||
insert_test_extent(c, e1_start, e1_end);
|
||||
insert_test_extent(c, e2_start, e2_end);
|
||||
|
||||
delete_test_keys(c);
|
||||
}
|
||||
|
||||
static void test_extent_overwrite_front(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
__test_extent_overwrite(c, 0, 64, 0, 32);
|
||||
__test_extent_overwrite(c, 8, 64, 0, 32);
|
||||
}
|
||||
|
||||
static void test_extent_overwrite_back(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
__test_extent_overwrite(c, 0, 64, 32, 64);
|
||||
__test_extent_overwrite(c, 0, 64, 32, 72);
|
||||
}
|
||||
|
||||
static void test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
__test_extent_overwrite(c, 0, 64, 32, 40);
|
||||
}
|
||||
|
||||
static void test_extent_overwrite_all(struct bch_fs *c, u64 nr)
|
||||
{
|
||||
__test_extent_overwrite(c, 32, 64, 0, 64);
|
||||
__test_extent_overwrite(c, 32, 64, 0, 128);
|
||||
__test_extent_overwrite(c, 32, 64, 32, 64);
|
||||
__test_extent_overwrite(c, 32, 64, 32, 128);
|
||||
}
|
||||
|
||||
/* perf tests */
|
||||
|
||||
static u64 test_rand(void)
|
||||
@ -294,7 +351,7 @@ static void rand_insert(struct bch_fs *c, u64 nr)
|
||||
k.k.p.offset = test_rand();
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
}
|
||||
@ -335,7 +392,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p = iter.pos;
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &k.k_i));
|
||||
BUG_ON(ret);
|
||||
}
|
||||
@ -356,7 +413,7 @@ static void rand_delete(struct bch_fs *c, u64 nr)
|
||||
k.k.p.offset = test_rand();
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k,
|
||||
NULL, NULL, NULL, 0);
|
||||
NULL, NULL, 0);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
}
|
||||
@ -375,7 +432,7 @@ static void seq_insert(struct bch_fs *c, u64 nr)
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
|
||||
insert.k.p = iter.pos;
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &insert.k_i));
|
||||
BUG_ON(ret);
|
||||
|
||||
@ -407,7 +464,7 @@ static void seq_overwrite(struct bch_fs *c, u64 nr)
|
||||
|
||||
bkey_reassemble(&u.k_i, k);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &u.k_i));
|
||||
BUG_ON(ret);
|
||||
}
|
||||
@ -420,7 +477,7 @@ static void seq_delete(struct bch_fs *c, u64 nr)
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
|
||||
POS(0, 0), POS(0, U64_MAX),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
NULL);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -498,6 +555,11 @@ void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
|
||||
perf_test(test_iterate_slots);
|
||||
perf_test(test_iterate_slots_extents);
|
||||
|
||||
perf_test(test_extent_overwrite_front);
|
||||
perf_test(test_extent_overwrite_back);
|
||||
perf_test(test_extent_overwrite_middle);
|
||||
perf_test(test_extent_overwrite_all);
|
||||
|
||||
if (!j.fn) {
|
||||
pr_err("unknown test %s", testname);
|
||||
return;
|
||||
|
@ -110,12 +110,12 @@ const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
int bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end = buf + size;
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
size_t n = 0;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_XATTR:
|
||||
@ -123,24 +123,26 @@ void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
|
||||
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
|
||||
if (handler && handler->prefix)
|
||||
n += scnprintf(buf + n, size - n, "%s", handler->prefix);
|
||||
out += scnprintf(out, end - out, "%s", handler->prefix);
|
||||
else if (handler)
|
||||
n += scnprintf(buf + n, size - n, "(type %u)",
|
||||
xattr.v->x_type);
|
||||
out += scnprintf(out, end - out, "(type %u)",
|
||||
xattr.v->x_type);
|
||||
else
|
||||
n += scnprintf(buf + n, size - n, "(unknown type %u)",
|
||||
xattr.v->x_type);
|
||||
out += scnprintf(out, end - out, "(unknown type %u)",
|
||||
xattr.v->x_type);
|
||||
|
||||
n += bch_scnmemcpy(buf + n, size - n, xattr.v->x_name,
|
||||
xattr.v->x_name_len);
|
||||
n += scnprintf(buf + n, size - n, ":");
|
||||
n += bch_scnmemcpy(buf + n, size - n, xattr_val(xattr.v),
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
out += bch_scnmemcpy(out, end - out, xattr.v->x_name,
|
||||
xattr.v->x_name_len);
|
||||
out += scnprintf(out, end - out, ":");
|
||||
out += bch_scnmemcpy(out, end - out, xattr_val(xattr.v),
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
break;
|
||||
case BCH_XATTR_WHITEOUT:
|
||||
scnprintf(buf, size, "whiteout");
|
||||
out += scnprintf(out, end - out, "whiteout");
|
||||
break;
|
||||
}
|
||||
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
|
||||
@ -433,7 +435,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
|
||||
ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (value &&
|
||||
|
@ -6,7 +6,7 @@
|
||||
extern const struct bch_hash_desc bch2_xattr_hash_desc;
|
||||
|
||||
const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
int bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_xattr_ops (struct bkey_ops) { \
|
||||
.key_invalid = bch2_xattr_invalid, \
|
||||
|
Loading…
Reference in New Issue
Block a user