mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 9abf628c70 bcachefs: Fix a spurious error in fsck
This commit is contained in:
parent
d7bfc55d23
commit
bb6f4111fb
@ -1 +1 @@
|
||||
f65603966f7474213e6bf22b046e374d01fd6639
|
||||
9abf628c701ad92670d697624f674cc01d42705e
|
||||
|
72
include/linux/percpu-rwsem.h
Normal file
72
include/linux/percpu-rwsem.h
Normal file
@ -0,0 +1,72 @@
|
||||
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_PERCPU_RWSEM_H
|
||||
#define _LINUX_PERCPU_RWSEM_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <linux/preempt.h>
|
||||
|
||||
struct percpu_rw_semaphore {
|
||||
pthread_rwlock_t lock;
|
||||
};
|
||||
|
||||
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
|
||||
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
|
||||
static struct percpu_rw_semaphore name = { \
|
||||
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
|
||||
.read_count = &__percpu_rwsem_rc_##name, \
|
||||
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
|
||||
.writer = __RCUWAIT_INITIALIZER(name.writer), \
|
||||
}
|
||||
|
||||
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
|
||||
extern void __percpu_up_read(struct percpu_rw_semaphore *);
|
||||
|
||||
static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_rdlock(&sem->lock);
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_rdlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
return !pthread_rwlock_tryrdlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
preempt_enable();
|
||||
pthread_rwlock_unlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_unlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline void percpu_down_write(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_wrlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline void percpu_up_write(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_unlock(&sem->lock);
|
||||
}
|
||||
|
||||
static inline void percpu_free_rwsem(struct percpu_rw_semaphore *sem) {}
|
||||
|
||||
static inline int percpu_init_rwsem(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
pthread_rwlock_init(&sem->lock, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define percpu_rwsem_assert_held(sem) do {} while (0)
|
||||
|
||||
#endif
|
@ -9,6 +9,7 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
|
||||
extern char *skip_spaces(const char *);
|
||||
extern char *strim(char *);
|
||||
extern void memzero_explicit(void *, size_t);
|
||||
int match_string(const char * const *, size_t, const char *);
|
||||
|
||||
#define kstrndup(s, n, gfp) strndup(s, n)
|
||||
|
||||
|
@ -296,6 +296,11 @@ DEFINE_EVENT(btree_node, btree_compact,
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_merge,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btree_node, btree_set_root,
|
||||
TP_PROTO(struct bch_fs *c, struct btree *b),
|
||||
TP_ARGS(c, b)
|
||||
|
@ -176,34 +176,19 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
|
||||
return acl;
|
||||
}
|
||||
|
||||
int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
int __bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
umode_t mode = inode->v.i_mode;
|
||||
int name_index;
|
||||
void *value = NULL;
|
||||
size_t size = 0;
|
||||
int ret;
|
||||
|
||||
if (type == ACL_TYPE_ACCESS && acl) {
|
||||
ret = posix_acl_update_mode(&inode->v, &mode, &acl);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_ACCESS;
|
||||
if (acl) {
|
||||
ret = posix_acl_equiv_mode(acl, &inode->v.i_mode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret == 0)
|
||||
acl = NULL;
|
||||
}
|
||||
break;
|
||||
|
||||
case ACL_TYPE_DEFAULT:
|
||||
name_index = BCH_XATTR_INDEX_POSIX_ACL_DEFAULT;
|
||||
if (!S_ISDIR(inode->v.i_mode))
|
||||
@ -220,20 +205,7 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
return (int)PTR_ERR(value);
|
||||
}
|
||||
|
||||
if (mode != inode->v.i_mode) {
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
inode->v.i_mode = mode;
|
||||
inode->v.i_ctime = current_time(&inode->v);
|
||||
|
||||
ret = bch2_write_inode(c, inode);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_xattr_set(c, inode, "", value, size, 0, name_index);
|
||||
err:
|
||||
kfree(value);
|
||||
|
||||
if (ret == -ERANGE)
|
||||
@ -245,4 +217,33 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(vinode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
umode_t mode = inode->v.i_mode;
|
||||
int ret;
|
||||
|
||||
if (type == ACL_TYPE_ACCESS && acl) {
|
||||
ret = posix_acl_update_mode(&inode->v, &mode, &acl);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = __bch2_set_acl(vinode, acl, type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (mode != inode->v.i_mode) {
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
inode->v.i_mode = mode;
|
||||
inode->v.i_ctime = current_time(&inode->v);
|
||||
|
||||
ret = bch2_write_inode(c, inode);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BCACHEFS_POSIX_ACL */
|
||||
|
@ -52,10 +52,16 @@ static inline int bch2_acl_count(size_t size)
|
||||
struct posix_acl;
|
||||
|
||||
extern struct posix_acl *bch2_get_acl(struct inode *, int);
|
||||
extern int __bch2_set_acl(struct inode *, struct posix_acl *, int);
|
||||
extern int bch2_set_acl(struct inode *, struct posix_acl *, int);
|
||||
|
||||
#else
|
||||
|
||||
static inline int __bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bch2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
return 0;
|
||||
|
@ -223,7 +223,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||
if (a.k->p.offset >= ca->mi.nbuckets)
|
||||
return;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
|
||||
g = bucket(ca, a.k->p.offset);
|
||||
bucket_cmpxchg(g, new, ({
|
||||
@ -237,7 +237,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
|
||||
if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
g->io_time[WRITE] = get_alloc_field(&d, 2);
|
||||
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
@ -288,7 +288,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
|
||||
static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, struct btree_iter *iter,
|
||||
u64 *journal_seq)
|
||||
u64 *journal_seq, bool nowait)
|
||||
{
|
||||
struct bucket_mark m;
|
||||
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
|
||||
@ -296,6 +296,13 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bkey_i_alloc *a;
|
||||
u8 *d;
|
||||
int ret;
|
||||
unsigned flags = BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE;
|
||||
|
||||
if (nowait)
|
||||
flags |= BTREE_INSERT_NOWAIT;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
|
||||
@ -304,7 +311,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
|
||||
/* read mark under btree node lock: */
|
||||
@ -320,14 +327,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
put_alloc_field(&d, 2, g->io_time[READ]);
|
||||
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
|
||||
put_alloc_field(&d, 2, g->io_time[WRITE]);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
BTREE_INSERT_NOWAIT,
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
} while (ret == -EINTR);
|
||||
@ -352,7 +354,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL);
|
||||
ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter,
|
||||
NULL, false);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return ret;
|
||||
}
|
||||
@ -372,7 +375,8 @@ int bch2_alloc_write(struct bch_fs *c)
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) {
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket, &iter, NULL);
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket, &iter,
|
||||
NULL, false);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -583,15 +587,20 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
{
|
||||
struct bucket_mark m;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (!bch2_invalidate_bucket(c, ca, bucket, &m)) {
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
verify_not_on_freelist(c, ca, bucket);
|
||||
BUG_ON(!fifo_push(&ca->free_inc, bucket));
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
/* gc lock held: */
|
||||
bucket_io_clock_reset(c, ca, bucket, READ);
|
||||
@ -812,7 +821,8 @@ static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca)
|
||||
}
|
||||
|
||||
static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 *journal_seq, size_t nr)
|
||||
u64 *journal_seq, size_t nr,
|
||||
bool nowait)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret = 0;
|
||||
@ -820,14 +830,12 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
|
||||
/*
|
||||
* XXX: if ca->nr_invalidated != 0, just return if we'd block doing the
|
||||
* btree update or journal_res_get
|
||||
*/
|
||||
/* Only use nowait if we've already invalidated at least one bucket: */
|
||||
while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) {
|
||||
size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq);
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq,
|
||||
nowait && ca->nr_invalidated);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -835,7 +843,9 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return ret;
|
||||
|
||||
/* If we used NOWAIT, don't return the error: */
|
||||
return ca->nr_invalidated ? 0 : ret;
|
||||
}
|
||||
|
||||
static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
|
||||
@ -943,7 +953,8 @@ static int bch2_allocator_thread(void *arg)
|
||||
fifo_used(&ca->free_inc));
|
||||
|
||||
journal_seq = 0;
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX);
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
|
||||
SIZE_MAX, true);
|
||||
if (ret) {
|
||||
bch_err(ca, "error invalidating buckets: %i", ret);
|
||||
goto stop;
|
||||
@ -1077,11 +1088,15 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
|
||||
{
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&ob->lock);
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
|
||||
false, gc_pos_alloc(c, ob), 0);
|
||||
ob->valid = false;
|
||||
|
||||
spin_unlock(&ob->lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
ob->freelist = c->open_buckets_freelist;
|
||||
@ -1151,6 +1166,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
long bucket;
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
if (may_alloc_partial &&
|
||||
ca->open_buckets_partial_nr) {
|
||||
int ret = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
|
||||
@ -1202,7 +1218,6 @@ out:
|
||||
ob = bch2_open_bucket_alloc(c);
|
||||
|
||||
spin_lock(&ob->lock);
|
||||
lg_local_lock(&c->usage_lock);
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
ob->valid = true;
|
||||
@ -1215,8 +1230,6 @@ out:
|
||||
|
||||
bucket_io_clock_reset(c, ca, bucket, READ);
|
||||
bucket_io_clock_reset(c, ca, bucket, WRITE);
|
||||
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
spin_unlock(&ob->lock);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
@ -1296,7 +1309,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
if (nr_ptrs_effective >= nr_replicas)
|
||||
return ALLOC_SUCCESS;
|
||||
|
||||
rcu_read_lock();
|
||||
devs_sorted = bch2_wp_alloc_list(c, wp, devs);
|
||||
|
||||
for (i = 0; i < devs_sorted.nr; i++) {
|
||||
@ -1337,7 +1349,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
EBUG_ON(reserve == RESERVE_MOVINGGC &&
|
||||
ret != ALLOC_SUCCESS &&
|
||||
@ -1422,8 +1433,13 @@ static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
struct closure *cl)
|
||||
{
|
||||
struct bch_devs_mask devs = c->rw_devs[wp->type];
|
||||
const struct bch_devs_mask *t;
|
||||
struct open_bucket *ob;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
/* Don't allocate from devices we already have pointers to: */
|
||||
for (i = 0; i < devs_have->nr; i++)
|
||||
@ -1432,17 +1448,16 @@ static int open_bucket_add_buckets(struct bch_fs *c,
|
||||
writepoint_for_each_ptr_all(wp, ob, i)
|
||||
__clear_bit(ob->ptr.dev, devs.d);
|
||||
|
||||
if (target) {
|
||||
const struct bch_devs_mask *t;
|
||||
t = bch2_target_to_mask(c, target);
|
||||
if (t)
|
||||
bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
|
||||
|
||||
rcu_read_lock();
|
||||
t = bch2_target_to_mask(c, target);
|
||||
if (t)
|
||||
bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
ret = bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
|
||||
|
||||
return bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
|
||||
rcu_read_unlock();
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
@ -1980,10 +1995,12 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
if (!is_available_bucket(m) || m.cached_sectors)
|
||||
continue;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
bch2_mark_alloc_bucket(c, ca, bu, true,
|
||||
gc_pos_alloc(c, NULL),
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
fifo_push(&ca->free_inc, bu);
|
||||
ca->nr_invalidated++;
|
||||
@ -2051,7 +2068,8 @@ not_enough:
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
|
||||
ca->free[RESERVE_BTREE].size);
|
||||
ca->free[RESERVE_BTREE].size,
|
||||
false);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
|
@ -182,10 +182,10 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/closure.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/lglock.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/percpu-rwsem.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/seqlock.h>
|
||||
@ -302,21 +302,14 @@ enum bch_time_stats {
|
||||
#include "rebalance_types.h"
|
||||
#include "super_types.h"
|
||||
|
||||
/*
|
||||
* Number of nodes we might have to allocate in a worst case btree split
|
||||
* operation - we split all the way up to the root, then allocate a new root.
|
||||
*/
|
||||
#define btree_reserve_required_nodes(depth) (((depth) + 1) * 2 + 1)
|
||||
|
||||
/* Number of nodes btree coalesce will try to coalesce at once */
|
||||
#define GC_MERGE_NODES 4U
|
||||
|
||||
/* Maximum number of nodes we might need to allocate atomically: */
|
||||
#define BTREE_RESERVE_MAX \
|
||||
(btree_reserve_required_nodes(BTREE_MAX_DEPTH) + GC_MERGE_NODES)
|
||||
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
|
||||
|
||||
/* Size of the freelist we allocate btree nodes from: */
|
||||
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
|
||||
#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
|
||||
|
||||
struct btree;
|
||||
|
||||
@ -591,7 +584,7 @@ struct bch_fs {
|
||||
|
||||
struct bch_fs_usage __percpu *usage_percpu;
|
||||
struct bch_fs_usage usage_cached;
|
||||
struct lglock usage_lock;
|
||||
struct percpu_rw_semaphore usage_lock;
|
||||
|
||||
struct closure_waitlist freelist_wait;
|
||||
|
||||
|
@ -118,20 +118,17 @@ static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct gc_pos pos = { 0 };
|
||||
struct bch_fs_usage *stats;
|
||||
u8 ret = 0;
|
||||
|
||||
preempt_disable();
|
||||
stats = this_cpu_ptr(c->usage_percpu);
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size, true, pos, stats,
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size, true, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
break;
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
bch2_mark_key(c, k, k.k->size, false, pos, stats,
|
||||
bch2_mark_key(c, k, k.k->size, false, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
@ -140,7 +137,6 @@ static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -320,8 +316,10 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
unsigned i;
|
||||
u64 b;
|
||||
|
||||
if (c)
|
||||
if (c) {
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
}
|
||||
|
||||
for (i = 0; i < layout->nr_superblocks; i++) {
|
||||
u64 offset = le64_to_cpu(layout->sb_offset[i]);
|
||||
@ -345,8 +343,10 @@ void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
|
||||
gc_phase(GC_PHASE_SB), flags);
|
||||
}
|
||||
|
||||
if (c)
|
||||
if (c) {
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
spin_unlock(&c->journal.lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_mark_superblocks(struct bch_fs *c)
|
||||
@ -397,6 +397,8 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
size_t i, j, iter;
|
||||
unsigned ci;
|
||||
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
gc_pos_set(c, gc_pos_alloc(c, NULL));
|
||||
|
||||
@ -433,6 +435,8 @@ static void bch2_mark_allocator_buckets(struct bch_fs *c)
|
||||
}
|
||||
spin_unlock(&ob->lock);
|
||||
}
|
||||
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
}
|
||||
|
||||
static void bch2_gc_start(struct bch_fs *c)
|
||||
@ -444,7 +448,7 @@ static void bch2_gc_start(struct bch_fs *c)
|
||||
size_t b;
|
||||
int cpu;
|
||||
|
||||
lg_global_lock(&c->usage_lock);
|
||||
percpu_down_write(&c->usage_lock);
|
||||
|
||||
/*
|
||||
* Indicates to buckets code that gc is now in progress - done under
|
||||
@ -470,7 +474,7 @@ static void bch2_gc_start(struct bch_fs *c)
|
||||
memset(p->s, 0, sizeof(p->s));
|
||||
}
|
||||
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
percpu_up_write(&c->usage_lock);
|
||||
|
||||
/* Clear bucket marks: */
|
||||
for_each_member_device(ca, c, i) {
|
||||
|
@ -152,7 +152,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
* the prev sibling in btree node merging:
|
||||
*/
|
||||
if (iter->nodes_locked &&
|
||||
__ffs(iter->nodes_locked) == level &&
|
||||
__ffs(iter->nodes_locked) <= level &&
|
||||
__btree_iter_cmp(iter->btree_id, pos, iter))
|
||||
return false;
|
||||
|
||||
@ -592,6 +592,8 @@ static inline void __btree_iter_init(struct btree_iter *iter,
|
||||
/* Skip to first non whiteout: */
|
||||
if (b->level)
|
||||
bch2_btree_node_iter_peek(&l->iter, b);
|
||||
|
||||
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
|
||||
}
|
||||
|
||||
static inline void btree_iter_node_set(struct btree_iter *iter,
|
||||
@ -1084,6 +1086,8 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
|
||||
EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
|
||||
!btree_node_locked(iter, 0));
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE) {
|
||||
struct bkey_packed *k =
|
||||
@ -1093,8 +1097,6 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
.v = bkeyp_val(&l->b->format, k)
|
||||
};
|
||||
|
||||
EBUG_ON(!btree_node_locked(iter, 0));
|
||||
|
||||
if (debug_check_bkeys(iter->c))
|
||||
bch2_bkey_debugcheck(iter->c, l->b, ret);
|
||||
return ret;
|
||||
@ -1257,16 +1259,16 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
|
||||
(iter->btree_id == BTREE_ID_EXTENTS));
|
||||
EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
|
||||
EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
|
||||
!btree_node_locked(iter, 0));
|
||||
|
||||
if (iter->uptodate == BTREE_ITER_UPTODATE) {
|
||||
struct bkey_s_c ret = { .k = &iter->k };;
|
||||
struct bkey_s_c ret = { .k = &iter->k };
|
||||
|
||||
if (!bkey_deleted(&iter->k))
|
||||
ret.v = bkeyp_val(&l->b->format,
|
||||
__bch2_btree_node_iter_peek_all(&l->iter, l->b));
|
||||
|
||||
EBUG_ON(!btree_node_locked(iter, 0));
|
||||
|
||||
if (debug_check_bkeys(iter->c))
|
||||
bch2_bkey_debugcheck(iter->c, l->b, ret);
|
||||
return ret;
|
||||
|
@ -1564,11 +1564,15 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree_update *as;
|
||||
struct closure cl;
|
||||
int ret = 0;
|
||||
struct btree_iter *linked;
|
||||
|
||||
/*
|
||||
* We already have a disk reservation and open buckets pinned; this
|
||||
* allocation must not block:
|
||||
*/
|
||||
for_each_linked_btree_iter(iter, linked)
|
||||
if (linked->btree_id == BTREE_ID_EXTENTS)
|
||||
btree_reserve_flags |= BTREE_INSERT_USE_RESERVE;
|
||||
if (iter->btree_id == BTREE_ID_EXTENTS)
|
||||
btree_reserve_flags |= BTREE_INSERT_USE_RESERVE;
|
||||
|
||||
@ -1704,15 +1708,17 @@ retry:
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(c, iter->btree_id,
|
||||
btree_update_reserve_required(c, b),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
&cl);
|
||||
btree_update_reserve_required(c, parent) + 1,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
&cl);
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
trace_btree_merge(c, b);
|
||||
|
||||
bch2_btree_interior_update_will_free_node(as, b);
|
||||
bch2_btree_interior_update_will_free_node(as, m);
|
||||
|
||||
@ -1778,8 +1784,10 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree_update *as;
|
||||
|
||||
as = bch2_btree_update_start(c, iter->btree_id,
|
||||
btree_update_reserve_required(c, b),
|
||||
flags, cl);
|
||||
(parent
|
||||
? btree_update_reserve_required(c, parent)
|
||||
: 0) + 1,
|
||||
flags, cl);
|
||||
if (IS_ERR(as)) {
|
||||
trace_btree_gc_rewrite_node_fail(c, b);
|
||||
return PTR_ERR(as);
|
||||
@ -1966,6 +1974,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
|
||||
int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
struct btree *b, struct bkey_i_extent *new_key)
|
||||
{
|
||||
struct btree *parent = btree_node_parent(iter, b);
|
||||
struct btree_update *as = NULL;
|
||||
struct btree *new_hash = NULL;
|
||||
struct closure cl;
|
||||
@ -2003,11 +2012,12 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
}
|
||||
|
||||
as = bch2_btree_update_start(c, iter->btree_id,
|
||||
btree_update_reserve_required(c, b),
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
parent ? btree_update_reserve_required(c, parent) : 0,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
|
||||
if (IS_ERR(as)) {
|
||||
ret = PTR_ERR(as);
|
||||
if (ret == -EAGAIN)
|
||||
|
@ -183,9 +183,14 @@ void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
|
||||
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
unsigned depth = btree_node_root(c, b)->level - b->level;
|
||||
unsigned depth = btree_node_root(c, b)->level - b->level + 1;
|
||||
|
||||
return btree_reserve_required_nodes(depth);
|
||||
/*
|
||||
* Number of nodes we might have to allocate in a worst case btree
|
||||
* split operation - we split all the way up to the root, then allocate
|
||||
* a new root.
|
||||
*/
|
||||
return depth * 2 + 1;
|
||||
}
|
||||
|
||||
static inline void btree_node_reset_sib_u64s(struct btree *b)
|
||||
|
@ -331,7 +331,7 @@ void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
stats->online_reserved -= added;
|
||||
}
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
/* online_reserved not subject to gc: */
|
||||
this_cpu_ptr(c->usage_percpu)->online_reserved +=
|
||||
stats->online_reserved;
|
||||
@ -341,7 +341,7 @@ void bch2_fs_usage_apply(struct bch_fs *c,
|
||||
bch2_usage_add(this_cpu_ptr(c->usage_percpu), stats);
|
||||
|
||||
bch2_fs_stats_verify(c);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
}
|
||||
@ -352,7 +352,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bch_dev_usage *dev_usage;
|
||||
|
||||
if (c)
|
||||
lockdep_assert_held(&c->usage_lock);
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
|
||||
if (old.data_type && new.data_type &&
|
||||
old.data_type != new.data_type) {
|
||||
@ -399,12 +399,13 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket *g;
|
||||
struct bucket_mark new;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
|
||||
g = bucket(ca, b);
|
||||
|
||||
*old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
if (!is_available_bucket(new)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -414,7 +415,6 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.dirty_sectors = 0;
|
||||
new.gen++;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
if (!old->owned_by_allocator && old->cached_sectors)
|
||||
trace_invalidate(ca, bucket_to_sector(ca, b),
|
||||
@ -429,19 +429,16 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, new;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
gc_will_visit(c, pos))
|
||||
return;
|
||||
}
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
BUG_ON(!owned_by_allocator && !old.owned_by_allocator &&
|
||||
c->gc_pos.phase == GC_PHASE_DONE);
|
||||
@ -471,16 +468,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
BUG_ON(!type);
|
||||
|
||||
if (likely(c)) {
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_rwsem_assert_held(&c->usage_lock);
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
gc_will_visit(c, pos))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
|
||||
g = bucket(ca, b);
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
@ -489,10 +484,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
new.data_type = type;
|
||||
}));
|
||||
|
||||
preempt_enable();
|
||||
|
||||
if (likely(c))
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
|
||||
bucket_became_unavailable(c, old, new));
|
||||
@ -654,11 +646,14 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
* (e.g. the btree node lock, or the relevant allocator lock).
|
||||
*/
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
|
||||
gc_will_visit(c, pos))
|
||||
flags |= BCH_BUCKET_MARK_GC_WILL_VISIT;
|
||||
|
||||
if (!stats)
|
||||
stats = this_cpu_ptr(c->usage_percpu);
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED: {
|
||||
@ -693,7 +688,7 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
break;
|
||||
}
|
||||
}
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
}
|
||||
|
||||
/* Disk reservations: */
|
||||
@ -711,19 +706,19 @@ static u64 __recalc_sectors_available(struct bch_fs *c)
|
||||
/* Used by gc when it's starting: */
|
||||
void bch2_recalc_sectors_available(struct bch_fs *c)
|
||||
{
|
||||
lg_global_lock(&c->usage_lock);
|
||||
percpu_down_write(&c->usage_lock);
|
||||
atomic64_set(&c->sectors_available, __recalc_sectors_available(c));
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
percpu_up_write(&c->usage_lock);
|
||||
}
|
||||
|
||||
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
|
||||
{
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
this_cpu_sub(c->usage_percpu->online_reserved,
|
||||
res->sectors);
|
||||
|
||||
bch2_fs_stats_verify(c);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
res->sectors = 0;
|
||||
}
|
||||
@ -738,7 +733,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
s64 sectors_available;
|
||||
int ret;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
stats = this_cpu_ptr(c->usage_percpu);
|
||||
|
||||
if (sectors <= stats->available_cache)
|
||||
@ -750,7 +745,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
get = min((u64) sectors + SECTORS_CACHE, old);
|
||||
|
||||
if (get < sectors) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
goto recalculate;
|
||||
}
|
||||
} while ((v = atomic64_cmpxchg(&c->sectors_available,
|
||||
@ -765,7 +760,7 @@ out:
|
||||
|
||||
bch2_disk_reservations_verify(c, flags);
|
||||
bch2_fs_stats_verify(c);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
return 0;
|
||||
|
||||
recalculate:
|
||||
@ -785,8 +780,8 @@ recalculate:
|
||||
else if (!down_read_trylock(&c->gc_lock))
|
||||
return -EINTR;
|
||||
}
|
||||
lg_global_lock(&c->usage_lock);
|
||||
|
||||
percpu_down_write(&c->usage_lock);
|
||||
sectors_available = __recalc_sectors_available(c);
|
||||
|
||||
if (sectors <= sectors_available ||
|
||||
@ -804,7 +799,8 @@ recalculate:
|
||||
}
|
||||
|
||||
bch2_fs_stats_verify(c);
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
percpu_up_write(&c->usage_lock);
|
||||
|
||||
if (!(flags & BCH_DISK_RESERVATION_GC_LOCK_HELD))
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
@ -874,7 +870,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
if (resize) {
|
||||
down_write(&c->gc_lock);
|
||||
down_write(&ca->bucket_lock);
|
||||
lg_global_lock(&c->usage_lock);
|
||||
percpu_down_write(&c->usage_lock);
|
||||
}
|
||||
|
||||
old_buckets = bucket_array(ca);
|
||||
@ -900,7 +896,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
swap(ca->buckets_dirty, buckets_dirty);
|
||||
|
||||
if (resize)
|
||||
lg_global_unlock(&c->usage_lock);
|
||||
percpu_up_write(&c->usage_lock);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++) {
|
||||
|
@ -32,7 +32,7 @@ static inline struct bucket_array *bucket_array(struct bch_dev *ca)
|
||||
{
|
||||
return rcu_dereference_check(ca->buckets,
|
||||
!ca->fs ||
|
||||
lockdep_is_held(&ca->fs->usage_lock) ||
|
||||
percpu_rwsem_is_held(&ca->fs->usage_lock) ||
|
||||
lockdep_is_held(&ca->fs->gc_lock) ||
|
||||
lockdep_is_held(&ca->bucket_lock));
|
||||
}
|
||||
|
@ -176,6 +176,8 @@ const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned targe
|
||||
struct target t = target_decode(target);
|
||||
|
||||
switch (t.type) {
|
||||
case TARGET_NULL:
|
||||
return NULL;
|
||||
case TARGET_DEV: {
|
||||
struct bch_dev *ca = t.dev < c->sb.nr_devices
|
||||
? rcu_dereference(c->devs[t.dev])
|
||||
|
@ -1702,6 +1702,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
||||
struct bio *bio;
|
||||
loff_t offset = req->ki_pos;
|
||||
bool sync = is_sync_kiocb(req);
|
||||
size_t shorten;
|
||||
ssize_t ret;
|
||||
|
||||
if ((offset|iter->count) & (block_bytes(c) - 1))
|
||||
@ -1709,11 +1710,13 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
||||
|
||||
ret = min_t(loff_t, iter->count,
|
||||
max_t(loff_t, 0, i_size_read(&inode->v) - offset));
|
||||
iov_iter_truncate(iter, round_up(ret, block_bytes(c)));
|
||||
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
|
||||
iter->count -= shorten;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_KERNEL,
|
||||
iov_iter_npages(iter, BIO_MAX_PAGES),
|
||||
&c->dio_read_bioset);
|
||||
@ -1769,6 +1772,8 @@ start:
|
||||
bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
|
||||
}
|
||||
|
||||
iter->count += shorten;
|
||||
|
||||
if (sync) {
|
||||
closure_sync(&dio->cl);
|
||||
closure_debug_destroy(&dio->cl);
|
||||
@ -1822,6 +1827,13 @@ static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
if (unlikely(ret < 0))
|
||||
goto err;
|
||||
|
||||
/* gup might have faulted pages back in: */
|
||||
ret = write_invalidate_inode_pages_range(mapping,
|
||||
req->ki_pos + (dio->iop.op.written << 9),
|
||||
req->ki_pos + iov_iter_count(&dio->iter) - 1);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
dio->iop.op.pos = POS(inode->v.i_ino,
|
||||
(req->ki_pos >> 9) + dio->iop.op.written);
|
||||
|
||||
@ -2280,7 +2292,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
|
||||
loff_t new_size;
|
||||
int ret;
|
||||
|
||||
if ((offset | len) & (PAGE_SIZE - 1))
|
||||
if ((offset | len) & (block_bytes(c) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
bch2_btree_iter_init(&dst, c, BTREE_ID_EXTENTS,
|
||||
@ -2354,8 +2366,11 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
|
||||
btree_iter_err:
|
||||
if (ret == -EINTR)
|
||||
ret = 0;
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch2_btree_iter_unlock(&src);
|
||||
bch2_btree_iter_unlock(&dst);
|
||||
goto err_put_sectors_dirty;
|
||||
}
|
||||
/*
|
||||
* XXX: if we error here we've left data with multiple
|
||||
* pointers... which isn't a _super_ serious problem...
|
||||
@ -2368,7 +2383,7 @@ btree_iter_err:
|
||||
bch2_btree_iter_unlock(&dst);
|
||||
|
||||
ret = bch2_inode_truncate(c, inode->v.i_ino,
|
||||
round_up(new_size, PAGE_SIZE) >> 9,
|
||||
round_up(new_size, block_bytes(c)) >> 9,
|
||||
&i_sectors_hook.hook,
|
||||
&inode->ei_journal_seq);
|
||||
if (ret)
|
||||
@ -2381,9 +2396,6 @@ err_put_sectors_dirty:
|
||||
err:
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
inode_unlock(&inode->v);
|
||||
|
||||
bch2_btree_iter_unlock(&src);
|
||||
bch2_btree_iter_unlock(&dst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2483,7 +2495,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
|
||||
&i_sectors_hook.quota_res,
|
||||
sectors, true);
|
||||
if (unlikely(ret))
|
||||
goto err_put_sectors_dirty;
|
||||
goto btree_iter_err;
|
||||
}
|
||||
|
||||
if (reservation.v.nr_replicas < replicas ||
|
||||
@ -2491,7 +2503,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
|
||||
ret = bch2_disk_reservation_get(c, &disk_res, sectors,
|
||||
replicas, 0);
|
||||
if (unlikely(ret))
|
||||
goto err_put_sectors_dirty;
|
||||
goto btree_iter_err;
|
||||
|
||||
reservation.v.nr_replicas = disk_res.nr_replicas;
|
||||
}
|
||||
@ -2503,8 +2515,12 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
|
||||
BTREE_INSERT_ENTRY(&iter, &reservation.k_i));
|
||||
bch2_disk_reservation_put(c, &disk_res);
|
||||
btree_iter_err:
|
||||
if (ret < 0 && ret != -EINTR)
|
||||
if (ret == -EINTR)
|
||||
ret = 0;
|
||||
if (ret) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
goto err_put_sectors_dirty;
|
||||
}
|
||||
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@ -2544,7 +2560,6 @@ btree_iter_err:
|
||||
err_put_sectors_dirty:
|
||||
ret = i_sectors_dirty_finish(c, &i_sectors_hook) ?: ret;
|
||||
err:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
inode_unlock(&inode->v);
|
||||
return ret;
|
||||
|
@ -243,13 +243,13 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
|
||||
atomic_long_inc(&c->nr_inodes);
|
||||
|
||||
if (default_acl) {
|
||||
ret = bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT);
|
||||
ret = __bch2_set_acl(&inode->v, default_acl, ACL_TYPE_DEFAULT);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
ret = bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS);
|
||||
ret = __bch2_set_acl(&inode->v, acl, ACL_TYPE_ACCESS);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
}
|
||||
|
@ -747,8 +747,13 @@ up:
|
||||
}
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
|
||||
if (k.k->type != BCH_INODE_FS ||
|
||||
!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
|
||||
if (k.k->type != BCH_INODE_FS)
|
||||
continue;
|
||||
|
||||
if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
|
||||
continue;
|
||||
|
||||
if (!bch2_empty_dir(c, k.k->p.inode))
|
||||
continue;
|
||||
|
||||
if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
|
||||
|
@ -1698,9 +1698,9 @@ noclone:
|
||||
if (!rbio->have_ioref)
|
||||
goto no_device_postclone;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_USER],
|
||||
bio_sectors(&rbio->bio));
|
||||
|
@ -725,7 +725,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
long bucket;
|
||||
|
||||
if (new_fs) {
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
bucket = bch2_bucket_alloc_new_fs(ca);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
|
||||
if (bucket < 0) {
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
@ -741,8 +744,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
bucket = sector_to_bucket(ca, ob->ptr.offset);
|
||||
}
|
||||
|
||||
if (c)
|
||||
if (c) {
|
||||
percpu_down_read_preempt_disable(&c->usage_lock);
|
||||
spin_lock(&c->journal.lock);
|
||||
}
|
||||
|
||||
__array_insert_item(ja->buckets, ja->nr, ja->last_idx);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
|
||||
@ -759,9 +764,6 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
}
|
||||
ja->nr++;
|
||||
|
||||
if (c)
|
||||
spin_unlock(&c->journal.lock);
|
||||
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
@ -769,6 +771,11 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
? BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE
|
||||
: 0);
|
||||
|
||||
if (c) {
|
||||
spin_unlock(&c->journal.lock);
|
||||
percpu_up_read_preempt_enable(&c->usage_lock);
|
||||
}
|
||||
|
||||
if (!new_fs)
|
||||
bch2_open_bucket_put(c, ob);
|
||||
}
|
||||
|
@ -218,7 +218,7 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
|
||||
return -ERANGE;
|
||||
break;
|
||||
case BCH_OPT_STR:
|
||||
ret = bch2_read_string_list(val, opt->choices);
|
||||
ret = match_string(opt->choices, -1, val);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
@ -1,6 +1,61 @@
|
||||
#ifndef _BCACHEFS_SIX_H
|
||||
#define _BCACHEFS_SIX_H
|
||||
|
||||
/*
|
||||
* Shared/intent/exclusive locks: sleepable read/write locks, much like rw
|
||||
* semaphores, except with a third intermediate state, intent. Basic operations
|
||||
* are:
|
||||
*
|
||||
* six_lock_read(&foo->lock);
|
||||
* six_unlock_read(&foo->lock);
|
||||
*
|
||||
* six_lock_intent(&foo->lock);
|
||||
* six_unlock_intent(&foo->lock);
|
||||
*
|
||||
* six_lock_write(&foo->lock);
|
||||
* six_unlock_write(&foo->lock);
|
||||
*
|
||||
* Intent locks block other intent locks, but do not block read locks, and you
|
||||
* must have an intent lock held before taking a write lock, like so:
|
||||
*
|
||||
* six_lock_intent(&foo->lock);
|
||||
* six_lock_write(&foo->lock);
|
||||
* six_unlock_write(&foo->lock);
|
||||
* six_unlock_intent(&foo->lock);
|
||||
*
|
||||
* Other operations:
|
||||
*
|
||||
* six_trylock_read()
|
||||
* six_trylock_intent()
|
||||
* six_trylock_write()
|
||||
*
|
||||
* six_lock_downgrade(): convert from intent to read
|
||||
* six_lock_tryupgrade(): attempt to convert from read to intent
|
||||
*
|
||||
* Locks also embed a sequence number, which is incremented when the lock is
|
||||
* locked or unlocked for write. The current sequence number can be grabbed
|
||||
* while a lock is held from lock->state.seq; then, if you drop the lock you can
|
||||
* use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
|
||||
* iff it hasn't been locked for write in the meantime.
|
||||
*
|
||||
* There are also operations that take the lock type as a parameter, where the
|
||||
* type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
|
||||
*
|
||||
* six_lock_type(lock, type)
|
||||
* six_unlock_type(lock, type)
|
||||
* six_relock(lock, type, seq)
|
||||
* six_trylock_type(lock, type)
|
||||
* six_trylock_convert(lock, from, to)
|
||||
*
|
||||
* A lock may be held multiple types by the same thread (for read or intent,
|
||||
* not write) - up to SIX_LOCK_MAX_RECURSE. However, the six locks code does
|
||||
* _not_ implement the actual recursive checks itself though - rather, if your
|
||||
* code (e.g. btree iterator code) knows that the current thread already has a
|
||||
* lock held, and for the correct type, six_lock_increment() may be used to
|
||||
* bump up the counter for that type - the only effect is that one more call to
|
||||
* unlock will be required before the lock is unlocked.
|
||||
*/
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/osq_lock.h>
|
||||
#include <linux/sched.h>
|
||||
@ -10,21 +65,6 @@
|
||||
|
||||
#define SIX_LOCK_SEPARATE_LOCKFNS
|
||||
|
||||
/*
|
||||
* LOCK STATES:
|
||||
*
|
||||
* read, intent, write (i.e. shared/intent/exclusive, hence the name)
|
||||
*
|
||||
* read and write work as with normal read/write locks - a lock can have
|
||||
* multiple readers, but write excludes reads and other write locks.
|
||||
*
|
||||
* Intent does not block read, but it does block other intent locks. The idea is
|
||||
* by taking an intent lock, you can then later upgrade to a write lock without
|
||||
* dropping your read lock and without deadlocking - because no other thread has
|
||||
* the intent lock and thus no other thread could be trying to take the write
|
||||
* lock.
|
||||
*/
|
||||
|
||||
union six_lock_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
|
@ -412,7 +412,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bch2_io_clock_exit(&c->io_clock[WRITE]);
|
||||
bch2_io_clock_exit(&c->io_clock[READ]);
|
||||
bch2_fs_compress_exit(c);
|
||||
lg_lock_free(&c->usage_lock);
|
||||
percpu_free_rwsem(&c->usage_lock);
|
||||
free_percpu(c->usage_percpu);
|
||||
mempool_exit(&c->btree_bounce_pool);
|
||||
bioset_exit(&c->btree_bio);
|
||||
@ -643,7 +643,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
offsetof(struct btree_write_bio, wbio.bio)),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
!(c->usage_percpu = alloc_percpu(struct bch_fs_usage)) ||
|
||||
lg_lock_init(&c->usage_lock) ||
|
||||
percpu_init_rwsem(&c->usage_lock) ||
|
||||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
btree_bytes(c)) ||
|
||||
bch2_io_clock_init(&c->io_clock[READ]) ||
|
||||
@ -1215,6 +1215,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_dev_sysfs_online(c, ca);
|
||||
|
||||
if (c->sb.nr_devices == 1)
|
||||
bdevname(ca->disk_sb.bdev, c->name);
|
||||
bdevname(ca->disk_sb.bdev, ca->name);
|
||||
|
@ -921,7 +921,7 @@ STORE(bch2_dev)
|
||||
}
|
||||
|
||||
if (attr == &sysfs_cache_replacement_policy) {
|
||||
ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
|
||||
ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
|
||||
|
||||
if (v < 0)
|
||||
return v;
|
||||
|
@ -126,24 +126,6 @@ ssize_t bch2_scnprint_string_list(char *buf, size_t size,
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
ssize_t bch2_read_string_list(const char *buf, const char * const list[])
|
||||
{
|
||||
size_t i, len;
|
||||
|
||||
buf = skip_spaces(buf);
|
||||
|
||||
len = strlen(buf);
|
||||
while (len && isspace(buf[len - 1]))
|
||||
--len;
|
||||
|
||||
for (i = 0; list[i]; i++)
|
||||
if (strlen(list[i]) == len &&
|
||||
!memcmp(buf, list[i], len))
|
||||
break;
|
||||
|
||||
return list[i] ? i : -EINVAL;
|
||||
}
|
||||
|
||||
ssize_t bch2_scnprint_flag_list(char *buf, size_t size,
|
||||
const char * const list[], u64 flags)
|
||||
{
|
||||
@ -178,7 +160,7 @@ u64 bch2_read_flag_list(char *opt, const char * const list[])
|
||||
s = strim(d);
|
||||
|
||||
while ((p = strsep(&s, ","))) {
|
||||
int flag = bch2_read_string_list(p, list);
|
||||
int flag = match_string(list, -1, p);
|
||||
if (flag < 0) {
|
||||
ret = -1;
|
||||
break;
|
||||
|
@ -227,57 +227,6 @@ do { \
|
||||
heap_sift_down(heap, _i, cmp); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Simple array based allocator - preallocates a number of elements and you can
|
||||
* never allocate more than that, also has no locking.
|
||||
*
|
||||
* Handy because if you know you only need a fixed number of elements you don't
|
||||
* have to worry about memory allocation failure, and sometimes a mempool isn't
|
||||
* what you want.
|
||||
*
|
||||
* We treat the free elements as entries in a singly linked list, and the
|
||||
* freelist as a stack - allocating and freeing push and pop off the freelist.
|
||||
*/
|
||||
|
||||
#define DECLARE_ARRAY_ALLOCATOR(type, name, size) \
|
||||
struct { \
|
||||
type *freelist; \
|
||||
type data[size]; \
|
||||
} name
|
||||
|
||||
#define array_alloc(array) \
|
||||
({ \
|
||||
typeof((array)->freelist) _ret = (array)->freelist; \
|
||||
\
|
||||
if (_ret) \
|
||||
(array)->freelist = *((typeof((array)->freelist) *) _ret);\
|
||||
\
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define array_free(array, ptr) \
|
||||
do { \
|
||||
typeof((array)->freelist) _ptr = ptr; \
|
||||
\
|
||||
*((typeof((array)->freelist) *) _ptr) = (array)->freelist; \
|
||||
(array)->freelist = _ptr; \
|
||||
} while (0)
|
||||
|
||||
#define array_allocator_init(array) \
|
||||
do { \
|
||||
typeof((array)->freelist) _i; \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof((array)->data[0]) < sizeof(void *)); \
|
||||
(array)->freelist = NULL; \
|
||||
\
|
||||
for (_i = (array)->data; \
|
||||
_i < (array)->data + ARRAY_SIZE((array)->data); \
|
||||
_i++) \
|
||||
array_free(array, _i); \
|
||||
} while (0)
|
||||
|
||||
#define array_freelist_empty(array) ((array)->freelist == NULL)
|
||||
|
||||
#define ANYSINT_MAX(t) \
|
||||
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
|
||||
|
||||
@ -359,8 +308,6 @@ bool bch2_is_zero(const void *, size_t);
|
||||
|
||||
ssize_t bch2_scnprint_string_list(char *, size_t, const char * const[], size_t);
|
||||
|
||||
ssize_t bch2_read_string_list(const char *, const char * const[]);
|
||||
|
||||
ssize_t bch2_scnprint_flag_list(char *, size_t, const char * const[], u64);
|
||||
u64 bch2_read_flag_list(char *, const char * const[]);
|
||||
|
||||
|
@ -95,3 +95,19 @@ void memzero_explicit(void *s, size_t count)
|
||||
memset(s, 0, count);
|
||||
barrier_data(s);
|
||||
}
|
||||
|
||||
int match_string(const char * const *array, size_t n, const char *string)
|
||||
{
|
||||
int index;
|
||||
const char *item;
|
||||
|
||||
for (index = 0; index < n; index++) {
|
||||
item = array[index];
|
||||
if (!item)
|
||||
break;
|
||||
if (!strcmp(item, string))
|
||||
return index;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -218,7 +218,7 @@ u64 read_file_u64(int dirfd, const char *path)
|
||||
ssize_t read_string_list_or_die(const char *opt, const char * const list[],
|
||||
const char *msg)
|
||||
{
|
||||
ssize_t v = bch2_read_string_list(opt, list);
|
||||
ssize_t v = match_string(list, -1, opt);
|
||||
if (v < 0)
|
||||
die("Bad %s %s", msg, opt);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user