mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-22 00:00:03 +03:00
Update bcachefs sources to 31c09369cd six locks: Fix an unitialized var
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b8b8dcfaed
commit
1f78fed469
@ -1 +1 @@
|
||||
799716df00709f7480f575e8fd626915bafba006
|
||||
31c09369cd01b34fb8ba845fa09776576b03a1e2
|
||||
|
@ -32,6 +32,8 @@ typedef struct {
|
||||
#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
|
||||
#define __ATOMIC_INC(p) uatomic_inc(p)
|
||||
#define __ATOMIC_DEC(p) uatomic_dec(p)
|
||||
#define __ATOMIC_AND(v, p) uatomic_and(p, v)
|
||||
#define __ATOMIC_OR(v, p) uatomic_or(p, v)
|
||||
|
||||
#define xchg(p, v) uatomic_xchg(p, v)
|
||||
#define xchg_acquire(p, v) uatomic_xchg(p, v)
|
||||
@ -56,6 +58,8 @@ typedef struct {
|
||||
#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \
|
||||
__atomic_sub_fetch(p, v, __ATOMIC_RELEASE)
|
||||
#define __ATOMIC_AND(p) __atomic_and_fetch(p, v, __ATOMIC_RELAXED)
|
||||
#define __ATOMIC_OR(p) __atomic_or_fetch(p, v, __ATOMIC_RELAXED)
|
||||
|
||||
#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
|
||||
#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
|
||||
@ -244,6 +248,16 @@ static inline bool a_type##_inc_not_zero(a_type##_t *v) \
|
||||
return a_type##_add_unless(v, 1, 0); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_and(i_type a, a_type##_t *v) \
|
||||
{ \
|
||||
__ATOMIC_AND(a, v); \
|
||||
} \
|
||||
\
|
||||
static inline void a_type##_or(i_type a, a_type##_t *v) \
|
||||
{ \
|
||||
__ATOMIC_OR(a, v); \
|
||||
} \
|
||||
\
|
||||
static inline i_type a_type##_xchg(a_type##_t *v, i_type i) \
|
||||
{ \
|
||||
return xchg(&v->counter, i); \
|
||||
|
@ -2,122 +2,112 @@
|
||||
#ifndef MEAN_AND_VARIANCE_H_
|
||||
#define MEAN_AND_VARIANCE_H_
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math64.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SQRT_U64_MAX 4294967295ULL
|
||||
|
||||
/**
|
||||
* abs - return absolute value of an argument
|
||||
* @x: the value. If it is unsigned type, it is converted to signed type first.
|
||||
* char is treated as if it was signed (regardless of whether it really is)
|
||||
* but the macro's return type is preserved as char.
|
||||
*
|
||||
* Return: an absolute value of x.
|
||||
/*
|
||||
* u128_u: u128 user mode, because not all architectures support a real int128
|
||||
* type
|
||||
*/
|
||||
#define abs(x) __abs_choose_expr(x, long long, \
|
||||
__abs_choose_expr(x, long, \
|
||||
__abs_choose_expr(x, int, \
|
||||
__abs_choose_expr(x, short, \
|
||||
__abs_choose_expr(x, char, \
|
||||
__builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), char), \
|
||||
(char)({ signed char __x = (x); __x<0?-__x:__x; }), \
|
||||
((void)0)))))))
|
||||
|
||||
#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), signed type) || \
|
||||
__builtin_types_compatible_p(typeof(x), unsigned type), \
|
||||
({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
|
||||
#ifdef __SIZEOF_INT128__
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
typedef struct {
|
||||
unsigned __int128 v;
|
||||
} __aligned(16) u128_u;
|
||||
|
||||
typedef unsigned __int128 u128;
|
||||
|
||||
static inline u128 u64_to_u128(u64 a)
|
||||
static inline u128_u u64_to_u128(u64 a)
|
||||
{
|
||||
return (u128)a;
|
||||
return (u128_u) { .v = a };
|
||||
}
|
||||
|
||||
static inline u64 u128_to_u64(u128 a)
|
||||
static inline u64 u128_lo(u128_u a)
|
||||
{
|
||||
return (u64)a;
|
||||
return a.v;
|
||||
}
|
||||
|
||||
static inline u64 u128_shr64_to_u64(u128 a)
|
||||
static inline u64 u128_hi(u128_u a)
|
||||
{
|
||||
return (u64)(a >> 64);
|
||||
return a.v >> 64;
|
||||
}
|
||||
|
||||
static inline u128 u128_add(u128 a, u128 b)
|
||||
static inline u128_u u128_add(u128_u a, u128_u b)
|
||||
{
|
||||
return a + b;
|
||||
a.v += b.v;
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline u128 u128_sub(u128 a, u128 b)
|
||||
static inline u128_u u128_sub(u128_u a, u128_u b)
|
||||
{
|
||||
return a - b;
|
||||
a.v -= b.v;
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl(u128 i, s8 shift)
|
||||
static inline u128_u u128_shl(u128_u a, s8 shift)
|
||||
{
|
||||
return i << shift;
|
||||
a.v <<= shift;
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl64_add(u64 a, u64 b)
|
||||
static inline u128_u u128_square(u64 a)
|
||||
{
|
||||
return ((u128)a << 64) + b;
|
||||
}
|
||||
u128_u b = u64_to_u128(a);
|
||||
|
||||
static inline u128 u128_square(u64 i)
|
||||
{
|
||||
return i*i;
|
||||
b.v *= b.v;
|
||||
return b;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
u64 hi, lo;
|
||||
} u128;
|
||||
} __aligned(16) u128_u;
|
||||
|
||||
static inline u128 u64_to_u128(u64 a)
|
||||
/* conversions */
|
||||
|
||||
static inline u128_u u64_to_u128(u64 a)
|
||||
{
|
||||
return (u128){ .lo = a };
|
||||
return (u128_u) { .lo = a };
|
||||
}
|
||||
|
||||
static inline u64 u128_to_u64(u128 a)
|
||||
static inline u64 u128_lo(u128_u a)
|
||||
{
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
static inline u64 u128_shr64_to_u64(u128 a)
|
||||
static inline u64 u128_hi(u128_u a)
|
||||
{
|
||||
return a.hi;
|
||||
}
|
||||
|
||||
static inline u128 u128_add(u128 a, u128 b)
|
||||
/* arithmetic */
|
||||
|
||||
static inline u128_u u128_add(u128_u a, u128_u b)
|
||||
{
|
||||
u128 c;
|
||||
u128_u c;
|
||||
|
||||
c.lo = a.lo + b.lo;
|
||||
c.hi = a.hi + b.hi + (c.lo < a.lo);
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline u128 u128_sub(u128 a, u128 b)
|
||||
static inline u128_u u128_sub(u128_u a, u128_u b)
|
||||
{
|
||||
u128 c;
|
||||
u128_u c;
|
||||
|
||||
c.lo = a.lo - b.lo;
|
||||
c.hi = a.hi - b.hi - (c.lo > a.lo);
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl(u128 i, s8 shift)
|
||||
static inline u128_u u128_shl(u128_u i, s8 shift)
|
||||
{
|
||||
u128 r;
|
||||
u128_u r;
|
||||
|
||||
r.lo = i.lo << shift;
|
||||
if (shift < 64)
|
||||
@ -129,15 +119,10 @@ static inline u128 u128_shl(u128 i, s8 shift)
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline u128 u128_shl64_add(u64 a, u64 b)
|
||||
static inline u128_u u128_square(u64 i)
|
||||
{
|
||||
return u128_add(u128_shl(u64_to_u128(a), 64), u64_to_u128(b));
|
||||
}
|
||||
|
||||
static inline u128 u128_square(u64 i)
|
||||
{
|
||||
u128 r;
|
||||
u64 h = i >> 32, l = i & (u64)U32_MAX;
|
||||
u128_u r;
|
||||
u64 h = i >> 32, l = i & U32_MAX;
|
||||
|
||||
r = u128_shl(u64_to_u128(h*h), 64);
|
||||
r = u128_add(r, u128_shl(u64_to_u128(h*l), 32));
|
||||
@ -148,85 +133,69 @@ static inline u128 u128_square(u64 i)
|
||||
|
||||
#endif
|
||||
|
||||
static inline u128 u128_div(u128 n, u64 d)
|
||||
static inline u128_u u64s_to_u128(u64 hi, u64 lo)
|
||||
{
|
||||
u128 r;
|
||||
u64 rem;
|
||||
u64 hi = u128_shr64_to_u64(n);
|
||||
u64 lo = u128_to_u64(n);
|
||||
u64 h = hi & ((u64)U32_MAX << 32);
|
||||
u64 l = (hi & (u64)U32_MAX) << 32;
|
||||
u128_u c = u64_to_u128(hi);
|
||||
|
||||
r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
|
||||
r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
|
||||
r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
|
||||
return r;
|
||||
c = u128_shl(c, 64);
|
||||
c = u128_add(c, u64_to_u128(lo));
|
||||
return c;
|
||||
}
|
||||
|
||||
u128_u u128_div(u128_u n, u64 d);
|
||||
|
||||
struct mean_and_variance {
|
||||
s64 n;
|
||||
s64 sum;
|
||||
u128 sum_squares;
|
||||
s64 n;
|
||||
s64 sum;
|
||||
u128_u sum_squares;
|
||||
};
|
||||
|
||||
/* expontentially weighted variant */
|
||||
struct mean_and_variance_weighted {
|
||||
bool init;
|
||||
u8 w;
|
||||
s64 mean;
|
||||
u64 variance;
|
||||
bool init;
|
||||
u8 weight; /* base 2 logarithim */
|
||||
s64 mean;
|
||||
u64 variance;
|
||||
};
|
||||
|
||||
s64 fast_divpow2(s64 n, u8 d);
|
||||
/**
|
||||
* fast_divpow2() - fast approximation for n / (1 << d)
|
||||
* @n: numerator
|
||||
* @d: the power of 2 denominator.
|
||||
*
|
||||
* note: this rounds towards 0.
|
||||
*/
|
||||
static inline s64 fast_divpow2(s64 n, u8 d)
|
||||
{
|
||||
return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
|
||||
}
|
||||
|
||||
/**
|
||||
* mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
|
||||
* and return it.
|
||||
* @s1: the mean_and_variance to update.
|
||||
* @v1: the new sample.
|
||||
*
|
||||
* see linked pdf equation 12.
|
||||
*/
|
||||
static inline struct mean_and_variance
|
||||
mean_and_variance_update_inlined(struct mean_and_variance s1, s64 v1)
|
||||
mean_and_variance_update(struct mean_and_variance s, s64 v)
|
||||
{
|
||||
struct mean_and_variance s2;
|
||||
u64 v2 = abs(v1);
|
||||
|
||||
s2.n = s1.n + 1;
|
||||
s2.sum = s1.sum + v1;
|
||||
s2.sum_squares = u128_add(s1.sum_squares, u128_square(v2));
|
||||
return s2;
|
||||
return (struct mean_and_variance) {
|
||||
.n = s.n + 1,
|
||||
.sum = s.sum + v,
|
||||
.sum_squares = u128_add(s.sum_squares, u128_square(abs(v))),
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct mean_and_variance_weighted
|
||||
mean_and_variance_weighted_update_inlined(struct mean_and_variance_weighted s1, s64 x)
|
||||
{
|
||||
struct mean_and_variance_weighted s2;
|
||||
// previous weighted variance.
|
||||
u64 var_w0 = s1.variance;
|
||||
u8 w = s2.w = s1.w;
|
||||
// new value weighted.
|
||||
s64 x_w = x << w;
|
||||
s64 diff_w = x_w - s1.mean;
|
||||
s64 diff = fast_divpow2(diff_w, w);
|
||||
// new mean weighted.
|
||||
s64 u_w1 = s1.mean + diff;
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s);
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1);
|
||||
u32 mean_and_variance_get_stddev(struct mean_and_variance s);
|
||||
|
||||
BUG_ON(w % 2 != 0);
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v);
|
||||
|
||||
if (!s1.init) {
|
||||
s2.mean = x_w;
|
||||
s2.variance = 0;
|
||||
} else {
|
||||
s2.mean = u_w1;
|
||||
s2.variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
|
||||
}
|
||||
s2.init = true;
|
||||
|
||||
return s2;
|
||||
}
|
||||
|
||||
struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1);
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s);
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1);
|
||||
u32 mean_and_variance_get_stddev(struct mean_and_variance s);
|
||||
|
||||
struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1, s64 v1);
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
|
||||
u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
|
||||
|
||||
#endif // MEAN_AND_VAIRANCE_H_
|
||||
|
@ -3,59 +3,124 @@
|
||||
#ifndef _LINUX_SIX_H
|
||||
#define _LINUX_SIX_H
|
||||
|
||||
/*
|
||||
* Shared/intent/exclusive locks: sleepable read/write locks, much like rw
|
||||
* semaphores, except with a third intermediate state, intent. Basic operations
|
||||
* are:
|
||||
/**
|
||||
* DOC: SIX locks overview
|
||||
*
|
||||
* six_lock_read(&foo->lock);
|
||||
* six_unlock_read(&foo->lock);
|
||||
* Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
|
||||
* but with an additional state: read/shared, intent, exclusive/write
|
||||
*
|
||||
* six_lock_intent(&foo->lock);
|
||||
* six_unlock_intent(&foo->lock);
|
||||
* The purpose of the intent state is to allow for greater concurrency on tree
|
||||
* structures without deadlocking. In general, a read can't be upgraded to a
|
||||
* write lock without deadlocking, so an operation that updates multiple nodes
|
||||
* will have to take write locks for the full duration of the operation.
|
||||
*
|
||||
* six_lock_write(&foo->lock);
|
||||
* six_unlock_write(&foo->lock);
|
||||
* But by adding an intent state, which is exclusive with other intent locks but
|
||||
* not with readers, we can take intent locks at thte start of the operation,
|
||||
* and then take write locks only for the actual update to each individual
|
||||
* nodes, without deadlocking.
|
||||
*
|
||||
* Intent locks block other intent locks, but do not block read locks, and you
|
||||
* must have an intent lock held before taking a write lock, like so:
|
||||
* Example usage:
|
||||
* six_lock_read(&foo->lock);
|
||||
* six_unlock_read(&foo->lock);
|
||||
*
|
||||
* six_lock_intent(&foo->lock);
|
||||
* six_lock_write(&foo->lock);
|
||||
* six_unlock_write(&foo->lock);
|
||||
* six_unlock_intent(&foo->lock);
|
||||
* An intent lock must be held before taking a write lock:
|
||||
* six_lock_intent(&foo->lock);
|
||||
* six_lock_write(&foo->lock);
|
||||
* six_unlock_write(&foo->lock);
|
||||
* six_unlock_intent(&foo->lock);
|
||||
*
|
||||
* Other operations:
|
||||
*
|
||||
* six_trylock_read()
|
||||
* six_trylock_intent()
|
||||
* six_trylock_write()
|
||||
*
|
||||
* six_lock_downgrade(): convert from intent to read
|
||||
* six_lock_tryupgrade(): attempt to convert from read to intent
|
||||
* six_lock_downgrade() convert from intent to read
|
||||
* six_lock_tryupgrade() attempt to convert from read to intent, may fail
|
||||
*
|
||||
* Locks also embed a sequence number, which is incremented when the lock is
|
||||
* locked or unlocked for write. The current sequence number can be grabbed
|
||||
* while a lock is held from lock->state.seq; then, if you drop the lock you can
|
||||
* use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
|
||||
* iff it hasn't been locked for write in the meantime.
|
||||
* There are also interfaces that take the lock type as an enum:
|
||||
*
|
||||
* There are also operations that take the lock type as a parameter, where the
|
||||
* type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
|
||||
* six_lock_type(&foo->lock, SIX_LOCK_read);
|
||||
* six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
|
||||
* six_lock_type(&foo->lock, SIX_LOCK_write);
|
||||
* six_unlock_type(&foo->lock, SIX_LOCK_write);
|
||||
* six_unlock_type(&foo->lock, SIX_LOCK_intent);
|
||||
*
|
||||
* six_lock_type(lock, type)
|
||||
* six_unlock_type(lock, type)
|
||||
* six_relock(lock, type, seq)
|
||||
* six_trylock_type(lock, type)
|
||||
* six_trylock_convert(lock, from, to)
|
||||
* Lock sequence numbers - unlock(), relock():
|
||||
*
|
||||
* A lock may be held multiple times by the same thread (for read or intent,
|
||||
* not write). However, the six locks code does _not_ implement the actual
|
||||
* recursive checks itself though - rather, if your code (e.g. btree iterator
|
||||
* code) knows that the current thread already has a lock held, and for the
|
||||
* correct type, six_lock_increment() may be used to bump up the counter for
|
||||
* that type - the only effect is that one more call to unlock will be required
|
||||
* before the lock is unlocked.
|
||||
* Locks embed sequences numbers, which are incremented on write lock/unlock.
|
||||
* This allows locks to be dropped and the retaken iff the state they protect
|
||||
* hasn't changed; this makes it much easier to avoid holding locks while e.g.
|
||||
* doing IO or allocating memory.
|
||||
*
|
||||
* Example usage:
|
||||
* six_lock_read(&foo->lock);
|
||||
* u32 seq = six_lock_seq(&foo->lock);
|
||||
* six_unlock_read(&foo->lock);
|
||||
*
|
||||
* some_operation_that_may_block();
|
||||
*
|
||||
* if (six_relock_read(&foo->lock, seq)) { ... }
|
||||
*
|
||||
* If the relock operation succeeds, it is as if the lock was never unlocked.
|
||||
*
|
||||
* Reentrancy:
|
||||
*
|
||||
* Six locks are not by themselves reentrent, but have counters for both the
|
||||
* read and intent states that can be used to provide reentrency by an upper
|
||||
* layer that tracks held locks. If a lock is known to already be held in the
|
||||
* read or intent state, six_lock_increment() can be used to bump the "lock
|
||||
* held in this state" counter, increasing the number of unlock calls that
|
||||
* will be required to fully unlock it.
|
||||
*
|
||||
* Example usage:
|
||||
* six_lock_read(&foo->lock);
|
||||
* six_lock_increment(&foo->lock, SIX_LOCK_read);
|
||||
* six_unlock_read(&foo->lock);
|
||||
* six_unlock_read(&foo->lock);
|
||||
* foo->lock is now fully unlocked.
|
||||
*
|
||||
* Since the intent state supercedes read, it's legal to increment the read
|
||||
* counter when holding an intent lock, but not the reverse.
|
||||
*
|
||||
* A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
|
||||
* is not legal.
|
||||
*
|
||||
* should_sleep_fn:
|
||||
*
|
||||
* There is a six_lock() variant that takes a function pointer that is called
|
||||
* immediately prior to schedule() when blocking, and may return an error to
|
||||
* abort.
|
||||
*
|
||||
* One possible use for this feature is when objects being locked are part of
|
||||
* a cache and may reused, and lock ordering is based on a property of the
|
||||
* object that will change when the object is reused - i.e. logical key order.
|
||||
*
|
||||
* If looking up an object in the cache may race with object reuse, and lock
|
||||
* ordering is required to prevent deadlock, object reuse may change the
|
||||
* correct lock order for that object and cause a deadlock. should_sleep_fn
|
||||
* can be used to check if the object is still the object we want and avoid
|
||||
* this deadlock.
|
||||
*
|
||||
* Wait list entry interface:
|
||||
*
|
||||
* There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
|
||||
* wait list entry. By embedding six_lock_waiter into another object, and by
|
||||
* traversing lock waitlists, it is then possible for an upper layer to
|
||||
* implement full cycle detection for deadlock avoidance.
|
||||
*
|
||||
* should_sleep_fn should be used for invoking the cycle detector, walking the
|
||||
* graph of held locks to check for a deadlock. The upper layer must track
|
||||
* held locks for each thread, and each thread's held locks must be reachable
|
||||
* from its six_lock_waiter object.
|
||||
*
|
||||
* six_lock_waiter() will add the wait object to the waitlist re-trying taking
|
||||
* the lock, and before calling should_sleep_fn, and the wait object will not
|
||||
* be removed from the waitlist until either the lock has been successfully
|
||||
* acquired, or we aborted because should_sleep_fn returned an error.
|
||||
*
|
||||
* Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
|
||||
* have timestamps in strictly ascending order - this is so the timestamp can
|
||||
* be used as a cursor for lock graph traverse.
|
||||
*/
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
@ -63,41 +128,6 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define SIX_LOCK_SEPARATE_LOCKFNS
|
||||
|
||||
union six_lock_state {
|
||||
struct {
|
||||
atomic64_t counter;
|
||||
};
|
||||
|
||||
struct {
|
||||
u64 v;
|
||||
};
|
||||
|
||||
struct {
|
||||
/* for waitlist_bitnr() */
|
||||
unsigned long l;
|
||||
};
|
||||
|
||||
struct {
|
||||
unsigned read_lock:26;
|
||||
unsigned write_locking:1;
|
||||
unsigned intent_lock:1;
|
||||
unsigned nospin:1;
|
||||
unsigned waiters:3;
|
||||
/*
|
||||
* seq works much like in seqlocks: it's incremented every time
|
||||
* we lock and unlock for write.
|
||||
*
|
||||
* If it's odd write lock is held, even unlocked.
|
||||
*
|
||||
* Thus readers can unlock, and then lock again later iff it
|
||||
* hasn't been modified in the meantime.
|
||||
*/
|
||||
u32 seq;
|
||||
};
|
||||
};
|
||||
|
||||
enum six_lock_type {
|
||||
SIX_LOCK_read,
|
||||
SIX_LOCK_intent,
|
||||
@ -105,7 +135,8 @@ enum six_lock_type {
|
||||
};
|
||||
|
||||
struct six_lock {
|
||||
union six_lock_state state;
|
||||
atomic_t state;
|
||||
u32 seq;
|
||||
unsigned intent_lock_recurse;
|
||||
struct task_struct *owner;
|
||||
unsigned __percpu *readers;
|
||||
@ -127,59 +158,210 @@ struct six_lock_waiter {
|
||||
|
||||
typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
|
||||
|
||||
static __always_inline void __six_lock_init(struct six_lock *lock,
|
||||
const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
atomic64_set(&lock->state.counter, 0);
|
||||
raw_spin_lock_init(&lock->wait_lock);
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
debug_check_no_locks_freed((void *) lock, sizeof(*lock));
|
||||
lockdep_init_map(&lock->dep_map, name, key, 0);
|
||||
#endif
|
||||
}
|
||||
void six_lock_exit(struct six_lock *lock);
|
||||
|
||||
#define six_lock_init(lock) \
|
||||
enum six_lock_init_flags {
|
||||
SIX_LOCK_INIT_PCPU = 1U << 0,
|
||||
};
|
||||
|
||||
void __six_lock_init(struct six_lock *lock, const char *name,
|
||||
struct lock_class_key *key, enum six_lock_init_flags flags);
|
||||
|
||||
/**
|
||||
* six_lock_init - initialize a six lock
|
||||
* @lock: lock to initialize
|
||||
* @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
|
||||
*/
|
||||
#define six_lock_init(lock, flags) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
\
|
||||
__six_lock_init((lock), #lock, &__key); \
|
||||
__six_lock_init((lock), #lock, &__key, flags); \
|
||||
} while (0)
|
||||
|
||||
#define __SIX_VAL(field, _v) (((union six_lock_state) { .field = _v }).v)
|
||||
/**
|
||||
* six_lock_seq - obtain current lock sequence number
|
||||
* @lock: six_lock to obtain sequence number for
|
||||
*
|
||||
* @lock should be held for read or intent, and not write
|
||||
*
|
||||
* By saving the lock sequence number, we can unlock @lock and then (typically
|
||||
* after some blocking operation) attempt to relock it: the relock will succeed
|
||||
* if the sequence number hasn't changed, meaning no write locks have been taken
|
||||
* and state corresponding to what @lock protects is still valid.
|
||||
*/
|
||||
static inline u32 six_lock_seq(const struct six_lock *lock)
|
||||
{
|
||||
return lock->seq;
|
||||
}
|
||||
|
||||
bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
|
||||
|
||||
/**
|
||||
* six_trylock_type - attempt to take a six lock without blocking
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
*
|
||||
* Return: true on success, false on failure.
|
||||
*/
|
||||
static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
return six_trylock_ip(lock, type, _THIS_IP_);
|
||||
}
|
||||
|
||||
int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip);
|
||||
|
||||
/**
|
||||
* six_lock_waiter - take a lock, with full waitlist interface
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @wait: pointer to wait object, which will be added to lock's waitlist
|
||||
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
|
||||
* to scheduling
|
||||
* @p: passed through to @should_sleep_fn
|
||||
*
|
||||
* This is a convenience wrapper around six_lock_ip_waiter(), see that function
|
||||
* for full documentation.
|
||||
*
|
||||
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
|
||||
*/
|
||||
static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
|
||||
}
|
||||
|
||||
/**
|
||||
* six_lock_ip - take a six lock lock
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
|
||||
* to scheduling
|
||||
* @p: passed through to @should_sleep_fn
|
||||
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
|
||||
*
|
||||
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
|
||||
*/
|
||||
static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct six_lock_waiter wait;
|
||||
|
||||
return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
|
||||
}
|
||||
|
||||
/**
|
||||
* six_lock_type - take a six lock lock
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
|
||||
* to scheduling
|
||||
* @p: passed through to @should_sleep_fn
|
||||
*
|
||||
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
|
||||
*/
|
||||
static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
struct six_lock_waiter wait;
|
||||
|
||||
return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
|
||||
}
|
||||
|
||||
bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned seq, unsigned long ip);
|
||||
|
||||
/**
|
||||
* six_relock_type - attempt to re-take a lock that was held previously
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @seq: lock sequence number obtained from six_lock_seq() while lock was
|
||||
* held previously
|
||||
*
|
||||
* Return: true on success, false on failure.
|
||||
*/
|
||||
static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned seq)
|
||||
{
|
||||
return six_relock_ip(lock, type, seq, _THIS_IP_);
|
||||
}
|
||||
|
||||
void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
|
||||
|
||||
/**
|
||||
* six_unlock_type - drop a six lock
|
||||
* @lock: lock to unlock
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
*
|
||||
* When a lock is held multiple times (because six_lock_incement()) was used),
|
||||
* this decrements the 'lock held' counter by one.
|
||||
*
|
||||
* For example:
|
||||
* six_lock_read(&foo->lock); read count 1
|
||||
* six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
|
||||
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
|
||||
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
|
||||
*/
|
||||
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
six_unlock_ip(lock, type, _THIS_IP_);
|
||||
}
|
||||
|
||||
#define __SIX_LOCK(type) \
|
||||
bool six_trylock_ip_##type(struct six_lock *, unsigned long); \
|
||||
bool six_relock_ip_##type(struct six_lock *, u32, unsigned long); \
|
||||
int six_lock_ip_##type(struct six_lock *, six_lock_should_sleep_fn, \
|
||||
void *, unsigned long); \
|
||||
int six_lock_ip_waiter_##type(struct six_lock *, struct six_lock_waiter *,\
|
||||
six_lock_should_sleep_fn, void *, unsigned long);\
|
||||
void six_unlock_ip_##type(struct six_lock *, unsigned long); \
|
||||
static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
|
||||
{ \
|
||||
return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
|
||||
} \
|
||||
\
|
||||
static inline bool six_trylock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
return six_trylock_ip_##type(lock, _THIS_IP_); \
|
||||
return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
|
||||
} \
|
||||
\
|
||||
static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
|
||||
struct six_lock_waiter *wait, \
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,\
|
||||
unsigned long ip) \
|
||||
{ \
|
||||
return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
|
||||
} \
|
||||
\
|
||||
static inline int six_lock_ip_##type(struct six_lock *lock, \
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p, \
|
||||
unsigned long ip) \
|
||||
{ \
|
||||
return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
|
||||
} \
|
||||
\
|
||||
static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
|
||||
{ \
|
||||
return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
|
||||
} \
|
||||
\
|
||||
static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
|
||||
{ \
|
||||
return six_relock_ip_##type(lock, seq, _THIS_IP_); \
|
||||
return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
|
||||
} \
|
||||
\
|
||||
static inline int six_lock_##type(struct six_lock *lock, \
|
||||
six_lock_should_sleep_fn fn, void *p)\
|
||||
{ \
|
||||
return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
|
||||
} \
|
||||
static inline int six_lock_waiter_##type(struct six_lock *lock, \
|
||||
struct six_lock_waiter *wait, \
|
||||
six_lock_should_sleep_fn fn, void *p) \
|
||||
\
|
||||
static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
|
||||
{ \
|
||||
return six_lock_ip_waiter_##type(lock, wait, fn, p, _THIS_IP_); \
|
||||
six_unlock_ip(lock, SIX_LOCK_##type, ip); \
|
||||
} \
|
||||
\
|
||||
static inline void six_unlock_##type(struct six_lock *lock) \
|
||||
{ \
|
||||
return six_unlock_ip_##type(lock, _THIS_IP_); \
|
||||
six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
|
||||
}
|
||||
|
||||
__SIX_LOCK(read)
|
||||
@ -187,55 +369,6 @@ __SIX_LOCK(intent)
|
||||
__SIX_LOCK(write)
|
||||
#undef __SIX_LOCK
|
||||
|
||||
#define SIX_LOCK_DISPATCH(type, fn, ...) \
|
||||
switch (type) { \
|
||||
case SIX_LOCK_read: \
|
||||
return fn##_read(__VA_ARGS__); \
|
||||
case SIX_LOCK_intent: \
|
||||
return fn##_intent(__VA_ARGS__); \
|
||||
case SIX_LOCK_write: \
|
||||
return fn##_write(__VA_ARGS__); \
|
||||
default: \
|
||||
BUG(); \
|
||||
}
|
||||
|
||||
static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_trylock, lock);
|
||||
}
|
||||
|
||||
static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned seq)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
|
||||
}
|
||||
|
||||
static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
static inline int six_lock_type_ip_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_lock_ip_waiter, lock, wait, should_sleep_fn, p, ip);
|
||||
}
|
||||
|
||||
static inline int six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_lock_waiter, lock, wait, should_sleep_fn, p);
|
||||
}
|
||||
|
||||
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
SIX_LOCK_DISPATCH(type, six_unlock, lock);
|
||||
}
|
||||
|
||||
void six_lock_downgrade(struct six_lock *);
|
||||
bool six_lock_tryupgrade(struct six_lock *);
|
||||
bool six_trylock_convert(struct six_lock *, enum six_lock_type,
|
||||
@ -245,13 +378,11 @@ void six_lock_increment(struct six_lock *, enum six_lock_type);
|
||||
|
||||
void six_lock_wakeup_all(struct six_lock *);
|
||||
|
||||
void six_lock_pcpu_free(struct six_lock *);
|
||||
void six_lock_pcpu_alloc(struct six_lock *);
|
||||
|
||||
struct six_lock_count {
|
||||
unsigned n[3];
|
||||
};
|
||||
|
||||
struct six_lock_count six_lock_counts(struct six_lock *);
|
||||
void six_lock_readers_add(struct six_lock *, int);
|
||||
|
||||
#endif /* _LINUX_SIX_H */
|
||||
|
@ -269,9 +269,9 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
|
||||
int rw = flags & WRITE;
|
||||
|
||||
if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) {
|
||||
prt_printf(err, "bad val size (%lu != %u)",
|
||||
bkey_val_u64s(k.k), alloc_v4_u64s(a.v));
|
||||
if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) {
|
||||
prt_printf(err, "bad val size (%u > %lu)",
|
||||
alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
|
||||
return -BCH_ERR_invalid_bkey;
|
||||
}
|
||||
|
||||
|
@ -724,7 +724,7 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
|
||||
|
||||
#define I(_x) (*(out)++ = (_x))
|
||||
#define I1(i0) I(i0)
|
||||
|
@ -9,9 +9,17 @@
|
||||
#include "util.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
#if 0
|
||||
|
||||
/*
|
||||
* compiled unpack functions are disabled, pending a new interface for
|
||||
* dynamically allocating executable memory:
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define HAVE_BCACHEFS_COMPILED_UNPACK 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void bch2_bkey_packed_to_binary_text(struct printbuf *,
|
||||
const struct bkey_format *,
|
||||
|
@ -62,10 +62,12 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
|
||||
|
||||
EBUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
clear_btree_node_just_written(b);
|
||||
|
||||
kvpfree(b->data, btree_bytes(c));
|
||||
b->data = NULL;
|
||||
#ifdef __KERNEL__
|
||||
vfree(b->aux_data);
|
||||
kvfree(b->aux_data);
|
||||
#else
|
||||
munmap(b->aux_data, btree_aux_data_bytes(b));
|
||||
#endif
|
||||
@ -100,7 +102,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
|
||||
if (!b->data)
|
||||
return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
|
||||
#ifdef __KERNEL__
|
||||
b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
|
||||
b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
|
||||
#else
|
||||
b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
|
||||
PROT_READ|PROT_WRITE|PROT_EXEC,
|
||||
@ -126,7 +128,6 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
return NULL;
|
||||
|
||||
bkey_btree_ptr_init(&b->key);
|
||||
bch2_btree_lock_init(&b->c);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
lockdep_set_no_check_recursion(&b->c.lock.dep_map);
|
||||
#endif
|
||||
@ -150,6 +151,8 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bch2_btree_lock_init(&b->c, 0);
|
||||
|
||||
bc->used++;
|
||||
list_add(&b->list, &bc->freeable);
|
||||
return b;
|
||||
@ -484,7 +487,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
while (!list_empty(&bc->freed_nonpcpu)) {
|
||||
b = list_first_entry(&bc->freed_nonpcpu, struct btree, list);
|
||||
list_del(&b->list);
|
||||
six_lock_pcpu_free(&b->c.lock);
|
||||
six_lock_exit(&b->c.lock);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
@ -645,8 +648,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
|
||||
mutex_lock(&bc->lock);
|
||||
}
|
||||
|
||||
if (pcpu_read_locks)
|
||||
six_lock_pcpu_alloc(&b->c.lock);
|
||||
bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
BUG_ON(!six_trylock_intent(&b->c.lock));
|
||||
BUG_ON(!six_trylock_write(&b->c.lock));
|
||||
@ -700,6 +702,7 @@ err:
|
||||
/* Try to cannibalize another cached btree node: */
|
||||
if (bc->alloc_lock == current) {
|
||||
b2 = btree_node_cannibalize(c);
|
||||
clear_btree_node_just_written(b2);
|
||||
bch2_btree_node_hash_remove(bc, b2);
|
||||
|
||||
if (b) {
|
||||
@ -784,7 +787,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
seq = b->c.lock.state.seq;
|
||||
seq = six_lock_seq(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
|
||||
/* Unlock before doing IO: */
|
||||
@ -908,7 +911,7 @@ retry:
|
||||
}
|
||||
|
||||
if (unlikely(btree_node_read_in_flight(b))) {
|
||||
u32 seq = b->c.lock.state.seq;
|
||||
u32 seq = six_lock_seq(&b->c.lock);
|
||||
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
bch2_trans_unlock(trans);
|
||||
@ -1006,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
|
||||
}
|
||||
|
||||
if (unlikely(btree_node_read_in_flight(b))) {
|
||||
u32 seq = b->c.lock.state.seq;
|
||||
u32 seq = six_lock_seq(&b->c.lock);
|
||||
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
bch2_trans_unlock(trans);
|
||||
|
@ -483,7 +483,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
|
||||
struct btree_node_entry *bne;
|
||||
bool reinit_iter = false;
|
||||
|
||||
EBUG_ON(!(b->c.lock.state.seq & 1));
|
||||
EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]);
|
||||
BUG_ON(bset_written(b, bset(b, &b->set[1])));
|
||||
BUG_ON(btree_node_just_written(b));
|
||||
|
||||
|
@ -652,9 +652,8 @@ void bch2_btree_path_level_init(struct btree_trans *trans,
|
||||
BUG_ON(path->cached);
|
||||
|
||||
EBUG_ON(!btree_path_pos_in_node(path, b));
|
||||
EBUG_ON(b->c.lock.state.seq & 1);
|
||||
|
||||
path->l[b->c.level].lock_seq = b->c.lock.state.seq;
|
||||
path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
|
||||
path->l[b->c.level].b = b;
|
||||
__btree_path_level_init(path, b->c.level);
|
||||
}
|
||||
|
@ -42,14 +42,7 @@ static inline struct btree *btree_path_node(struct btree_path *path,
|
||||
static inline bool btree_node_lock_seq_matches(const struct btree_path *path,
|
||||
const struct btree *b, unsigned level)
|
||||
{
|
||||
/*
|
||||
* We don't compare the low bits of the lock sequence numbers because
|
||||
* @path might have taken a write lock on @b, and we don't want to skip
|
||||
* the linked path if the sequence numbers were equal before taking that
|
||||
* write lock. The lock sequence number is incremented by taking and
|
||||
* releasing write locks and is even when unlocked:
|
||||
*/
|
||||
return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
|
||||
return path->l[level].lock_seq == six_lock_seq(&b->c.lock);
|
||||
}
|
||||
|
||||
static inline struct btree *btree_node_parent(struct btree_path *path,
|
||||
|
@ -252,7 +252,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
}
|
||||
|
||||
path->l[0].b = (void *) ck;
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
|
||||
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
|
||||
|
||||
ret = bch2_btree_node_lock_write(trans, path, &ck->c);
|
||||
@ -283,9 +283,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
|
||||
return NULL;
|
||||
init:
|
||||
INIT_LIST_HEAD(&ck->list);
|
||||
bch2_btree_lock_init(&ck->c);
|
||||
if (pcpu_readers)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
|
||||
|
||||
ck->c.cached = true;
|
||||
BUG_ON(!six_trylock_intent(&ck->c.lock));
|
||||
@ -341,9 +339,6 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
|
||||
}
|
||||
|
||||
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
|
||||
} else {
|
||||
if (path->btree_id == BTREE_ID_subvolumes)
|
||||
six_lock_pcpu_alloc(&ck->c.lock);
|
||||
}
|
||||
|
||||
ck->c.level = 0;
|
||||
@ -512,7 +507,7 @@ retry:
|
||||
mark_btree_node_locked(trans, path, 0, lock_want);
|
||||
}
|
||||
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
|
||||
path->l[0].b = (void *) ck;
|
||||
fill:
|
||||
path->uptodate = BTREE_ITER_UPTODATE;
|
||||
@ -594,7 +589,7 @@ retry:
|
||||
mark_btree_node_locked(trans, path, 0, lock_want);
|
||||
}
|
||||
|
||||
path->l[0].lock_seq = ck->c.lock.state.seq;
|
||||
path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
|
||||
path->l[0].b = (void *) ck;
|
||||
fill:
|
||||
if (!ck->valid)
|
||||
@ -872,7 +867,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
break;
|
||||
|
||||
list_del(&ck->list);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
@ -888,7 +883,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
break;
|
||||
|
||||
list_del(&ck->list);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
atomic_long_dec(&bc->nr_freed);
|
||||
scanned++;
|
||||
@ -1013,7 +1008,7 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
|
||||
list_del(&ck->list);
|
||||
kfree(ck->k);
|
||||
six_lock_pcpu_free(&ck->c.lock);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
kmem_cache_free(bch2_key_cache, ck);
|
||||
}
|
||||
|
||||
|
@ -6,9 +6,10 @@
|
||||
|
||||
static struct lock_class_key bch2_btree_node_lock_key;
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b)
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
|
||||
enum six_lock_init_flags flags)
|
||||
{
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key);
|
||||
__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
@ -20,16 +21,6 @@ void bch2_assert_btree_nodes_not_locked(void)
|
||||
|
||||
/* Btree node locking: */
|
||||
|
||||
static inline void six_lock_readers_add(struct six_lock *lock, int nr)
|
||||
{
|
||||
if (lock->readers)
|
||||
this_cpu_add(*lock->readers, nr);
|
||||
else if (nr > 0)
|
||||
atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
|
||||
else
|
||||
atomic64_sub(__SIX_VAL(read_lock, -nr), &lock->state.counter);
|
||||
}
|
||||
|
||||
struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
|
||||
struct btree_path *skip,
|
||||
struct btree_bkey_cached_common *b,
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#include "btree_iter.h"
|
||||
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *);
|
||||
void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
void bch2_assert_btree_nodes_not_locked(void);
|
||||
@ -176,13 +176,13 @@ bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_pat
|
||||
struct btree_path *linked;
|
||||
|
||||
EBUG_ON(path->l[b->c.level].b != b);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
|
||||
EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
|
||||
EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
|
||||
|
||||
mark_btree_node_locked_noreset(path, b->c.level, SIX_LOCK_intent);
|
||||
|
||||
trans_for_each_path_with_node(trans, b, linked)
|
||||
linked->l[b->c.level].lock_seq += 2;
|
||||
linked->l[b->c.level].lock_seq++;
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
}
|
||||
@ -206,8 +206,8 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans,
|
||||
trans->lock_must_abort = false;
|
||||
trans->locking = b;
|
||||
|
||||
ret = six_lock_type_ip_waiter(&b->lock, type, &trans->locking_wait,
|
||||
bch2_six_check_for_deadlock, trans, ip);
|
||||
ret = six_lock_ip_waiter(&b->lock, type, &trans->locking_wait,
|
||||
bch2_six_check_for_deadlock, trans, ip);
|
||||
WRITE_ONCE(trans->locking, NULL);
|
||||
WRITE_ONCE(trans->locking_wait.start_time, 0);
|
||||
return ret;
|
||||
@ -284,7 +284,7 @@ static inline int __btree_node_lock_write(struct btree_trans *trans,
|
||||
bool lock_may_not_fail)
|
||||
{
|
||||
EBUG_ON(&path->l[b->level].b->c != b);
|
||||
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
|
||||
EBUG_ON(path->l[b->level].lock_seq != six_lock_seq(&b->lock));
|
||||
EBUG_ON(!btree_node_intent_locked(path, b->level));
|
||||
|
||||
/*
|
||||
|
@ -688,7 +688,7 @@ err:
|
||||
bch2_trans_unlock(&trans);
|
||||
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
|
||||
mark_btree_node_locked(&trans, path, b->c.level, SIX_LOCK_intent);
|
||||
path->l[b->c.level].lock_seq = b->c.lock.state.seq;
|
||||
path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
|
||||
path->l[b->c.level].b = b;
|
||||
|
||||
bch2_btree_node_lock_write_nofail(&trans, path, &b->c);
|
||||
|
@ -137,17 +137,17 @@ u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
|
||||
struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_online *ret;
|
||||
unsigned seq, i, v, u64s = fs_usage_u64s(c) + 1;
|
||||
unsigned nr_replicas = READ_ONCE(c->replicas.nr);
|
||||
unsigned seq, i;
|
||||
retry:
|
||||
ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||
ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_NOFS);
|
||||
if (unlikely(!ret))
|
||||
return NULL;
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
|
||||
v = fs_usage_u64s(c) + 1;
|
||||
if (unlikely(u64s != v)) {
|
||||
u64s = v;
|
||||
if (nr_replicas != c->replicas.nr) {
|
||||
nr_replicas = c->replicas.nr;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
kfree(ret);
|
||||
goto retry;
|
||||
@ -157,10 +157,12 @@ retry:
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&c->usage_lock);
|
||||
unsafe_memcpy(&ret->u, c->usage_base, u64s * sizeof(u64),
|
||||
unsafe_memcpy(&ret->u, c->usage_base,
|
||||
__fs_usage_u64s(nr_replicas) * sizeof(u64),
|
||||
"embedded variable length struct");
|
||||
for (i = 0; i < ARRAY_SIZE(c->usage); i++)
|
||||
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], u64s);
|
||||
acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i],
|
||||
__fs_usage_u64s(nr_replicas));
|
||||
} while (read_seqcount_retry(&c->usage_lock, seq));
|
||||
|
||||
return ret;
|
||||
|
@ -207,10 +207,24 @@ static inline u64 dev_buckets_available(struct bch_dev *ca,
|
||||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline unsigned __fs_usage_u64s(unsigned nr_replicas)
|
||||
{
|
||||
return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas;
|
||||
}
|
||||
|
||||
static inline unsigned fs_usage_u64s(struct bch_fs *c)
|
||||
{
|
||||
return sizeof(struct bch_fs_usage) / sizeof(u64) +
|
||||
READ_ONCE(c->replicas.nr);
|
||||
return __fs_usage_u64s(READ_ONCE(c->replicas.nr));
|
||||
}
|
||||
|
||||
static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas)
|
||||
{
|
||||
return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas;
|
||||
}
|
||||
|
||||
static inline unsigned fs_usage_online_u64s(struct bch_fs *c)
|
||||
{
|
||||
return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr));
|
||||
}
|
||||
|
||||
static inline unsigned dev_usage_u64s(void)
|
||||
|
@ -420,7 +420,9 @@ TRACE_EVENT(btree_path_relock_fail,
|
||||
else
|
||||
scnprintf(__entry->node, sizeof(__entry->node), "%px", b);
|
||||
__entry->iter_lock_seq = path->l[level].lock_seq;
|
||||
__entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
|
||||
__entry->node_lock_seq = is_btree_node(path, level)
|
||||
? six_lock_seq(&path->l[level].b->c.lock)
|
||||
: 0;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s iter seq %u lock seq %u",
|
||||
@ -475,7 +477,9 @@ TRACE_EVENT(btree_path_upgrade_fail,
|
||||
__entry->read_count = c.n[SIX_LOCK_read];
|
||||
__entry->intent_count = c.n[SIX_LOCK_read];
|
||||
__entry->iter_lock_seq = path->l[level].lock_seq;
|
||||
__entry->node_lock_seq = is_btree_node(path, level) ? path->l[level].b->c.lock.state.seq : 0;
|
||||
__entry->node_lock_seq = is_btree_node(path, level)
|
||||
? six_lock_seq(&path->l[level].b->c.lock)
|
||||
: 0;
|
||||
),
|
||||
|
||||
TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u",
|
||||
|
@ -350,11 +350,8 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
|
||||
if (time_after64(end, start)) {
|
||||
duration = end - start;
|
||||
stats->duration_stats = mean_and_variance_update_inlined(stats->duration_stats,
|
||||
duration);
|
||||
stats->duration_stats_weighted = mean_and_variance_weighted_update(
|
||||
stats->duration_stats_weighted,
|
||||
duration);
|
||||
stats->duration_stats = mean_and_variance_update(stats->duration_stats, duration);
|
||||
mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
|
||||
stats->max_duration = max(stats->max_duration, duration);
|
||||
stats->min_duration = min(stats->min_duration, duration);
|
||||
bch2_quantiles_update(&stats->quantiles, duration);
|
||||
@ -362,10 +359,8 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
|
||||
|
||||
if (time_after64(end, stats->last_event)) {
|
||||
freq = end - stats->last_event;
|
||||
stats->freq_stats = mean_and_variance_update_inlined(stats->freq_stats, freq);
|
||||
stats->freq_stats_weighted = mean_and_variance_weighted_update(
|
||||
stats->freq_stats_weighted,
|
||||
freq);
|
||||
stats->freq_stats = mean_and_variance_update(stats->freq_stats, freq);
|
||||
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
|
||||
stats->max_freq = max(stats->max_freq, freq);
|
||||
stats->min_freq = min(stats->min_freq, freq);
|
||||
stats->last_event = end;
|
||||
@ -594,8 +589,8 @@ void bch2_time_stats_exit(struct bch2_time_stats *stats)
|
||||
void bch2_time_stats_init(struct bch2_time_stats *stats)
|
||||
{
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->duration_stats_weighted.w = 8;
|
||||
stats->freq_stats_weighted.w = 8;
|
||||
stats->duration_stats_weighted.weight = 8;
|
||||
stats->freq_stats_weighted.weight = 8;
|
||||
stats->min_duration = U64_MAX;
|
||||
stats->min_freq = U64_MAX;
|
||||
spin_lock_init(&stats->lock);
|
||||
|
@ -43,38 +43,28 @@
|
||||
#include <linux/mean_and_variance.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/**
|
||||
* fast_divpow2() - fast approximation for n / (1 << d)
|
||||
* @n: numerator
|
||||
* @d: the power of 2 denominator.
|
||||
*
|
||||
* note: this rounds towards 0.
|
||||
*/
|
||||
s64 fast_divpow2(s64 n, u8 d)
|
||||
u128_u u128_div(u128_u n, u64 d)
|
||||
{
|
||||
return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
|
||||
}
|
||||
u128_u r;
|
||||
u64 rem;
|
||||
u64 hi = u128_hi(n);
|
||||
u64 lo = u128_lo(n);
|
||||
u64 h = hi & ((u64) U32_MAX << 32);
|
||||
u64 l = (hi & (u64) U32_MAX) << 32;
|
||||
|
||||
/**
|
||||
* mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
|
||||
* and return it.
|
||||
* @s1: the mean_and_variance to update.
|
||||
* @v1: the new sample.
|
||||
*
|
||||
* see linked pdf equation 12.
|
||||
*/
|
||||
struct mean_and_variance mean_and_variance_update(struct mean_and_variance s1, s64 v1)
|
||||
{
|
||||
return mean_and_variance_update_inlined(s1, v1);
|
||||
r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
|
||||
r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
|
||||
r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_update);
|
||||
EXPORT_SYMBOL_GPL(u128_div);
|
||||
|
||||
/**
|
||||
* mean_and_variance_get_mean() - get mean from @s
|
||||
*/
|
||||
s64 mean_and_variance_get_mean(struct mean_and_variance s)
|
||||
{
|
||||
return div64_u64(s.sum, s.n);
|
||||
return s.n ? div64_u64(s.sum, s.n) : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
|
||||
|
||||
@ -85,10 +75,14 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
|
||||
*/
|
||||
u64 mean_and_variance_get_variance(struct mean_and_variance s1)
|
||||
{
|
||||
u128 s2 = u128_div(s1.sum_squares, s1.n);
|
||||
u64 s3 = abs(mean_and_variance_get_mean(s1));
|
||||
if (s1.n) {
|
||||
u128_u s2 = u128_div(s1.sum_squares, s1.n);
|
||||
u64 s3 = abs(mean_and_variance_get_mean(s1));
|
||||
|
||||
return u128_to_u64(u128_sub(s2, u128_square(s3)));
|
||||
return u128_lo(u128_sub(s2, u128_square(s3)));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
|
||||
|
||||
@ -109,10 +103,26 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
|
||||
* see linked pdf: function derived from equations 140-143 where alpha = 2^w.
|
||||
* values are stored bitshifted for performance and added precision.
|
||||
*/
|
||||
struct mean_and_variance_weighted mean_and_variance_weighted_update(struct mean_and_variance_weighted s1,
|
||||
s64 x)
|
||||
void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x)
|
||||
{
|
||||
return mean_and_variance_weighted_update_inlined(s1, x);
|
||||
// previous weighted variance.
|
||||
u8 w = s->weight;
|
||||
u64 var_w0 = s->variance;
|
||||
// new value weighted.
|
||||
s64 x_w = x << w;
|
||||
s64 diff_w = x_w - s->mean;
|
||||
s64 diff = fast_divpow2(diff_w, w);
|
||||
// new mean weighted.
|
||||
s64 u_w1 = s->mean + diff;
|
||||
|
||||
if (!s->init) {
|
||||
s->mean = x_w;
|
||||
s->variance = 0;
|
||||
} else {
|
||||
s->mean = u_w1;
|
||||
s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
|
||||
}
|
||||
s->init = true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
||||
|
||||
@ -121,7 +131,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
|
||||
*/
|
||||
s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
|
||||
{
|
||||
return fast_divpow2(s.mean, s.w);
|
||||
return fast_divpow2(s.mean, s.weight);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
||||
|
||||
@ -131,7 +141,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
|
||||
u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
|
||||
{
|
||||
// always positive don't need fast divpow2
|
||||
return s.variance >> s.w;
|
||||
return s.variance >> s.weight;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
|
||||
|
||||
|
641
linux/six.c
641
linux/six.c
@ -14,9 +14,9 @@
|
||||
#include <trace/events/lock.h>
|
||||
|
||||
#ifdef DEBUG
|
||||
#define EBUG_ON(cond) BUG_ON(cond)
|
||||
#define EBUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EBUG_ON(cond) do {} while (0)
|
||||
#define EBUG_ON(cond) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
|
||||
@ -24,59 +24,69 @@
|
||||
|
||||
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
|
||||
|
||||
#define SIX_LOCK_HELD_read_OFFSET 0
|
||||
#define SIX_LOCK_HELD_read ~(~0U << 26)
|
||||
#define SIX_LOCK_HELD_intent (1U << 26)
|
||||
#define SIX_LOCK_HELD_write (1U << 27)
|
||||
#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
|
||||
#define SIX_LOCK_WAITING_intent (1U << (28 + SIX_LOCK_intent))
|
||||
#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
|
||||
#define SIX_LOCK_NOSPIN (1U << 31)
|
||||
|
||||
struct six_lock_vals {
|
||||
/* Value we add to the lock in order to take the lock: */
|
||||
u64 lock_val;
|
||||
u32 lock_val;
|
||||
|
||||
/* If the lock has this value (used as a mask), taking the lock fails: */
|
||||
u64 lock_fail;
|
||||
|
||||
/* Value we add to the lock in order to release the lock: */
|
||||
u64 unlock_val;
|
||||
u32 lock_fail;
|
||||
|
||||
/* Mask that indicates lock is held for this type: */
|
||||
u64 held_mask;
|
||||
u32 held_mask;
|
||||
|
||||
/* Waitlist we wakeup when releasing the lock: */
|
||||
enum six_lock_type unlock_wakeup;
|
||||
};
|
||||
|
||||
#define __SIX_LOCK_HELD_read __SIX_VAL(read_lock, ~0)
|
||||
#define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
|
||||
#define __SIX_LOCK_HELD_write __SIX_VAL(seq, 1)
|
||||
|
||||
#define LOCK_VALS { \
|
||||
[SIX_LOCK_read] = { \
|
||||
.lock_val = __SIX_VAL(read_lock, 1), \
|
||||
.lock_fail = __SIX_LOCK_HELD_write + __SIX_VAL(write_locking, 1),\
|
||||
.unlock_val = -__SIX_VAL(read_lock, 1), \
|
||||
.held_mask = __SIX_LOCK_HELD_read, \
|
||||
.lock_val = 1U << SIX_LOCK_HELD_read_OFFSET, \
|
||||
.lock_fail = SIX_LOCK_HELD_write, \
|
||||
.held_mask = SIX_LOCK_HELD_read, \
|
||||
.unlock_wakeup = SIX_LOCK_write, \
|
||||
}, \
|
||||
[SIX_LOCK_intent] = { \
|
||||
.lock_val = __SIX_VAL(intent_lock, 1), \
|
||||
.lock_fail = __SIX_LOCK_HELD_intent, \
|
||||
.unlock_val = -__SIX_VAL(intent_lock, 1), \
|
||||
.held_mask = __SIX_LOCK_HELD_intent, \
|
||||
.lock_val = SIX_LOCK_HELD_intent, \
|
||||
.lock_fail = SIX_LOCK_HELD_intent, \
|
||||
.held_mask = SIX_LOCK_HELD_intent, \
|
||||
.unlock_wakeup = SIX_LOCK_intent, \
|
||||
}, \
|
||||
[SIX_LOCK_write] = { \
|
||||
.lock_val = __SIX_VAL(seq, 1), \
|
||||
.lock_fail = __SIX_LOCK_HELD_read, \
|
||||
.unlock_val = __SIX_VAL(seq, 1), \
|
||||
.held_mask = __SIX_LOCK_HELD_write, \
|
||||
.lock_val = SIX_LOCK_HELD_write, \
|
||||
.lock_fail = SIX_LOCK_HELD_read, \
|
||||
.held_mask = SIX_LOCK_HELD_write, \
|
||||
.unlock_wakeup = SIX_LOCK_read, \
|
||||
}, \
|
||||
}
|
||||
|
||||
static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
|
||||
{
|
||||
if ((atomic_read(&lock->state) & mask) != mask)
|
||||
atomic_or(mask, &lock->state);
|
||||
}
|
||||
|
||||
static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
|
||||
{
|
||||
if (atomic_read(&lock->state) & mask)
|
||||
atomic_and(~mask, &lock->state);
|
||||
}
|
||||
|
||||
static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
|
||||
union six_lock_state old,
|
||||
struct task_struct *owner)
|
||||
u32 old, struct task_struct *owner)
|
||||
{
|
||||
if (type != SIX_LOCK_intent)
|
||||
return;
|
||||
|
||||
if (!old.intent_lock) {
|
||||
if (!(old & SIX_LOCK_HELD_intent)) {
|
||||
EBUG_ON(lock->owner);
|
||||
lock->owner = owner;
|
||||
} else {
|
||||
@ -94,22 +104,25 @@ static inline unsigned pcpu_read_count(struct six_lock *lock)
|
||||
return read_count;
|
||||
}
|
||||
|
||||
/* This is probably up there with the more evil things I've done */
|
||||
#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
|
||||
|
||||
static int __do_six_trylock_type(struct six_lock *lock,
|
||||
enum six_lock_type type,
|
||||
struct task_struct *task,
|
||||
bool try)
|
||||
/*
|
||||
* __do_six_trylock() - main trylock routine
|
||||
*
|
||||
* Returns 1 on success, 0 on failure
|
||||
*
|
||||
* In percpu reader mode, a failed trylock may cause a spurious trylock failure
|
||||
* for anoter thread taking the competing lock type, and we may havve to do a
|
||||
* wakeup: when a wakeup is required, we return -1 - wakeup_type.
|
||||
*/
|
||||
static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
|
||||
struct task_struct *task, bool try)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
union six_lock_state old, new;
|
||||
int ret;
|
||||
u64 v;
|
||||
u32 old, new, v;
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
|
||||
EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
|
||||
EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
|
||||
EBUG_ON(type == SIX_LOCK_write &&
|
||||
(try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
|
||||
|
||||
/*
|
||||
* Percpu reader mode:
|
||||
@ -124,101 +137,75 @@ static int __do_six_trylock_type(struct six_lock *lock,
|
||||
* the lock, then issues a full memory barrier, then reads from the
|
||||
* other thread's variable to check if the other thread thinks it has
|
||||
* the lock. If we raced, we backoff and retry/sleep.
|
||||
*
|
||||
* Failure to take the lock may cause a spurious trylock failure in
|
||||
* another thread, because we temporarily set the lock to indicate that
|
||||
* we held it. This would be a problem for a thread in six_lock(), when
|
||||
* they are calling trylock after adding themself to the waitlist and
|
||||
* prior to sleeping.
|
||||
*
|
||||
* Therefore, if we fail to get the lock, and there were waiters of the
|
||||
* type we conflict with, we will have to issue a wakeup.
|
||||
*
|
||||
* Since we may be called under wait_lock (and by the wakeup code
|
||||
* itself), we return that the wakeup has to be done instead of doing it
|
||||
* here.
|
||||
*/
|
||||
|
||||
if (type == SIX_LOCK_read && lock->readers) {
|
||||
preempt_disable();
|
||||
this_cpu_inc(*lock->readers); /* signal that we own lock */
|
||||
|
||||
smp_mb();
|
||||
|
||||
old.v = READ_ONCE(lock->state.v);
|
||||
ret = !(old.v & l[type].lock_fail);
|
||||
old = atomic_read(&lock->state);
|
||||
ret = !(old & l[type].lock_fail);
|
||||
|
||||
this_cpu_sub(*lock->readers, !ret);
|
||||
preempt_enable();
|
||||
|
||||
/*
|
||||
* If we failed because a writer was trying to take the
|
||||
* lock, issue a wakeup because we might have caused a
|
||||
* spurious trylock failure:
|
||||
*/
|
||||
#if 0
|
||||
/*
|
||||
* This code should be sufficient, but we're seeing unexplained
|
||||
* lost wakeups:
|
||||
*/
|
||||
if (old.write_locking)
|
||||
if (!ret && (old & SIX_LOCK_WAITING_write))
|
||||
ret = -1 - SIX_LOCK_write;
|
||||
#else
|
||||
if (!ret)
|
||||
ret = -1 - SIX_LOCK_write;
|
||||
#endif
|
||||
} else if (type == SIX_LOCK_write && lock->readers) {
|
||||
if (try) {
|
||||
atomic64_add(__SIX_VAL(write_locking, 1),
|
||||
&lock->state.counter);
|
||||
smp_mb__after_atomic();
|
||||
} else if (!(lock->state.waiters & (1 << SIX_LOCK_write))) {
|
||||
atomic64_add(__SIX_VAL(waiters, 1 << SIX_LOCK_write),
|
||||
&lock->state.counter);
|
||||
/*
|
||||
* pairs with barrier after unlock and before checking
|
||||
* for readers in unlock path
|
||||
*/
|
||||
atomic_add(SIX_LOCK_HELD_write, &lock->state);
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
ret = !pcpu_read_count(lock);
|
||||
|
||||
/*
|
||||
* On success, we increment lock->seq; also we clear
|
||||
* write_locking unless we failed from the lock path:
|
||||
*/
|
||||
v = 0;
|
||||
if (ret)
|
||||
v += __SIX_VAL(seq, 1);
|
||||
if (ret || try)
|
||||
v -= __SIX_VAL(write_locking, 1);
|
||||
|
||||
if (try && !ret) {
|
||||
old.v = atomic64_add_return(v, &lock->state.counter);
|
||||
if (old.waiters & (1 << SIX_LOCK_read))
|
||||
old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
|
||||
if (old & SIX_LOCK_WAITING_read)
|
||||
ret = -1 - SIX_LOCK_read;
|
||||
} else {
|
||||
atomic64_add(v, &lock->state.counter);
|
||||
}
|
||||
} else {
|
||||
v = READ_ONCE(lock->state.v);
|
||||
v = atomic_read(&lock->state);
|
||||
do {
|
||||
new.v = old.v = v;
|
||||
new = old = v;
|
||||
|
||||
if (!(old.v & l[type].lock_fail)) {
|
||||
new.v += l[type].lock_val;
|
||||
ret = !(old & l[type].lock_fail);
|
||||
|
||||
if (type == SIX_LOCK_write)
|
||||
new.write_locking = 0;
|
||||
} else if (!try && !(new.waiters & (1 << type)))
|
||||
new.waiters |= 1 << type;
|
||||
else
|
||||
break; /* waiting bit already set */
|
||||
} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
if (!ret || (type == SIX_LOCK_write && !try)) {
|
||||
smp_mb();
|
||||
break;
|
||||
}
|
||||
|
||||
ret = !(old.v & l[type].lock_fail);
|
||||
new += l[type].lock_val;
|
||||
} while ((v = atomic_cmpxchg_acquire(&lock->state, old, new)) != old);
|
||||
|
||||
EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
|
||||
EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
|
||||
}
|
||||
|
||||
if (ret > 0)
|
||||
six_set_owner(lock, type, old, task);
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
|
||||
EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
|
||||
(atomic_read(&lock->state) & SIX_LOCK_HELD_write));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
|
||||
static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
|
||||
{
|
||||
struct six_lock_waiter *w, *next;
|
||||
struct task_struct *task;
|
||||
@ -237,7 +224,7 @@ again:
|
||||
goto unlock;
|
||||
saw_one = true;
|
||||
|
||||
ret = __do_six_trylock_type(lock, lock_type, w->task, false);
|
||||
ret = __do_six_trylock(lock, lock_type, w->task, false);
|
||||
if (ret <= 0)
|
||||
goto unlock;
|
||||
|
||||
@ -252,7 +239,7 @@ again:
|
||||
wake_up_process(task);
|
||||
}
|
||||
|
||||
clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
|
||||
six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
|
||||
unlock:
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
@ -262,96 +249,74 @@ unlock:
|
||||
}
|
||||
}
|
||||
|
||||
static inline void six_lock_wakeup(struct six_lock *lock,
|
||||
union six_lock_state state,
|
||||
enum six_lock_type lock_type)
|
||||
__always_inline
|
||||
static void six_lock_wakeup(struct six_lock *lock, u32 state,
|
||||
enum six_lock_type lock_type)
|
||||
{
|
||||
if (lock_type == SIX_LOCK_write && state.read_lock)
|
||||
if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
|
||||
return;
|
||||
|
||||
if (!(state.waiters & (1 << lock_type)))
|
||||
if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
|
||||
return;
|
||||
|
||||
__six_lock_wakeup(lock, lock_type);
|
||||
}
|
||||
|
||||
static bool do_six_trylock_type(struct six_lock *lock,
|
||||
enum six_lock_type type,
|
||||
bool try)
|
||||
__always_inline
|
||||
static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __do_six_trylock_type(lock, type, current, try);
|
||||
ret = __do_six_trylock(lock, type, current, try);
|
||||
if (ret < 0)
|
||||
__six_lock_wakeup(lock, -ret - 1);
|
||||
|
||||
return ret > 0;
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned long ip)
|
||||
/**
|
||||
* six_trylock_ip - attempt to take a six lock without blocking
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
|
||||
*
|
||||
* Return: true on success, false on failure.
|
||||
*/
|
||||
bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
|
||||
{
|
||||
if (!do_six_trylock_type(lock, type, true))
|
||||
if (!do_six_trylock(lock, type, true))
|
||||
return false;
|
||||
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_trylock_ip);
|
||||
|
||||
__always_inline __flatten
|
||||
static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned seq, unsigned long ip)
|
||||
/**
|
||||
* six_relock_ip - attempt to re-take a lock that was held previously
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @seq: lock sequence number obtained from six_lock_seq() while lock was
|
||||
* held previously
|
||||
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
|
||||
*
|
||||
* Return: true on success, false on failure.
|
||||
*/
|
||||
bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned seq, unsigned long ip)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
union six_lock_state old;
|
||||
u64 v;
|
||||
if (lock->seq != seq || !six_trylock_ip(lock, type, ip))
|
||||
return false;
|
||||
|
||||
EBUG_ON(type == SIX_LOCK_write);
|
||||
|
||||
if (type == SIX_LOCK_read &&
|
||||
lock->readers) {
|
||||
bool ret;
|
||||
|
||||
preempt_disable();
|
||||
this_cpu_inc(*lock->readers);
|
||||
|
||||
smp_mb();
|
||||
|
||||
old.v = READ_ONCE(lock->state.v);
|
||||
ret = !(old.v & l[type].lock_fail) && old.seq == seq;
|
||||
|
||||
this_cpu_sub(*lock->readers, !ret);
|
||||
preempt_enable();
|
||||
|
||||
/*
|
||||
* Similar to the lock path, we may have caused a spurious write
|
||||
* lock fail and need to issue a wakeup:
|
||||
*/
|
||||
if (ret)
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
|
||||
else
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_write);
|
||||
|
||||
return ret;
|
||||
if (lock->seq != seq) {
|
||||
six_unlock_ip(lock, type, ip);
|
||||
return false;
|
||||
}
|
||||
|
||||
v = READ_ONCE(lock->state.v);
|
||||
do {
|
||||
old.v = v;
|
||||
|
||||
if (old.seq != seq || old.v & l[type].lock_fail)
|
||||
return false;
|
||||
} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
|
||||
old.v,
|
||||
old.v + l[type].lock_val)) != old.v);
|
||||
|
||||
six_set_owner(lock, type, old, current);
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_relock_ip);
|
||||
|
||||
#ifdef CONFIG_LOCK_SPIN_ON_OWNER
|
||||
|
||||
@ -371,17 +336,6 @@ static inline bool six_can_spin_on_owner(struct six_lock *lock)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void six_set_nospin(struct six_lock *lock)
|
||||
{
|
||||
union six_lock_state old, new;
|
||||
u64 v = READ_ONCE(lock->state.v);
|
||||
|
||||
do {
|
||||
new.v = old.v = v;
|
||||
new.nospin = true;
|
||||
} while ((v = atomic64_cmpxchg(&lock->state.counter, old.v, new.v)) != old.v);
|
||||
}
|
||||
|
||||
static inline bool six_spin_on_owner(struct six_lock *lock,
|
||||
struct task_struct *owner,
|
||||
u64 end_time)
|
||||
@ -405,7 +359,7 @@ static inline bool six_spin_on_owner(struct six_lock *lock,
|
||||
}
|
||||
|
||||
if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
|
||||
six_set_nospin(lock);
|
||||
six_set_bitmask(lock, SIX_LOCK_NOSPIN);
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
@ -445,7 +399,7 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type
|
||||
if (owner && !six_spin_on_owner(lock, owner, end_time))
|
||||
break;
|
||||
|
||||
if (do_six_trylock_type(lock, type, false)) {
|
||||
if (do_six_trylock(lock, type, false)) {
|
||||
osq_unlock(&lock->osq);
|
||||
preempt_enable();
|
||||
return true;
|
||||
@ -494,17 +448,16 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type
|
||||
#endif
|
||||
|
||||
noinline
|
||||
static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
union six_lock_state old;
|
||||
int ret = 0;
|
||||
|
||||
if (type == SIX_LOCK_write) {
|
||||
EBUG_ON(lock->state.write_locking);
|
||||
atomic64_add(__SIX_VAL(write_locking, 1), &lock->state.counter);
|
||||
EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
|
||||
atomic_add(SIX_LOCK_HELD_write, &lock->state);
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
@ -519,13 +472,12 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
wait->lock_acquired = false;
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
if (!(lock->state.waiters & (1 << type)))
|
||||
set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
|
||||
six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
|
||||
/*
|
||||
* Retry taking the lock after taking waitlist lock, have raced with an
|
||||
* unlock:
|
||||
* Retry taking the lock after taking waitlist lock, in case we raced
|
||||
* with an unlock:
|
||||
*/
|
||||
ret = __do_six_trylock_type(lock, type, current, false);
|
||||
ret = __do_six_trylock(lock, type, current, false);
|
||||
if (ret <= 0) {
|
||||
wait->start_time = local_clock();
|
||||
|
||||
@ -565,7 +517,7 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
list_del(&wait->list);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
if (wait->lock_acquired)
|
||||
if (unlikely(wait->lock_acquired))
|
||||
do_six_unlock_type(lock, type);
|
||||
break;
|
||||
}
|
||||
@ -575,21 +527,49 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
out:
|
||||
if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
|
||||
old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
|
||||
&lock->state.counter);
|
||||
six_lock_wakeup(lock, old, SIX_LOCK_read);
|
||||
if (ret && type == SIX_LOCK_write) {
|
||||
six_clear_bitmask(lock, SIX_LOCK_HELD_write);
|
||||
six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
|
||||
}
|
||||
trace_contention_end(lock, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
/**
|
||||
* six_lock_ip_waiter - take a lock, with full waitlist interface
|
||||
* @lock: lock to take
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @wait: pointer to wait object, which will be added to lock's waitlist
|
||||
* @should_sleep_fn: callback run after adding to waitlist, immediately prior
|
||||
* to scheduling
|
||||
* @p: passed through to @should_sleep_fn
|
||||
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
|
||||
*
|
||||
* This is the most general six_lock() variant, with parameters to support full
|
||||
* cycle detection for deadlock avoidance.
|
||||
*
|
||||
* The code calling this function must implement tracking of held locks, and the
|
||||
* @wait object should be embedded into the struct that tracks held locks -
|
||||
* which must also be accessible in a thread-safe way.
|
||||
*
|
||||
* @should_sleep_fn should invoke the cycle detector; it should walk each
|
||||
* lock's waiters, and for each waiter recursively walk their held locks.
|
||||
*
|
||||
* When this function must block, @wait will be added to @lock's waitlist before
|
||||
* calling trylock, and before calling @should_sleep_fn, and @wait will not be
|
||||
* removed from the lock waitlist until the lock has been successfully acquired,
|
||||
* or we abort.
|
||||
*
|
||||
* @wait.start_time will be monotonically increasing for any given waitlist, and
|
||||
* thus may be used as a loop cursor.
|
||||
*
|
||||
* Return: 0 on success, or the return code from @should_sleep_fn on failure.
|
||||
*/
|
||||
int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
|
||||
struct six_lock_waiter *wait,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -598,8 +578,8 @@ static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type
|
||||
if (type != SIX_LOCK_write)
|
||||
six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
|
||||
|
||||
ret = do_six_trylock_type(lock, type, true) ? 0
|
||||
: __six_lock_type_slowpath(lock, type, wait, should_sleep_fn, p, ip);
|
||||
ret = do_six_trylock(lock, type, true) ? 0
|
||||
: six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
|
||||
|
||||
if (ret && type != SIX_LOCK_write)
|
||||
six_release(&lock->dep_map, ip);
|
||||
@ -608,22 +588,13 @@ static int __six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
|
||||
|
||||
__always_inline
|
||||
static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct six_lock_waiter wait;
|
||||
|
||||
return __six_lock_type_waiter(lock, type, &wait, should_sleep_fn, p, ip);
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
union six_lock_state state;
|
||||
u32 state;
|
||||
|
||||
if (type == SIX_LOCK_intent)
|
||||
lock->owner = NULL;
|
||||
@ -633,26 +604,39 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
|
||||
smp_mb(); /* unlock barrier */
|
||||
this_cpu_dec(*lock->readers);
|
||||
smp_mb(); /* between unlocking and checking for waiters */
|
||||
state.v = READ_ONCE(lock->state.v);
|
||||
state = atomic_read(&lock->state);
|
||||
} else {
|
||||
u64 v = l[type].unlock_val;
|
||||
u32 v = l[type].lock_val;
|
||||
|
||||
if (type != SIX_LOCK_read)
|
||||
v -= lock->state.v & __SIX_VAL(nospin, 1);
|
||||
v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
|
||||
|
||||
EBUG_ON(!(lock->state.v & l[type].held_mask));
|
||||
state.v = atomic64_add_return_release(v, &lock->state.counter);
|
||||
EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
|
||||
state = atomic_sub_return_release(v, &lock->state);
|
||||
}
|
||||
|
||||
six_lock_wakeup(lock, state, l[type].unlock_wakeup);
|
||||
}
|
||||
|
||||
__always_inline __flatten
|
||||
static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
unsigned long ip)
|
||||
/**
|
||||
* six_unlock_ip - drop a six lock
|
||||
* @lock: lock to unlock
|
||||
* @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
|
||||
* @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
|
||||
*
|
||||
* When a lock is held multiple times (because six_lock_incement()) was used),
|
||||
* this decrements the 'lock held' counter by one.
|
||||
*
|
||||
* For example:
|
||||
* six_lock_read(&foo->lock); read count 1
|
||||
* six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
|
||||
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
|
||||
* six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
|
||||
*/
|
||||
void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
|
||||
{
|
||||
EBUG_ON(type == SIX_LOCK_write &&
|
||||
!(lock->state.v & __SIX_LOCK_HELD_intent));
|
||||
!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
|
||||
EBUG_ON((type == SIX_LOCK_write ||
|
||||
type == SIX_LOCK_intent) &&
|
||||
lock->owner != current);
|
||||
@ -666,52 +650,18 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type,
|
||||
return;
|
||||
}
|
||||
|
||||
lock->seq += type == SIX_LOCK_write;
|
||||
|
||||
do_six_unlock_type(lock, type);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_unlock_ip);
|
||||
|
||||
#define __SIX_LOCK(type) \
|
||||
bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip) \
|
||||
{ \
|
||||
return __six_trylock_type(lock, SIX_LOCK_##type, ip); \
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_trylock_ip_##type); \
|
||||
\
|
||||
bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
|
||||
{ \
|
||||
return __six_relock_type(lock, SIX_LOCK_##type, seq, ip); \
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_relock_ip_##type); \
|
||||
\
|
||||
int six_lock_ip_##type(struct six_lock *lock, \
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p, \
|
||||
unsigned long ip) \
|
||||
{ \
|
||||
return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_lock_ip_##type); \
|
||||
\
|
||||
int six_lock_ip_waiter_##type(struct six_lock *lock, \
|
||||
struct six_lock_waiter *wait, \
|
||||
six_lock_should_sleep_fn should_sleep_fn, void *p,\
|
||||
unsigned long ip) \
|
||||
{ \
|
||||
return __six_lock_type_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_lock_ip_waiter_##type); \
|
||||
\
|
||||
void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
|
||||
{ \
|
||||
__six_unlock_type(lock, SIX_LOCK_##type, ip); \
|
||||
} \
|
||||
EXPORT_SYMBOL_GPL(six_unlock_ip_##type);
|
||||
|
||||
__SIX_LOCK(read)
|
||||
__SIX_LOCK(intent)
|
||||
__SIX_LOCK(write)
|
||||
|
||||
#undef __SIX_LOCK
|
||||
|
||||
/* Convert from intent to read: */
|
||||
/**
|
||||
* six_lock_downgrade - convert an intent lock to a read lock
|
||||
* @lock: lock to dowgrade
|
||||
*
|
||||
* @lock will have read count incremented and intent count decremented
|
||||
*/
|
||||
void six_lock_downgrade(struct six_lock *lock)
|
||||
{
|
||||
six_lock_increment(lock, SIX_LOCK_read);
|
||||
@ -719,25 +669,33 @@ void six_lock_downgrade(struct six_lock *lock)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_downgrade);
|
||||
|
||||
/**
|
||||
* six_lock_tryupgrade - attempt to convert read lock to an intent lock
|
||||
* @lock: lock to upgrade
|
||||
*
|
||||
* On success, @lock will have intent count incremented and read count
|
||||
* decremented
|
||||
*
|
||||
* Return: true on success, false on failure
|
||||
*/
|
||||
bool six_lock_tryupgrade(struct six_lock *lock)
|
||||
{
|
||||
union six_lock_state old, new;
|
||||
u64 v = READ_ONCE(lock->state.v);
|
||||
const struct six_lock_vals l[] = LOCK_VALS;
|
||||
u32 old, new, v = atomic_read(&lock->state);
|
||||
|
||||
do {
|
||||
new.v = old.v = v;
|
||||
new = old = v;
|
||||
|
||||
if (new.intent_lock)
|
||||
if (new & SIX_LOCK_HELD_intent)
|
||||
return false;
|
||||
|
||||
if (!lock->readers) {
|
||||
EBUG_ON(!new.read_lock);
|
||||
new.read_lock--;
|
||||
EBUG_ON(!(new & SIX_LOCK_HELD_read));
|
||||
new -= l[SIX_LOCK_read].lock_val;
|
||||
}
|
||||
|
||||
new.intent_lock = 1;
|
||||
} while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
new |= SIX_LOCK_HELD_intent;
|
||||
} while ((v = atomic_cmpxchg_acquire(&lock->state, old, new)) != old);
|
||||
|
||||
if (lock->readers)
|
||||
this_cpu_dec(*lock->readers);
|
||||
@ -748,6 +706,17 @@ bool six_lock_tryupgrade(struct six_lock *lock)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
|
||||
|
||||
/**
|
||||
* six_trylock_convert - attempt to convert a held lock from one type to another
|
||||
* @lock: lock to upgrade
|
||||
* @from: SIX_LOCK_read or SIX_LOCK_intent
|
||||
* @to: SIX_LOCK_read or SIX_LOCK_intent
|
||||
*
|
||||
* On success, @lock will have intent count incremented and read count
|
||||
* decremented
|
||||
*
|
||||
* Return: true on success, false on failure
|
||||
*/
|
||||
bool six_trylock_convert(struct six_lock *lock,
|
||||
enum six_lock_type from,
|
||||
enum six_lock_type to)
|
||||
@ -766,9 +735,16 @@ bool six_trylock_convert(struct six_lock *lock,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_trylock_convert);
|
||||
|
||||
/*
|
||||
* Increment read/intent lock count, assuming we already have it read or intent
|
||||
* locked:
|
||||
/**
|
||||
* six_lock_increment - increase held lock count on a lock that is already held
|
||||
* @lock: lock to increment
|
||||
* @type: SIX_LOCK_read or SIX_LOCK_intent
|
||||
*
|
||||
* @lock must already be held, with a lock type that is greater than or equal to
|
||||
* @type
|
||||
*
|
||||
* A corresponding six_unlock_type() call will be required for @lock to be fully
|
||||
* unlocked.
|
||||
*/
|
||||
void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
|
||||
{
|
||||
@ -783,13 +759,14 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
|
||||
if (lock->readers) {
|
||||
this_cpu_inc(*lock->readers);
|
||||
} else {
|
||||
EBUG_ON(!lock->state.read_lock &&
|
||||
!lock->state.intent_lock);
|
||||
atomic64_add(l[type].lock_val, &lock->state.counter);
|
||||
EBUG_ON(!(atomic_read(&lock->state) &
|
||||
(SIX_LOCK_HELD_read|
|
||||
SIX_LOCK_HELD_intent)));
|
||||
atomic_add(l[type].lock_val, &lock->state);
|
||||
}
|
||||
break;
|
||||
case SIX_LOCK_intent:
|
||||
EBUG_ON(!lock->state.intent_lock);
|
||||
EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
|
||||
lock->intent_lock_recurse++;
|
||||
break;
|
||||
case SIX_LOCK_write:
|
||||
@ -799,9 +776,19 @@ void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_increment);
|
||||
|
||||
/**
|
||||
* six_lock_wakeup_all - wake up all waiters on @lock
|
||||
* @lock: lock to wake up waiters for
|
||||
*
|
||||
* Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
|
||||
* abort the lock operation.
|
||||
*
|
||||
* This function is never needed in a bug-free program; it's only useful in
|
||||
* debug code, e.g. to determine if a cycle detector is at fault.
|
||||
*/
|
||||
void six_lock_wakeup_all(struct six_lock *lock)
|
||||
{
|
||||
union six_lock_state state = lock->state;
|
||||
u32 state = atomic_read(&lock->state);
|
||||
struct six_lock_waiter *w;
|
||||
|
||||
six_lock_wakeup(lock, state, SIX_LOCK_read);
|
||||
@ -815,38 +802,96 @@ void six_lock_wakeup_all(struct six_lock *lock)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
|
||||
|
||||
void six_lock_pcpu_free(struct six_lock *lock)
|
||||
{
|
||||
BUG_ON(lock->readers && pcpu_read_count(lock));
|
||||
BUG_ON(lock->state.read_lock);
|
||||
|
||||
free_percpu(lock->readers);
|
||||
lock->readers = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
|
||||
|
||||
void six_lock_pcpu_alloc(struct six_lock *lock)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
if (!lock->readers)
|
||||
lock->readers = alloc_percpu(unsigned);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_pcpu_alloc);
|
||||
|
||||
/*
|
||||
* Returns lock held counts, for both read and intent
|
||||
/**
|
||||
* six_lock_counts - return held lock counts, for each lock type
|
||||
* @lock: lock to return counters for
|
||||
*
|
||||
* Return: the number of times a lock is held for read, intent and write.
|
||||
*/
|
||||
struct six_lock_count six_lock_counts(struct six_lock *lock)
|
||||
{
|
||||
struct six_lock_count ret;
|
||||
|
||||
ret.n[SIX_LOCK_read] = !lock->readers
|
||||
? lock->state.read_lock
|
||||
? atomic_read(&lock->state) & SIX_LOCK_HELD_read
|
||||
: pcpu_read_count(lock);
|
||||
ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
|
||||
ret.n[SIX_LOCK_write] = lock->state.seq & 1;
|
||||
ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
|
||||
lock->intent_lock_recurse;
|
||||
ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_counts);
|
||||
|
||||
/**
|
||||
* six_lock_readers_add - directly manipulate reader count of a lock
|
||||
* @lock: lock to add/subtract readers for
|
||||
* @nr: reader count to add/subtract
|
||||
*
|
||||
* When an upper layer is implementing lock reentrency, we may have both read
|
||||
* and intent locks on the same lock.
|
||||
*
|
||||
* When we need to take a write lock, the read locks will cause self-deadlock,
|
||||
* because six locks themselves do not track which read locks are held by the
|
||||
* current thread and which are held by a different thread - it does no
|
||||
* per-thread tracking of held locks.
|
||||
*
|
||||
* The upper layer that is tracking held locks may however, if trylock() has
|
||||
* failed, count up its own read locks, subtract them, take the write lock, and
|
||||
* then re-add them.
|
||||
*
|
||||
* As in any other situation when taking a write lock, @lock must be held for
|
||||
* intent one (or more) times, so @lock will never be left unlocked.
|
||||
*/
|
||||
void six_lock_readers_add(struct six_lock *lock, int nr)
|
||||
{
|
||||
if (lock->readers) {
|
||||
this_cpu_add(*lock->readers, nr);
|
||||
} else {
|
||||
EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
|
||||
/* reader count starts at bit 0 */
|
||||
atomic_add(nr, &lock->state);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_readers_add);
|
||||
|
||||
/**
|
||||
* six_lock_exit - release resources held by a lock prior to freeing
|
||||
* @lock: lock to exit
|
||||
*
|
||||
* When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
|
||||
* required to free the percpu read counts.
|
||||
*/
|
||||
void six_lock_exit(struct six_lock *lock)
|
||||
{
|
||||
WARN_ON(lock->readers && pcpu_read_count(lock));
|
||||
WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
|
||||
|
||||
free_percpu(lock->readers);
|
||||
lock->readers = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(six_lock_exit);
|
||||
|
||||
void __six_lock_init(struct six_lock *lock, const char *name,
|
||||
struct lock_class_key *key, enum six_lock_init_flags flags)
|
||||
{
|
||||
atomic_set(&lock->state, 0);
|
||||
raw_spin_lock_init(&lock->wait_lock);
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
debug_check_no_locks_freed((void *) lock, sizeof(*lock));
|
||||
lockdep_init_map(&lock->dep_map, name, key, 0);
|
||||
#endif
|
||||
|
||||
if (flags & SIX_LOCK_INIT_PCPU) {
|
||||
/*
|
||||
* We don't return an error here on memory allocation failure
|
||||
* since percpu is an optimization, and locks will work with the
|
||||
* same semantics in non-percpu mode: callers can check for
|
||||
* failure if they wish by checking lock->readers, but generally
|
||||
* will not want to treat it as an error.
|
||||
*/
|
||||
lock->readers = alloc_percpu(unsigned);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__six_lock_init);
|
||||
|
Loading…
Reference in New Issue
Block a user