Update bcachefs sources to 18686af684 bcachefs: Inode backpointers

This commit is contained in:
Kent Overstreet 2021-03-28 17:38:28 -04:00
parent bb6eccc2ec
commit a2094890a9
51 changed files with 3458 additions and 789 deletions

View File

@ -1 +1 @@
ad68801b939cdda0530f54cd07b3212e98fe1d75 18686af68412ebfad9c2adc6ee976ffdb9e1b886

View File

@ -156,6 +156,10 @@ update-bcachefs-sources:
git add linux/six.c git add linux/six.c
cp $(LINUX_DIR)/include/linux/six.h include/linux/ cp $(LINUX_DIR)/include/linux/six.h include/linux/
git add include/linux/six.h git add include/linux/six.h
cp $(LINUX_DIR)/include/linux/list_nulls.h include/linux/
git add include/linux/list_nulls.h
cp $(LINUX_DIR)/include/linux/poison.h include/linux/
git add include/linux/poison.h
$(RM) libbcachefs/*.mod.c $(RM) libbcachefs/*.mod.c
git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision
git add .bcachefs_revision git add .bcachefs_revision

View File

@ -323,9 +323,7 @@ static void print_node_ondisk(struct bch_fs *c, struct btree *b)
le64_to_cpu(i->journal_seq)); le64_to_cpu(i->journal_seq));
offset += sectors; offset += sectors;
for (k = i->start; for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) {
k != vstruct_last(i);
k = bkey_next_skip_noops(k, vstruct_last(i))) {
struct bkey u; struct bkey u;
char buf[4096]; char buf[4096];

145
include/linux/list_nulls.h Normal file
View File

@ -0,0 +1,145 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_LIST_NULLS_H
#define _LINUX_LIST_NULLS_H
#include <linux/poison.h>
#include <linux/const.h>
/*
* Special version of lists, where end of list is not a NULL pointer,
* but a 'nulls' marker, which can have many different values.
* (up to 2^31 different values guaranteed on all platforms)
*
* In the standard hlist, termination of a list is the NULL pointer.
* In this special 'nulls' variant, we use the fact that objects stored in
* a list are aligned on a word (4 or 8 bytes alignment).
* We therefore use the last significant bit of 'ptr' :
* Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
* Set to 0 : This is a pointer to some object (ptr)
*/
struct hlist_nulls_head {
struct hlist_nulls_node *first;
};
struct hlist_nulls_node {
struct hlist_nulls_node *next, **pprev;
};
#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
#define hlist_nulls_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
!is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
})
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
*
*/
static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
{
return ((unsigned long)ptr & 1);
}
/**
* get_nulls_value - Get the 'nulls' value of the end of chain
* @ptr: end of chain
*
* Should be called only if is_a_nulls(ptr);
*/
static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
{
return ((unsigned long)ptr) >> 1;
}
/**
* hlist_nulls_unhashed - Has node been removed and reinitialized?
* @h: Node to be checked
*
* Not that not all removal functions will leave a node in unhashed state.
* For example, hlist_del_init_rcu() leaves the node in unhashed state,
* but hlist_nulls_del() does not.
*/
static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
{
return !h->pprev;
}
/**
* hlist_nulls_unhashed_lockless - Has node been removed and reinitialized?
* @h: Node to be checked
*
* Not that not all removal functions will leave a node in unhashed state.
* For example, hlist_del_init_rcu() leaves the node in unhashed state,
* but hlist_nulls_del() does not. Unlike hlist_nulls_unhashed(), this
* function may be used locklessly.
*/
static inline int hlist_nulls_unhashed_lockless(const struct hlist_nulls_node *h)
{
return !READ_ONCE(h->pprev);
}
static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
{
return is_a_nulls(READ_ONCE(h->first));
}
static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
struct hlist_nulls_head *h)
{
struct hlist_nulls_node *first = h->first;
n->next = first;
WRITE_ONCE(n->pprev, &h->first);
h->first = n;
if (!is_a_nulls(first))
WRITE_ONCE(first->pprev, &n->next);
}
static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
{
struct hlist_nulls_node *next = n->next;
struct hlist_nulls_node **pprev = n->pprev;
WRITE_ONCE(*pprev, next);
if (!is_a_nulls(next))
WRITE_ONCE(next->pprev, pprev);
}
static inline void hlist_nulls_del(struct hlist_nulls_node *n)
{
__hlist_nulls_del(n);
WRITE_ONCE(n->pprev, LIST_POISON2);
}
/**
* hlist_nulls_for_each_entry - iterate over list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
*
*/
#define hlist_nulls_for_each_entry(tpos, pos, head, member) \
for (pos = (head)->first; \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @member: the name of the hlist_node within the struct.
*
*/
#define hlist_nulls_for_each_entry_from(tpos, pos, member) \
for (; (!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
#endif

346
include/linux/overflow.h Normal file
View File

@ -0,0 +1,346 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
#ifndef __LINUX_OVERFLOW_H
#define __LINUX_OVERFLOW_H
#include <linux/compiler.h>
#include <linux/limits.h>
/*
* In the fallback code below, we need to compute the minimum and
* maximum values representable in a given type. These macros may also
* be useful elsewhere, so we provide them outside the
* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
*
* It would seem more obvious to do something like
*
* #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
* #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
*
* Unfortunately, the middle expressions, strictly speaking, have
* undefined behaviour, and at least some versions of gcc warn about
* the type_max expression (but not if -fsanitize=undefined is in
* effect; in that case, the warning is deferred to runtime...).
*
* The slightly excessive casting in type_min is to make sure the
* macros also produce sensible values for the exotic type _Bool. [The
* overflow checkers only almost work for _Bool, but that's
* a-feature-not-a-bug, since people shouldn't be doing arithmetic on
* _Bools. Besides, the gcc builtins don't allow _Bool* as third
* argument.]
*
* Idea stolen from
* https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
* credit to Christian Biere.
*/
#define is_signed_type(type) (((type)(-1)) < (type)1)
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_min(T) ((T)((T)-type_max(T)-(T)1))
/*
* Avoids triggering -Wtype-limits compilation warning,
* while using unsigned data types to check a < 0.
*/
#define is_non_negative(a) ((a) > 0 || (a) == 0)
#define is_negative(a) (!(is_non_negative(a)))
/*
* Allows for effectively applying __must_check to a macro so we can have
* both the type-agnostic benefits of the macros while also being able to
* enforce that the return value is, in fact, checked.
*/
static inline bool __must_check __must_check_overflow(bool overflow)
{
return unlikely(overflow);
}
#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
/*
* For simplicity and code hygiene, the fallback code below insists on
* a, b and *d having the same type (similar to the min() and max()
* macros), whereas gcc's type-generic overflow checkers accept
* different types. Hence we don't just make check_add_overflow an
* alias for __builtin_add_overflow, but add type checks similar to
* below.
*/
#define check_add_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_add_overflow(__a, __b, __d); \
}))
#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_sub_overflow(__a, __b, __d); \
}))
#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_mul_overflow(__a, __b, __d); \
}))
#else
/* Checking for unsigned overflow is relatively easy without causing UB. */
#define __unsigned_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a + __b; \
*__d < __a; \
})
#define __unsigned_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a - __b; \
__a < __b; \
})
/*
* If one of a or b is a compile-time constant, this avoids a division.
*/
#define __unsigned_mul_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a * __b; \
__builtin_constant_p(__b) ? \
__b > 0 && __a > type_max(typeof(__a)) / __b : \
__a > 0 && __b > type_max(typeof(__b)) / __a; \
})
/*
* For signed types, detecting overflow is much harder, especially if
* we want to avoid UB. But the interface of these macros is such that
* we must provide a result in *d, and in fact we must produce the
* result promised by gcc's builtins, which is simply the possibly
* wrapped-around value. Fortunately, we can just formally do the
* operations in the widest relevant unsigned type (u64) and then
* truncate the result - gcc is smart enough to generate the same code
* with and without the (u64) casts.
*/
/*
* Adding two signed integers can overflow only if they have the same
* sign, and overflow has happened iff the result has the opposite
* sign.
*/
#define __signed_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a + (u64)__b; \
(((~(__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Subtraction is similar, except that overflow can now happen only
* when the signs are opposite. In this case, overflow has happened if
* the result has the opposite sign of a.
*/
#define __signed_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a - (u64)__b; \
((((__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Signed multiplication is rather hard. gcc always follows C99, so
* division is truncated towards 0. This means that we can write the
* overflow check like this:
*
* (a > 0 && (b > MAX/a || b < MIN/a)) ||
* (a < -1 && (b > MIN/a || b < MAX/a) ||
* (a == -1 && b == MIN)
*
* The redundant casts of -1 are to silence an annoying -Wtype-limits
* (included in -Wextra) warning: When the type is u8 or u16, the
* __b_c_e in check_mul_overflow obviously selects
* __unsigned_mul_overflow, but unfortunately gcc still parses this
* code and warns about the limited range of __b.
*/
#define __signed_mul_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
typeof(a) __tmax = type_max(typeof(a)); \
typeof(a) __tmin = type_min(typeof(a)); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a * (u64)__b; \
(__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
(__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
(__b == (typeof(__b))-1 && __a == __tmin); \
})
#define check_add_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_add_overflow(a, b, d), \
__unsigned_add_overflow(a, b, d)))
#define check_sub_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_sub_overflow(a, b, d), \
__unsigned_sub_overflow(a, b, d)))
#define check_mul_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_mul_overflow(a, b, d), \
__unsigned_mul_overflow(a, b, d)))
#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
/** check_shl_overflow() - Calculate a left-shifted value and check overflow
*
* @a: Value to be shifted
* @s: How many bits left to shift
* @d: Pointer to where to store the result
*
* Computes *@d = (@a << @s)
*
* Returns true if '*d' cannot hold the result or when 'a << s' doesn't
* make sense. Example conditions:
* - 'a << s' causes bits to be lost when stored in *d.
* - 's' is garbage (e.g. negative) or so large that the result of
* 'a << s' is guaranteed to be 0.
* - 'a' is negative.
* - 'a << s' sets the sign bit, if any, in '*d'.
*
* '*d' will hold the results of the attempted shift, but is not
* considered "safe for use" if false is returned.
*/
#define check_shl_overflow(a, s, d) __must_check_overflow(({ \
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
u64 _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
(_to_shift != _s || is_negative(*_d) || is_negative(_a) || \
(*_d >> _to_shift) != _a); \
}))
/**
* array_size() - Calculate size of 2-dimensional array.
*
* @a: dimension one
* @b: dimension two
*
* Calculates size of 2-dimensional array: @a * @b.
*
* Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow.
*/
static inline __must_check size_t array_size(size_t a, size_t b)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* array3_size() - Calculate size of 3-dimensional array.
*
* @a: dimension one
* @b: dimension two
* @c: dimension three
*
* Calculates size of 3-dimensional array: @a * @b * @c.
*
* Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow.
*/
static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_mul_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/*
* Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
* struct_size() below.
*/
static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_add_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* struct_size() - Calculate size of structure with trailing array.
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure @p followed by an
* array of @count number of @member elements.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size(p, member, count) \
__ab_c_size(count, \
sizeof(*(p)->member) + __must_be_array((p)->member),\
sizeof(*(p)))
/**
* flex_array_size() - Calculate size of a flexible array member
* within an enclosing structure.
*
* @p: Pointer to the structure.
* @member: Name of the flexible array member.
* @count: Number of elements in the array.
*
* Calculates size of a flexible array of @count number of @member
* elements, at the end of structure @p.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define flex_array_size(p, member, count) \
array_size(count, \
sizeof(*(p)->member) + __must_be_array((p)->member))
#endif /* __LINUX_OVERFLOW_H */

85
include/linux/poison.h Normal file
View File

@ -0,0 +1,85 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_POISON_H
#define _LINUX_POISON_H
/********** include/linux/list.h **********/
/*
* Architectures might want to move the poison pointer offset
* into some well-recognized area such as 0xdead000000000000,
* that is also not mappable by user-space exploits:
*/
#ifdef CONFIG_ILLEGAL_POINTER_VALUE
# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
#else
# define POISON_POINTER_DELTA 0
#endif
/*
* These are non-NULL pointers that will result in page faults
* under normal circumstances, used to verify that nobody uses
* non-initialized list entries.
*/
#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA)
/********** include/linux/timer.h **********/
#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
/********** mm/page_poison.c **********/
#ifdef CONFIG_PAGE_POISONING_ZERO
#define PAGE_POISON 0x00
#else
#define PAGE_POISON 0xaa
#endif
/********** mm/page_alloc.c ************/
#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
/********** mm/slab.c **********/
/*
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
#define POISON_FREE 0x6b /* for use-after-free poisoning */
#define POISON_END 0xa5 /* end-byte of poisoning */
/********** arch/$ARCH/mm/init.c **********/
#define POISON_FREE_INITMEM 0xcc
/********** arch/ia64/hp/common/sba_iommu.c **********/
/*
* arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
* value of "SBAIOMMU POISON\0" for spill-over poisoning.
*/
/********** fs/jbd/journal.c **********/
#define JBD_POISON_FREE 0x5b
#define JBD2_POISON_FREE 0x5c
/********** drivers/base/dmapool.c **********/
#define POOL_POISON_FREED 0xa7 /* !inuse */
#define POOL_POISON_ALLOCATED 0xa9 /* !initted */
/********** drivers/atm/ **********/
#define ATM_POISON_FREE 0x12
#define ATM_POISON 0xdeadbeef
/********** kernel/mutexes **********/
#define MUTEX_DEBUG_INIT 0x11
#define MUTEX_DEBUG_FREE 0x22
#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA)
/********** security/ **********/
#define KEY_DESTROY 0xbd
#endif

View File

@ -45,6 +45,7 @@ static inline type get_random_##type(void) \
get_random_type(int); get_random_type(int);
get_random_type(long); get_random_type(long);
get_random_type(u32);
get_random_type(u64); get_random_type(u64);
#endif /* _LINUX_RANDOM_H */ #endif /* _LINUX_RANDOM_H */

View File

@ -13,4 +13,32 @@
#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v) #define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
/* Has the specified rcu_head structure been handed to call_rcu()? */
/**
* rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
* @rhp: The rcu_head structure to initialize.
*
* If you intend to invoke rcu_head_after_call_rcu() to test whether a
* given rcu_head structure has already been passed to call_rcu(), then
* you must also invoke this rcu_head_init() function on it just after
* allocating that structure. Calls to this function must not race with
* calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
*/
static inline void rcu_head_init(struct rcu_head *rhp)
{
rhp->func = (void *)~0L;
}
static inline bool
rcu_head_after_call_rcu(struct rcu_head *rhp,
void (*f)(struct rcu_head *head))
{
void (*func)(struct rcu_head *head) = READ_ONCE(rhp->func);
if (func == f)
return true;
return false;
}
#endif /* __TOOLS_LINUX_RCUPDATE_H */ #endif /* __TOOLS_LINUX_RCUPDATE_H */

View File

@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Resizable, Scalable, Concurrent Hash Table
*
* Simple structures that might be needed in include
* files.
*/
#ifndef _LINUX_RHASHTABLE_TYPES_H
#define _LINUX_RHASHTABLE_TYPES_H
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
struct rhash_head {
struct rhash_head __rcu *next;
};
struct rhlist_head {
struct rhash_head rhead;
struct rhlist_head __rcu *next;
};
struct bucket_table;
/**
* struct rhashtable_compare_arg - Key for the function rhashtable_compare
* @ht: Hash table
* @key: Key to compare against
*/
struct rhashtable_compare_arg {
struct rhashtable *ht;
const void *key;
};
typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
const void *obj);
/**
* struct rhashtable_params - Hash table construction parameters
* @nelem_hint: Hint on number of elements, should be 75% of desired size
* @key_len: Length of key
* @key_offset: Offset of key in struct to be hashed
* @head_offset: Offset of rhash_head in struct to be hashed
* @max_size: Maximum size while expanding
* @min_size: Minimum size while shrinking
* @automatic_shrinking: Enable automatic shrinking of tables
* @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
* @obj_hashfn: Function to hash object
* @obj_cmpfn: Function to compare key with object
*/
struct rhashtable_params {
u16 nelem_hint;
u16 key_len;
u16 key_offset;
u16 head_offset;
unsigned int max_size;
u16 min_size;
bool automatic_shrinking;
rht_hashfn_t hashfn;
rht_obj_hashfn_t obj_hashfn;
rht_obj_cmpfn_t obj_cmpfn;
};
/**
* struct rhashtable - Hash table handle
* @tbl: Bucket table
* @key_len: Key length for hashfn
* @max_elems: Maximum number of elements in table
* @p: Configuration parameters
* @rhlist: True if this is an rhltable
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list
* @nelems: Number of elements in table
*/
struct rhashtable {
struct bucket_table __rcu *tbl;
unsigned int key_len;
unsigned int max_elems;
struct rhashtable_params p;
bool rhlist;
struct work_struct run_work;
struct mutex mutex;
spinlock_t lock;
atomic_t nelems;
};
/**
* struct rhltable - Hash table with duplicate objects in a list
* @ht: Underlying rhtable
*/
struct rhltable {
struct rhashtable ht;
};
/**
* struct rhashtable_walker - Hash table walker
* @list: List entry on list of walkers
* @tbl: The table that we were walking over
*/
struct rhashtable_walker {
struct list_head list;
struct bucket_table *tbl;
};
/**
* struct rhashtable_iter - Hash table iterator
* @ht: Table to iterate through
* @p: Current pointer
* @list: Current hash list pointer
* @walker: Associated rhashtable walker
* @slot: Current slot
* @skip: Number of entries to skip in slot
*/
struct rhashtable_iter {
struct rhashtable *ht;
struct rhash_head *p;
struct rhlist_head *list;
struct rhashtable_walker walker;
unsigned int slot;
unsigned int skip;
bool end_of_table;
};
int rhashtable_init(struct rhashtable *ht,
const struct rhashtable_params *params);
int rhltable_init(struct rhltable *hlt,
const struct rhashtable_params *params);
#endif /* _LINUX_RHASHTABLE_TYPES_H */

File diff suppressed because it is too large Load Diff

View File

@ -196,6 +196,7 @@ void six_lock_increment(struct six_lock *, enum six_lock_type);
void six_lock_wakeup_all(struct six_lock *); void six_lock_wakeup_all(struct six_lock *);
void six_lock_pcpu_free_rcu(struct six_lock *);
void six_lock_pcpu_free(struct six_lock *); void six_lock_pcpu_free(struct six_lock *);
void six_lock_pcpu_alloc(struct six_lock *); void six_lock_pcpu_alloc(struct six_lock *);

View File

@ -66,6 +66,7 @@ static inline void *krealloc(void *old, size_t size, gfp_t flags)
#define kzfree(p) free(p) #define kzfree(p) free(p)
#define kvmalloc(size, flags) kmalloc(size, flags) #define kvmalloc(size, flags) kmalloc(size, flags)
#define kvzalloc(size, flags) kzalloc(size, flags)
#define kvfree(p) kfree(p) #define kvfree(p) kfree(p)
static inline struct page *alloc_pages(gfp_t flags, unsigned int order) static inline struct page *alloc_pages(gfp_t flags, unsigned int order)

View File

@ -11,6 +11,8 @@
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h> #include <asm/types.h>
#include <linux/cache.h>
#define BITS_PER_LONG __BITS_PER_LONG #define BITS_PER_LONG __BITS_PER_LONG
struct page; struct page;

View File

@ -138,19 +138,18 @@ struct bpos {
#define KEY_SNAPSHOT_MAX ((__u32)~0U) #define KEY_SNAPSHOT_MAX ((__u32)~0U)
#define KEY_SIZE_MAX ((__u32)~0U) #define KEY_SIZE_MAX ((__u32)~0U)
static inline struct bpos POS(__u64 inode, __u64 offset) static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
{ {
struct bpos ret; return (struct bpos) {
.inode = inode,
ret.inode = inode; .offset = offset,
ret.offset = offset; .snapshot = snapshot,
ret.snapshot = 0; };
return ret;
} }
#define POS_MIN POS(0, 0) #define POS_MIN SPOS(0, 0, 0)
#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX) #define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
/* Empty placeholder struct, for container_of() */ /* Empty placeholder struct, for container_of() */
struct bch_val { struct bch_val {
@ -707,7 +706,9 @@ struct bch_inode_generation {
x(bi_foreground_target, 16) \ x(bi_foreground_target, 16) \
x(bi_background_target, 16) \ x(bi_background_target, 16) \
x(bi_erasure_code, 16) \ x(bi_erasure_code, 16) \
x(bi_fields_set, 16) x(bi_fields_set, 16) \
x(bi_dir, 64) \
x(bi_dir_offset, 64)
/* subset of BCH_INODE_FIELDS */ /* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \ #define BCH_INODE_OPTS() \
@ -743,6 +744,7 @@ enum {
__BCH_INODE_I_SIZE_DIRTY= 5, __BCH_INODE_I_SIZE_DIRTY= 5,
__BCH_INODE_I_SECTORS_DIRTY= 6, __BCH_INODE_I_SECTORS_DIRTY= 6,
__BCH_INODE_UNLINKED = 7, __BCH_INODE_UNLINKED = 7,
__BCH_INODE_BACKPTR_UNTRUSTED = 8,
/* bits 20+ reserved for packed fields below: */ /* bits 20+ reserved for packed fields below: */
}; };
@ -755,6 +757,7 @@ enum {
#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) #define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) #define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) #define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
@ -1204,7 +1207,9 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_new_versioning = 10, bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10, bcachefs_metadata_version_bkey_renumber = 10,
bcachefs_metadata_version_inode_btree_change = 11, bcachefs_metadata_version_inode_btree_change = 11,
bcachefs_metadata_version_max = 12, bcachefs_metadata_version_snapshot = 12,
bcachefs_metadata_version_inode_backpointers = 13,
bcachefs_metadata_version_max = 14,
}; };
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@ -1736,7 +1741,7 @@ struct btree_node {
/* Closed interval: */ /* Closed interval: */
struct bpos min_key; struct bpos min_key;
struct bpos max_key; struct bpos max_key;
struct bch_extent_ptr ptr; struct bch_extent_ptr _ptr; /* not used anymore */
struct bkey_format format; struct bkey_format format;
union { union {

View File

@ -614,15 +614,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
return "incorrect number of fields"; return "incorrect number of fields";
for (i = 0; i < f->nr_fields; i++) { for (i = 0; i < f->nr_fields; i++) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 field_offset = le64_to_cpu(f->field_offset[i]); u64 field_offset = le64_to_cpu(f->field_offset[i]);
if (f->bits_per_field[i] > 64) if (f->bits_per_field[i] > unpacked_bits)
return "field too large"; return "field too large";
if (field_offset && if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
(f->bits_per_field[i] == 64 || return "offset + bits overflow";
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
field_offset))) if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
unpacked_mask) <
field_offset)
return "offset + bits overflow"; return "offset + bits overflow";
bits += f->bits_per_field[i]; bits += f->bits_per_field[i];
@ -1045,7 +1049,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
high_word(f, r), high_word(f, r),
b->nr_key_bits); b->nr_key_bits);
EBUG_ON(ret != bkey_cmp(bkey_unpack_pos(b, l), EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
bkey_unpack_pos(b, r))); bkey_unpack_pos(b, r)));
return ret; return ret;
} }
@ -1055,7 +1059,7 @@ int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
const struct bkey_packed *l, const struct bkey_packed *l,
const struct bpos *r) const struct bpos *r)
{ {
return bkey_cmp(bkey_unpack_pos_format_checked(b, l), *r); return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r);
} }
__pure __flatten __pure __flatten
@ -1076,7 +1080,7 @@ int bch2_bkey_cmp_packed(const struct btree *b,
r = (void*) &unpacked; r = (void*) &unpacked;
} }
return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
} }
__pure __flatten __pure __flatten
@ -1087,7 +1091,7 @@ int __bch2_bkey_cmp_left_packed(const struct btree *b,
const struct bkey *l_unpacked; const struct bkey *l_unpacked;
return unlikely(l_unpacked = packed_to_bkey_c(l)) return unlikely(l_unpacked = packed_to_bkey_c(l))
? bkey_cmp(l_unpacked->p, *r) ? bpos_cmp(l_unpacked->p, *r)
: __bch2_bkey_cmp_left_packed_format_checked(b, l, r); : __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
} }
@ -1123,11 +1127,12 @@ void bch2_bkey_pack_test(void)
struct bkey_packed p; struct bkey_packed p;
struct bkey_format test_format = { struct bkey_format test_format = {
.key_u64s = 2, .key_u64s = 3,
.nr_fields = BKEY_NR_FIELDS, .nr_fields = BKEY_NR_FIELDS,
.bits_per_field = { .bits_per_field = {
13, 13,
64, 64,
32,
}, },
}; };

View File

@ -33,16 +33,6 @@ struct bkey_s {
#define bkey_next(_k) vstruct_next(_k) #define bkey_next(_k) vstruct_next(_k)
static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
struct bkey_packed *end)
{
k = bkey_next(k);
while (k != end && !k->u64s)
k = (void *) ((u64 *) k + 1);
return k;
}
#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s) #define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
static inline size_t bkey_val_bytes(const struct bkey *k) static inline size_t bkey_val_bytes(const struct bkey *k)
@ -150,29 +140,27 @@ static inline int bkey_cmp_left_packed_byval(const struct btree *b,
return bkey_cmp_left_packed(b, l, &r); return bkey_cmp_left_packed(b, l, &r);
} }
#if 1 static __always_inline int bpos_cmp(struct bpos l, struct bpos r)
{
return cmp_int(l.inode, r.inode) ?:
cmp_int(l.offset, r.offset) ?:
cmp_int(l.snapshot, r.snapshot);
}
static __always_inline int bkey_cmp(struct bpos l, struct bpos r) static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
{ {
if (l.inode != r.inode) return cmp_int(l.inode, r.inode) ?:
return l.inode < r.inode ? -1 : 1; cmp_int(l.offset, r.offset);
if (l.offset != r.offset)
return l.offset < r.offset ? -1 : 1;
if (l.snapshot != r.snapshot)
return l.snapshot < r.snapshot ? -1 : 1;
return 0;
} }
#else
int bkey_cmp(struct bpos l, struct bpos r);
#endif
static inline struct bpos bpos_min(struct bpos l, struct bpos r) static inline struct bpos bpos_min(struct bpos l, struct bpos r)
{ {
return bkey_cmp(l, r) < 0 ? l : r; return bpos_cmp(l, r) < 0 ? l : r;
} }
static inline struct bpos bpos_max(struct bpos l, struct bpos r) static inline struct bpos bpos_max(struct bpos l, struct bpos r)
{ {
return bkey_cmp(l, r) > 0 ? l : r; return bpos_cmp(l, r) > 0 ? l : r;
} }
#define sbb(a, b, borrow) \ #define sbb(a, b, borrow) \
@ -200,7 +188,7 @@ static inline struct bpos bpos_sub(struct bpos a, struct bpos b)
static inline struct bpos bpos_diff(struct bpos l, struct bpos r) static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
{ {
if (bkey_cmp(l, r) > 0) if (bpos_cmp(l, r) > 0)
swap(l, r); swap(l, r);
return bpos_sub(r, l); return bpos_sub(r, l);
@ -262,24 +250,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
format->bits_per_field[BKEY_FIELD_SNAPSHOT]; format->bits_per_field[BKEY_FIELD_SNAPSHOT];
} }
static inline struct bpos bkey_successor(struct bpos p) static inline struct bpos bpos_successor(struct bpos p)
{ {
struct bpos ret = p; if (!++p.snapshot &&
!++p.offset &&
!++p.inode)
BUG();
if (!++ret.offset) return p;
BUG_ON(!++ret.inode);
return ret;
} }
static inline struct bpos bkey_predecessor(struct bpos p) static inline struct bpos bpos_predecessor(struct bpos p)
{ {
struct bpos ret = p; if (!p.snapshot-- &&
!p.offset-- &&
!p.inode--)
BUG();
if (!ret.offset--) return p;
BUG_ON(!ret.inode--); }
return ret; static inline struct bpos bpos_nosnap_successor(struct bpos p)
{
p.snapshot = 0;
if (!++p.offset &&
!++p.inode)
BUG();
return p;
}
static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
{
p.snapshot = 0;
if (!p.offset-- &&
!p.inode--)
BUG();
return p;
} }
static inline u64 bkey_start_offset(const struct bkey *k) static inline u64 bkey_start_offset(const struct bkey *k)

View File

@ -119,9 +119,16 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
return "nonzero size field"; return "nonzero size field";
} }
if (k.k->p.snapshot) if (type != BKEY_TYPE_btree &&
!btree_type_has_snapshots(type) &&
k.k->p.snapshot)
return "nonzero snapshot"; return "nonzero snapshot";
if (type != BKEY_TYPE_btree &&
btree_type_has_snapshots(type) &&
k.k->p.snapshot != U32_MAX)
return "invalid snapshot field";
if (type != BKEY_TYPE_btree && if (type != BKEY_TYPE_btree &&
!bkey_cmp(k.k->p, POS_MAX)) !bkey_cmp(k.k->p, POS_MAX))
return "POS_MAX key"; return "POS_MAX key";
@ -138,10 +145,10 @@ const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
{ {
if (bkey_cmp(k.k->p, b->data->min_key) < 0) if (bpos_cmp(k.k->p, b->data->min_key) < 0)
return "key before start of btree node"; return "key before start of btree node";
if (bkey_cmp(k.k->p, b->data->max_key) > 0) if (bpos_cmp(k.k->p, b->data->max_key) > 0)
return "key past end of btree node"; return "key past end of btree node";
return NULL; return NULL;
@ -165,9 +172,9 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
{ {
if (!bkey_cmp(pos, POS_MIN)) if (!bpos_cmp(pos, POS_MIN))
pr_buf(out, "POS_MIN"); pr_buf(out, "POS_MIN");
else if (!bkey_cmp(pos, POS_MAX)) else if (!bpos_cmp(pos, POS_MAX))
pr_buf(out, "POS_MAX"); pr_buf(out, "POS_MAX");
else { else {
if (pos.inode == U64_MAX) if (pos.inode == U64_MAX)
@ -256,7 +263,7 @@ enum merge_result bch2_bkey_merge(struct bch_fs *c,
!ops->key_merge || !ops->key_merge ||
l.k->type != r.k->type || l.k->type != r.k->type ||
bversion_cmp(l.k->version, r.k->version) || bversion_cmp(l.k->version, r.k->version) ||
bkey_cmp(l.k->p, bkey_start_pos(r.k))) bpos_cmp(l.k->p, bkey_start_pos(r.k)))
return BCH_MERGE_NOMERGE; return BCH_MERGE_NOMERGE;
ret = ops->key_merge(c, l, r); ret = ops->key_merge(c, l, r);
@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
const struct bkey_ops *ops; const struct bkey_ops *ops;
struct bkey uk; struct bkey uk;
struct bkey_s u; struct bkey_s u;
unsigned nr_compat = 5;
int i; int i;
/* /*
* Do these operations in reverse order in the write path: * Do these operations in reverse order in the write path:
*/ */
for (i = 0; i < 4; i++) for (i = 0; i < nr_compat; i++)
switch (!write ? i : 3 - i) { switch (!write ? i : nr_compat - 1 - i) {
case 0: case 0:
if (big_endian != CPU_BIG_ENDIAN) if (big_endian != CPU_BIG_ENDIAN)
bch2_bkey_swab_key(f, k); bch2_bkey_swab_key(f, k);
@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
} }
break; break;
case 3: case 3:
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
struct bkey_i *u = packed_to_bkey(k);
if (u) {
u->k.p.snapshot = write
? 0 : U32_MAX;
} else {
u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT];
u64 max_packed = min_packed +
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
uk = __bch2_bkey_unpack_key(f, k);
uk.p.snapshot = write
? min_packed : min_t(u64, U32_MAX, max_packed);
BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
}
}
break;
case 4:
if (!bkey_packed(k)) { if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k)); u = bkey_i_to_s(packed_to_bkey(k));
} else { } else {

View File

@ -45,7 +45,7 @@ static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
BUG_ON(!iter->used); BUG_ON(!iter->used);
i->k = bkey_next_skip_noops(i->k, i->end); i->k = bkey_next(i->k);
BUG_ON(i->k > i->end); BUG_ON(i->k > i->end);

View File

@ -78,7 +78,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
for (_k = i->start; for (_k = i->start;
_k < vstruct_last(i); _k < vstruct_last(i);
_k = _n) { _k = _n) {
_n = bkey_next_skip_noops(_k, vstruct_last(i)); _n = bkey_next(_k);
k = bkey_disassemble(b, _k, &uk); k = bkey_disassemble(b, _k, &uk);
if (c) if (c)
@ -93,13 +93,13 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b,
n = bkey_unpack_key(b, _n); n = bkey_unpack_key(b, _n);
if (bkey_cmp(bkey_start_pos(&n), k.k->p) < 0) { if (bpos_cmp(n.p, k.k->p) < 0) {
printk(KERN_ERR "Key skipped backwards\n"); printk(KERN_ERR "Key skipped backwards\n");
continue; continue;
} }
if (!bkey_deleted(k.k) && if (!bkey_deleted(k.k) &&
!bkey_cmp(n.p, k.k->p)) !bpos_cmp(n.p, k.k->p))
printk(KERN_ERR "Duplicate keys\n"); printk(KERN_ERR "Duplicate keys\n");
} }
} }
@ -534,7 +534,7 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b,
goto start; goto start;
while (1) { while (1) {
if (rw_aux_to_bkey(b, t, j) == k) { if (rw_aux_to_bkey(b, t, j) == k) {
BUG_ON(bkey_cmp(rw_aux_tree(b, t)[j].k, BUG_ON(bpos_cmp(rw_aux_tree(b, t)[j].k,
bkey_unpack_pos(b, k))); bkey_unpack_pos(b, k)));
start: start:
if (++j == t->size) if (++j == t->size)
@ -544,7 +544,7 @@ start:
rw_aux_tree(b, t)[j - 1].offset); rw_aux_tree(b, t)[j - 1].offset);
} }
k = bkey_next_skip_noops(k, btree_bkey_last(b, t)); k = bkey_next(k);
BUG_ON(k >= btree_bkey_last(b, t)); BUG_ON(k >= btree_bkey_last(b, t));
} }
} }
@ -686,17 +686,21 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
if (is_power_of_2(j) && if (is_power_of_2(j) &&
!min_key->u64s) { !min_key->u64s) {
if (!bkey_pack_pos(min_key, b->data->min_key, b)) {
k = (void *) min_key; k = (void *) min_key;
bkey_init(&k->k); bkey_init(&k->k);
k->k.p = b->data->min_key; k->k.p = b->data->min_key;
} }
}
if (is_power_of_2(j + 1) && if (is_power_of_2(j + 1) &&
!max_key->u64s) { !max_key->u64s) {
if (!bkey_pack_pos(max_key, b->data->max_key, b)) {
k = (void *) max_key; k = (void *) max_key;
bkey_init(&k->k); bkey_init(&k->k);
k->k.p = t->max_key; k->k.p = t->max_key;
} }
}
__make_bfloat(b, t, j, min_key, max_key); __make_bfloat(b, t, j, min_key, max_key);
} }
@ -759,7 +763,7 @@ retry:
/* First we figure out where the first key in each cacheline is */ /* First we figure out where the first key in each cacheline is */
eytzinger1_for_each(j, t->size) { eytzinger1_for_each(j, t->size) {
while (bkey_to_cacheline(b, t, k) < cacheline) while (bkey_to_cacheline(b, t, k) < cacheline)
prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t)); prev = k, k = bkey_next(k);
if (k >= btree_bkey_last(b, t)) { if (k >= btree_bkey_last(b, t)) {
/* XXX: this path sucks */ /* XXX: this path sucks */
@ -776,14 +780,19 @@ retry:
} }
while (k != btree_bkey_last(b, t)) while (k != btree_bkey_last(b, t))
prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t)); prev = k, k = bkey_next(k);
t->max_key = bkey_unpack_pos(b, prev); t->max_key = bkey_unpack_pos(b, prev);
if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) {
bkey_init(&min_key.k); bkey_init(&min_key.k);
min_key.k.p = b->data->min_key; min_key.k.p = b->data->min_key;
}
if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) {
bkey_init(&max_key.k); bkey_init(&max_key.k);
max_key.k.p = t->max_key; max_key.k.p = t->max_key;
}
/* Then we build the tree */ /* Then we build the tree */
eytzinger1_for_each(j, t->size) eytzinger1_for_each(j, t->size)
@ -911,7 +920,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
struct bkey_packed *p, *i, *ret = NULL, *orig_k = k; struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
while ((p = __bkey_prev(b, t, k)) && !ret) { while ((p = __bkey_prev(b, t, k)) && !ret) {
for (i = p; i != k; i = bkey_next_skip_noops(i, k)) for (i = p; i != k; i = bkey_next(i))
if (i->type >= min_key_type) if (i->type >= min_key_type)
ret = i; ret = i;
@ -922,10 +931,10 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
BUG_ON(ret >= orig_k); BUG_ON(ret >= orig_k);
for (i = ret for (i = ret
? bkey_next_skip_noops(ret, orig_k) ? bkey_next(ret)
: btree_bkey_first(b, t); : btree_bkey_first(b, t);
i != orig_k; i != orig_k;
i = bkey_next_skip_noops(i, orig_k)) i = bkey_next(i))
BUG_ON(i->type >= min_key_type); BUG_ON(i->type >= min_key_type);
} }
@ -960,7 +969,7 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
/* signal to make_bfloat() that they're uninitialized: */ /* signal to make_bfloat() that they're uninitialized: */
min_key.u64s = max_key.u64s = 0; min_key.u64s = max_key.u64s = 0;
if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) { if (bkey_next(k) == btree_bkey_last(b, t)) {
t->max_key = bkey_unpack_pos(b, k); t->max_key = bkey_unpack_pos(b, k);
for (j = 1; j < t->size; j = j * 2 + 1) for (j = 1; j < t->size; j = j * 2 + 1)
@ -1084,7 +1093,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
struct bkey_packed *k = start; struct bkey_packed *k = start;
while (1) { while (1) {
k = bkey_next_skip_noops(k, end); k = bkey_next(k);
if (k == end) if (k == end)
break; break;
@ -1170,15 +1179,14 @@ void bch2_bset_delete(struct btree *b,
__flatten __flatten
static struct bkey_packed *bset_search_write_set(const struct btree *b, static struct bkey_packed *bset_search_write_set(const struct btree *b,
struct bset_tree *t, struct bset_tree *t,
struct bpos *search, struct bpos *search)
const struct bkey_packed *packed_search)
{ {
unsigned l = 0, r = t->size; unsigned l = 0, r = t->size;
while (l + 1 != r) { while (l + 1 != r) {
unsigned m = (l + r) >> 1; unsigned m = (l + r) >> 1;
if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0) if (bpos_cmp(rw_aux_tree(b, t)[m].k, *search) < 0)
l = m; l = m;
else else
r = m; r = m;
@ -1238,9 +1246,6 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
prefetch(&base->f[n << 4]); prefetch(&base->f[n << 4]);
f = &base->f[n]; f = &base->f[n];
if (!unlikely(packed_search))
goto slowpath;
if (unlikely(f->exponent >= BFLOAT_FAILED)) if (unlikely(f->exponent >= BFLOAT_FAILED))
goto slowpath; goto slowpath;
@ -1304,7 +1309,7 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
case BSET_NO_AUX_TREE: case BSET_NO_AUX_TREE:
return btree_bkey_first(b, t); return btree_bkey_first(b, t);
case BSET_RW_AUX_TREE: case BSET_RW_AUX_TREE:
return bset_search_write_set(b, t, search, lossy_packed_search); return bset_search_write_set(b, t, search);
case BSET_RO_AUX_TREE: case BSET_RO_AUX_TREE:
/* /*
* Each node in the auxiliary search tree covers a certain range * Each node in the auxiliary search tree covers a certain range
@ -1313,7 +1318,7 @@ struct bkey_packed *__bch2_bset_search(struct btree *b,
* start and end - handle that here: * start and end - handle that here:
*/ */
if (bkey_cmp(*search, t->max_key) > 0) if (bpos_cmp(*search, t->max_key) > 0)
return btree_bkey_last(b, t); return btree_bkey_last(b, t);
return bset_search_tree(b, t, search, lossy_packed_search); return bset_search_tree(b, t, search, lossy_packed_search);
@ -1334,12 +1339,12 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
while (m != btree_bkey_last(b, t) && while (m != btree_bkey_last(b, t) &&
bkey_iter_cmp_p_or_unp(b, m, bkey_iter_cmp_p_or_unp(b, m,
lossy_packed_search, search) < 0) lossy_packed_search, search) < 0)
m = bkey_next_skip_noops(m, btree_bkey_last(b, t)); m = bkey_next(m);
if (!packed_search) if (!packed_search)
while (m != btree_bkey_last(b, t) && while (m != btree_bkey_last(b, t) &&
bkey_iter_pos_cmp(b, m, search) < 0) bkey_iter_pos_cmp(b, m, search) < 0)
m = bkey_next_skip_noops(m, btree_bkey_last(b, t)); m = bkey_next(m);
if (bch2_expensive_debug_checks) { if (bch2_expensive_debug_checks) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
@ -1403,16 +1408,15 @@ noinline __flatten __attribute__((cold))
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos *search) struct btree *b, struct bpos *search)
{ {
struct bset_tree *t; struct bkey_packed *k;
trace_bkey_pack_pos_fail(search); trace_bkey_pack_pos_fail(search);
for_each_bset(b, t) bch2_btree_node_iter_init_from_start(iter, b);
__bch2_btree_node_iter_push(iter, b,
bch2_bset_search(b, t, search, NULL, NULL),
btree_bkey_last(b, t));
bch2_btree_node_iter_sort(iter, b); while ((k = bch2_btree_node_iter_peek(iter, b)) &&
bkey_iter_pos_cmp(b, k, search) < 0)
bch2_btree_node_iter_advance(iter, b);
} }
/** /**
@ -1446,7 +1450,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
* to the search key is going to have 0 sectors after the search key. * to the search key is going to have 0 sectors after the search key.
* *
* But this does mean that we can't just search for * But this does mean that we can't just search for
* bkey_successor(start_of_range) to get the first extent that overlaps with * bpos_successor(start_of_range) to get the first extent that overlaps with
* the range we want - if we're unlucky and there's an extent that ends * the range we want - if we're unlucky and there's an extent that ends
* exactly where we searched, then there could be a deleted key at the same * exactly where we searched, then there could be a deleted key at the same
* position and we'd get that when we search instead of the preceding extent * position and we'd get that when we search instead of the preceding extent
@ -1464,7 +1468,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct bkey_packed *k[MAX_BSETS]; struct bkey_packed *k[MAX_BSETS];
unsigned i; unsigned i;
EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0); EBUG_ON(bpos_cmp(*search, b->data->min_key) < 0);
bset_aux_tree_verify(b); bset_aux_tree_verify(b);
memset(iter, 0, sizeof(*iter)); memset(iter, 0, sizeof(*iter));

View File

@ -305,7 +305,7 @@ static inline struct bkey_s __bkey_disassemble(struct btree *b,
#define bset_tree_for_each_key(_b, _t, _k) \ #define bset_tree_for_each_key(_b, _t, _k) \
for (_k = btree_bkey_first(_b, _t); \ for (_k = btree_bkey_first(_b, _t); \
_k != btree_bkey_last(_b, _t); \ _k != btree_bkey_last(_b, _t); \
_k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t))) _k = bkey_next(_k))
static inline bool bset_has_ro_aux_tree(struct bset_tree *t) static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
{ {
@ -378,7 +378,7 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b,
EBUG_ON(r_packed && !bkey_packed(r_packed)); EBUG_ON(r_packed && !bkey_packed(r_packed));
if (unlikely(!bkey_packed(l))) if (unlikely(!bkey_packed(l)))
return bkey_cmp(packed_to_bkey_c(l)->p, *r); return bpos_cmp(packed_to_bkey_c(l)->p, *r);
if (likely(r_packed)) if (likely(r_packed))
return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b); return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
@ -403,24 +403,6 @@ bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
return bch2_bkey_prev_filter(b, t, k, 1); return bch2_bkey_prev_filter(b, t, k, 1);
} }
enum bch_extent_overlap {
BCH_EXTENT_OVERLAP_ALL = 0,
BCH_EXTENT_OVERLAP_BACK = 1,
BCH_EXTENT_OVERLAP_FRONT = 2,
BCH_EXTENT_OVERLAP_MIDDLE = 3,
};
/* Returns how k overlaps with m */
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
const struct bkey *m)
{
int cmp1 = bkey_cmp(k->p, m->p) < 0;
int cmp2 = bkey_cmp(bkey_start_pos(k),
bkey_start_pos(m)) > 0;
return (cmp1 << 1) + cmp2;
}
/* Btree key iteration */ /* Btree key iteration */
void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,

View File

@ -149,7 +149,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
if (level) if (level)
six_lock_pcpu_alloc(&b->c.lock); six_lock_pcpu_alloc(&b->c.lock);
else else
six_lock_pcpu_free(&b->c.lock); six_lock_pcpu_free_rcu(&b->c.lock);
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
ret = __bch2_btree_node_hash_insert(bc, b); ret = __bch2_btree_node_hash_insert(bc, b);
@ -814,9 +814,9 @@ lock_node:
EBUG_ON(b->c.btree_id != iter->btree_id); EBUG_ON(b->c.btree_id != iter->btree_id);
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
EBUG_ON(bkey_cmp(b->data->max_key, k->k.p)); EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key, bpos_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)); bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
return b; return b;
@ -897,9 +897,9 @@ lock_node:
EBUG_ON(b->c.btree_id != btree_id); EBUG_ON(b->c.btree_id != btree_id);
EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
EBUG_ON(bkey_cmp(b->data->max_key, k->k.p)); EBUG_ON(bpos_cmp(b->data->max_key, k->k.p));
EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
bkey_cmp(b->data->min_key, bpos_cmp(b->data->min_key,
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)); bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
out: out:
bch2_btree_cache_cannibalize_unlock(c); bch2_btree_cache_cannibalize_unlock(c);
@ -1011,7 +1011,7 @@ out:
if (sib != btree_prev_sib) if (sib != btree_prev_sib)
swap(n1, n2); swap(n1, n2);
if (bkey_cmp(bkey_successor(n1->key.k.p), if (bpos_cmp(bpos_successor(n1->key.k.p),
n2->data->min_key)) { n2->data->min_key)) {
char buf1[200], buf2[200]; char buf1[200], buf2[200];

View File

@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
struct bpos node_end = b->data->max_key; struct bpos node_end = b->data->max_key;
struct bpos expected_start = bkey_deleted(&prev->k->k) struct bpos expected_start = bkey_deleted(&prev->k->k)
? node_start ? node_start
: bkey_successor(prev->k->k.p); : bpos_successor(prev->k->k.p);
char buf1[200], buf2[200]; char buf1[200], buf2[200];
bool update_min = false; bool update_min = false;
bool update_max = false; bool update_max = false;
@ -81,7 +81,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k)); bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
} }
if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c, if (fsck_err_on(bpos_cmp(expected_start, bp->v.min_key), c,
"btree node with incorrect min_key at btree %s level %u:\n" "btree node with incorrect min_key at btree %s level %u:\n"
" prev %s\n" " prev %s\n"
" cur %s", " cur %s",
@ -92,7 +92,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
} }
if (fsck_err_on(is_last && if (fsck_err_on(is_last &&
bkey_cmp(cur.k->k.p, node_end), c, bpos_cmp(cur.k->k.p, node_end), c,
"btree node with incorrect max_key at btree %s level %u:\n" "btree node with incorrect max_key at btree %s level %u:\n"
" %s\n" " %s\n"
" expected %s", " expected %s",
@ -470,8 +470,8 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
bkey_init(&prev.k->k); bkey_init(&prev.k->k);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0); BUG_ON(bpos_cmp(k.k->p, b->data->min_key) < 0);
BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0); BUG_ON(bpos_cmp(k.k->p, b->data->max_key) > 0);
ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false, ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false,
k, &max_stale, true); k, &max_stale, true);
@ -560,13 +560,13 @@ static int bch2_gc_btree_init(struct bch_fs *c,
return 0; return 0;
six_lock_read(&b->c.lock, NULL, NULL); six_lock_read(&b->c.lock, NULL, NULL);
if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c, if (fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c,
"btree root with incorrect min_key: %s", "btree root with incorrect min_key: %s",
(bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) { (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) {
BUG(); BUG();
} }
if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c, if (fsck_err_on(bpos_cmp(b->data->max_key, POS_MAX), c,
"btree root with incorrect max_key: %s", "btree root with incorrect max_key: %s",
(bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) { (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) {
BUG(); BUG();
@ -1148,7 +1148,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH|
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) { !(ret = bkey_err(k))) {
@ -1171,6 +1173,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
bch2_btree_iter_advance(iter); bch2_btree_iter_advance(iter);
} }
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&sk, c);
@ -1271,6 +1274,9 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
/* Find a format that all keys in @old_nodes can pack into */ /* Find a format that all keys in @old_nodes can pack into */
bch2_bkey_format_init(&format_state); bch2_bkey_format_init(&format_state);
/*
* XXX: this won't correctly take it account the new min/max keys:
*/
for (i = 0; i < nr_old_nodes; i++) for (i = 0; i < nr_old_nodes; i++)
__bch2_btree_calc_format(&format_state, old_nodes[i]); __bch2_btree_calc_format(&format_state, old_nodes[i]);
@ -1333,7 +1339,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
k < vstruct_last(s2) && k < vstruct_last(s2) &&
vstruct_blocks_plus(n1->data, c->block_bits, vstruct_blocks_plus(n1->data, c->block_bits,
u64s + k->u64s) <= blocks; u64s + k->u64s) <= blocks;
k = bkey_next_skip_noops(k, vstruct_last(s2))) { k = bkey_next(k)) {
last = k; last = k;
u64s += k->u64s; u64s += k->u64s;
} }
@ -1362,7 +1368,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
n1->key.k.p = n1->data->max_key = n1->key.k.p = n1->data->max_key =
bkey_unpack_pos(n1, last); bkey_unpack_pos(n1, last);
n2->data->min_key = bkey_successor(n1->data->max_key); n2->data->min_key = bpos_successor(n1->data->max_key);
memcpy_u64s(vstruct_last(s1), memcpy_u64s(vstruct_last(s1),
s2->start, u64s); s2->start, u64s);
@ -1405,7 +1411,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
unsigned j; unsigned j;
for (j = 0; j < nr_new_nodes; j++) for (j = 0; j < nr_new_nodes; j++)
if (!bkey_cmp(old_nodes[i]->key.k.p, if (!bpos_cmp(old_nodes[i]->key.k.p,
new_nodes[j]->key.k.p)) new_nodes[j]->key.k.p))
goto next; goto next;

View File

@ -45,13 +45,9 @@ static inline struct gc_pos gc_phase(enum gc_phase phase)
static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
{ {
if (l.phase != r.phase) return cmp_int(l.phase, r.phase) ?:
return l.phase < r.phase ? -1 : 1; bpos_cmp(l.pos, r.pos) ?:
if (bkey_cmp(l.pos, r.pos)) cmp_int(l.level, r.level);
return bkey_cmp(l.pos, r.pos);
if (l.level != r.level)
return l.level < r.level ? -1 : 1;
return 0;
} }
static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)

View File

@ -32,13 +32,13 @@ static void verify_no_dups(struct btree *b,
if (start == end) if (start == end)
return; return;
for (p = start, k = bkey_next_skip_noops(start, end); for (p = start, k = bkey_next(start);
k != end; k != end;
p = k, k = bkey_next_skip_noops(k, end)) { p = k, k = bkey_next(k)) {
struct bkey l = bkey_unpack_key(b, p); struct bkey l = bkey_unpack_key(b, p);
struct bkey r = bkey_unpack_key(b, k); struct bkey r = bkey_unpack_key(b, k);
BUG_ON(bkey_cmp(l.p, bkey_start_pos(&r)) >= 0); BUG_ON(bpos_cmp(l.p, bkey_start_pos(&r)) >= 0);
} }
#endif #endif
} }
@ -47,9 +47,7 @@ static void set_needs_whiteout(struct bset *i, int v)
{ {
struct bkey_packed *k; struct bkey_packed *k;
for (k = i->start; for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k != vstruct_last(i);
k = bkey_next_skip_noops(k, vstruct_last(i)))
k->needs_whiteout = v; k->needs_whiteout = v;
} }
@ -213,7 +211,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
out = i->start; out = i->start;
for (k = start; k != end; k = n) { for (k = start; k != end; k = n) {
n = bkey_next_skip_noops(k, end); n = bkey_next(k);
if (!bkey_deleted(k)) { if (!bkey_deleted(k)) {
bkey_copy(out, k); bkey_copy(out, k);
@ -614,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_MUST_RETRY, c, ca, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect level"); "incorrect level");
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
u64 *p = (u64 *) &bn->ptr;
*p = swab64(*p);
}
if (!write) if (!write)
compat_btree_node(b->c.level, b->c.btree_id, version, compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn); BSET_BIG_ENDIAN(i), write, bn);
@ -633,14 +625,14 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
b->data->max_key = b->key.k.p; b->data->max_key = b->key.k.p;
} }
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), btree_err_on(bpos_cmp(b->data->min_key, bp->min_key),
BTREE_ERR_MUST_RETRY, c, ca, b, NULL, BTREE_ERR_MUST_RETRY, c, ca, b, NULL,
"incorrect min_key: got %s should be %s", "incorrect min_key: got %s should be %s",
(bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1), (bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1),
(bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2)); (bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2));
} }
btree_err_on(bkey_cmp(bn->max_key, b->key.k.p), btree_err_on(bpos_cmp(bn->max_key, b->key.k.p),
BTREE_ERR_MUST_RETRY, c, ca, b, i, BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect max key %s", "incorrect max key %s",
(bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1)); (bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1));
@ -754,7 +746,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
} }
prev = k; prev = k;
k = bkey_next_skip_noops(k, vstruct_last(i)); k = bkey_next(k);
} }
fsck_err: fsck_err:
return ret; return ret;
@ -947,7 +939,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bp.v->mem_ptr = 0; bp.v->mem_ptr = 0;
} }
k = bkey_next_skip_noops(k, vstruct_last(i)); k = bkey_next(k);
} }
bch2_bset_build_aux_tree(b, b->set, false); bch2_bset_build_aux_tree(b, b->set, false);
@ -1327,8 +1319,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree)) if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree))
return -1; return -1;
ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?: ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false); validate_bset(c, NULL, b, i, sectors, WRITE, false);
if (ret) { if (ret) {
bch2_inconsistent_error(c); bch2_inconsistent_error(c);
dump_stack(); dump_stack();
@ -1481,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true; validate_before_checksum = true;
/* validate_bset will be modifying: */ /* validate_bset will be modifying: */
if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change) if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
validate_before_checksum = true; validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */ /* if we're going to be encrypting, check metadata validity first: */

View File

@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id,
swap(f->field_offset[BKEY_FIELD_INODE], swap(f->field_offset[BKEY_FIELD_INODE],
f->field_offset[BKEY_FIELD_OFFSET]); f->field_offset[BKEY_FIELD_OFFSET]);
} }
if (version < bcachefs_metadata_version_snapshot &&
(level || btree_type_has_snapshots(btree_id))) {
u64 max_packed =
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
f->field_offset[BKEY_FIELD_SNAPSHOT] = write
? 0
: U32_MAX - max_packed;
}
} }
static inline void compat_bpos(unsigned level, enum btree_id btree_id, static inline void compat_bpos(unsigned level, enum btree_id btree_id,
@ -220,18 +230,26 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
{ {
if (version < bcachefs_metadata_version_inode_btree_change && if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bkey_cmp(bn->min_key, POS_MIN) && bpos_cmp(bn->min_key, POS_MIN) &&
write) write)
bn->min_key = bkey_predecessor(bn->min_key); bn->min_key = bpos_nosnap_predecessor(bn->min_key);
if (version < bcachefs_metadata_version_snapshot &&
write)
bn->max_key.snapshot = 0;
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key); compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key); compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
if (version < bcachefs_metadata_version_snapshot &&
!write)
bn->max_key.snapshot = U32_MAX;
if (version < bcachefs_metadata_version_inode_btree_change && if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bkey_cmp(bn->min_key, POS_MIN) && bpos_cmp(bn->min_key, POS_MIN) &&
!write) !write)
bn->min_key = bkey_successor(bn->min_key); bn->min_key = bpos_nosnap_successor(bn->min_key);
} }
#endif /* _BCACHEFS_BTREE_IO_H */ #endif /* _BCACHEFS_BTREE_IO_H */

View File

@ -18,6 +18,36 @@
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_successor(p);
} else {
p = bpos_nosnap_successor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
{
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
/* Are we iterating over keys in all snapshots? */
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
p = bpos_predecessor(p);
} else {
p = bpos_nosnap_predecessor(p);
p.snapshot = iter->snapshot;
}
return p;
}
static inline bool is_btree_node(struct btree_iter *iter, unsigned l) static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{ {
return l < BTREE_MAX_DEPTH && return l < BTREE_MAX_DEPTH &&
@ -30,20 +60,20 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
if ((iter->flags & BTREE_ITER_IS_EXTENTS) && if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(pos, POS_MAX)) bkey_cmp(pos, POS_MAX))
pos = bkey_successor(pos); pos = bkey_successor(iter, pos);
return pos; return pos;
} }
static inline bool btree_iter_pos_before_node(struct btree_iter *iter, static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
struct btree *b) struct btree *b)
{ {
return bkey_cmp(iter->real_pos, b->data->min_key) < 0; return bpos_cmp(iter->real_pos, b->data->min_key) < 0;
} }
static inline bool btree_iter_pos_after_node(struct btree_iter *iter, static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b) struct btree *b)
{ {
return bkey_cmp(b->key.k.p, iter->real_pos) < 0; return bpos_cmp(b->key.k.p, iter->real_pos) < 0;
} }
static inline bool btree_iter_pos_in_node(struct btree_iter *iter, static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
@ -285,7 +315,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
/* Must lock btree nodes in key order: */ /* Must lock btree nodes in key order: */
if (btree_node_locked(linked, level) && if (btree_node_locked(linked, level) &&
bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b, bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
btree_iter_type(linked))) <= 0) { btree_iter_type(linked))) <= 0) {
deadlock_iter = linked; deadlock_iter = linked;
reason = 7; reason = 7;
@ -583,10 +613,24 @@ err:
static void bch2_btree_iter_verify(struct btree_iter *iter) static void bch2_btree_iter_verify(struct btree_iter *iter)
{ {
enum btree_iter_type type = btree_iter_type(iter);
unsigned i; unsigned i;
EBUG_ON(iter->btree_id >= BTREE_ID_NR); EBUG_ON(iter->btree_id >= BTREE_ID_NR);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type == BTREE_ITER_NODES &&
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
BUG_ON(type != BTREE_ITER_NODES &&
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
!btree_type_has_snapshots(iter->btree_id));
bch2_btree_iter_verify_locks(iter); bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++) for (i = 0; i < BTREE_MAX_DEPTH; i++)
@ -597,6 +641,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{ {
enum btree_iter_type type = btree_iter_type(iter); enum btree_iter_type type = btree_iter_type(iter);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
iter->pos.snapshot != iter->snapshot);
BUG_ON((type == BTREE_ITER_KEYS || BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) && type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
@ -1384,7 +1431,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
if (!b) if (!b)
return NULL; return NULL;
BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0); BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
iter->pos = iter->real_pos = b->key.k.p; iter->pos = iter->real_pos = b->key.k.p;
@ -1421,12 +1468,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
if (!b) if (!b)
return NULL; return NULL;
if (bkey_cmp(iter->pos, b->key.k.p) < 0) { if (bpos_cmp(iter->pos, b->key.k.p) < 0) {
/* /*
* Haven't gotten to the end of the parent node: go back down to * Haven't gotten to the end of the parent node: go back down to
* the next child node * the next child node
*/ */
btree_iter_set_search_pos(iter, bkey_successor(iter->pos)); btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
/* Unlock to avoid screwing up our lock invariants: */ /* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock(iter, iter->level); btree_node_unlock(iter, iter->level);
@ -1453,7 +1500,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos)
{ {
int cmp = bkey_cmp(new_pos, iter->real_pos); int cmp = bpos_cmp(new_pos, iter->real_pos);
unsigned l = iter->level; unsigned l = iter->level;
if (!cmp) if (!cmp)
@ -1497,10 +1544,10 @@ out:
inline bool bch2_btree_iter_advance(struct btree_iter *iter) inline bool bch2_btree_iter_advance(struct btree_iter *iter)
{ {
struct bpos pos = iter->k.p; struct bpos pos = iter->k.p;
bool ret = bkey_cmp(pos, POS_MAX) != 0; bool ret = bpos_cmp(pos, POS_MAX) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_successor(pos); pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return ret; return ret;
} }
@ -1508,10 +1555,10 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
inline bool bch2_btree_iter_rewind(struct btree_iter *iter) inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
{ {
struct bpos pos = bkey_start_pos(&iter->k); struct bpos pos = bkey_start_pos(&iter->k);
bool ret = bkey_cmp(pos, POS_MIN) != 0; bool ret = bpos_cmp(pos, POS_MIN) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
pos = bkey_predecessor(pos); pos = bkey_predecessor(iter, pos);
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return ret; return ret;
} }
@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
{ {
struct bpos next_pos = iter->l[0].b->key.k.p; struct bpos next_pos = iter->l[0].b->key.k.p;
bool ret = bkey_cmp(next_pos, POS_MAX) != 0; bool ret = bpos_cmp(next_pos, POS_MAX) != 0;
/* /*
* Typically, we don't want to modify iter->pos here, since that * Typically, we don't want to modify iter->pos here, since that
@ -1527,7 +1574,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
* btree, in that case we want iter->pos to reflect that: * btree, in that case we want iter->pos to reflect that:
*/ */
if (ret) if (ret)
btree_iter_set_search_pos(iter, bkey_successor(next_pos)); btree_iter_set_search_pos(iter, bpos_successor(next_pos));
else else
bch2_btree_iter_set_pos(iter, POS_MAX); bch2_btree_iter_set_pos(iter, POS_MAX);
@ -1537,10 +1584,10 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
{ {
struct bpos next_pos = iter->l[0].b->data->min_key; struct bpos next_pos = iter->l[0].b->data->min_key;
bool ret = bkey_cmp(next_pos, POS_MIN) != 0; bool ret = bpos_cmp(next_pos, POS_MIN) != 0;
if (ret) if (ret)
btree_iter_set_search_pos(iter, bkey_predecessor(next_pos)); btree_iter_set_search_pos(iter, bpos_predecessor(next_pos));
else else
bch2_btree_iter_set_pos(iter, POS_MIN); bch2_btree_iter_set_pos(iter, POS_MIN);
@ -1586,13 +1633,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
k = btree_iter_level_peek(iter, &iter->l[0]); k = btree_iter_level_peek(iter, &iter->l[0]);
if (next_update && if (next_update &&
bkey_cmp(next_update->k.p, iter->real_pos) <= 0) bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
k = bkey_i_to_s_c(next_update); k = bkey_i_to_s_c(next_update);
if (likely(k.k)) { if (likely(k.k)) {
if (bkey_deleted(k.k)) { if (bkey_deleted(k.k)) {
btree_iter_set_search_pos(iter, btree_iter_set_search_pos(iter,
bkey_successor(k.k->p)); bkey_successor(iter, k.k->p));
continue; continue;
} }
@ -1731,7 +1778,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
if (iter->pos.inode == KEY_INODE_MAX) if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null; return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos)); bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
} }
pos = iter->pos; pos = iter->pos;
@ -1965,6 +2012,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
{ {
struct btree_iter *iter, *best = NULL; struct btree_iter *iter, *best = NULL;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
!btree_type_has_snapshots(btree_id))
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
pos.snapshot = btree_type_has_snapshots(btree_id)
? U32_MAX : 0;
/* We always want a fresh iterator for node iterators: */ /* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES) if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter; goto alloc_iter;
@ -1999,11 +2054,14 @@ alloc_iter:
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS)) !(flags & BTREE_ITER_NOT_EXTENTS) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS; flags |= BTREE_ITER_IS_EXTENTS;
iter->flags = flags; iter->flags = flags;
iter->snapshot = pos.snapshot;
if (!(iter->flags & BTREE_ITER_INTENT)) if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter); bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want) else if (!iter->locks_want)
@ -2026,6 +2084,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
__bch2_trans_get_iter(trans, btree_id, pos, __bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES| BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS| BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_ALL_SNAPSHOTS|
flags); flags);
unsigned i; unsigned i;
@ -2127,6 +2186,7 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
trans->nr_updates2 = 0; trans->nr_updates2 = 0;
trans->mem_top = 0; trans->mem_top = 0;
trans->hooks = NULL;
trans->extra_journal_entries = NULL; trans->extra_journal_entries = NULL;
trans->extra_journal_entry_u64s = 0; trans->extra_journal_entry_u64s = 0;
@ -2137,6 +2197,7 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
(void *) &trans->fs_usage_deltas->memset_start); (void *) &trans->fs_usage_deltas->memset_start);
} }
if (!(flags & TRANS_RESET_NOUNLOCK))
bch2_trans_cond_resched(trans); bch2_trans_cond_resched(trans);
if (!(flags & TRANS_RESET_NOTRAVERSE)) if (!(flags & TRANS_RESET_NOTRAVERSE))

View File

@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *);
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{ {
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
new_pos.snapshot = iter->snapshot;
bkey_init(&iter->k); bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos; iter->k.p = iter->pos = new_pos;
} }
@ -303,6 +306,7 @@ static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btr
} }
#define TRANS_RESET_NOTRAVERSE (1 << 0) #define TRANS_RESET_NOTRAVERSE (1 << 0)
#define TRANS_RESET_NOUNLOCK (1 << 1)
void bch2_trans_reset(struct btree_trans *, unsigned); void bch2_trans_reset(struct btree_trans *, unsigned);

View File

@ -21,7 +21,7 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
const struct bkey_cached_key *key = arg->key; const struct bkey_cached_key *key = arg->key;
return cmp_int(ck->key.btree_id, key->btree_id) ?: return cmp_int(ck->key.btree_id, key->btree_id) ?:
bkey_cmp(ck->key.pos, key->pos); bpos_cmp(ck->key.pos, key->pos);
} }
static const struct rhashtable_params bch2_btree_key_cache_params = { static const struct rhashtable_params bch2_btree_key_cache_params = {
@ -70,7 +70,7 @@ static void bkey_cached_evict(struct btree_key_cache *c,
bch2_btree_key_cache_params)); bch2_btree_key_cache_params));
memset(&ck->key, ~0, sizeof(ck->key)); memset(&ck->key, ~0, sizeof(ck->key));
c->nr_keys--; atomic_long_dec(&c->nr_keys);
} }
static void bkey_cached_free(struct btree_key_cache *bc, static void bkey_cached_free(struct btree_key_cache *bc,
@ -99,12 +99,6 @@ bkey_cached_alloc(struct btree_key_cache *c)
{ {
struct bkey_cached *ck; struct bkey_cached *ck;
list_for_each_entry_reverse(ck, &c->freed, list)
if (bkey_cached_lock_for_evict(ck)) {
c->nr_freed--;
return ck;
}
ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO); ck = kmem_cache_alloc(bch2_key_cache, GFP_NOFS|__GFP_ZERO);
if (likely(ck)) { if (likely(ck)) {
INIT_LIST_HEAD(&ck->list); INIT_LIST_HEAD(&ck->list);
@ -114,11 +108,39 @@ bkey_cached_alloc(struct btree_key_cache *c)
return ck; return ck;
} }
list_for_each_entry(ck, &c->clean, list) return NULL;
}
static struct bkey_cached *
bkey_cached_reuse(struct btree_key_cache *c)
{
struct bucket_table *tbl;
struct rhash_head *pos;
struct bkey_cached *ck;
unsigned i;
mutex_lock(&c->lock);
list_for_each_entry_reverse(ck, &c->freed, list)
if (bkey_cached_lock_for_evict(ck)) { if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(c, ck); c->nr_freed--;
list_del(&ck->list);
mutex_unlock(&c->lock);
return ck; return ck;
} }
mutex_unlock(&c->lock);
rcu_read_lock();
tbl = rht_dereference_rcu(c->table.tbl, &c->table);
for (i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(c, ck);
rcu_read_unlock();
return ck;
}
}
rcu_read_unlock();
return NULL; return NULL;
} }
@ -129,11 +151,18 @@ btree_key_cache_create(struct btree_key_cache *c,
struct bpos pos) struct bpos pos)
{ {
struct bkey_cached *ck; struct bkey_cached *ck;
bool was_new = true;
ck = bkey_cached_alloc(c); ck = bkey_cached_alloc(c);
if (!ck)
if (unlikely(!ck)) {
ck = bkey_cached_reuse(c);
if (unlikely(!ck))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
was_new = false;
}
ck->c.level = 0; ck->c.level = 0;
ck->c.btree_id = btree_id; ck->c.btree_id = btree_id;
ck->key.btree_id = btree_id; ck->key.btree_id = btree_id;
@ -141,17 +170,26 @@ btree_key_cache_create(struct btree_key_cache *c,
ck->valid = false; ck->valid = false;
ck->flags = 1U << BKEY_CACHED_ACCESSED; ck->flags = 1U << BKEY_CACHED_ACCESSED;
if (rhashtable_lookup_insert_fast(&c->table, if (unlikely(rhashtable_lookup_insert_fast(&c->table,
&ck->hash, &ck->hash,
bch2_btree_key_cache_params)) { bch2_btree_key_cache_params))) {
/* We raced with another fill: */ /* We raced with another fill: */
if (likely(was_new)) {
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
kfree(ck);
} else {
mutex_lock(&c->lock);
bkey_cached_free(c, ck); bkey_cached_free(c, ck);
mutex_unlock(&c->lock);
}
return NULL; return NULL;
} }
c->nr_keys++; atomic_long_inc(&c->nr_keys);
list_move(&ck->list, &c->clean);
six_unlock_write(&ck->c.lock); six_unlock_write(&ck->c.lock);
return ck; return ck;
@ -213,7 +251,7 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p)
const struct btree_iter *iter = p; const struct btree_iter *iter = p;
return ck->key.btree_id == iter->btree_id && return ck->key.btree_id == iter->btree_id &&
!bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1; !bpos_cmp(ck->key.pos, iter->pos) ? 0 : -1;
} }
__flatten __flatten
@ -238,11 +276,8 @@ retry:
return 0; return 0;
} }
mutex_lock(&c->btree_key_cache.lock);
ck = btree_key_cache_create(&c->btree_key_cache, ck = btree_key_cache_create(&c->btree_key_cache,
iter->btree_id, iter->pos); iter->btree_id, iter->pos);
mutex_unlock(&c->btree_key_cache.lock);
ret = PTR_ERR_OR_ZERO(ck); ret = PTR_ERR_OR_ZERO(ck);
if (ret) if (ret)
goto err; goto err;
@ -257,7 +292,7 @@ retry:
if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
bkey_cached_check_fn, iter, _THIS_IP_)) { bkey_cached_check_fn, iter, _THIS_IP_)) {
if (ck->key.btree_id != iter->btree_id || if (ck->key.btree_id != iter->btree_id ||
bkey_cmp(ck->key.pos, iter->pos)) { bpos_cmp(ck->key.pos, iter->pos)) {
goto retry; goto retry;
} }
@ -267,7 +302,7 @@ retry:
} }
if (ck->key.btree_id != iter->btree_id || if (ck->key.btree_id != iter->btree_id ||
bkey_cmp(ck->key.pos, iter->pos)) { bpos_cmp(ck->key.pos, iter->pos)) {
six_unlock_type(&ck->c.lock, lock_want); six_unlock_type(&ck->c.lock, lock_want);
goto retry; goto retry;
} }
@ -370,15 +405,13 @@ err:
bch2_journal_pin_drop(j, &ck->journal); bch2_journal_pin_drop(j, &ck->journal);
bch2_journal_preres_put(j, &ck->res); bch2_journal_preres_put(j, &ck->res);
BUG_ON(!btree_node_locked(c_iter, 0));
if (!evict) { if (!evict) {
mutex_lock(&c->btree_key_cache.lock);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags); clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
c->btree_key_cache.nr_dirty--; atomic_long_dec(&c->btree_key_cache.nr_dirty);
} }
list_move_tail(&ck->list, &c->btree_key_cache.clean);
mutex_unlock(&c->btree_key_cache.lock);
} else { } else {
evict: evict:
BUG_ON(!btree_node_intent_locked(c_iter, 0)); BUG_ON(!btree_node_intent_locked(c_iter, 0));
@ -388,13 +421,14 @@ evict:
six_lock_write(&ck->c.lock, NULL, NULL); six_lock_write(&ck->c.lock, NULL, NULL);
mutex_lock(&c->btree_key_cache.lock);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags); clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
c->btree_key_cache.nr_dirty--; atomic_long_dec(&c->btree_key_cache.nr_dirty);
} }
bkey_cached_evict(&c->btree_key_cache, ck); bkey_cached_evict(&c->btree_key_cache, ck);
mutex_lock(&c->btree_key_cache.lock);
bkey_cached_free(&c->btree_key_cache, ck); bkey_cached_free(&c->btree_key_cache, ck);
mutex_unlock(&c->btree_key_cache.lock); mutex_unlock(&c->btree_key_cache.lock);
} }
@ -475,16 +509,11 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
ck->valid = true; ck->valid = true;
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
mutex_lock(&c->btree_key_cache.lock);
list_move(&ck->list, &c->btree_key_cache.dirty);
set_bit(BKEY_CACHED_DIRTY, &ck->flags); set_bit(BKEY_CACHED_DIRTY, &ck->flags);
c->btree_key_cache.nr_dirty++; atomic_long_inc(&c->btree_key_cache.nr_dirty);
if (bch2_nr_btree_keys_need_flush(c)) if (bch2_nr_btree_keys_need_flush(c))
kick_reclaim = true; kick_reclaim = true;
mutex_unlock(&c->btree_key_cache.lock);
} }
bch2_journal_pin_update(&c->journal, trans->journal_res.seq, bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
@ -509,9 +538,11 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
struct bch_fs *c = container_of(shrink, struct bch_fs, struct bch_fs *c = container_of(shrink, struct bch_fs,
btree_key_cache.shrink); btree_key_cache.shrink);
struct btree_key_cache *bc = &c->btree_key_cache; struct btree_key_cache *bc = &c->btree_key_cache;
struct bucket_table *tbl;
struct bkey_cached *ck, *t; struct bkey_cached *ck, *t;
size_t scanned = 0, freed = 0, nr = sc->nr_to_scan; size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
unsigned flags; unsigned start, flags;
int srcu_idx;
/* Return -1 if we can't do anything right now */ /* Return -1 if we can't do anything right now */
if (sc->gfp_mask & __GFP_FS) if (sc->gfp_mask & __GFP_FS)
@ -519,6 +550,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
else if (!mutex_trylock(&bc->lock)) else if (!mutex_trylock(&bc->lock))
return -1; return -1;
srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
flags = memalloc_nofs_save(); flags = memalloc_nofs_save();
/* /*
@ -540,7 +572,19 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
if (scanned >= nr) if (scanned >= nr)
goto out; goto out;
list_for_each_entry_safe(ck, t, &bc->clean, list) { rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size)
bc->shrink_iter = 0;
start = bc->shrink_iter;
do {
struct rhash_head *pos, *next;
rht_for_each_entry_safe(ck, pos, next, tbl, bc->shrink_iter, hash) {
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
continue;
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
else if (bkey_cached_lock_for_evict(ck)) { else if (bkey_cached_lock_for_evict(ck)) {
@ -549,14 +593,19 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
} }
scanned++; scanned++;
if (scanned >= nr) { if (scanned >= nr)
if (&t->list != &bc->clean) break;
list_move_tail(&bc->clean, &t->list);
goto out;
}
} }
bc->shrink_iter++;
if (bc->shrink_iter >= tbl->size)
bc->shrink_iter = 0;
} while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock();
out: out:
memalloc_nofs_restore(flags); memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
return freed; return freed;
@ -569,41 +618,45 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
btree_key_cache.shrink); btree_key_cache.shrink);
struct btree_key_cache *bc = &c->btree_key_cache; struct btree_key_cache *bc = &c->btree_key_cache;
return bc->nr_keys; return atomic_long_read(&bc->nr_keys);
} }
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
{ {
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
struct bucket_table *tbl;
struct bkey_cached *ck, *n; struct bkey_cached *ck, *n;
struct rhash_head *pos;
unsigned i;
if (bc->shrink.list.next) if (bc->shrink.list.next)
unregister_shrinker(&bc->shrink); unregister_shrinker(&bc->shrink);
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
list_splice(&bc->dirty, &bc->clean);
list_for_each_entry_safe(ck, n, &bc->clean, list) { rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
for (i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
bkey_cached_evict(bc, ck);
list_add(&ck->list, &bc->freed);
}
rcu_read_unlock();
list_for_each_entry_safe(ck, n, &bc->freed, list) {
cond_resched(); cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal); bch2_journal_pin_drop(&c->journal, &ck->journal);
bch2_journal_preres_put(&c->journal, &ck->res); bch2_journal_preres_put(&c->journal, &ck->res);
list_del(&ck->list);
kfree(ck->k); kfree(ck->k);
list_del(&ck->list);
kmem_cache_free(bch2_key_cache, ck);
bc->nr_keys--;
}
BUG_ON(bc->nr_dirty && !bch2_journal_error(&c->journal));
BUG_ON(bc->nr_keys);
list_for_each_entry_safe(ck, n, &bc->freed, list) {
cond_resched();
list_del(&ck->list);
kmem_cache_free(bch2_key_cache, ck); kmem_cache_free(bch2_key_cache, ck);
} }
BUG_ON(atomic_long_read(&bc->nr_dirty) && !bch2_journal_error(&c->journal));
BUG_ON(atomic_long_read(&bc->nr_keys));
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
if (bc->table_init_done) if (bc->table_init_done)
@ -614,8 +667,6 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
{ {
mutex_init(&c->lock); mutex_init(&c->lock);
INIT_LIST_HEAD(&c->freed); INIT_LIST_HEAD(&c->freed);
INIT_LIST_HEAD(&c->clean);
INIT_LIST_HEAD(&c->dirty);
} }
int bch2_fs_btree_key_cache_init(struct btree_key_cache *c) int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
@ -641,8 +692,8 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
{ {
pr_buf(out, "nr_freed:\t%zu\n", c->nr_freed); pr_buf(out, "nr_freed:\t%zu\n", c->nr_freed);
pr_buf(out, "nr_keys:\t%zu\n", c->nr_keys); pr_buf(out, "nr_keys:\t%zu\n", atomic_long_read(&c->nr_keys));
pr_buf(out, "nr_dirty:\t%zu\n", c->nr_dirty); pr_buf(out, "nr_dirty:\t%zu\n", atomic_long_read(&c->nr_dirty));
} }
void bch2_btree_key_cache_exit(void) void bch2_btree_key_cache_exit(void)

View File

@ -3,8 +3,8 @@
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
{ {
size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
size_t max_dirty = 1024 + nr_keys / 2; size_t max_dirty = 1024 + nr_keys / 2;
return max_t(ssize_t, 0, nr_dirty - max_dirty); return max_t(ssize_t, 0, nr_dirty - max_dirty);
@ -12,8 +12,8 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
{ {
size_t nr_dirty = READ_ONCE(c->btree_key_cache.nr_dirty); size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
size_t nr_keys = READ_ONCE(c->btree_key_cache.nr_keys); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
size_t max_dirty = 4096 + (nr_keys * 3) / 4; size_t max_dirty = 4096 + (nr_keys * 3) / 4;
return nr_dirty > max_dirty && return nr_dirty > max_dirty &&

View File

@ -216,6 +216,7 @@ enum btree_iter_type {
#define BTREE_ITER_CACHED_NOFILL (1 << 9) #define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10) #define BTREE_ITER_CACHED_NOCREATE (1 << 10)
#define BTREE_ITER_NOT_EXTENTS (1 << 11) #define BTREE_ITER_NOT_EXTENTS (1 << 11)
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
enum btree_iter_uptodate { enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0, BTREE_ITER_UPTODATE = 0,
@ -245,6 +246,8 @@ struct btree_iter {
/* what we're searching for/what the iterator actually points to: */ /* what we're searching for/what the iterator actually points to: */
struct bpos real_pos; struct bpos real_pos;
struct bpos pos_after_commit; struct bpos pos_after_commit;
/* When we're filtering by snapshot, the snapshot ID we're looking for: */
unsigned snapshot;
u16 flags; u16 flags;
u8 idx; u8 idx;
@ -292,13 +295,12 @@ struct btree_key_cache {
struct rhashtable table; struct rhashtable table;
bool table_init_done; bool table_init_done;
struct list_head freed; struct list_head freed;
struct list_head clean;
struct list_head dirty;
struct shrinker shrink; struct shrinker shrink;
unsigned shrink_iter;
size_t nr_freed; size_t nr_freed;
size_t nr_keys; atomic_long_t nr_keys;
size_t nr_dirty; atomic_long_t nr_dirty;
}; };
struct bkey_cached_key { struct bkey_cached_key {
@ -330,7 +332,7 @@ struct bkey_cached {
struct btree_insert_entry { struct btree_insert_entry {
unsigned trigger_flags; unsigned trigger_flags;
u8 bkey_type; u8 bkey_type;
u8 btree_id; enum btree_id btree_id:8;
u8 level; u8 level;
unsigned trans_triggers_run:1; unsigned trans_triggers_run:1;
unsigned is_extent:1; unsigned is_extent:1;
@ -344,6 +346,14 @@ struct btree_insert_entry {
#define BTREE_ITER_MAX 32 #define BTREE_ITER_MAX 32
#endif #endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
struct btree_trans_commit_hook {
btree_trans_commit_hook_fn *fn;
struct btree_trans_commit_hook *next;
};
struct btree_trans { struct btree_trans {
struct bch_fs *c; struct bch_fs *c;
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
@ -378,6 +388,7 @@ struct btree_trans {
struct btree_insert_entry *updates2; struct btree_insert_entry *updates2;
/* update path: */ /* update path: */
struct btree_trans_commit_hook *hooks;
struct jset_entry *extra_journal_entries; struct jset_entry *extra_journal_entries;
unsigned extra_journal_entry_u64s; unsigned extra_journal_entry_u64s;
struct journal_entry_pin *journal_pin; struct journal_entry_pin *journal_pin;
@ -600,6 +611,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
#define BTREE_ID_HAS_SNAPSHOTS \
((1U << BTREE_ID_extents)| \
(1U << BTREE_ID_inodes)| \
(1U << BTREE_ID_dirents)| \
(1U << BTREE_ID_xattrs))
static inline bool btree_type_has_snapshots(enum btree_id id)
{
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
}
enum btree_trigger_flags { enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */

View File

@ -77,6 +77,8 @@ int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
int bch2_trans_update(struct btree_trans *, struct btree_iter *, int bch2_trans_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, enum btree_trigger_flags); struct bkey_i *, enum btree_trigger_flags);
void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *); int __bch2_trans_commit(struct btree_trans *);
/** /**

View File

@ -50,7 +50,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break; break;
bp = bkey_s_c_to_btree_ptr_v2(k); bp = bkey_s_c_to_btree_ptr_v2(k);
if (bkey_cmp(next_node, bp.v->min_key)) { if (bpos_cmp(next_node, bp.v->min_key)) {
bch2_dump_btree_node(c, b); bch2_dump_btree_node(c, b);
panic("expected next min_key %s got %s\n", panic("expected next min_key %s got %s\n",
(bch2_bpos_to_text(&PBUF(buf1), next_node), buf1), (bch2_bpos_to_text(&PBUF(buf1), next_node), buf1),
@ -60,7 +60,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
bch2_btree_node_iter_advance(&iter, b); bch2_btree_node_iter_advance(&iter, b);
if (bch2_btree_node_iter_end(&iter)) { if (bch2_btree_node_iter_end(&iter)) {
if (bkey_cmp(k.k->p, b->key.k.p)) { if (bpos_cmp(k.k->p, b->key.k.p)) {
bch2_dump_btree_node(c, b); bch2_dump_btree_node(c, b);
panic("expected end %s got %s\n", panic("expected end %s got %s\n",
(bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1), (bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1),
@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break; break;
} }
next_node = bkey_successor(k.k->p); next_node = bpos_successor(k.k->p);
} }
#endif #endif
} }
@ -82,8 +82,6 @@ void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
struct bset_tree *t; struct bset_tree *t;
struct bkey uk; struct bkey uk;
bch2_bkey_format_add_pos(s, b->data->min_key);
for_each_bset(b, t) for_each_bset(b, t)
bset_tree_for_each_key(b, t, k) bset_tree_for_each_key(b, t, k)
if (!bkey_deleted(k)) { if (!bkey_deleted(k)) {
@ -97,6 +95,8 @@ static struct bkey_format bch2_btree_calc_format(struct btree *b)
struct bkey_format_state s; struct bkey_format_state s;
bch2_bkey_format_init(&s); bch2_bkey_format_init(&s);
bch2_bkey_format_add_pos(&s, b->data->min_key);
bch2_bkey_format_add_pos(&s, b->data->max_key);
__bch2_btree_calc_format(&s, b); __bch2_btree_calc_format(&s, b);
return bch2_bkey_format_done(&s); return bch2_bkey_format_done(&s);
@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b->data->flags = 0; b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level); SET_BTREE_NODE_LEVEL(b->data, level);
b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key); struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
@ -1095,10 +1094,12 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct btree *n1, struct btree *n1,
struct btree_iter *iter) struct btree_iter *iter)
{ {
struct bkey_format_state s;
size_t nr_packed = 0, nr_unpacked = 0; size_t nr_packed = 0, nr_unpacked = 0;
struct btree *n2; struct btree *n2;
struct bset *set1, *set2; struct bset *set1, *set2;
struct bkey_packed *k, *prev = NULL; struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level); n2 = bch2_btree_node_alloc(as, n1->c.level);
bch2_btree_update_add_new_node(as, n2); bch2_btree_update_add_new_node(as, n2);
@ -1108,8 +1109,6 @@ static struct btree *__btree_split_node(struct btree_update *as,
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data)); SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
n2->key.k.p = n1->key.k.p; n2->key.k.p = n1->key.k.p;
btree_node_set_format(n2, n2->data->format);
set1 = btree_bset_first(n1); set1 = btree_bset_first(n1);
set2 = btree_bset_first(n2); set2 = btree_bset_first(n2);
@ -1119,7 +1118,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
*/ */
k = set1->start; k = set1->start;
while (1) { while (1) {
struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1)); struct bkey_packed *n = bkey_next(k);
if (n == vstruct_last(set1)) if (n == vstruct_last(set1))
break; break;
@ -1136,33 +1135,53 @@ static struct btree *__btree_split_node(struct btree_update *as,
} }
BUG_ON(!prev); BUG_ON(!prev);
set2_start = k;
set2_end = vstruct_last(set1);
btree_set_max(n1, bkey_unpack_pos(n1, prev)); set1->u64s = cpu_to_le16((u64 *) set2_start - set1->_data);
btree_set_min(n2, bkey_successor(n1->key.k.p));
set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
set_btree_bset_end(n1, n1->set); set_btree_bset_end(n1, n1->set);
set_btree_bset_end(n2, n2->set);
n2->nr.live_u64s = le16_to_cpu(set2->u64s);
n2->nr.bset_u64s[0] = le16_to_cpu(set2->u64s);
n2->nr.packed_keys = n1->nr.packed_keys - nr_packed;
n2->nr.unpacked_keys = n1->nr.unpacked_keys - nr_unpacked;
n1->nr.live_u64s = le16_to_cpu(set1->u64s); n1->nr.live_u64s = le16_to_cpu(set1->u64s);
n1->nr.bset_u64s[0] = le16_to_cpu(set1->u64s); n1->nr.bset_u64s[0] = le16_to_cpu(set1->u64s);
n1->nr.packed_keys = nr_packed; n1->nr.packed_keys = nr_packed;
n1->nr.unpacked_keys = nr_unpacked; n1->nr.unpacked_keys = nr_unpacked;
n1_pos = bkey_unpack_pos(n1, prev);
if (as->c->sb.version < bcachefs_metadata_version_snapshot)
n1_pos.snapshot = U32_MAX;
btree_set_max(n1, n1_pos);
btree_set_min(n2, bpos_successor(n1->key.k.p));
bch2_bkey_format_init(&s);
bch2_bkey_format_add_pos(&s, n2->data->min_key);
bch2_bkey_format_add_pos(&s, n2->data->max_key);
for (k = set2_start; k != set2_end; k = bkey_next(k)) {
struct bkey uk = bkey_unpack_key(n1, k);
bch2_bkey_format_add_key(&s, &uk);
}
n2->data->format = bch2_bkey_format_done(&s);
btree_node_set_format(n2, n2->data->format);
out = set2->start;
memset(&n2->nr, 0, sizeof(n2->nr));
for (k = set2_start; k != set2_end; k = bkey_next(k)) {
BUG_ON(!bch2_bkey_transform(&n2->format, out, bkey_packed(k)
? &n1->format : &bch2_bkey_format_current, k));
out->format = KEY_FORMAT_LOCAL_BTREE;
btree_keys_account_key_add(&n2->nr, 0, out);
out = bkey_next(out);
}
set2->u64s = cpu_to_le16((u64 *) out - set2->_data);
set_btree_bset_end(n2, n2->set);
BUG_ON(!set1->u64s); BUG_ON(!set1->u64s);
BUG_ON(!set2->u64s); BUG_ON(!set2->u64s);
memcpy_u64s(set2->start,
vstruct_end(set1),
le16_to_cpu(set2->u64s));
btree_node_reset_sib_u64s(n1); btree_node_reset_sib_u64s(n1);
btree_node_reset_sib_u64s(n2); btree_node_reset_sib_u64s(n2);
@ -1216,7 +1235,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
i = btree_bset_first(b); i = btree_bset_first(b);
src = dst = i->start; src = dst = i->start;
while (src != vstruct_last(i)) { while (src != vstruct_last(i)) {
n = bkey_next_skip_noops(src, vstruct_last(i)); n = bkey_next(src);
if (!bkey_deleted(src)) { if (!bkey_deleted(src)) {
memmove_u64s_down(dst, src, src->u64s); memmove_u64s_down(dst, src, src->u64s);
dst = bkey_next(dst); dst = bkey_next(dst);
@ -1563,8 +1582,10 @@ retry:
} }
bch2_bkey_format_init(&new_s); bch2_bkey_format_init(&new_s);
__bch2_btree_calc_format(&new_s, b); bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
__bch2_btree_calc_format(&new_s, m); __bch2_btree_calc_format(&new_s, prev);
__bch2_btree_calc_format(&new_s, next);
bch2_bkey_format_add_pos(&new_s, next->data->max_key);
new_f = bch2_bkey_format_done(&new_s); new_f = bch2_bkey_format_done(&new_s);
sib_u64s = btree_node_u64s_with_format(b, &new_f) + sib_u64s = btree_node_u64s_with_format(b, &new_f) +

View File

@ -26,7 +26,7 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
{ {
return cmp_int(l->btree_id, r->btree_id) ?: return cmp_int(l->btree_id, r->btree_id) ?:
-cmp_int(l->level, r->level) ?: -cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p); bpos_cmp(l->k->k.p, r->k->k.p);
} }
static inline bool same_leaf_as_prev(struct btree_trans *trans, static inline bool same_leaf_as_prev(struct btree_trans *trans,
@ -70,8 +70,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
EBUG_ON(btree_node_just_written(b)); EBUG_ON(btree_node_just_written(b));
EBUG_ON(bset_written(b, btree_bset_last(b))); EBUG_ON(bset_written(b, btree_bset_last(b)));
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0); EBUG_ON(bpos_cmp(insert->k.p, b->data->min_key) < 0);
EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0); EBUG_ON(bpos_cmp(insert->k.p, b->data->max_key) > 0);
EBUG_ON(insert->k.u64s > EBUG_ON(insert->k.u64s >
bch_btree_keys_u64s_remaining(iter->trans->c, b)); bch_btree_keys_u64s_remaining(iter->trans->c, b));
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
BUG_ON(bch2_debug_check_bkeys && if (bch2_debug_check_bkeys) {
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type)); const char *invalid = bch2_bkey_invalid(c,
BUG_ON(bkey_cmp(i->k->k.p, i->iter->real_pos)); bkey_i_to_s_c(i->k), i->bkey_type);
if (invalid) {
char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
panic("invalid bkey %s on insert: %s\n", buf, invalid);
}
}
BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level); BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id); BUG_ON(i->btree_id != i->iter->btree_id);
} }
@ -369,6 +377,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL; struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i; struct btree_insert_entry *i;
struct btree_trans_commit_hook *h;
unsigned u64s = 0; unsigned u64s = 0;
bool marking = false; bool marking = false;
int ret; int ret;
@ -386,6 +395,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
prefetch(&trans->c->journal.flags); prefetch(&trans->c->journal.flags);
h = trans->hooks;
while (h) {
ret = h->fn(trans, h);
if (ret)
return ret;
h = h->next;
}
trans_for_each_update2(trans, i) { trans_for_each_update2(trans, i) {
/* Multiple inserts might go to same leaf: */ /* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i)) if (!same_leaf_as_prev(trans, i))
@ -556,6 +573,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
if (trans->flags & BTREE_INSERT_NOUNLOCK) if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true; trans->nounlock = true;
if (!(trans->flags & BTREE_INSERT_NOUNLOCK))
trans_for_each_update2(trans, i) trans_for_each_update2(trans, i)
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED && if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
!same_leaf_as_prev(trans, i)) !same_leaf_as_prev(trans, i))
@ -826,7 +844,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
struct btree_insert_entry *i = NULL; struct btree_insert_entry *i = NULL;
struct btree_iter *iter; struct btree_iter *iter;
bool trans_trigger_run; bool trans_trigger_run;
unsigned u64s; unsigned u64s, reset_flags = 0;
int ret = 0; int ret = 0;
if (!trans->nr_updates) if (!trans->nr_updates)
@ -940,7 +958,11 @@ out:
if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&trans->c->writes); percpu_ref_put(&trans->c->writes);
out_reset: out_reset:
bch2_trans_reset(trans, !ret ? TRANS_RESET_NOTRAVERSE : 0); if (!ret)
reset_flags |= TRANS_RESET_NOTRAVERSE;
if (!ret && (trans->flags & BTREE_INSERT_NOUNLOCK))
reset_flags |= TRANS_RESET_NOUNLOCK;
bch2_trans_reset(trans, reset_flags);
return ret; return ret;
err: err:
@ -1053,6 +1075,13 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
return 0; return 0;
} }
void bch2_trans_commit_hook(struct btree_trans *trans,
struct btree_trans_commit_hook *h)
{
h->next = trans->hooks;
trans->hooks = h;
}
int __bch2_btree_insert(struct btree_trans *trans, int __bch2_btree_insert(struct btree_trans *trans,
enum btree_id id, struct bkey_i *k) enum btree_id id, struct bkey_i *k)
{ {

View File

@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0); bch2_trans_init(&trans, i->c, 0, 0);
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); iter = bch2_trans_get_iter(&trans, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS);
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
while (k.k && !(err = bkey_err(k))) { while (k.k && !(err = bkey_err(k))) {
@ -273,7 +275,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
if (err) if (err)
return err; return err;
if (!i->size || !bkey_cmp(POS_MAX, i->from)) if (!i->size || !bpos_cmp(POS_MAX, i->from))
return i->ret; return i->ret;
bch2_trans_init(&trans, i->c, 0, 0); bch2_trans_init(&trans, i->c, 0, 0);
@ -289,8 +291,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
* can't easily correctly restart a btree node traversal across * can't easily correctly restart a btree node traversal across
* all nodes, meh * all nodes, meh
*/ */
i->from = bkey_cmp(POS_MAX, b->key.k.p) i->from = bpos_cmp(POS_MAX, b->key.k.p)
? bkey_successor(b->key.k.p) ? bpos_successor(b->key.k.p)
: b->key.k.p; : b->key.k.p;
if (!i->size) if (!i->size)

View File

@ -141,7 +141,7 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
int bch2_dirent_create(struct btree_trans *trans, int bch2_dirent_create(struct btree_trans *trans,
u64 dir_inum, const struct bch_hash_info *hash_info, u64 dir_inum, const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum, u8 type, const struct qstr *name, u64 dst_inum,
int flags) u64 *dir_offset, int flags)
{ {
struct bkey_i_dirent *dirent; struct bkey_i_dirent *dirent;
int ret; int ret;
@ -151,8 +151,11 @@ int bch2_dirent_create(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
dir_inum, &dirent->k_i, flags); dir_inum, &dirent->k_i, flags);
*dir_offset = dirent->k.p.offset;
return ret;
} }
static void dirent_copy_target(struct bkey_i_dirent *dst, static void dirent_copy_target(struct bkey_i_dirent *dst,
@ -165,8 +168,8 @@ static void dirent_copy_target(struct bkey_i_dirent *dst,
int bch2_dirent_rename(struct btree_trans *trans, int bch2_dirent_rename(struct btree_trans *trans,
u64 src_dir, struct bch_hash_info *src_hash, u64 src_dir, struct bch_hash_info *src_hash,
u64 dst_dir, struct bch_hash_info *dst_hash, u64 dst_dir, struct bch_hash_info *dst_hash,
const struct qstr *src_name, u64 *src_inum, const struct qstr *src_name, u64 *src_inum, u64 *src_offset,
const struct qstr *dst_name, u64 *dst_inum, const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset,
enum bch_rename_mode mode) enum bch_rename_mode mode)
{ {
struct btree_iter *src_iter = NULL, *dst_iter = NULL; struct btree_iter *src_iter = NULL, *dst_iter = NULL;
@ -255,7 +258,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
new_dst->k.p = src_iter->pos; new_dst->k.p = src_iter->pos;
bch2_trans_update(trans, src_iter, bch2_trans_update(trans, src_iter,
&new_dst->k_i, 0); &new_dst->k_i, 0);
goto out; goto out_set_offset;
} else { } else {
/* If we're overwriting, we can't insert new_dst /* If we're overwriting, we can't insert new_dst
* at a different slot because it has to * at a different slot because it has to
@ -278,6 +281,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
bch2_trans_update(trans, src_iter, &new_src->k_i, 0); bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0); bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
out_set_offset:
*src_offset = new_src->k.p.offset;
*dst_offset = new_dst->k.p.offset;
out: out:
bch2_trans_iter_put(trans, src_iter); bch2_trans_iter_put(trans, src_iter);
bch2_trans_iter_put(trans, dst_iter); bch2_trans_iter_put(trans, dst_iter);

View File

@ -31,7 +31,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_create(struct btree_trans *, u64, int bch2_dirent_create(struct btree_trans *, u64,
const struct bch_hash_info *, u8, const struct bch_hash_info *, u8,
const struct qstr *, u64, int); const struct qstr *, u64, u64 *, int);
int bch2_dirent_delete_at(struct btree_trans *, int bch2_dirent_delete_at(struct btree_trans *,
const struct bch_hash_info *, const struct bch_hash_info *,
@ -46,8 +46,8 @@ enum bch_rename_mode {
int bch2_dirent_rename(struct btree_trans *, int bch2_dirent_rename(struct btree_trans *,
u64, struct bch_hash_info *, u64, struct bch_hash_info *,
u64, struct bch_hash_info *, u64, struct bch_hash_info *,
const struct qstr *, u64 *, const struct qstr *, u64 *, u64 *,
const struct qstr *, u64 *, const struct qstr *, u64 *, u64 *,
enum bch_rename_mode); enum bch_rename_mode);
struct btree_iter * struct btree_iter *

View File

@ -873,6 +873,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
if (ret) if (ret)
break; break;
} }
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&sk, c);

View File

@ -180,7 +180,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big"; return "value too big";
if (bp.v->min_key.snapshot) if (c->sb.version < bcachefs_metadata_version_snapshot &&
bp.v->min_key.snapshot)
return "invalid min_key.snapshot"; return "invalid min_key.snapshot";
return bch2_bkey_ptrs_invalid(c, k); return bch2_bkey_ptrs_invalid(c, k);
@ -212,8 +213,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
bkey_cmp(bp.v->min_key, POS_MIN)) bkey_cmp(bp.v->min_key, POS_MIN))
bp.v->min_key = write bp.v->min_key = write
? bkey_predecessor(bp.v->min_key) ? bpos_nosnap_predecessor(bp.v->min_key)
: bkey_successor(bp.v->min_key); : bpos_nosnap_successor(bp.v->min_key);
} }
/* KEY_TYPE_extent: */ /* KEY_TYPE_extent: */

View File

@ -582,6 +582,24 @@ void bch2_ptr_swab(struct bkey_s);
/* Generic extent code: */ /* Generic extent code: */
enum bch_extent_overlap {
BCH_EXTENT_OVERLAP_ALL = 0,
BCH_EXTENT_OVERLAP_BACK = 1,
BCH_EXTENT_OVERLAP_FRONT = 2,
BCH_EXTENT_OVERLAP_MIDDLE = 3,
};
/* Returns how k overlaps with m */
static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
const struct bkey *m)
{
int cmp1 = bkey_cmp(k->p, m->p) < 0;
int cmp2 = bkey_cmp(bkey_start_pos(k),
bkey_start_pos(m)) > 0;
return (cmp1 << 1) + cmp2;
}
int bch2_cut_front_s(struct bpos, struct bkey_s); int bch2_cut_front_s(struct bpos, struct bkey_s);
int bch2_cut_back_s(struct bpos, struct bkey_s); int bch2_cut_back_s(struct bpos, struct bkey_s);

View File

@ -20,8 +20,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter *dir_iter = NULL; struct btree_iter *dir_iter = NULL;
struct btree_iter *inode_iter = NULL;
struct bch_hash_info hash = bch2_hash_info_init(c, new_inode); struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
u64 now = bch2_current_time(trans->c); u64 now = bch2_current_time(c);
u64 dir_offset = 0;
int ret; int ret;
dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
@ -34,7 +36,8 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name) if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED; new_inode->bi_flags |= BCH_INODE_UNLINKED;
ret = bch2_inode_create(trans, new_inode); inode_iter = bch2_inode_create(trans, new_inode);
ret = PTR_ERR_OR_ZERO(inode_iter);
if (ret) if (ret)
goto err; goto err;
@ -66,11 +69,20 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
ret = bch2_dirent_create(trans, dir_inum, &dir_hash, ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
mode_to_type(new_inode->bi_mode), mode_to_type(new_inode->bi_mode),
name, new_inode->bi_inum, name, new_inode->bi_inum,
&dir_offset,
BCH_HASH_SET_MUST_CREATE); BCH_HASH_SET_MUST_CREATE);
if (ret) if (ret)
goto err; goto err;
} }
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
new_inode->bi_dir = dir_u->bi_inum;
new_inode->bi_dir_offset = dir_offset;
}
ret = bch2_inode_write(trans, inode_iter, new_inode);
err: err:
bch2_trans_iter_put(trans, inode_iter);
bch2_trans_iter_put(trans, dir_iter); bch2_trans_iter_put(trans, dir_iter);
return ret; return ret;
} }
@ -79,9 +91,11 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
u64 inum, struct bch_inode_unpacked *dir_u, u64 inum, struct bch_inode_unpacked *dir_u,
struct bch_inode_unpacked *inode_u, const struct qstr *name) struct bch_inode_unpacked *inode_u, const struct qstr *name)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *dir_iter = NULL, *inode_iter = NULL; struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
struct bch_hash_info dir_hash; struct bch_hash_info dir_hash;
u64 now = bch2_current_time(trans->c); u64 now = bch2_current_time(c);
u64 dir_offset = 0;
int ret; int ret;
inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
@ -92,6 +106,8 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
inode_u->bi_ctime = now; inode_u->bi_ctime = now;
bch2_inode_nlink_inc(inode_u); bch2_inode_nlink_inc(inode_u);
inode_u->bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0); dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
ret = PTR_ERR_OR_ZERO(dir_iter); ret = PTR_ERR_OR_ZERO(dir_iter);
if (ret) if (ret)
@ -99,12 +115,21 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
dir_u->bi_mtime = dir_u->bi_ctime = now; dir_u->bi_mtime = dir_u->bi_ctime = now;
dir_hash = bch2_hash_info_init(trans->c, dir_u); dir_hash = bch2_hash_info_init(c, dir_u);
ret = bch2_dirent_create(trans, dir_inum, &dir_hash, ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
mode_to_type(inode_u->bi_mode), mode_to_type(inode_u->bi_mode),
name, inum, BCH_HASH_SET_MUST_CREATE) ?: name, inum, &dir_offset,
bch2_inode_write(trans, dir_iter, dir_u) ?: BCH_HASH_SET_MUST_CREATE);
if (ret)
goto err;
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
inode_u->bi_dir = dir_inum;
inode_u->bi_dir_offset = dir_offset;
}
ret = bch2_inode_write(trans, dir_iter, dir_u) ?:
bch2_inode_write(trans, inode_iter, inode_u); bch2_inode_write(trans, inode_iter, inode_u);
err: err:
bch2_trans_iter_put(trans, dir_iter); bch2_trans_iter_put(trans, dir_iter);
@ -117,10 +142,11 @@ int bch2_unlink_trans(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *inode_u,
const struct qstr *name) const struct qstr *name)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *dir_iter = NULL, *dirent_iter = NULL, struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
*inode_iter = NULL; *inode_iter = NULL;
struct bch_hash_info dir_hash; struct bch_hash_info dir_hash;
u64 inum, now = bch2_current_time(trans->c); u64 inum, now = bch2_current_time(c);
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
@ -129,7 +155,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
dir_hash = bch2_hash_info_init(trans->c, dir_u); dir_hash = bch2_hash_info_init(c, dir_u);
dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash, dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
name, BTREE_ITER_INTENT); name, BTREE_ITER_INTENT);
@ -195,10 +221,12 @@ int bch2_rename_trans(struct btree_trans *trans,
const struct qstr *dst_name, const struct qstr *dst_name,
enum bch_rename_mode mode) enum bch_rename_mode mode)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL; struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL; struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
struct bch_hash_info src_hash, dst_hash; struct bch_hash_info src_hash, dst_hash;
u64 src_inode, dst_inode, now = bch2_current_time(trans->c); u64 src_inode, src_offset, dst_inode, dst_offset;
u64 now = bch2_current_time(c);
int ret; int ret;
src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir, src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
@ -207,7 +235,7 @@ int bch2_rename_trans(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
src_hash = bch2_hash_info_init(trans->c, src_dir_u); src_hash = bch2_hash_info_init(c, src_dir_u);
if (dst_dir != src_dir) { if (dst_dir != src_dir) {
dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir, dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
@ -216,7 +244,7 @@ int bch2_rename_trans(struct btree_trans *trans,
if (ret) if (ret)
goto err; goto err;
dst_hash = bch2_hash_info_init(trans->c, dst_dir_u); dst_hash = bch2_hash_info_init(c, dst_dir_u);
} else { } else {
dst_dir_u = src_dir_u; dst_dir_u = src_dir_u;
dst_hash = src_hash; dst_hash = src_hash;
@ -225,8 +253,8 @@ int bch2_rename_trans(struct btree_trans *trans,
ret = bch2_dirent_rename(trans, ret = bch2_dirent_rename(trans,
src_dir, &src_hash, src_dir, &src_hash,
dst_dir, &dst_hash, dst_dir, &dst_hash,
src_name, &src_inode, src_name, &src_inode, &src_offset,
dst_name, &dst_inode, dst_name, &dst_inode, &dst_offset,
mode); mode);
if (ret) if (ret)
goto err; goto err;
@ -245,6 +273,16 @@ int bch2_rename_trans(struct btree_trans *trans,
goto err; goto err;
} }
if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
src_inode_u->bi_dir = dst_dir_u->bi_inum;
src_inode_u->bi_dir_offset = dst_offset;
if (mode == BCH_RENAME_EXCHANGE) {
dst_inode_u->bi_dir = src_dir_u->bi_inum;
dst_inode_u->bi_dir_offset = src_offset;
}
}
if (mode == BCH_RENAME_OVERWRITE) { if (mode == BCH_RENAME_OVERWRITE) {
if (S_ISDIR(src_inode_u->bi_mode) != if (S_ISDIR(src_inode_u->bi_mode) !=
S_ISDIR(dst_inode_u->bi_mode)) { S_ISDIR(dst_inode_u->bi_mode)) {

View File

@ -675,6 +675,39 @@ retry:
continue; continue;
} }
if (!target.bi_nlink &&
!(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
(target.bi_dir != k.k->p.inode ||
target.bi_dir_offset != k.k->p.offset) &&
(fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c,
"inode %llu has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
d_inum,
target.bi_dir,
target.bi_dir_offset,
k.k->p.inode,
k.k->p.offset) ||
c->opts.version_upgrade)) {
struct bkey_inode_buf p;
target.bi_dir = k.k->p.inode;
target.bi_dir_offset = k.k->p.offset;
bch2_trans_unlock(&trans);
bch2_inode_pack(c, &p, &target);
ret = bch2_btree_insert(c, BTREE_ID_inodes,
&p.inode.k_i, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW);
if (ret) {
bch_err(c, "error in fsck: error %i updating inode", ret);
goto err;
}
continue;
}
if (fsck_err_on(have_target && if (fsck_err_on(have_target &&
d.v->d_type != d.v->d_type !=
mode_to_type(target.bi_mode), c, mode_to_type(target.bi_mode), c,
@ -1314,6 +1347,16 @@ static int check_inode(struct btree_trans *trans,
do_update = true; do_update = true;
} }
if (!S_ISDIR(u.bi_mode) &&
u.bi_nlink &&
!(u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
(fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c,
"inode missing BCH_INODE_BACKPTR_UNTRUSTED flags") ||
c->opts.version_upgrade)) {
u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
do_update = true;
}
if (do_update) { if (do_update) {
struct bkey_inode_buf p; struct bkey_inode_buf p;

View File

@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans,
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
bch2_inode_pack(trans->c, inode_p, inode); bch2_inode_pack(trans->c, inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0; return 0;
} }
@ -469,11 +470,10 @@ static inline u32 bkey_generation(struct bkey_s_c k)
} }
} }
int bch2_inode_create(struct btree_trans *trans, struct btree_iter *bch2_inode_create(struct btree_trans *trans,
struct bch_inode_unpacked *inode_u) struct bch_inode_unpacked *inode_u)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_inode_buf *inode_p;
struct btree_iter *iter = NULL; struct btree_iter *iter = NULL;
struct bkey_s_c k; struct bkey_s_c k;
u64 min, max, start, *hint; u64 min, max, start, *hint;
@ -493,10 +493,6 @@ int bch2_inode_create(struct btree_trans *trans,
if (start >= max || start < min) if (start >= max || start < min)
start = min; start = min;
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p))
return PTR_ERR(inode_p);
again: again:
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
@ -520,7 +516,7 @@ again:
bch2_trans_iter_put(trans, iter); bch2_trans_iter_put(trans, iter);
if (ret) if (ret)
return ret; return ERR_PTR(ret);
if (start != min) { if (start != min) {
/* Retry from start */ /* Retry from start */
@ -528,15 +524,12 @@ again:
goto again; goto again;
} }
return -ENOSPC; return ERR_PTR(-ENOSPC);
found_slot: found_slot:
*hint = k.k->p.offset; *hint = k.k->p.offset;
inode_u->bi_inum = k.k->p.offset; inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k); inode_u->bi_generation = bkey_generation(k);
return iter;
ret = bch2_inode_write(trans, iter, inode_u);
bch2_trans_iter_put(trans, iter);
return ret;
} }
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached)

View File

@ -69,7 +69,8 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t, uid_t, gid_t, umode_t, dev_t,
struct bch_inode_unpacked *); struct bch_inode_unpacked *);
int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *); struct btree_iter *bch2_inode_create(struct btree_trans *,
struct bch_inode_unpacked *);
int bch2_inode_rm(struct bch_fs *, u64, bool); int bch2_inode_rm(struct bch_fs *, u64, bool);

View File

@ -322,6 +322,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta || new_i_size) { if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u); bch2_inode_pack(trans->c, &inode_p, &inode_u);
inode_p.inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, inode_iter, bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0); &inode_p.inode.k_i, 0);
} }
@ -437,6 +440,8 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys); k = bch2_keylist_front(keys);
k->k.p.snapshot = iter->snapshot;
bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k); bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k); bch2_cut_front(iter->pos, sk.k);

View File

@ -914,14 +914,17 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
if (dynamic_fault("bcachefs:add:journal_alloc")) if (dynamic_fault("bcachefs:add:journal_alloc"))
return -ENOMEM; return -ENOMEM;
/* 1/128th of the device by default: */
nr = ca->mi.nbuckets >> 7;
/* /*
* clamp journal size to 1024 buckets or 512MB (in sectors), whichever * clamp journal size to 8192 buckets or 8GB (in sectors), whichever
* is smaller: * is smaller:
*/ */
nr = clamp_t(unsigned, ca->mi.nbuckets >> 8, nr = clamp_t(unsigned, nr,
BCH_JOURNAL_BUCKETS_MIN, BCH_JOURNAL_BUCKETS_MIN,
min(1 << 10, min(1 << 13,
(1 << 20) / ca->mi.bucket_size)); (1 << 24) / ca->mi.bucket_size));
return __bch2_set_nr_journal_buckets(ca, nr, true, NULL); return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
} }

View File

@ -1452,7 +1452,7 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true; validate_before_checksum = true;
if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change) if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
validate_before_checksum = true; validate_before_checksum = true;
if (validate_before_checksum && if (validate_before_checksum &&

View File

@ -610,8 +610,8 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
j->prereserved.remaining, j->prereserved.remaining,
atomic_read(&c->btree_cache.dirty), atomic_read(&c->btree_cache.dirty),
c->btree_cache.used, c->btree_cache.used,
c->btree_key_cache.nr_dirty, atomic_long_read(&c->btree_key_cache.nr_dirty),
c->btree_key_cache.nr_keys); atomic_long_read(&c->btree_key_cache.nr_keys));
nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr); nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);

View File

@ -48,14 +48,14 @@ static int __journal_key_cmp(enum btree_id l_btree_id,
{ {
return (cmp_int(l_btree_id, r->btree_id) ?: return (cmp_int(l_btree_id, r->btree_id) ?:
cmp_int(l_level, r->level) ?: cmp_int(l_level, r->level) ?:
bkey_cmp(l_pos, r->k->k.p)); bpos_cmp(l_pos, r->k->k.p));
} }
static int journal_key_cmp(struct journal_key *l, struct journal_key *r) static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
{ {
return (cmp_int(l->btree_id, r->btree_id) ?: return (cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->level, r->level) ?: cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p)); bpos_cmp(l->k->k.p, r->k->k.p));
} }
static size_t journal_key_search(struct journal_keys *journal_keys, static size_t journal_key_search(struct journal_keys *journal_keys,
@ -90,7 +90,7 @@ static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsign
if (iter->idx > idx || if (iter->idx > idx ||
(iter->idx == idx && (iter->idx == idx &&
biter->last && biter->last &&
bkey_cmp(n->k.p, biter->unpacked.p) <= 0)) bpos_cmp(n->k.p, biter->unpacked.p) <= 0))
iter->idx++; iter->idx++;
} }
@ -238,7 +238,7 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal)); bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
if (btree_k.k && journal_k.k) { if (btree_k.k && journal_k.k) {
int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p); int cmp = bpos_cmp(btree_k.k->p, journal_k.k->p);
if (!cmp) if (!cmp)
bch2_journal_iter_advance_btree(iter); bch2_journal_iter_advance_btree(iter);
@ -256,7 +256,7 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
ret = iter->last == journal ? journal_k : btree_k; ret = iter->last == journal ? journal_k : btree_k;
if (iter->b && if (iter->b &&
bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) { bpos_cmp(ret.k->p, iter->b->data->max_key) > 0) {
iter->journal.idx = iter->journal.keys->nr; iter->journal.idx = iter->journal.keys->nr;
iter->last = none; iter->last = none;
return bkey_s_c_null; return bkey_s_c_null;
@ -419,7 +419,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
return cmp_int(l->btree_id, r->btree_id) ?: return cmp_int(l->btree_id, r->btree_id) ?:
cmp_int(l->level, r->level) ?: cmp_int(l->level, r->level) ?:
bkey_cmp(l->k->k.p, r->k->k.p) ?: bpos_cmp(l->k->k.p, r->k->k.p) ?:
cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->journal_offset, r->journal_offset); cmp_int(l->journal_offset, r->journal_offset);
} }
@ -490,7 +490,7 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
while (src + 1 < keys.d + keys.nr && while (src + 1 < keys.d + keys.nr &&
src[0].btree_id == src[1].btree_id && src[0].btree_id == src[1].btree_id &&
src[0].level == src[1].level && src[0].level == src[1].level &&
!bkey_cmp(src[0].k->k.p, src[1].k->k.p)) !bpos_cmp(src[0].k->k.p, src[1].k->k.p))
src++; src++;
*dst++ = *src++; *dst++ = *src++;
@ -581,7 +581,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(r->level, l->level) ?: return cmp_int(r->level, l->level) ?:
cmp_int(l->journal_seq, r->journal_seq) ?: cmp_int(l->journal_seq, r->journal_seq) ?:
cmp_int(l->btree_id, r->btree_id) ?: cmp_int(l->btree_id, r->btree_id) ?:
bkey_cmp(l->k->k.p, r->k->k.p); bpos_cmp(l->k->k.p, r->k->k.p);
} }
static int bch2_journal_replay(struct bch_fs *c, static int bch2_journal_replay(struct bch_fs *c,
@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err; goto err;
} }
if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_BFORMAT_OVERFLOW_DONE))) {
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
ret = -EINVAL;
goto err;
}
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
bch_info(c, "alloc_v2 feature bit not set, fsck required"); bch_info(c, "alloc_v2 feature bit not set, fsck required");
c->opts.fsck = true; c->opts.fsck = true;
@ -1338,6 +1345,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(c, &packed_inode, &root_inode); bch2_inode_pack(c, &packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory"; err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_inodes, ret = bch2_btree_insert(c, BTREE_ID_inodes,

View File

@ -67,6 +67,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
goto err; goto err;
} }
err: err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -106,6 +107,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
goto err; goto err;
} }
err: err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -113,7 +115,7 @@ err:
static int test_iterate(struct bch_fs *c, u64 nr) static int test_iterate(struct bch_fs *c, u64 nr)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter = NULL;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret = 0; int ret = 0;
@ -159,6 +161,7 @@ static int test_iterate(struct bch_fs *c, u64 nr)
BUG_ON(i); BUG_ON(i);
err: err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -166,7 +169,7 @@ err:
static int test_iterate_extents(struct bch_fs *c, u64 nr) static int test_iterate_extents(struct bch_fs *c, u64 nr)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter = NULL;
struct bkey_s_c k; struct bkey_s_c k;
u64 i; u64 i;
int ret = 0; int ret = 0;
@ -213,6 +216,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
BUG_ON(i); BUG_ON(i);
err: err:
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -257,7 +261,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
BUG_ON(k.k->p.offset != i); BUG_ON(k.k->p.offset != i);
i += 2; i += 2;
} }
bch2_trans_iter_free(&trans, iter); bch2_trans_iter_put(&trans, iter);
BUG_ON(i != nr * 2); BUG_ON(i != nr * 2);
@ -274,6 +278,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
if (i == nr * 2) if (i == nr * 2)
break; break;
} }
bch2_trans_iter_put(&trans, iter);
err: err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
@ -318,7 +323,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
BUG_ON(k.k->size != 8); BUG_ON(k.k->size != 8);
i += 16; i += 16;
} }
bch2_trans_iter_free(&trans, iter); bch2_trans_iter_put(&trans, iter);
BUG_ON(i != nr); BUG_ON(i != nr);
@ -337,6 +342,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
if (i == nr) if (i == nr)
break; break;
} }
bch2_trans_iter_put(&trans, iter);
err: err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return 0; return 0;
@ -362,6 +368,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return 0; return 0;
} }
@ -382,6 +390,8 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
BUG_ON(k.k); BUG_ON(k.k);
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return 0; return 0;
} }
@ -473,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
bkey_cookie_init(&k.k_i); bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand(); k.k.p.offset = test_rand();
k.k.p.snapshot = U32_MAX;
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
@ -508,7 +519,7 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
} }
} }
bch2_trans_iter_free(&trans, iter); bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -549,7 +560,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr)
} }
} }
bch2_trans_iter_free(&trans, iter); bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -630,6 +641,8 @@ static int seq_insert(struct bch_fs *c, u64 nr)
if (++i == nr) if (++i == nr)
break; break;
} }
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -645,6 +658,8 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret)
; ;
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }
@ -671,6 +686,8 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
break; break;
} }
} }
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
} }

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/rt.h> #include <linux/sched/rt.h>
#include <linux/six.h> #include <linux/six.h>
#include <linux/slab.h>
#ifdef DEBUG #ifdef DEBUG
#define EBUG_ON(cond) BUG_ON(cond) #define EBUG_ON(cond) BUG_ON(cond)
@ -309,6 +310,9 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
wake_up_process(p); wake_up_process(p);
} }
if (ret)
six_acquire(&lock->dep_map, 1);
return ret; return ret;
} }
@ -560,6 +564,7 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
lock->readers) { lock->readers) {
smp_mb(); /* unlock barrier */ smp_mb(); /* unlock barrier */
this_cpu_dec(*lock->readers); this_cpu_dec(*lock->readers);
smp_mb(); /* between unlocking and checking for waiters */
state.v = READ_ONCE(lock->state.v); state.v = READ_ONCE(lock->state.v);
} else { } else {
EBUG_ON(!(lock->state.v & l[type].held_mask)); EBUG_ON(!(lock->state.v & l[type].held_mask));
@ -705,6 +710,34 @@ void six_lock_wakeup_all(struct six_lock *lock)
} }
EXPORT_SYMBOL_GPL(six_lock_wakeup_all); EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
struct free_pcpu_rcu {
struct rcu_head rcu;
void __percpu *p;
};
static void free_pcpu_rcu_fn(struct rcu_head *_rcu)
{
struct free_pcpu_rcu *rcu =
container_of(_rcu, struct free_pcpu_rcu, rcu);
free_percpu(rcu->p);
kfree(rcu);
}
void six_lock_pcpu_free_rcu(struct six_lock *lock)
{
struct free_pcpu_rcu *rcu = kzalloc(sizeof(*rcu), GFP_KERNEL);
if (!rcu)
return;
rcu->p = lock->readers;
lock->readers = NULL;
call_rcu(&rcu->rcu, free_pcpu_rcu_fn);
}
EXPORT_SYMBOL_GPL(six_lock_pcpu_free_rcu);
void six_lock_pcpu_free(struct six_lock *lock) void six_lock_pcpu_free(struct six_lock *lock)
{ {
BUG_ON(lock->readers && pcpu_read_count(lock)); BUG_ON(lock->readers && pcpu_read_count(lock));
@ -717,8 +750,6 @@ EXPORT_SYMBOL_GPL(six_lock_pcpu_free);
void six_lock_pcpu_alloc(struct six_lock *lock) void six_lock_pcpu_alloc(struct six_lock *lock)
{ {
BUG_ON(lock->readers && pcpu_read_count(lock));
BUG_ON(lock->state.read_lock);
#ifdef __KERNEL__ #ifdef __KERNEL__
if (!lock->readers) if (!lock->readers)
lock->readers = alloc_percpu(unsigned); lock->readers = alloc_percpu(unsigned);