Update bcachefs sources to 5a0455ae19af bcachefs: bcachefs_metadata_version_snapshot_deletion_v2
Some checks are pending
build / bcachefs-tools-deb (ubuntu-22.04) (push) Waiting to run
build / bcachefs-tools-deb (ubuntu-24.04) (push) Waiting to run
build / bcachefs-tools-rpm (push) Waiting to run
build / bcachefs-tools-msrv (push) Waiting to run
Nix Flake actions / nix-matrix (push) Waiting to run
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Blocked by required conditions

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-05-02 13:39:43 -04:00
parent a4babd1d64
commit 8376810564
33 changed files with 855 additions and 529 deletions

View File

@ -1 +1 @@
9b4ab159abcd84cf0c25ee851dda8c40baffecc8 5a0455ae19afb354634b3c5c9bf55d2171005a2f

View File

@ -4,14 +4,12 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/limits.h> #include <linux/limits.h>
#include <linux/const.h>
/* /*
* In the fallback code below, we need to compute the minimum and * We need to compute the minimum and maximum values representable in a given
* maximum values representable in a given type. These macros may also * type. These macros may also be useful elsewhere. It would seem more obvious
* be useful elsewhere, so we provide them outside the * to do something like:
* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
*
* It would seem more obvious to do something like
* *
* #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
* #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@ -33,8 +31,10 @@
* credit to Christian Biere. * credit to Christian Biere.
*/ */
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_min(T) ((T)((T)-type_max(T)-(T)1)) #define type_max(t) __type_max(typeof(t))
#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
#define type_min(t) __type_min(typeof(t))
/* /*
* Avoids triggering -Wtype-limits compilation warning, * Avoids triggering -Wtype-limits compilation warning,
@ -53,194 +53,153 @@ static inline bool __must_check __must_check_overflow(bool overflow)
return unlikely(overflow); return unlikely(overflow);
} }
#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /**
/* * check_add_overflow() - Calculate addition with overflow checking
* For simplicity and code hygiene, the fallback code below insists on * @a: first addend
* a, b and *d having the same type (similar to the min() and max() * @b: second addend
* macros), whereas gcc's type-generic overflow checkers accept * @d: pointer to store sum
* different types. Hence we don't just make check_add_overflow an
* alias for __builtin_add_overflow, but add type checks similar to
* below.
*/
#define check_add_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_add_overflow(__a, __b, __d); \
}))
#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_sub_overflow(__a, __b, __d); \
}))
#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_mul_overflow(__a, __b, __d); \
}))
#else
/* Checking for unsigned overflow is relatively easy without causing UB. */
#define __unsigned_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a + __b; \
*__d < __a; \
})
#define __unsigned_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a - __b; \
__a < __b; \
})
/*
* If one of a or b is a compile-time constant, this avoids a division.
*/
#define __unsigned_mul_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a * __b; \
__builtin_constant_p(__b) ? \
__b > 0 && __a > type_max(typeof(__a)) / __b : \
__a > 0 && __b > type_max(typeof(__b)) / __a; \
})
/*
* For signed types, detecting overflow is much harder, especially if
* we want to avoid UB. But the interface of these macros is such that
* we must provide a result in *d, and in fact we must produce the
* result promised by gcc's builtins, which is simply the possibly
* wrapped-around value. Fortunately, we can just formally do the
* operations in the widest relevant unsigned type (u64) and then
* truncate the result - gcc is smart enough to generate the same code
* with and without the (u64) casts.
*/
/*
* Adding two signed integers can overflow only if they have the same
* sign, and overflow has happened iff the result has the opposite
* sign.
*/
#define __signed_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a + (u64)__b; \
(((~(__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Subtraction is similar, except that overflow can now happen only
* when the signs are opposite. In this case, overflow has happened if
* the result has the opposite sign of a.
*/
#define __signed_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a - (u64)__b; \
((((__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Signed multiplication is rather hard. gcc always follows C99, so
* division is truncated towards 0. This means that we can write the
* overflow check like this:
* *
* (a > 0 && (b > MAX/a || b < MIN/a)) || * Returns true on wrap-around, false otherwise.
* (a < -1 && (b > MIN/a || b < MAX/a) ||
* (a == -1 && b == MIN)
* *
* The redundant casts of -1 are to silence an annoying -Wtype-limits * *@d holds the results of the attempted addition, regardless of whether
* (included in -Wextra) warning: When the type is u8 or u16, the * wrap-around occurred.
* __b_c_e in check_mul_overflow obviously selects
* __unsigned_mul_overflow, but unfortunately gcc still parses this
* code and warns about the limited range of __b.
*/ */
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
#define __signed_mul_overflow(a, b, d) ({ \ /**
typeof(a) __a = (a); \ * wrapping_add() - Intentionally perform a wrapping addition
typeof(b) __b = (b); \ * @type: type for result of calculation
typeof(d) __d = (d); \ * @a: first addend
typeof(a) __tmax = type_max(typeof(a)); \ * @b: second addend
typeof(a) __tmin = type_min(typeof(a)); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a * (u64)__b; \
(__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
(__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
(__b == (typeof(__b))-1 && __a == __tmin); \
})
#define check_add_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_add_overflow(a, b, d), \
__unsigned_add_overflow(a, b, d)))
#define check_sub_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_sub_overflow(a, b, d), \
__unsigned_sub_overflow(a, b, d)))
#define check_mul_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_mul_overflow(a, b, d), \
__unsigned_mul_overflow(a, b, d)))
#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
/** check_shl_overflow() - Calculate a left-shifted value and check overflow
* *
* Return the potentially wrapped-around addition without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_add(type, a, b) \
({ \
type __val; \
__builtin_add_overflow(a, b, &__val); \
__val; \
})
/**
* wrapping_assign_add() - Intentionally perform a wrapping increment assignment
* @var: variable to be incremented
* @offset: amount to add
*
* Increments @var by @offset with wrap-around. Returns the resulting
* value of @var. Will not trip any wrap-around sanitizers.
*
* Returns the new value of @var.
*/
#define wrapping_assign_add(var, offset) \
({ \
typeof(var) *__ptr = &(var); \
*__ptr = wrapping_add(typeof(var), *__ptr, offset); \
})
/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
* Returns true on wrap-around, false otherwise.
*
* *@d holds the results of the attempted subtraction, regardless of whether
* wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
* wrapping_sub() - Intentionally perform a wrapping subtraction
* @type: type for result of calculation
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
*
* Return the potentially wrapped-around subtraction without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_sub(type, a, b) \
({ \
type __val; \
__builtin_sub_overflow(a, b, &__val); \
__val; \
})
/**
* wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
* @var: variable to be decremented
* @offset: amount to subtract
*
* Decrements @var by @offset with wrap-around. Returns the resulting
* value of @var. Will not trip any wrap-around sanitizers.
*
* Returns the new value of @var.
*/
#define wrapping_assign_sub(var, offset) \
({ \
typeof(var) *__ptr = &(var); \
*__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
})
/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
* Returns true on wrap-around, false otherwise.
*
* *@d holds the results of the attempted multiplication, regardless of whether
* wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
* wrapping_mul() - Intentionally perform a wrapping multiplication
* @type: type for result of calculation
* @a: first factor
* @b: second factor
*
* Return the potentially wrapped-around multiplication without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_mul(type, a, b) \
({ \
type __val; \
__builtin_mul_overflow(a, b, &__val); \
__val; \
})
/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted * @a: Value to be shifted
* @s: How many bits left to shift * @s: How many bits left to shift
* @d: Pointer to where to store the result * @d: Pointer to where to store the result
* *
* Computes *@d = (@a << @s) * Computes *@d = (@a << @s)
* *
* Returns true if '*d' cannot hold the result or when 'a << s' doesn't * Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't
* make sense. Example conditions: * make sense. Example conditions:
* - 'a << s' causes bits to be lost when stored in *d.
* - 's' is garbage (e.g. negative) or so large that the result of
* 'a << s' is guaranteed to be 0.
* - 'a' is negative.
* - 'a << s' sets the sign bit, if any, in '*d'.
* *
* '*d' will hold the results of the attempted shift, but is not * - '@a << @s' causes bits to be lost when stored in *@d.
* considered "safe for use" if false is returned. * - '@s' is garbage (e.g. negative) or so large that the result of
* '@a << @s' is guaranteed to be 0.
* - '@a' is negative.
* - '@a << @s' sets the sign bit, if any, in '*@d'.
*
* '*@d' will hold the results of the attempted shift, but is not
* considered "safe for use" if true is returned.
*/ */
#define check_shl_overflow(a, s, d) __must_check_overflow(({ \ #define check_shl_overflow(a, s, d) __must_check_overflow(({ \
typeof(a) _a = a; \ typeof(a) _a = a; \
typeof(s) _s = s; \ typeof(s) _s = s; \
typeof(d) _d = d; \ typeof(d) _d = d; \
u64 _a_full = _a; \ unsigned long long _a_full = _a; \
unsigned int _to_shift = \ unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \ *_d = (_a_full << _to_shift); \
@ -248,9 +207,115 @@ static inline bool __must_check __must_check_overflow(bool overflow)
(*_d >> _to_shift) != _a); \ (*_d >> _to_shift) != _a); \
})) }))
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
(x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
(x) < 0 || (x) > type_max(T) : \
(x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
check_add_overflow((x), v, &v); \
})
/**
* overflows_type - helper for checking the overflows between value, variables,
* or data type
*
* @n: source constant value or variable to be checked
* @T: destination variable or data type proposed to store @x
*
* Compares the @x expression for whether or not it can safely fit in
* the storage of the type in @T. @x and @T can have different types.
* If @x is a constant expression, this will also resolve to a constant
* expression.
*
* Returns: true if overflow can occur, false otherwise.
*/
#define overflows_type(n, T) \
__builtin_choose_expr(__is_constexpr(n), \
__overflows_type_constexpr(n, T), \
__overflows_type(n, T))
/**
* castable_to_type - like __same_type(), but also allows for casted literals
*
* @n: variable or constant value
* @T: variable or data type
*
* Unlike the __same_type() macro, this allows a constant value as the
* first argument. If this value would not overflow into an assignment
* of the second argument's type, it returns true. Otherwise, this falls
* back to __same_type().
*/
#define castable_to_type(n, T) \
__builtin_choose_expr(__is_constexpr(n), \
!__overflows_type_constexpr(n, T), \
__same_type(n, T))
/**
* size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
* @factor1: first factor
* @factor2: second factor
*
* Returns: calculate @factor1 * @factor2, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
{
size_t bytes;
if (check_mul_overflow(factor1, factor2, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* size_add() - Calculate size_t addition with saturation at SIZE_MAX
* @addend1: first addend
* @addend2: second addend
*
* Returns: calculate @addend1 + @addend2, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_add(size_t addend1, size_t addend2)
{
size_t bytes;
if (check_add_overflow(addend1, addend2, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
* @minuend: value to subtract from
* @subtrahend: value to subtract from @minuend
*
* Returns: calculate @minuend - @subtrahend, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. For
* composition with the size_add() and size_mul() helpers, neither
* argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
* The lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
{
size_t bytes;
if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
check_sub_overflow(minuend, subtrahend, &bytes))
return SIZE_MAX;
return bytes;
}
/** /**
* array_size() - Calculate size of 2-dimensional array. * array_size() - Calculate size of 2-dimensional array.
*
* @a: dimension one * @a: dimension one
* @b: dimension two * @b: dimension two
* *
@ -259,19 +324,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* Returns: number of bytes needed to represent the array or SIZE_MAX on * Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow. * overflow.
*/ */
static inline __must_check size_t array_size(size_t a, size_t b) #define array_size(a, b) size_mul(a, b)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
return bytes;
}
/** /**
* array3_size() - Calculate size of 3-dimensional array. * array3_size() - Calculate size of 3-dimensional array.
*
* @a: dimension one * @a: dimension one
* @b: dimension two * @b: dimension two
* @c: dimension three * @c: dimension three
@ -281,54 +337,11 @@ static inline __must_check size_t array_size(size_t a, size_t b)
* Returns: number of bytes needed to represent the array or SIZE_MAX on * Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow. * overflow.
*/ */
static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) #define array3_size(a, b, c) size_mul(size_mul(a, b), c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_mul_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/*
* Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
* struct_size() below.
*/
static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_add_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* struct_size() - Calculate size of structure with trailing array.
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure @p followed by an
* array of @count number of @member elements.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size(p, member, count) \
__ab_c_size(count, \
sizeof(*(p)->member) + __must_be_array((p)->member),\
sizeof(*(p)))
/** /**
* flex_array_size() - Calculate size of a flexible array member * flex_array_size() - Calculate size of a flexible array member
* within an enclosing structure. * within an enclosing structure.
*
* @p: Pointer to the structure. * @p: Pointer to the structure.
* @member: Name of the flexible array member. * @member: Name of the flexible array member.
* @count: Number of elements in the array. * @count: Number of elements in the array.
@ -339,7 +352,92 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
* Return: number of bytes needed or SIZE_MAX on overflow. * Return: number of bytes needed or SIZE_MAX on overflow.
*/ */
#define flex_array_size(p, member, count) \ #define flex_array_size(p, member, count) \
array_size(count, \ __builtin_choose_expr(__is_constexpr(count), \
sizeof(*(p)->member) + __must_be_array((p)->member)) (count) * sizeof(*(p)->member) + __must_be_array((p)->member), \
size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member)))
/**
* struct_size() - Calculate size of structure with trailing flexible array.
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure of @p followed by an
* array of @count number of @member elements.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size(p, member, count) \
__builtin_choose_expr(__is_constexpr(count), \
sizeof(*(p)) + flex_array_size(p, member, count), \
size_add(sizeof(*(p)), flex_array_size(p, member, count)))
/**
* struct_size_t() - Calculate size of structure with trailing flexible array
* @type: structure type name.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure @type followed by an
* array of @count number of @member elements. Prefer using struct_size()
* when possible instead, to keep calculations associated with a specific
* instance variable of type @type.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size_t(type, member, count) \
struct_size((type *)NULL, member, count)
/**
* _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
* Enables caller macro to pass (different) initializer.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
* @initializer: initializer expression (could be empty for no init).
*/
#define _DEFINE_FLEX(type, name, member, count, initializer...) \
_Static_assert(__builtin_constant_p(count), \
"onstack flex array members require compile-time const count"); \
union { \
u8 bytes[struct_size_t(type, member, count)]; \
type obj; \
} name##_u initializer; \
type *name = (type *)&name##_u
/**
* DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
* flexible array member, when it does not have a __counted_by annotation.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
*
* Define a zeroed, on-stack, instance of @type structure with a trailing
* flexible array member.
* Use __struct_size(@name) to get compile-time size of it afterwards.
*/
#define DEFINE_RAW_FLEX(type, name, member, count) \
_DEFINE_FLEX(type, name, member, count, = {})
/**
* DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
* flexible array member.
*
* @TYPE: structure type name, including "struct" keyword.
* @NAME: Name for a variable to define.
* @MEMBER: Name of the array member.
* @COUNTER: Name of the __counted_by member.
* @COUNT: Number of elements in the array; must be compile-time const.
*
* Define a zeroed, on-stack, instance of @TYPE structure with a trailing
* flexible array member.
* Use __struct_size(@NAME) to get compile-time size of it afterwards.
*/
#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \
_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
#endif /* __LINUX_OVERFLOW_H */ #endif /* __LINUX_OVERFLOW_H */

View File

@ -216,6 +216,7 @@
#include "recovery_passes_types.h" #include "recovery_passes_types.h"
#include "sb-errors_types.h" #include "sb-errors_types.h"
#include "seqmutex.h" #include "seqmutex.h"
#include "snapshot_types.h"
#include "time_stats.h" #include "time_stats.h"
#include "util.h" #include "util.h"
@ -709,7 +710,7 @@ struct btree_transaction_stats {
unsigned nr_max_paths; unsigned nr_max_paths;
unsigned journal_entries_size; unsigned journal_entries_size;
unsigned max_mem; unsigned max_mem;
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace; darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif #endif
char *max_paths_text; char *max_paths_text;
@ -869,7 +870,7 @@ struct bch_fs {
struct mutex snapshot_table_lock; struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock; struct rw_semaphore snapshot_create_lock;
struct work_struct snapshot_delete_work; struct snapshot_delete snapshot_delete;
struct work_struct snapshot_wait_for_pagecache_and_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked; snapshot_id_list snapshots_unlinked;
struct mutex snapshots_unlinked_lock; struct mutex snapshots_unlinked_lock;

View File

@ -695,7 +695,8 @@ struct bch_sb_field_ext {
x(stripe_backpointers, BCH_VERSION(1, 22)) \ x(stripe_backpointers, BCH_VERSION(1, 22)) \
x(stripe_lru, BCH_VERSION(1, 23)) \ x(stripe_lru, BCH_VERSION(1, 23)) \
x(casefolding, BCH_VERSION(1, 24)) \ x(casefolding, BCH_VERSION(1, 24)) \
x(extent_flags, BCH_VERSION(1, 25)) x(extent_flags, BCH_VERSION(1, 25)) \
x(snapshot_deletion_v2, BCH_VERSION(1, 26))
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,

View File

@ -3089,7 +3089,7 @@ void bch2_trans_copy_iter(struct btree_trans *trans,
dst->key_cache_path = 0; dst->key_cache_path = 0;
} }
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
void bch2_trans_kmalloc_trace_to_text(struct printbuf *out, void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
darray_trans_kmalloc_trace *trace) darray_trans_kmalloc_trace *trace)
{ {
@ -3112,7 +3112,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
void *p; void *p;
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf); bch2_print_str(c, KERN_ERR, buf.buf);
@ -3127,7 +3127,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
struct btree_transaction_stats *s = btree_trans_stats(trans); struct btree_transaction_stats *s = btree_trans_stats(trans);
if (new_bytes > s->max_mem) { if (new_bytes > s->max_mem) {
mutex_lock(&s->lock); mutex_lock(&s->lock);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
trans->trans_kmalloc_trace.nr); trans->trans_kmalloc_trace.nr);
@ -3314,7 +3314,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
} }
#endif #endif
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
trans->trans_kmalloc_trace.nr = 0; trans->trans_kmalloc_trace.nr = 0;
#endif #endif
@ -3486,6 +3486,8 @@ void bch2_trans_put(struct btree_trans *trans)
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
darray_exit(&trans->last_restarted_trace); darray_exit(&trans->last_restarted_trace);
#endif
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_exit(&trans->trans_kmalloc_trace); darray_exit(&trans->trans_kmalloc_trace);
#endif #endif
@ -3642,7 +3644,7 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
for (s = c->btree_transaction_stats; for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
s++) { s++) {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_exit(&s->trans_kmalloc_trace); darray_exit(&s->trans_kmalloc_trace);
#endif #endif
kfree(s->max_paths_text); kfree(s->max_paths_text);

View File

@ -543,7 +543,7 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
void bch2_trans_kmalloc_trace_to_text(struct printbuf *, void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
darray_trans_kmalloc_trace *); darray_trans_kmalloc_trace *);
#endif #endif
@ -553,7 +553,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size, static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
unsigned long ip) unsigned long ip)
{ {
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_push(&trans->trans_kmalloc_trace, darray_push(&trans->trans_kmalloc_trace,
((struct trans_kmalloc_trace) { .ip = ip, .bytes = size })); ((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
#endif #endif

View File

@ -495,7 +495,7 @@ struct btree_trans {
void *mem; void *mem;
unsigned mem_top; unsigned mem_top;
unsigned mem_bytes; unsigned mem_bytes;
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace; darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif #endif

View File

@ -760,7 +760,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
mutex_lock(&s->lock); mutex_lock(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
printbuf_indent_add(&i->buf, 2); printbuf_indent_add(&i->buf, 2);
bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
printbuf_indent_sub(&i->buf, 2); printbuf_indent_sub(&i->buf, 2);

View File

@ -692,7 +692,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv
vfs_d_type(d.v->d_type)); vfs_d_type(d.v->d_type));
if (ret) if (ret)
ctx->pos = d.k->p.offset + 1; ctx->pos = d.k->p.offset + 1;
return ret; return !ret;
} }
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
@ -717,7 +717,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
if (ret2 > 0) if (ret2 > 0)
continue; continue;
ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target));
}))); })));
bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&sk, c);

View File

@ -287,7 +287,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p) static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p)
{ {
struct bch_replicas_padded r; union bch_replicas_padded r;
return accounting_to_replicas(&r.e, p) return accounting_to_replicas(&r.e, p)
? bch2_mark_replicas(c, &r.e) ? bch2_mark_replicas(c, &r.e)
: 0; : 0;
@ -361,7 +361,7 @@ err:
int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
enum bch_accounting_mode mode) enum bch_accounting_mode mode)
{ {
struct bch_replicas_padded r; union bch_replicas_padded r;
if (mode != BCH_ACCOUNTING_read && if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) && accounting_to_replicas(&r.e, a.k->p) &&
@ -425,10 +425,12 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
darray_for_each(acc->k, i) { darray_for_each(acc->k, i) {
struct { union {
u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
BCH_BKEY_PTRS_MAX)];
struct bch_replicas_usage r; struct bch_replicas_usage r;
u8 pad[BCH_BKEY_PTRS_MAX];
} u; } u;
u.r.r.nr_devs = BCH_BKEY_PTRS_MAX;
if (!accounting_to_replicas(&u.r.r, i->pos)) if (!accounting_to_replicas(&u.r.r, i->pos))
continue; continue;
@ -627,7 +629,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
switch (acc->type) { switch (acc->type) {
case BCH_DISK_ACCOUNTING_replicas: { case BCH_DISK_ACCOUNTING_replicas: {
struct bch_replicas_padded r; union bch_replicas_padded r;
__accounting_to_replicas(&r.e, acc); __accounting_to_replicas(&r.e, acc);
for (unsigned i = 0; i < r.e.nr_devs; i++) for (unsigned i = 0; i < r.e.nr_devs; i++)

View File

@ -86,35 +86,6 @@ err:
return ret; return ret;
} }
void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
{
out->atomic++;
rcu_read_lock();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
if (!g)
goto out;
for (unsigned i = 0; i < g->nr; i++) {
if (i)
prt_printf(out, " ");
if (g->entries[i].deleted) {
prt_printf(out, "[deleted]");
continue;
}
prt_printf(out, "[parent %d devs", g->entries[i].parent);
for_each_member_device_rcu(c, ca, &g->entries[i].devs)
prt_printf(out, " %s", ca->name);
prt_printf(out, "]");
}
out:
rcu_read_unlock();
out->atomic--;
}
static void bch2_sb_disk_groups_to_text(struct printbuf *out, static void bch2_sb_disk_groups_to_text(struct printbuf *out,
struct bch_sb *sb, struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f)
@ -241,17 +212,14 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
case TARGET_DEV: case TARGET_DEV:
return dev == t.dev; return dev == t.dev;
case TARGET_GROUP: { case TARGET_GROUP: {
struct bch_disk_groups_cpu *g;
const struct bch_devs_mask *m;
bool ret;
rcu_read_lock(); rcu_read_lock();
g = rcu_dereference(c->disk_groups); struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
m = g && t.group < g->nr && !g->entries[t.group].deleted const struct bch_devs_mask *m =
g && t.group < g->nr && !g->entries[t.group].deleted
? &g->entries[t.group].devs ? &g->entries[t.group].devs
: NULL; : NULL;
ret = m ? test_bit(dev, m->d) : false; bool ret = m ? test_bit(dev, m->d) : false;
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
@ -377,54 +345,81 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
return v; return v;
} }
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) static void __bch2_disk_path_to_text(struct printbuf *out, struct bch_disk_groups_cpu *g,
unsigned v)
{ {
struct bch_disk_groups_cpu *groups;
struct bch_disk_group_cpu *g;
unsigned nr = 0;
u16 path[32]; u16 path[32];
unsigned nr = 0;
out->atomic++;
rcu_read_lock();
groups = rcu_dereference(c->disk_groups);
if (!groups)
goto invalid;
while (1) { while (1) {
if (nr == ARRAY_SIZE(path)) if (nr == ARRAY_SIZE(path))
goto invalid; goto invalid;
if (v >= groups->nr) if (v >= (g ? g->nr : 0))
goto invalid; goto invalid;
g = groups->entries + v; struct bch_disk_group_cpu *e = g->entries + v;
if (g->deleted) if (e->deleted)
goto invalid; goto invalid;
path[nr++] = v; path[nr++] = v;
if (!g->parent) if (!e->parent)
break; break;
v = g->parent - 1; v = e->parent - 1;
} }
while (nr) { while (nr) {
v = path[--nr]; struct bch_disk_group_cpu *e = g->entries + path[--nr];
g = groups->entries + v;
prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); prt_printf(out, "%.*s", (int) sizeof(e->label), e->label);
if (nr) if (nr)
prt_printf(out, "."); prt_printf(out, ".");
} }
out:
rcu_read_unlock();
out->atomic--;
return; return;
invalid: invalid:
prt_printf(out, "invalid label %u", v); prt_printf(out, "invalid label %u", v);
goto out; }
void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_printbuf_make_room(out, 4096);
out->atomic++;
rcu_read_lock();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
for (unsigned i = 0; i < (g ? g->nr : 0); i++) {
prt_printf(out, "%2u: ", i);
if (g->entries[i].deleted) {
prt_printf(out, "[deleted]");
goto next;
}
__bch2_disk_path_to_text(out, g, i);
prt_printf(out, " devs");
for_each_member_device_rcu(c, ca, &g->entries[i].devs)
prt_printf(out, " %s", ca->name);
next:
prt_newline(out);
}
rcu_read_unlock();
out->atomic--;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
out->atomic++;
rcu_read_lock();
__bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v),
rcu_read_unlock();
--out->atomic;
} }
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
@ -470,23 +465,22 @@ inval:
int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
{ {
struct bch_member *mi; lockdep_assert_held(&c->sb_lock);
int ret, v = -1;
if (!strlen(name) || !strcmp(name, "none"))
return 0;
v = bch2_disk_path_find_or_create(&c->disk_sb, name); if (!strlen(name) || !strcmp(name, "none")) {
if (v < 0) struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
return v; SET_BCH_MEMBER_GROUP(mi, 0);
} else {
int v = bch2_disk_path_find_or_create(&c->disk_sb, name);
if (v < 0)
return v;
ret = bch2_sb_disk_groups_to_cpu(c); struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
if (ret) SET_BCH_MEMBER_GROUP(mi, v + 1);
return ret; }
mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); return bch2_sb_disk_groups_to_cpu(c);
SET_BCH_MEMBER_GROUP(mi, v + 1);
return 0;
} }
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)

View File

@ -2223,10 +2223,10 @@ void bch2_fs_ec_stop(struct bch_fs *c)
static bool bch2_fs_ec_flush_done(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{ {
bool ret; sched_annotate_sleep();
mutex_lock(&c->ec_stripe_new_lock); mutex_lock(&c->ec_stripe_new_lock);
ret = list_empty(&c->ec_stripe_new_list); bool ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock); mutex_unlock(&c->ec_stripe_new_lock);
return ret; return ret;

View File

@ -4,9 +4,10 @@
#include "bcachefs_format.h" #include "bcachefs_format.h"
struct bch_replicas_padded { union bch_replicas_padded {
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
devs, BCH_BKEY_PTRS_MAX)];
struct bch_replicas_entry_v1 e; struct bch_replicas_entry_v1 e;
u8 pad[BCH_BKEY_PTRS_MAX];
}; };
struct stripe { struct stripe {
@ -28,7 +29,7 @@ struct gc_stripe {
u16 block_sectors[BCH_BKEY_PTRS_MAX]; u16 block_sectors[BCH_BKEY_PTRS_MAX];
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX]; struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
struct bch_replicas_padded r; union bch_replicas_padded r;
}; };
#endif /* _BCACHEFS_EC_TYPES_H */ #endif /* _BCACHEFS_EC_TYPES_H */

View File

@ -147,10 +147,24 @@ int __must_check bch2_write_inode_size(struct bch_fs *c,
void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
struct quota_res *quota_res, s64 sectors) struct quota_res *quota_res, s64 sectors)
{ {
bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, if (unlikely((s64) inode->v.i_blocks + sectors < 0)) {
"inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", struct printbuf buf = PRINTBUF;
inode->v.i_ino, (u64) inode->v.i_blocks, sectors, bch2_log_msg_start(c, &buf);
inode->ei_inode.bi_sectors); prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (sectors < 0)
sectors = -inode->v.i_blocks;
else
sectors = 0;
}
inode->v.i_blocks += sectors; inode->v.i_blocks += sectors;
#ifdef CONFIG_BCACHEFS_QUOTA #ifdef CONFIG_BCACHEFS_QUOTA
@ -244,7 +258,6 @@ out:
if (!ret) if (!ret)
ret = err; ret = err;
bch_err_fn(c, ret);
return ret; return ret;
} }
@ -506,11 +519,20 @@ int bchfs_truncate(struct mnt_idmap *idmap,
goto err; goto err;
} }
bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal), c, !bch2_journal_error(&c->journal))) {
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", struct printbuf buf = PRINTBUF;
inode->v.i_ino, (u64) inode->v.i_blocks, bch2_log_msg_start(c, &buf);
inode->ei_inode.bi_sectors); prt_printf(&buf,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
ret = bch2_setattr_nonsize(idmap, inode, iattr); ret = bch2_setattr_nonsize(idmap, inode, iattr);
err: err:

View File

@ -785,12 +785,11 @@ static int ref_visible2(struct bch_fs *c,
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
(_i)->snapshot <= (_snapshot); _i++) \ (_i)->inode.bi_snapshot <= (_snapshot); _i++) \
if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) if (key_visible_in_snapshot(_c, _s, _i->inode.bi_snapshot, _snapshot))
struct inode_walker_entry { struct inode_walker_entry {
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
u32 snapshot;
u64 count; u64 count;
u64 i_size; u64 i_size;
}; };
@ -824,7 +823,6 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
return bch2_inode_unpack(inode, &u) ?: return bch2_inode_unpack(inode, &u) ?:
darray_push(&w->inodes, ((struct inode_walker_entry) { darray_push(&w->inodes, ((struct inode_walker_entry) {
.inode = u, .inode = u,
.snapshot = inode.k->p.snapshot,
})); }));
} }
@ -864,47 +862,45 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
} }
static struct inode_walker_entry * static struct inode_walker_entry *
lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
{ {
bool is_whiteout = k.k->type == KEY_TYPE_whiteout; struct bch_fs *c = trans->c;
struct inode_walker_entry *i; struct inode_walker_entry *i;
__darray_for_each(w->inodes, i) __darray_for_each(w->inodes, i)
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
goto found; goto found;
return NULL; return NULL;
found: found:
BUG_ON(k.k->p.snapshot > i->snapshot); BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
if (k.k->p.snapshot != i->snapshot && !is_whiteout) { struct printbuf buf = PRINTBUF;
struct inode_walker_entry new = *i; int ret = 0;
new.snapshot = k.k->p.snapshot; if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
new.count = 0; trans, snapshot_key_missing_inode_snapshot,
new.i_size = 0; "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
"unexpected because we should always update the inode when we update a key in that inode\n" "unexpected because we should always update the inode when we update a key in that inode\n"
"%s", "%s",
w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
printbuf_exit(&buf); (bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
struct bch_inode_unpacked new = i->inode;
while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) new.bi_snapshot = k.k->p.snapshot;
--i;
size_t pos = i - w->inodes.data; ret = __bch2_fsck_write_inode(trans, &new) ?:
int ret = darray_insert_item(&w->inodes, pos, new); bch2_trans_commit(trans, NULL, NULL, 0) ?:
if (ret) -BCH_ERR_transaction_restart_nested;
return ERR_PTR(ret); goto fsck_err;
i = w->inodes.data + pos;
} }
printbuf_exit(&buf);
return i; return i;
fsck_err:
printbuf_exit(&buf);
return ERR_PTR(ret);
} }
static struct inode_walker_entry *walk_inode(struct btree_trans *trans, static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
@ -919,7 +915,7 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
w->last_pos = k.k->p; w->last_pos = k.k->p;
return lookup_inode_for_snapshot(trans->c, w, k); return lookup_inode_for_snapshot(trans, w, k);
} }
static int get_visible_inodes(struct btree_trans *trans, static int get_visible_inodes(struct btree_trans *trans,
@ -1496,21 +1492,21 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
if (i->inode.bi_sectors == i->count) if (i->inode.bi_sectors == i->count)
continue; continue;
count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (w->recalculate_sums) if (w->recalculate_sums)
i->count = count2; i->count = count2;
if (i->count != count2) { if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2); w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2; i->count = count2;
} }
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
trans, inode_i_sectors_wrong, trans, inode_i_sectors_wrong,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->last_pos.inode, i->snapshot, w->last_pos.inode, i->inode.bi_snapshot,
i->inode.bi_sectors, i->count)) { i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count; i->inode.bi_sectors = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode); ret = bch2_fsck_write_inode(trans, &i->inode);
@ -1821,20 +1817,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data; inode->inodes.data && i >= inode->inodes.data;
--i) { --i) {
if (i->snapshot > k.k->p.snapshot || if (i->inode.bi_snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue; continue;
if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k), !bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode, trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s", "extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->snapshot, i->inode.bi_size, i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct btree_iter iter2; struct btree_iter iter2;
bch2_trans_copy_iter(trans, &iter2, iter); bch2_trans_copy_iter(trans, &iter2, iter);
bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot); bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
ret = bch2_btree_iter_traverse(trans, &iter2) ?: ret = bch2_btree_iter_traverse(trans, &iter2) ?:
bch2_btree_delete_at(trans, &iter2, bch2_btree_delete_at(trans, &iter2,
BTREE_UPDATE_internal_snapshot_node); BTREE_UPDATE_internal_snapshot_node);
@ -1856,8 +1852,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data; inode->inodes.data && i >= inode->inodes.data;
--i) { --i) {
if (i->snapshot > k.k->p.snapshot || if (i->inode.bi_snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue; continue;
i->count += k.k->size; i->count += k.k->size;
@ -1939,13 +1935,13 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (i->inode.bi_nlink == i->count) if (i->inode.bi_nlink == i->count)
continue; continue;
count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (count2 < 0) if (count2 < 0)
return count2; return count2;
if (i->count != count2) { if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2); w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2; i->count = count2;
if (i->inode.bi_nlink == i->count) if (i->inode.bi_nlink == i->count)
continue; continue;
@ -1954,7 +1950,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (fsck_err_on(i->inode.bi_nlink != i->count, if (fsck_err_on(i->inode.bi_nlink != i->count,
trans, inode_dir_wrong_nlink, trans, inode_dir_wrong_nlink,
"directory %llu:%u with wrong i_nlink: got %u, should be %llu", "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count; i->inode.bi_nlink = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode); ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret) if (ret)

View File

@ -1172,8 +1172,6 @@ retry_pick:
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
rbio->bounce = true; rbio->bounce = true;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else if (flags & BCH_READ_must_clone) { } else if (flags & BCH_READ_must_clone) {
/* /*
* Have to clone if there were any splits, due to error * Have to clone if there were any splits, due to error
@ -1187,8 +1185,6 @@ retry_pick:
&c->bio_read_split), &c->bio_read_split),
orig); orig);
rbio->bio.bi_iter = iter; rbio->bio.bi_iter = iter;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else { } else {
rbio = orig; rbio = orig;
rbio->bio.bi_iter = iter; rbio->bio.bi_iter = iter;
@ -1219,6 +1215,8 @@ retry_pick:
rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio; rbio->bio.bi_end_io = bch2_read_endio;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
/* XXX: also nvme read recovery level */ /* XXX: also nvme read recovery level */
if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev))) if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev)))
rbio->bio.bi_opf |= REQ_FUA; rbio->bio.bi_opf |= REQ_FUA;

View File

@ -256,10 +256,35 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
} }
if (i_sectors_delta) { if (i_sectors_delta) {
s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors);
if (unlikely(bi_sectors + i_sectors_delta < 0)) {
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta);
bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (i_sectors_delta < 0)
i_sectors_delta = -bi_sectors;
else
i_sectors_delta = 0;
}
le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
inode_update_flags = 0; inode_update_flags = 0;
} }
/*
* extents, dirents and xattrs updates require that an inode update also
* happens - to ensure that if a key exists in one of those btrees with
* a given snapshot ID an inode is also present - so we may have to skip
* the nojournal optimization:
*/
if (inode->k.p.snapshot != iter.snapshot) { if (inode->k.p.snapshot != iter.snapshot) {
inode->k.p.snapshot = iter.snapshot; inode->k.p.snapshot = iter.snapshot;
inode_update_flags = 0; inode_update_flags = 0;

View File

@ -1404,7 +1404,7 @@ int bch2_journal_read(struct bch_fs *c,
} }
genradix_for_each(&c->journal_entries, radix_iter, _i) { genradix_for_each(&c->journal_entries, radix_iter, _i) {
struct bch_replicas_padded replicas = { union bch_replicas_padded replicas = {
.e.data_type = BCH_DATA_journal, .e.data_type = BCH_DATA_journal,
.e.nr_devs = 0, .e.nr_devs = 0,
.e.nr_required = 1, .e.nr_required = 1,
@ -1632,7 +1632,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_type(w, struct journal_buf, io); closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]); struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas; union bch_replicas_padded replicas;
u64 seq = le64_to_cpu(w->data->seq); u64 seq = le64_to_cpu(w->data->seq);
int err = 0; int err = 0;
@ -1784,7 +1784,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
BCH_DEV_WRITE_REF_journal_write); BCH_DEV_WRITE_REF_journal_write);
if (!ca) { if (!ca) {
/* XXX: fix this */ /* XXX: fix this */
bch_err(c, "missing device for journal write\n"); bch_err(c, "missing device %u for journal write", ptr->dev);
continue; continue;
} }
@ -2055,7 +2055,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
closure_type(w, struct journal_buf, io); closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]); struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas; union bch_replicas_padded replicas;
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]); unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
int ret; int ret;

View File

@ -955,7 +955,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0; seq = 0;
spin_lock(&j->lock); spin_lock(&j->lock);
while (!ret) { while (!ret) {
struct bch_replicas_padded replicas; union bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j)); seq = max(seq, journal_last_seq(j));
if (seq >= j->pin.back) if (seq >= j->pin.back)

View File

@ -209,6 +209,7 @@ enum bch_fsck_flags {
x(subvol_to_missing_root, 188, 0) \ x(subvol_to_missing_root, 188, 0) \
x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \ x(bkey_in_missing_snapshot, 190, 0) \
x(bkey_in_deleted_snapshot, 315, 0) \
x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \ x(inode_pos_blockdev_range, 192, 0) \
x(inode_alloc_cursor_inode_bad, 301, 0) \ x(inode_alloc_cursor_inode_bad, 301, 0) \
@ -216,6 +217,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \ x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \ x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \ x(inode_snapshot_mismatch, 196, 0) \
x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \
x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \ x(inode_unlinked_and_not_open, 281, 0) \
@ -236,6 +238,9 @@ enum bch_fsck_flags {
x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_has_child_snapshots_wrong, 287, 0) \
x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
@ -320,7 +325,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(MAX, 311, 0) x(MAX, 316, 0)
enum bch_sb_error_id { enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n, #define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -139,6 +139,11 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v1 *mi1;
struct bch_sb_field_members_v2 *mi2; struct bch_sb_field_members_v2 *mi2;
if (BCH_SB_VERSION_INCOMPAT(disk_sb->sb) > bcachefs_metadata_version_extent_flags) {
bch2_sb_field_resize(disk_sb, members_v1, 0);
return 0;
}
mi1 = bch2_sb_field_resize(disk_sb, members_v1, mi1 = bch2_sb_field_resize(disk_sb, members_v1,
DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
disk_sb->sb->nr_devices, sizeof(u64))); disk_sb->sb->nr_devices, sizeof(u64)));

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bbpos.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "btree_cache.h" #include "btree_cache.h"
#include "btree_key_cache.h" #include "btree_key_cache.h"
@ -212,7 +213,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
BCH_SNAPSHOT_SUBVOL(s.v), BCH_SNAPSHOT_SUBVOL(s.v),
BCH_SNAPSHOT_DELETED(s.v), BCH_SNAPSHOT_WILL_DELETE(s.v),
le32_to_cpu(s.v->parent), le32_to_cpu(s.v->parent),
le32_to_cpu(s.v->children[0]), le32_to_cpu(s.v->children[0]),
le32_to_cpu(s.v->children[1]), le32_to_cpu(s.v->children[1]),
@ -313,7 +314,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) { if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->live = true; t->state = !BCH_SNAPSHOT_DELETED(s.v)
? SNAPSHOT_ID_live
: SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent); t->parent = le32_to_cpu(s.v->parent);
t->children[0] = le32_to_cpu(s.v->children[0]); t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]); t->children[1] = le32_to_cpu(s.v->children[1]);
@ -338,7 +341,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
parent - id - 1 < IS_ANCESTOR_BITMAP) parent - id - 1 < IS_ANCESTOR_BITMAP)
__set_bit(parent - id - 1, t->is_ancestor); __set_bit(parent - id - 1, t->is_ancestor);
if (BCH_SNAPSHOT_DELETED(s.v)) { if (BCH_SNAPSHOT_WILL_DELETE(s.v)) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
bch2_delete_dead_snapshots_async(c); bch2_delete_dead_snapshots_async(c);
@ -710,6 +713,9 @@ static int check_snapshot(struct btree_trans *trans,
memset(&s, 0, sizeof(s)); memset(&s, 0, sizeof(s));
memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
if (BCH_SNAPSHOT_DELETED(&s))
return 0;
id = le32_to_cpu(s.parent); id = le32_to_cpu(s.parent);
if (id) { if (id) {
ret = bch2_snapshot_lookup(trans, id, &v); ret = bch2_snapshot_lookup(trans, id, &v);
@ -747,7 +753,7 @@ static int check_snapshot(struct btree_trans *trans,
} }
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s); !BCH_SNAPSHOT_WILL_DELETE(&s);
if (should_have_subvol) { if (should_have_subvol) {
id = le32_to_cpu(s.subvol); id = le32_to_cpu(s.subvol);
@ -997,7 +1003,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
snapshot_id_list_to_text(&buf, t); snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) { darray_for_each(*t, id) {
if (fsck_err_on(!bch2_snapshot_exists(c, *id), if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty,
trans, snapshot_node_missing, trans, snapshot_node_missing,
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) { "snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) { if (t->nr > 1) {
@ -1022,22 +1028,38 @@ err:
return ret; return ret;
} }
int bch2_check_key_has_snapshot(struct btree_trans *trans, int __bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k) struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot), /* Snapshot was definitively deleted, this error is marked autofix */
if (fsck_err_on(state == SNAPSHOT_ID_deleted,
trans, bkey_in_deleted_snapshot,
"key in deleted snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
/*
* Snapshot missing: we should have caught this with btree_lost_data and
* kicked off reconstruct_snapshots, so if we end up here we have no
* idea what happened:
*/
if (fsck_err_on(state == SNAPSHOT_ID_empty,
trans, bkey_in_missing_snapshot, trans, bkey_in_missing_snapshot,
"key in missing snapshot %s, delete?", "key in missing snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id), (bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '), prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter, ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1; BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err: fsck_err:
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
@ -1061,10 +1083,10 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
} }
/* already deleted? */ /* already deleted? */
if (BCH_SNAPSHOT_DELETED(&s->v)) if (BCH_SNAPSHOT_WILL_DELETE(&s->v))
goto err; goto err;
SET_BCH_SNAPSHOT_DELETED(&s->v, true); SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
s->v.subvol = 0; s->v.subvol = 0;
err: err:
@ -1084,24 +1106,25 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
struct btree_iter iter, p_iter = {}; struct btree_iter iter, p_iter = {};
struct btree_iter c_iter = {}; struct btree_iter c_iter = {};
struct btree_iter tree_iter = {}; struct btree_iter tree_iter = {};
struct bkey_s_c_snapshot s;
u32 parent_id, child_id; u32 parent_id, child_id;
unsigned i; unsigned i;
int ret = 0; int ret = 0;
s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), struct bkey_i_snapshot *s =
BTREE_ITER_intent, snapshot); bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
ret = bkey_err(s); BTREE_ITER_intent, snapshot);
ret = PTR_ERR_OR_ZERO(s);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"missing snapshot %u", id); "missing snapshot %u", id);
if (ret) if (ret)
goto err; goto err;
BUG_ON(s.v->children[1]); BUG_ON(BCH_SNAPSHOT_DELETED(&s->v));
BUG_ON(s->v.children[1]);
parent_id = le32_to_cpu(s.v->parent); parent_id = le32_to_cpu(s->v.parent);
child_id = le32_to_cpu(s.v->children[0]); child_id = le32_to_cpu(s->v.children[0]);
if (parent_id) { if (parent_id) {
struct bkey_i_snapshot *parent; struct bkey_i_snapshot *parent;
@ -1159,24 +1182,38 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
*/ */
struct bkey_i_snapshot_tree *s_t; struct bkey_i_snapshot_tree *s_t;
BUG_ON(s.v->children[1]); BUG_ON(s->v.children[1]);
s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree); 0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(s_t); ret = PTR_ERR_OR_ZERO(s_t);
if (ret) if (ret)
goto err; goto err;
if (s.v->children[0]) { if (s->v.children[0]) {
s_t->v.root_snapshot = s.v->children[0]; s_t->v.root_snapshot = s->v.children[0];
} else { } else {
s_t->k.type = KEY_TYPE_deleted; s_t->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s_t->k, 0); set_bkey_val_u64s(&s_t->k, 0);
} }
} }
ret = bch2_btree_delete_at(trans, &iter, 0); if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) {
SET_BCH_SNAPSHOT_DELETED(&s->v, true);
s->v.parent = 0;
s->v.children[0] = 0;
s->v.children[1] = 0;
s->v.subvol = 0;
s->v.tree = 0;
s->v.depth = 0;
s->v.skip[0] = 0;
s->v.skip[1] = 0;
s->v.skip[2] = 0;
} else {
s->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s->k, 0);
}
err: err:
bch2_trans_iter_exit(trans, &tree_iter); bch2_trans_iter_exit(trans, &tree_iter);
bch2_trans_iter_exit(trans, &p_iter); bch2_trans_iter_exit(trans, &p_iter);
@ -1346,12 +1383,6 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it * that key to snapshot leaf nodes, where we can mutate it
*/ */
struct snapshot_interior_delete {
u32 id;
u32 live_child;
};
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id) static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{ {
darray_for_each(*l, i) darray_for_each(*l, i)
@ -1385,28 +1416,28 @@ static unsigned __live_child(struct snapshot_table *t, u32 id,
return 0; return 0;
} }
static unsigned live_child(struct bch_fs *c, u32 id, static unsigned live_child(struct bch_fs *c, u32 id)
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{ {
struct snapshot_delete *d = &c->snapshot_delete;
rcu_read_lock(); rcu_read_lock();
u32 ret = __live_child(rcu_dereference(c->snapshots), id, u32 ret = __live_child(rcu_dereference(c->snapshots), id,
delete_leaves, delete_interior); &d->delete_leaves, &d->delete_interior);
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
} }
static int delete_dead_snapshots_process_key(struct btree_trans *trans, static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k, struct bkey_s_c k)
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{ {
if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot)) struct snapshot_delete *d = &trans->c->snapshot_delete;
if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter, return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node); BTREE_UPDATE_internal_snapshot_node);
u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot); u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot);
if (live_child) { if (live_child) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
int ret = PTR_ERR_OR_ZERO(new); int ret = PTR_ERR_OR_ZERO(new);
@ -1437,46 +1468,70 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
return 0; return 0;
} }
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
bch2_snapshot_tree(c, iter->pos.snapshot));
if (unlikely(ret)) {
struct bpos pos = iter->pos;
pos.snapshot = 0;
if (iter->btree_id != BTREE_ID_inodes)
pos.offset = U64_MAX;
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos));
}
return ret;
}
/* /*
* For a given snapshot, if it doesn't have a subvolume that points to it, and * For a given snapshot, if it doesn't have a subvolume that points to it, and
* it doesn't have child snapshot nodes - it's now redundant and we can mark it * it doesn't have child snapshot nodes - it's now redundant and we can mark it
* as deleted. * as deleted.
*/ */
static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k, static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k)
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
{ {
if (k.k->type != KEY_TYPE_snapshot) if (k.k->type != KEY_TYPE_snapshot)
return 0; return 0;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0; unsigned live_children = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v)) if (BCH_SNAPSHOT_SUBVOL(s.v))
return 0; return 0;
if (BCH_SNAPSHOT_DELETED(s.v))
return 0;
for (unsigned i = 0; i < 2; i++) { for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]); u32 child = le32_to_cpu(s.v->children[i]);
live_children += child && live_children += child &&
!snapshot_list_has_id(delete_leaves, child); !snapshot_list_has_id(&d->delete_leaves, child);
} }
u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
if (live_children == 0) { if (live_children == 0) {
return snapshot_list_add(c, delete_leaves, s.k->p.offset); return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
} else if (live_children == 1) { } else if (live_children == 1) {
struct snapshot_interior_delete d = { struct snapshot_interior_delete n = {
.id = s.k->p.offset, .id = s.k->p.offset,
.live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior), .live_child = live_child(c, s.k->p.offset),
}; };
if (!d.live_child) { if (!n.live_child) {
bch_err(c, "error finding live child of snapshot %u", d.id); bch_err(c, "error finding live child of snapshot %u", n.id);
return -EINVAL; return -EINVAL;
} }
return darray_push(delete_interior, d); return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
darray_push(&d->delete_interior, n);
} else { } else {
return 0; return 0;
} }
@ -1508,6 +1563,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
struct bkey_i_snapshot *s; struct bkey_i_snapshot *s;
int ret; int ret;
if (!bch2_snapshot_exists(c, k.k->p.offset))
return 0;
if (k.k->type != KEY_TYPE_snapshot) if (k.k->type != KEY_TYPE_snapshot)
return 0; return 0;
@ -1555,39 +1613,52 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &s->k_i, 0); return bch2_trans_update(trans, iter, &s->k_i, 0);
} }
static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d)
{
prt_printf(out, "deleting from trees");
darray_for_each(d->deleting_from_trees, i)
prt_printf(out, " %u", *i);
prt_printf(out, "deleting leaves");
darray_for_each(d->delete_leaves, i)
prt_printf(out, " %u", *i);
prt_printf(out, " interior");
darray_for_each(d->delete_interior, i)
prt_printf(out, " %u->%u", i->id, i->live_child);
}
int bch2_delete_dead_snapshots(struct bch_fs *c) int bch2_delete_dead_snapshots(struct bch_fs *c)
{ {
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0; return 0;
struct btree_trans *trans = bch2_trans_get(c); struct btree_trans *trans = bch2_trans_get(c);
snapshot_id_list delete_leaves = {}; struct snapshot_delete *d = &c->snapshot_delete;
interior_delete_list delete_interior = {};
int ret = 0; int ret = 0;
/* /*
* For every snapshot node: If we have no live children and it's not * For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it: * pointed to by a subvolume, delete it:
*/ */
mutex_lock(&d->lock);
d->running = true;
d->pos = BBPOS_MIN;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior)); check_should_delete_snapshot(trans, k));
mutex_unlock(&d->lock);
if (!bch2_err_matches(ret, EROFS)) if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "walking snapshots"); bch_err_msg(c, ret, "walking snapshots");
if (ret) if (ret)
goto err; goto err;
if (!delete_leaves.nr && !delete_interior.nr) if (!d->delete_leaves.nr && !d->delete_interior.nr)
goto err; goto err;
{ {
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
prt_printf(&buf, "deleting leaves"); bch2_snapshot_delete_nodes_to_text(&buf, d);
darray_for_each(delete_leaves, i)
prt_printf(&buf, " %u", *i);
prt_printf(&buf, " interior");
darray_for_each(delete_interior, i)
prt_printf(&buf, " %u->%u", i->id, i->live_child);
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
printbuf_exit(&buf); printbuf_exit(&buf);
@ -1595,19 +1666,25 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err; goto err;
} }
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) { for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
struct disk_reservation res = { 0 }; struct disk_reservation res = { 0 };
if (!btree_type_has_snapshots(btree)) d->pos.pos = POS_MIN;
if (!btree_type_has_snapshots(d->pos.btree))
continue; continue;
ret = for_each_btree_key_commit(trans, iter, ret = for_each_btree_key_commit(trans, iter,
btree, POS_MIN, d->pos.btree, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc, &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
delete_dead_snapshots_process_key(trans, &iter, k, d->pos.pos = iter.pos;
&delete_leaves,
&delete_interior)); if (skip_unrelated_snapshot_tree(trans, &iter))
continue;
delete_dead_snapshots_process_key(trans, &iter, k);
}));
bch2_disk_reservation_put(c, &res); bch2_disk_reservation_put(c, &res);
@ -1617,7 +1694,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err; goto err;
} }
darray_for_each(delete_leaves, i) { darray_for_each(d->delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0, ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i)); bch2_snapshot_node_delete(trans, *i));
if (!bch2_err_matches(ret, EROFS)) if (!bch2_err_matches(ret, EROFS))
@ -1634,11 +1711,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_intent, k, BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior)); bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
if (ret) if (ret)
goto err; goto err;
darray_for_each(delete_interior, i) { darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0, ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, i->id)); bch2_snapshot_node_delete(trans, i->id));
if (!bch2_err_matches(ret, EROFS)) if (!bch2_err_matches(ret, EROFS))
@ -1647,8 +1724,12 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err; goto err;
} }
err: err:
darray_exit(&delete_interior); mutex_lock(&d->lock);
darray_exit(&delete_leaves); darray_exit(&d->deleting_from_trees);
darray_exit(&d->delete_interior);
darray_exit(&d->delete_leaves);
d->running = false;
mutex_unlock(&d->lock);
bch2_trans_put(trans); bch2_trans_put(trans);
if (!bch2_err_matches(ret, EROFS)) if (!bch2_err_matches(ret, EROFS))
bch_err_fn(c, ret); bch_err_fn(c, ret);
@ -1657,7 +1738,7 @@ err:
void bch2_delete_dead_snapshots_work(struct work_struct *work) void bch2_delete_dead_snapshots_work(struct work_struct *work)
{ {
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name); set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
@ -1672,10 +1753,27 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work)) if (!queue_work(c->write_ref_wq, &c->snapshot_delete.work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots); enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
} }
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshot_delete;
if (!d->running) {
prt_str(out, "(not running)");
return;
}
mutex_lock(&d->lock);
bch2_snapshot_delete_nodes_to_text(out, d);
prt_newline(out);
mutex_unlock(&d->lock);
bch2_bbpos_to_text(out, d->pos);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
enum btree_id id, enum btree_id id,
struct bpos pos) struct bpos pos)
@ -1714,7 +1812,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
return 0; return 0;
struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k); struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v) || if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
interior_snapshot_needs_delete(snap)) interior_snapshot_needs_delete(snap))
set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags); set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
@ -1750,3 +1848,10 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
{ {
kvfree(rcu_dereference_protected(c->snapshots, true)); kvfree(rcu_dereference_protected(c->snapshots, true));
} }
void bch2_fs_snapshots_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
mutex_init(&c->snapshot_delete.lock);
mutex_init(&c->snapshots_unlinked_lock);
}

View File

@ -120,19 +120,24 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
return id; return id;
} }
static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id) static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{ {
const struct snapshot_t *s = snapshot_t(c, id); const struct snapshot_t *s = snapshot_t(c, id);
return s ? s->live : 0; return s ? s->state : SNAPSHOT_ID_empty;
}
static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
rcu_read_lock();
enum snapshot_id_state ret = __bch2_snapshot_id_state(c, id);
rcu_read_unlock();
return ret;
} }
static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id) static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
{ {
rcu_read_lock(); return bch2_snapshot_id_state(c, id) == SNAPSHOT_ID_live;
bool ret = __bch2_snapshot_exists(c, id);
rcu_read_unlock();
return ret;
} }
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
@ -241,10 +246,19 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *); int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
int __bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
static inline int bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
return likely(bch2_snapshot_exists(trans->c, k.k->p.snapshot))
? 0
: __bch2_check_key_has_snapshot(trans, iter, k);
}
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos); int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
@ -259,7 +273,13 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return __bch2_key_has_snapshot_overwrites(trans, id, pos); return __bch2_key_has_snapshot_overwrites(trans, id, pos);
} }
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_work(struct work_struct *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *);
int bch2_snapshots_read(struct bch_fs *); int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *);
void bch2_fs_snapshots_init_early(struct bch_fs *);
#endif /* _BCACHEFS_SNAPSHOT_H */ #endif /* _BCACHEFS_SNAPSHOT_H */

View File

@ -15,10 +15,10 @@ struct bch_snapshot {
bch_le128 btime; bch_le128 btime;
}; };
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1)
/* True if a subvolume points to this snapshot node: */ /* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3)
/* /*
* Snapshot trees: * Snapshot trees:

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SNAPSHOT_TYPES_H
#define _BCACHEFS_SNAPSHOT_TYPES_H
#include "bbpos_types.h"
#include "darray.h"
#include "subvolume_types.h"
typedef DARRAY(u32) snapshot_id_list;
#define IS_ANCESTOR_BITMAP 128
struct snapshot_t {
enum snapshot_id_state {
SNAPSHOT_ID_empty,
SNAPSHOT_ID_live,
SNAPSHOT_ID_deleted,
} state;
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
};
struct snapshot_table {
struct rcu_head rcu;
size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
struct snapshot_t s[0];
#endif
};
struct snapshot_interior_delete {
u32 id;
u32 live_child;
};
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
struct snapshot_delete {
struct work_struct work;
struct mutex lock;
snapshot_id_list deleting_from_trees;
snapshot_id_list delete_leaves;
interior_delete_list delete_interior;
bool running;
struct bbpos pos;
};
#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */

View File

@ -730,8 +730,6 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
void bch2_fs_subvolumes_init_early(struct bch_fs *c) void bch2_fs_subvolumes_init_early(struct bch_fs *c)
{ {
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
bch2_subvolume_wait_for_pagecache_and_delete); bch2_subvolume_wait_for_pagecache_and_delete);
mutex_init(&c->snapshots_unlinked_lock);
} }

View File

@ -77,9 +77,6 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btr
_end, _subvolid, _flags, _k, _do); \ _end, _subvolid, _flags, _k, _do); \
}) })
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);

View File

@ -2,33 +2,6 @@
#ifndef _BCACHEFS_SUBVOLUME_TYPES_H #ifndef _BCACHEFS_SUBVOLUME_TYPES_H
#define _BCACHEFS_SUBVOLUME_TYPES_H #define _BCACHEFS_SUBVOLUME_TYPES_H
#include "darray.h"
typedef DARRAY(u32) snapshot_id_list;
#define IS_ANCESTOR_BITMAP 128
struct snapshot_t {
bool live;
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
};
struct snapshot_table {
struct rcu_head rcu;
size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
struct snapshot_t s[0];
#endif
};
typedef struct { typedef struct {
/* we can't have padding in this struct: */ /* we can't have padding in this struct: */
u64 subvol; u64 subvol;

View File

@ -864,6 +864,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch2_fs_quota_init(c); bch2_fs_quota_init(c);
bch2_fs_rebalance_init(c); bch2_fs_rebalance_init(c);
bch2_fs_sb_errors_init_early(c); bch2_fs_sb_errors_init_early(c);
bch2_fs_snapshots_init_early(c);
bch2_fs_subvolumes_init_early(c); bch2_fs_subvolumes_init_early(c);
INIT_LIST_HEAD(&c->list); INIT_LIST_HEAD(&c->list);
@ -1488,7 +1489,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
{ {
ca->dev_idx = dev_idx; ca->dev_idx = dev_idx;
__set_bit(ca->dev_idx, ca->self.d); __set_bit(ca->dev_idx, ca->self.d);
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
if (!ca->name[0])
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
ca->fs = c; ca->fs = c;
rcu_assign_pointer(c->devs[ca->dev_idx], ca); rcu_assign_pointer(c->devs[ca->dev_idx], ca);
@ -1540,6 +1543,11 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
if (ret) if (ret)
return ret; return ret;
struct printbuf name = PRINTBUF;
prt_bdevname(&name, sb->bdev);
strscpy(ca->name, name.buf, sizeof(ca->name));
printbuf_exit(&name);
/* Commit: */ /* Commit: */
ca->disk_sb = *sb; ca->disk_sb = *sb;
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
@ -1581,11 +1589,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
bch2_dev_sysfs_online(c, ca); bch2_dev_sysfs_online(c, ca);
struct printbuf name = PRINTBUF;
prt_bdevname(&name, ca->disk_sb.bdev);
strscpy(ca->name, name.buf, sizeof(ca->name));
printbuf_exit(&name);
bch2_rebalance_wakeup(c); bch2_rebalance_wakeup(c);
return 0; return 0;
} }

View File

@ -147,8 +147,9 @@ write_attribute(trigger_journal_flush);
write_attribute(trigger_journal_writes); write_attribute(trigger_journal_writes);
write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_cache_shrink);
write_attribute(trigger_btree_key_cache_shrink); write_attribute(trigger_btree_key_cache_shrink);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_btree_updates); write_attribute(trigger_btree_updates);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_recalc_capacity);
read_attribute(gc_gens_pos); read_attribute(gc_gens_pos);
__sysfs_attribute(read_fua_test, 0400); __sysfs_attribute(read_fua_test, 0400);
@ -199,6 +200,7 @@ read_attribute(copy_gc_wait);
sysfs_pd_controller_attribute(rebalance); sysfs_pd_controller_attribute(rebalance);
read_attribute(rebalance_status); read_attribute(rebalance_status);
read_attribute(snapshot_delete_status);
read_attribute(new_stripes); read_attribute(new_stripes);
@ -431,6 +433,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_rebalance_status) if (attr == &sysfs_rebalance_status)
bch2_rebalance_status_to_text(out, c); bch2_rebalance_status_to_text(out, c);
if (attr == &sysfs_snapshot_delete_status)
bch2_snapshot_delete_status_to_text(out, c);
/* Debugging: */ /* Debugging: */
if (attr == &sysfs_journal_debug) if (attr == &sysfs_journal_debug)
@ -540,6 +545,12 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_freelist_wakeup) if (attr == &sysfs_trigger_freelist_wakeup)
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
if (attr == &sysfs_trigger_recalc_capacity) {
down_read(&c->state_lock);
bch2_recalc_capacity(c);
up_read(&c->state_lock);
}
#ifdef CONFIG_BCACHEFS_TESTS #ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) { if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@ -571,6 +582,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_write_stats, &sysfs_btree_write_stats,
&sysfs_rebalance_status, &sysfs_rebalance_status,
&sysfs_snapshot_delete_status,
&sysfs_compression_stats, &sysfs_compression_stats,
@ -665,8 +677,9 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_journal_writes, &sysfs_trigger_journal_writes,
&sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_cache_shrink,
&sysfs_trigger_btree_key_cache_shrink, &sysfs_trigger_btree_key_cache_shrink,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_btree_updates, &sysfs_trigger_btree_updates,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_recalc_capacity,
&sysfs_gc_gens_pos, &sysfs_gc_gens_pos,

View File

@ -38,7 +38,7 @@ static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k); struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
return bch2_xattr_hash(info, return bch2_xattr_hash(info,
&X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len)); &X_SEARCH(x.v->x_type, x.v->x_name_and_value, x.v->x_name_len));
} }
static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r) static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
@ -48,7 +48,7 @@ static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
return l.v->x_type != r->type || return l.v->x_type != r->type ||
l.v->x_name_len != r->name.len || l.v->x_name_len != r->name.len ||
memcmp(l.v->x_name, r->name.name, r->name.len); memcmp(l.v->x_name_and_value, r->name.name, r->name.len);
} }
static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
@ -58,7 +58,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
return l.v->x_type != r.v->x_type || return l.v->x_type != r.v->x_type ||
l.v->x_name_len != r.v->x_name_len || l.v->x_name_len != r.v->x_name_len ||
memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len); memcmp(l.v->x_name_and_value, r.v->x_name_and_value, r.v->x_name_len);
} }
const struct bch_hash_desc bch2_xattr_hash_desc = { const struct bch_hash_desc bch2_xattr_hash_desc = {
@ -96,7 +96,7 @@ int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k,
c, xattr_invalid_type, c, xattr_invalid_type,
"invalid type (%u)", xattr.v->x_type); "invalid type (%u)", xattr.v->x_type);
bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), bkey_fsck_err_on(memchr(xattr.v->x_name_and_value, '\0', xattr.v->x_name_len),
c, xattr_name_invalid_chars, c, xattr_name_invalid_chars,
"xattr name has invalid characters"); "xattr name has invalid characters");
fsck_err: fsck_err:
@ -120,13 +120,13 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
unsigned name_len = xattr.v->x_name_len; unsigned name_len = xattr.v->x_name_len;
unsigned val_len = le16_to_cpu(xattr.v->x_val_len); unsigned val_len = le16_to_cpu(xattr.v->x_val_len);
unsigned max_name_val_bytes = bkey_val_bytes(xattr.k) - unsigned max_name_val_bytes = bkey_val_bytes(xattr.k) -
offsetof(struct bch_xattr, x_name); offsetof(struct bch_xattr, x_name_and_value);
val_len = min_t(int, val_len, max_name_val_bytes - name_len); val_len = min_t(int, val_len, max_name_val_bytes - name_len);
name_len = min(name_len, max_name_val_bytes); name_len = min(name_len, max_name_val_bytes);
prt_printf(out, "%.*s:%.*s", prt_printf(out, "%.*s:%.*s",
name_len, xattr.v->x_name, name_len, xattr.v->x_name_and_value,
val_len, (char *) xattr_val(xattr.v)); val_len, (char *) xattr_val(xattr.v));
if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS || if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
@ -176,6 +176,11 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
if (ret) if (ret)
return ret; return ret;
/*
* Besides the ctime update, extents, dirents and xattrs updates require
* that an inode update also happens - to ensure that if a key exists in
* one of those btrees with a given snapshot ID an inode is also present
*/
inode_u->bi_ctime = bch2_current_time(c); inode_u->bi_ctime = bch2_current_time(c);
ret = bch2_inode_write(trans, &inode_iter, inode_u); ret = bch2_inode_write(trans, &inode_iter, inode_u);
@ -202,7 +207,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
xattr->v.x_type = type; xattr->v.x_type = type;
xattr->v.x_name_len = namelen; xattr->v.x_name_len = namelen;
xattr->v.x_val_len = cpu_to_le16(size); xattr->v.x_val_len = cpu_to_le16(size);
memcpy(xattr->v.x_name, name, namelen); memcpy(xattr->v.x_name_and_value, name, namelen);
memcpy(xattr_val(&xattr->v), value, size); memcpy(xattr_val(&xattr->v), value, size);
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info, ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
@ -270,7 +275,7 @@ static int bch2_xattr_emit(struct dentry *dentry,
if (!prefix) if (!prefix)
return 0; return 0;
return __bch2_xattr_emit(prefix, xattr->x_name, xattr->x_name_len, buf); return __bch2_xattr_emit(prefix, xattr->x_name_and_value, xattr->x_name_len, buf);
} }
static int bch2_xattr_list_bcachefs(struct bch_fs *c, static int bch2_xattr_list_bcachefs(struct bch_fs *c,

View File

@ -18,12 +18,12 @@ void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len) static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
{ {
return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) + return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name_and_value) +
name_len + val_len, sizeof(u64)); name_len + val_len, sizeof(u64));
} }
#define xattr_val(_xattr) \ #define xattr_val(_xattr) \
((void *) (_xattr)->x_name + (_xattr)->x_name_len) ((void *) (_xattr)->x_name_and_value + (_xattr)->x_name_len)
struct xattr_search_key { struct xattr_search_key {
u8 type; u8 type;

View File

@ -13,7 +13,13 @@ struct bch_xattr {
__u8 x_type; __u8 x_type;
__u8 x_name_len; __u8 x_name_len;
__le16 x_val_len; __le16 x_val_len;
__u8 x_name[] __counted_by(x_name_len); /*
* x_name contains the name and value counted by
* x_name_len + x_val_len. The introduction of
* __counted_by(x_name_len) previously caused a false positive
* detection of an out of bounds write.
*/
__u8 x_name_and_value[];
} __packed __aligned(8); } __packed __aligned(8);
#endif /* _BCACHEFS_XATTR_FORMAT_H */ #endif /* _BCACHEFS_XATTR_FORMAT_H */