Update bcachefs sources to 5a0455ae19af bcachefs: bcachefs_metadata_version_snapshot_deletion_v2
Some checks are pending
build / bcachefs-tools-deb (ubuntu-22.04) (push) Waiting to run
build / bcachefs-tools-deb (ubuntu-24.04) (push) Waiting to run
build / bcachefs-tools-rpm (push) Waiting to run
build / bcachefs-tools-msrv (push) Waiting to run
Nix Flake actions / nix-matrix (push) Waiting to run
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Blocked by required conditions

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-05-02 13:39:43 -04:00
parent a4babd1d64
commit 8376810564
33 changed files with 855 additions and 529 deletions

View File

@ -1 +1 @@
9b4ab159abcd84cf0c25ee851dda8c40baffecc8
5a0455ae19afb354634b3c5c9bf55d2171005a2f

View File

@ -4,14 +4,12 @@
#include <linux/compiler.h>
#include <linux/limits.h>
#include <linux/const.h>
/*
* In the fallback code below, we need to compute the minimum and
* maximum values representable in a given type. These macros may also
* be useful elsewhere, so we provide them outside the
* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
*
* It would seem more obvious to do something like
* We need to compute the minimum and maximum values representable in a given
* type. These macros may also be useful elsewhere. It would seem more obvious
* to do something like:
*
* #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
* #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@ -33,8 +31,10 @@
* credit to Christian Biere.
*/
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_min(T) ((T)((T)-type_max(T)-(T)1))
#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_max(t) __type_max(typeof(t))
#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
#define type_min(t) __type_min(typeof(t))
/*
* Avoids triggering -Wtype-limits compilation warning,
@ -53,194 +53,153 @@ static inline bool __must_check __must_check_overflow(bool overflow)
return unlikely(overflow);
}
#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
/*
* For simplicity and code hygiene, the fallback code below insists on
* a, b and *d having the same type (similar to the min() and max()
* macros), whereas gcc's type-generic overflow checkers accept
* different types. Hence we don't just make check_add_overflow an
* alias for __builtin_add_overflow, but add type checks similar to
* below.
*/
#define check_add_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_add_overflow(__a, __b, __d); \
}))
#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_sub_overflow(__a, __b, __d); \
}))
#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
__builtin_mul_overflow(__a, __b, __d); \
}))
#else
/* Checking for unsigned overflow is relatively easy without causing UB. */
#define __unsigned_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a + __b; \
*__d < __a; \
})
#define __unsigned_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a - __b; \
__a < __b; \
})
/*
* If one of a or b is a compile-time constant, this avoids a division.
*/
#define __unsigned_mul_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = __a * __b; \
__builtin_constant_p(__b) ? \
__b > 0 && __a > type_max(typeof(__a)) / __b : \
__a > 0 && __b > type_max(typeof(__b)) / __a; \
})
/*
* For signed types, detecting overflow is much harder, especially if
* we want to avoid UB. But the interface of these macros is such that
* we must provide a result in *d, and in fact we must produce the
* result promised by gcc's builtins, which is simply the possibly
* wrapped-around value. Fortunately, we can just formally do the
* operations in the widest relevant unsigned type (u64) and then
* truncate the result - gcc is smart enough to generate the same code
* with and without the (u64) casts.
*/
/*
* Adding two signed integers can overflow only if they have the same
* sign, and overflow has happened iff the result has the opposite
* sign.
*/
#define __signed_add_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a + (u64)__b; \
(((~(__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Subtraction is similar, except that overflow can now happen only
* when the signs are opposite. In this case, overflow has happened if
* the result has the opposite sign of a.
*/
#define __signed_sub_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a - (u64)__b; \
((((__a ^ __b)) & (*__d ^ __a)) \
& type_min(typeof(__a))) != 0; \
})
/*
* Signed multiplication is rather hard. gcc always follows C99, so
* division is truncated towards 0. This means that we can write the
* overflow check like this:
/**
* check_add_overflow() - Calculate addition with overflow checking
* @a: first addend
* @b: second addend
* @d: pointer to store sum
*
* (a > 0 && (b > MAX/a || b < MIN/a)) ||
* (a < -1 && (b > MIN/a || b < MAX/a) ||
* (a == -1 && b == MIN)
* Returns true on wrap-around, false otherwise.
*
* The redundant casts of -1 are to silence an annoying -Wtype-limits
* (included in -Wextra) warning: When the type is u8 or u16, the
* __b_c_e in check_mul_overflow obviously selects
* __unsigned_mul_overflow, but unfortunately gcc still parses this
* code and warns about the limited range of __b.
* *@d holds the results of the attempted addition, regardless of whether
* wrap-around occurred.
*/
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
#define __signed_mul_overflow(a, b, d) ({ \
typeof(a) __a = (a); \
typeof(b) __b = (b); \
typeof(d) __d = (d); \
typeof(a) __tmax = type_max(typeof(a)); \
typeof(a) __tmin = type_min(typeof(a)); \
(void) (&__a == &__b); \
(void) (&__a == __d); \
*__d = (u64)__a * (u64)__b; \
(__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
(__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
(__b == (typeof(__b))-1 && __a == __tmin); \
})
#define check_add_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_add_overflow(a, b, d), \
__unsigned_add_overflow(a, b, d)))
#define check_sub_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_sub_overflow(a, b, d), \
__unsigned_sub_overflow(a, b, d)))
#define check_mul_overflow(a, b, d) __must_check_overflow( \
__builtin_choose_expr(is_signed_type(typeof(a)), \
__signed_mul_overflow(a, b, d), \
__unsigned_mul_overflow(a, b, d)))
#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
/** check_shl_overflow() - Calculate a left-shifted value and check overflow
/**
* wrapping_add() - Intentionally perform a wrapping addition
* @type: type for result of calculation
* @a: first addend
* @b: second addend
*
* Return the potentially wrapped-around addition without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_add(type, a, b) \
({ \
type __val; \
__builtin_add_overflow(a, b, &__val); \
__val; \
})
/**
* wrapping_assign_add() - Intentionally perform a wrapping increment assignment
* @var: variable to be incremented
* @offset: amount to add
*
* Increments @var by @offset with wrap-around. Returns the resulting
* value of @var. Will not trip any wrap-around sanitizers.
*
* Returns the new value of @var.
*/
#define wrapping_assign_add(var, offset) \
({ \
typeof(var) *__ptr = &(var); \
*__ptr = wrapping_add(typeof(var), *__ptr, offset); \
})
/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
* Returns true on wrap-around, false otherwise.
*
* *@d holds the results of the attempted subtraction, regardless of whether
* wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
* wrapping_sub() - Intentionally perform a wrapping subtraction
* @type: type for result of calculation
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
*
* Return the potentially wrapped-around subtraction without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_sub(type, a, b) \
({ \
type __val; \
__builtin_sub_overflow(a, b, &__val); \
__val; \
})
/**
* wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
* @var: variable to be decremented
* @offset: amount to subtract
*
* Decrements @var by @offset with wrap-around. Returns the resulting
* value of @var. Will not trip any wrap-around sanitizers.
*
* Returns the new value of @var.
*/
#define wrapping_assign_sub(var, offset) \
({ \
typeof(var) *__ptr = &(var); \
*__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
})
/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
* Returns true on wrap-around, false otherwise.
*
* *@d holds the results of the attempted multiplication, regardless of whether
* wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
* wrapping_mul() - Intentionally perform a wrapping multiplication
* @type: type for result of calculation
* @a: first factor
* @b: second factor
*
* Return the potentially wrapped-around multiplication without
* tripping any wrap-around sanitizers that may be enabled.
*/
#define wrapping_mul(type, a, b) \
({ \
type __val; \
__builtin_mul_overflow(a, b, &__val); \
__val; \
})
/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
* @d: Pointer to where to store the result
*
* Computes *@d = (@a << @s)
*
* Returns true if '*d' cannot hold the result or when 'a << s' doesn't
* Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't
* make sense. Example conditions:
* - 'a << s' causes bits to be lost when stored in *d.
* - 's' is garbage (e.g. negative) or so large that the result of
* 'a << s' is guaranteed to be 0.
* - 'a' is negative.
* - 'a << s' sets the sign bit, if any, in '*d'.
*
* '*d' will hold the results of the attempted shift, but is not
* considered "safe for use" if false is returned.
* - '@a << @s' causes bits to be lost when stored in *@d.
* - '@s' is garbage (e.g. negative) or so large that the result of
* '@a << @s' is guaranteed to be 0.
* - '@a' is negative.
* - '@a << @s' sets the sign bit, if any, in '*@d'.
*
* '*@d' will hold the results of the attempted shift, but is not
* considered "safe for use" if true is returned.
*/
#define check_shl_overflow(a, s, d) __must_check_overflow(({ \
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
u64 _a_full = _a; \
unsigned long long _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
@ -248,9 +207,115 @@ static inline bool __must_check __must_check_overflow(bool overflow)
(*_d >> _to_shift) != _a); \
}))
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
(x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
(x) < 0 || (x) > type_max(T) : \
(x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
check_add_overflow((x), v, &v); \
})
/**
* overflows_type - helper for checking the overflows between value, variables,
* or data type
*
* @n: source constant value or variable to be checked
* @T: destination variable or data type proposed to store @x
*
* Compares the @x expression for whether or not it can safely fit in
* the storage of the type in @T. @x and @T can have different types.
* If @x is a constant expression, this will also resolve to a constant
* expression.
*
* Returns: true if overflow can occur, false otherwise.
*/
#define overflows_type(n, T) \
__builtin_choose_expr(__is_constexpr(n), \
__overflows_type_constexpr(n, T), \
__overflows_type(n, T))
/**
* castable_to_type - like __same_type(), but also allows for casted literals
*
* @n: variable or constant value
* @T: variable or data type
*
* Unlike the __same_type() macro, this allows a constant value as the
* first argument. If this value would not overflow into an assignment
* of the second argument's type, it returns true. Otherwise, this falls
* back to __same_type().
*/
#define castable_to_type(n, T) \
__builtin_choose_expr(__is_constexpr(n), \
!__overflows_type_constexpr(n, T), \
__same_type(n, T))
/**
* size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
* @factor1: first factor
* @factor2: second factor
*
* Returns: calculate @factor1 * @factor2, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
{
size_t bytes;
if (check_mul_overflow(factor1, factor2, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* size_add() - Calculate size_t addition with saturation at SIZE_MAX
* @addend1: first addend
* @addend2: second addend
*
* Returns: calculate @addend1 + @addend2, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. The
* lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_add(size_t addend1, size_t addend2)
{
size_t bytes;
if (check_add_overflow(addend1, addend2, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
* @minuend: value to subtract from
* @subtrahend: value to subtract from @minuend
*
* Returns: calculate @minuend - @subtrahend, both promoted to size_t,
* with any overflow causing the return value to be SIZE_MAX. For
* composition with the size_add() and size_mul() helpers, neither
* argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
* The lvalue must be size_t to avoid implicit type conversion.
*/
static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
{
size_t bytes;
if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
check_sub_overflow(minuend, subtrahend, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* array_size() - Calculate size of 2-dimensional array.
*
* @a: dimension one
* @b: dimension two
*
@ -259,19 +324,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow.
*/
static inline __must_check size_t array_size(size_t a, size_t b)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
return bytes;
}
#define array_size(a, b) size_mul(a, b)
/**
* array3_size() - Calculate size of 3-dimensional array.
*
* @a: dimension one
* @b: dimension two
* @c: dimension three
@ -281,54 +337,11 @@ static inline __must_check size_t array_size(size_t a, size_t b)
* Returns: number of bytes needed to represent the array or SIZE_MAX on
* overflow.
*/
static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_mul_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/*
* Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
* struct_size() below.
*/
static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
{
size_t bytes;
if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_add_overflow(bytes, c, &bytes))
return SIZE_MAX;
return bytes;
}
/**
* struct_size() - Calculate size of structure with trailing array.
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure @p followed by an
* array of @count number of @member elements.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size(p, member, count) \
__ab_c_size(count, \
sizeof(*(p)->member) + __must_be_array((p)->member),\
sizeof(*(p)))
#define array3_size(a, b, c) size_mul(size_mul(a, b), c)
/**
* flex_array_size() - Calculate size of a flexible array member
* within an enclosing structure.
*
* @p: Pointer to the structure.
* @member: Name of the flexible array member.
* @count: Number of elements in the array.
@ -339,7 +352,92 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define flex_array_size(p, member, count) \
array_size(count, \
sizeof(*(p)->member) + __must_be_array((p)->member))
__builtin_choose_expr(__is_constexpr(count), \
(count) * sizeof(*(p)->member) + __must_be_array((p)->member), \
size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member)))
/**
* struct_size() - Calculate size of structure with trailing flexible array.
* @p: Pointer to the structure.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure of @p followed by an
* array of @count number of @member elements.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size(p, member, count) \
__builtin_choose_expr(__is_constexpr(count), \
sizeof(*(p)) + flex_array_size(p, member, count), \
size_add(sizeof(*(p)), flex_array_size(p, member, count)))
/**
* struct_size_t() - Calculate size of structure with trailing flexible array
* @type: structure type name.
* @member: Name of the array member.
* @count: Number of elements in the array.
*
* Calculates size of memory needed for structure @type followed by an
* array of @count number of @member elements. Prefer using struct_size()
* when possible instead, to keep calculations associated with a specific
* instance variable of type @type.
*
* Return: number of bytes needed or SIZE_MAX on overflow.
*/
#define struct_size_t(type, member, count) \
struct_size((type *)NULL, member, count)
/**
* _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
* Enables caller macro to pass (different) initializer.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
* @initializer: initializer expression (could be empty for no init).
*/
#define _DEFINE_FLEX(type, name, member, count, initializer...) \
_Static_assert(__builtin_constant_p(count), \
"onstack flex array members require compile-time const count"); \
union { \
u8 bytes[struct_size_t(type, member, count)]; \
type obj; \
} name##_u initializer; \
type *name = (type *)&name##_u
/**
* DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
* flexible array member, when it does not have a __counted_by annotation.
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
*
* Define a zeroed, on-stack, instance of @type structure with a trailing
* flexible array member.
* Use __struct_size(@name) to get compile-time size of it afterwards.
*/
#define DEFINE_RAW_FLEX(type, name, member, count) \
_DEFINE_FLEX(type, name, member, count, = {})
/**
* DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
* flexible array member.
*
* @TYPE: structure type name, including "struct" keyword.
* @NAME: Name for a variable to define.
* @MEMBER: Name of the array member.
* @COUNTER: Name of the __counted_by member.
* @COUNT: Number of elements in the array; must be compile-time const.
*
* Define a zeroed, on-stack, instance of @TYPE structure with a trailing
* flexible array member.
* Use __struct_size(@NAME) to get compile-time size of it afterwards.
*/
#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \
_DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
#endif /* __LINUX_OVERFLOW_H */

View File

@ -216,6 +216,7 @@
#include "recovery_passes_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "snapshot_types.h"
#include "time_stats.h"
#include "util.h"
@ -709,7 +710,7 @@ struct btree_transaction_stats {
unsigned nr_max_paths;
unsigned journal_entries_size;
unsigned max_mem;
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
char *max_paths_text;
@ -869,7 +870,7 @@ struct bch_fs {
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
struct work_struct snapshot_delete_work;
struct snapshot_delete snapshot_delete;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;
snapshot_id_list snapshots_unlinked;
struct mutex snapshots_unlinked_lock;

View File

@ -695,7 +695,8 @@ struct bch_sb_field_ext {
x(stripe_backpointers, BCH_VERSION(1, 22)) \
x(stripe_lru, BCH_VERSION(1, 23)) \
x(casefolding, BCH_VERSION(1, 24)) \
x(extent_flags, BCH_VERSION(1, 25))
x(extent_flags, BCH_VERSION(1, 25)) \
x(snapshot_deletion_v2, BCH_VERSION(1, 26))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,

View File

@ -3089,7 +3089,7 @@ void bch2_trans_copy_iter(struct btree_trans *trans,
dst->key_cache_path = 0;
}
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
darray_trans_kmalloc_trace *trace)
{
@ -3112,7 +3112,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
void *p;
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
struct printbuf buf = PRINTBUF;
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf);
@ -3127,7 +3127,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
struct btree_transaction_stats *s = btree_trans_stats(trans);
if (new_bytes > s->max_mem) {
mutex_lock(&s->lock);
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
trans->trans_kmalloc_trace.nr);
@ -3314,7 +3314,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
#endif
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
trans->trans_kmalloc_trace.nr = 0;
#endif
@ -3486,6 +3486,8 @@ void bch2_trans_put(struct btree_trans *trans)
#ifdef CONFIG_BCACHEFS_DEBUG
darray_exit(&trans->last_restarted_trace);
#endif
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_exit(&trans->trans_kmalloc_trace);
#endif
@ -3642,7 +3644,7 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
s++) {
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_exit(&s->trans_kmalloc_trace);
#endif
kfree(s->max_paths_text);

View File

@ -543,7 +543,7 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
darray_trans_kmalloc_trace *);
#endif
@ -553,7 +553,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
unsigned long ip)
{
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_push(&trans->trans_kmalloc_trace,
((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
#endif

View File

@ -495,7 +495,7 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif

View File

@ -760,7 +760,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
mutex_lock(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_DEBUG
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
printbuf_indent_add(&i->buf, 2);
bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
printbuf_indent_sub(&i->buf, 2);

View File

@ -692,7 +692,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv
vfs_d_type(d.v->d_type));
if (ret)
ctx->pos = d.k->p.offset + 1;
return ret;
return !ret;
}
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
@ -717,7 +717,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
if (ret2 > 0)
continue;
ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target));
ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target));
})));
bch2_bkey_buf_exit(&sk, c);

View File

@ -287,7 +287,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc
static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p)
{
struct bch_replicas_padded r;
union bch_replicas_padded r;
return accounting_to_replicas(&r.e, p)
? bch2_mark_replicas(c, &r.e)
: 0;
@ -361,7 +361,7 @@ err:
int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
enum bch_accounting_mode mode)
{
struct bch_replicas_padded r;
union bch_replicas_padded r;
if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) &&
@ -425,10 +425,12 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage)
percpu_down_read(&c->mark_lock);
darray_for_each(acc->k, i) {
struct {
union {
u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs,
BCH_BKEY_PTRS_MAX)];
struct bch_replicas_usage r;
u8 pad[BCH_BKEY_PTRS_MAX];
} u;
u.r.r.nr_devs = BCH_BKEY_PTRS_MAX;
if (!accounting_to_replicas(&u.r.r, i->pos))
continue;
@ -627,7 +629,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
switch (acc->type) {
case BCH_DISK_ACCOUNTING_replicas: {
struct bch_replicas_padded r;
union bch_replicas_padded r;
__accounting_to_replicas(&r.e, acc);
for (unsigned i = 0; i < r.e.nr_devs; i++)

View File

@ -86,35 +86,6 @@ err:
return ret;
}
void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
{
out->atomic++;
rcu_read_lock();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
if (!g)
goto out;
for (unsigned i = 0; i < g->nr; i++) {
if (i)
prt_printf(out, " ");
if (g->entries[i].deleted) {
prt_printf(out, "[deleted]");
continue;
}
prt_printf(out, "[parent %d devs", g->entries[i].parent);
for_each_member_device_rcu(c, ca, &g->entries[i].devs)
prt_printf(out, " %s", ca->name);
prt_printf(out, "]");
}
out:
rcu_read_unlock();
out->atomic--;
}
static void bch2_sb_disk_groups_to_text(struct printbuf *out,
struct bch_sb *sb,
struct bch_sb_field *f)
@ -241,17 +212,14 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
case TARGET_DEV:
return dev == t.dev;
case TARGET_GROUP: {
struct bch_disk_groups_cpu *g;
const struct bch_devs_mask *m;
bool ret;
rcu_read_lock();
g = rcu_dereference(c->disk_groups);
m = g && t.group < g->nr && !g->entries[t.group].deleted
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
const struct bch_devs_mask *m =
g && t.group < g->nr && !g->entries[t.group].deleted
? &g->entries[t.group].devs
: NULL;
ret = m ? test_bit(dev, m->d) : false;
bool ret = m ? test_bit(dev, m->d) : false;
rcu_read_unlock();
return ret;
@ -377,54 +345,81 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
return v;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
static void __bch2_disk_path_to_text(struct printbuf *out, struct bch_disk_groups_cpu *g,
unsigned v)
{
struct bch_disk_groups_cpu *groups;
struct bch_disk_group_cpu *g;
unsigned nr = 0;
u16 path[32];
out->atomic++;
rcu_read_lock();
groups = rcu_dereference(c->disk_groups);
if (!groups)
goto invalid;
unsigned nr = 0;
while (1) {
if (nr == ARRAY_SIZE(path))
goto invalid;
if (v >= groups->nr)
if (v >= (g ? g->nr : 0))
goto invalid;
g = groups->entries + v;
struct bch_disk_group_cpu *e = g->entries + v;
if (g->deleted)
if (e->deleted)
goto invalid;
path[nr++] = v;
if (!g->parent)
if (!e->parent)
break;
v = g->parent - 1;
v = e->parent - 1;
}
while (nr) {
v = path[--nr];
g = groups->entries + v;
struct bch_disk_group_cpu *e = g->entries + path[--nr];
prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
prt_printf(out, "%.*s", (int) sizeof(e->label), e->label);
if (nr)
prt_printf(out, ".");
}
out:
rcu_read_unlock();
out->atomic--;
return;
invalid:
prt_printf(out, "invalid label %u", v);
goto out;
}
void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_printbuf_make_room(out, 4096);
out->atomic++;
rcu_read_lock();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
for (unsigned i = 0; i < (g ? g->nr : 0); i++) {
prt_printf(out, "%2u: ", i);
if (g->entries[i].deleted) {
prt_printf(out, "[deleted]");
goto next;
}
__bch2_disk_path_to_text(out, g, i);
prt_printf(out, " devs");
for_each_member_device_rcu(c, ca, &g->entries[i].devs)
prt_printf(out, " %s", ca->name);
next:
prt_newline(out);
}
rcu_read_unlock();
out->atomic--;
}
void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
{
out->atomic++;
rcu_read_lock();
__bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v),
rcu_read_unlock();
--out->atomic;
}
void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
@ -470,23 +465,22 @@ inval:
int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
{
struct bch_member *mi;
int ret, v = -1;
lockdep_assert_held(&c->sb_lock);
if (!strlen(name) || !strcmp(name, "none"))
return 0;
v = bch2_disk_path_find_or_create(&c->disk_sb, name);
if (v < 0)
return v;
if (!strlen(name) || !strcmp(name, "none")) {
struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
SET_BCH_MEMBER_GROUP(mi, 0);
} else {
int v = bch2_disk_path_find_or_create(&c->disk_sb, name);
if (v < 0)
return v;
ret = bch2_sb_disk_groups_to_cpu(c);
if (ret)
return ret;
struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
SET_BCH_MEMBER_GROUP(mi, v + 1);
}
mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
SET_BCH_MEMBER_GROUP(mi, v + 1);
return 0;
return bch2_sb_disk_groups_to_cpu(c);
}
int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)

View File

@ -2223,10 +2223,10 @@ void bch2_fs_ec_stop(struct bch_fs *c)
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
bool ret;
sched_annotate_sleep();
mutex_lock(&c->ec_stripe_new_lock);
ret = list_empty(&c->ec_stripe_new_list);
bool ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
return ret;

View File

@ -4,9 +4,10 @@
#include "bcachefs_format.h"
struct bch_replicas_padded {
union bch_replicas_padded {
u8 bytes[struct_size_t(struct bch_replicas_entry_v1,
devs, BCH_BKEY_PTRS_MAX)];
struct bch_replicas_entry_v1 e;
u8 pad[BCH_BKEY_PTRS_MAX];
};
struct stripe {
@ -28,7 +29,7 @@ struct gc_stripe {
u16 block_sectors[BCH_BKEY_PTRS_MAX];
struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
struct bch_replicas_padded r;
union bch_replicas_padded r;
};
#endif /* _BCACHEFS_EC_TYPES_H */

View File

@ -147,10 +147,24 @@ int __must_check bch2_write_inode_size(struct bch_fs *c,
void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
struct quota_res *quota_res, s64 sectors)
{
bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c,
"inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
if (unlikely((s64) inode->v.i_blocks + sectors < 0)) {
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (sectors < 0)
sectors = -inode->v.i_blocks;
else
sectors = 0;
}
inode->v.i_blocks += sectors;
#ifdef CONFIG_BCACHEFS_QUOTA
@ -244,7 +258,6 @@ out:
if (!ret)
ret = err;
bch_err_fn(c, ret);
return ret;
}
@ -506,11 +519,20 @@ int bchfs_truncate(struct mnt_idmap *idmap,
goto err;
}
bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal), c,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal))) {
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
prt_printf(&buf,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
ret = bch2_setattr_nonsize(idmap, inode, iattr);
err:

View File

@ -785,12 +785,11 @@ static int ref_visible2(struct bch_fs *c,
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
(_i)->snapshot <= (_snapshot); _i++) \
if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
(_i)->inode.bi_snapshot <= (_snapshot); _i++) \
if (key_visible_in_snapshot(_c, _s, _i->inode.bi_snapshot, _snapshot))
struct inode_walker_entry {
struct bch_inode_unpacked inode;
u32 snapshot;
u64 count;
u64 i_size;
};
@ -824,7 +823,6 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
return bch2_inode_unpack(inode, &u) ?:
darray_push(&w->inodes, ((struct inode_walker_entry) {
.inode = u,
.snapshot = inode.k->p.snapshot,
}));
}
@ -864,47 +862,45 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
}
static struct inode_walker_entry *
lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
{
bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
__darray_for_each(w->inodes, i)
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot))
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->inode.bi_snapshot))
goto found;
return NULL;
found:
BUG_ON(k.k->p.snapshot > i->snapshot);
BUG_ON(k.k->p.snapshot > i->inode.bi_snapshot);
if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
struct inode_walker_entry new = *i;
struct printbuf buf = PRINTBUF;
int ret = 0;
new.snapshot = k.k->p.snapshot;
new.count = 0;
new.i_size = 0;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot,
trans, snapshot_key_missing_inode_snapshot,
"have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
"unexpected because we should always update the inode when we update a key in that inode\n"
"%s",
w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf);
printbuf_exit(&buf);
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
struct bch_inode_unpacked new = i->inode;
while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot)
--i;
new.bi_snapshot = k.k->p.snapshot;
size_t pos = i - w->inodes.data;
int ret = darray_insert_item(&w->inodes, pos, new);
if (ret)
return ERR_PTR(ret);
i = w->inodes.data + pos;
ret = __bch2_fsck_write_inode(trans, &new) ?:
bch2_trans_commit(trans, NULL, NULL, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto fsck_err;
}
printbuf_exit(&buf);
return i;
fsck_err:
printbuf_exit(&buf);
return ERR_PTR(ret);
}
static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
@ -919,7 +915,7 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
w->last_pos = k.k->p;
return lookup_inode_for_snapshot(trans->c, w, k);
return lookup_inode_for_snapshot(trans, w, k);
}
static int get_visible_inodes(struct btree_trans *trans,
@ -1496,21 +1492,21 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
if (i->inode.bi_sectors == i->count)
continue;
count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (w->recalculate_sums)
i->count = count2;
if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2);
w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2;
}
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
trans, inode_i_sectors_wrong,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->last_pos.inode, i->snapshot,
w->last_pos.inode, i->inode.bi_snapshot,
i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
@ -1821,20 +1817,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
if (i->inode.bi_snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct btree_iter iter2;
bch2_trans_copy_iter(trans, &iter2, iter);
bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot);
bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
bch2_btree_delete_at(trans, &iter2,
BTREE_UPDATE_internal_snapshot_node);
@ -1856,8 +1852,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
if (i->inode.bi_snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
i->count += k.k->size;
@ -1939,13 +1935,13 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (i->inode.bi_nlink == i->count)
continue;
count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->inode.bi_snapshot);
if (count2 < 0)
return count2;
if (i->count != count2) {
bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2);
w->last_pos.inode, i->inode.bi_snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
@ -1954,7 +1950,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
if (fsck_err_on(i->inode.bi_nlink != i->count,
trans, inode_dir_wrong_nlink,
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)

View File

@ -1172,8 +1172,6 @@ retry_pick:
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
rbio->bounce = true;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else if (flags & BCH_READ_must_clone) {
/*
* Have to clone if there were any splits, due to error
@ -1187,8 +1185,6 @@ retry_pick:
&c->bio_read_split),
orig);
rbio->bio.bi_iter = iter;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else {
rbio = orig;
rbio->bio.bi_iter = iter;
@ -1219,6 +1215,8 @@ retry_pick:
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
/* XXX: also nvme read recovery level */
if (unlikely(failed && bch2_dev_io_failures(failed, pick.ptr.dev)))
rbio->bio.bi_opf |= REQ_FUA;

View File

@ -256,10 +256,35 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
}
if (i_sectors_delta) {
s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors);
if (unlikely(bi_sectors + i_sectors_delta < 0)) {
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0",
extent_iter->pos.inode, bi_sectors, i_sectors_delta);
bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf);
if (print)
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
if (i_sectors_delta < 0)
i_sectors_delta = -bi_sectors;
else
i_sectors_delta = 0;
}
le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
inode_update_flags = 0;
}
/*
* extents, dirents and xattrs updates require that an inode update also
* happens - to ensure that if a key exists in one of those btrees with
* a given snapshot ID an inode is also present - so we may have to skip
* the nojournal optimization:
*/
if (inode->k.p.snapshot != iter.snapshot) {
inode->k.p.snapshot = iter.snapshot;
inode_update_flags = 0;

View File

@ -1404,7 +1404,7 @@ int bch2_journal_read(struct bch_fs *c,
}
genradix_for_each(&c->journal_entries, radix_iter, _i) {
struct bch_replicas_padded replicas = {
union bch_replicas_padded replicas = {
.e.data_type = BCH_DATA_journal,
.e.nr_devs = 0,
.e.nr_required = 1,
@ -1632,7 +1632,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
union bch_replicas_padded replicas;
u64 seq = le64_to_cpu(w->data->seq);
int err = 0;
@ -1784,7 +1784,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
BCH_DEV_WRITE_REF_journal_write);
if (!ca) {
/* XXX: fix this */
bch_err(c, "missing device for journal write\n");
bch_err(c, "missing device %u for journal write", ptr->dev);
continue;
}
@ -2055,7 +2055,7 @@ CLOSURE_CALLBACK(bch2_journal_write)
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
union bch_replicas_padded replicas;
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
int ret;

View File

@ -955,7 +955,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0;
spin_lock(&j->lock);
while (!ret) {
struct bch_replicas_padded replicas;
union bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j));
if (seq >= j->pin.back)

View File

@ -209,6 +209,7 @@ enum bch_fsck_flags {
x(subvol_to_missing_root, 188, 0) \
x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \
x(bkey_in_deleted_snapshot, 315, 0) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
x(inode_alloc_cursor_inode_bad, 301, 0) \
@ -216,6 +217,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
@ -236,6 +238,9 @@ enum bch_fsck_flags {
x(inode_has_child_snapshots_wrong, 287, 0) \
x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
@ -320,7 +325,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(MAX, 311, 0)
x(MAX, 316, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -139,6 +139,11 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
struct bch_sb_field_members_v1 *mi1;
struct bch_sb_field_members_v2 *mi2;
if (BCH_SB_VERSION_INCOMPAT(disk_sb->sb) > bcachefs_metadata_version_extent_flags) {
bch2_sb_field_resize(disk_sb, members_v1, 0);
return 0;
}
mi1 = bch2_sb_field_resize(disk_sb, members_v1,
DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
disk_sb->sb->nr_devices, sizeof(u64)));

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bbpos.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_key_cache.h"
@ -212,7 +213,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
BCH_SNAPSHOT_SUBVOL(s.v),
BCH_SNAPSHOT_DELETED(s.v),
BCH_SNAPSHOT_WILL_DELETE(s.v),
le32_to_cpu(s.v->parent),
le32_to_cpu(s.v->children[0]),
le32_to_cpu(s.v->children[1]),
@ -313,7 +314,9 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
if (new.k->type == KEY_TYPE_snapshot) {
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
t->live = true;
t->state = !BCH_SNAPSHOT_DELETED(s.v)
? SNAPSHOT_ID_live
: SNAPSHOT_ID_deleted;
t->parent = le32_to_cpu(s.v->parent);
t->children[0] = le32_to_cpu(s.v->children[0]);
t->children[1] = le32_to_cpu(s.v->children[1]);
@ -338,7 +341,7 @@ static int __bch2_mark_snapshot(struct btree_trans *trans,
parent - id - 1 < IS_ANCESTOR_BITMAP)
__set_bit(parent - id - 1, t->is_ancestor);
if (BCH_SNAPSHOT_DELETED(s.v)) {
if (BCH_SNAPSHOT_WILL_DELETE(s.v)) {
set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
bch2_delete_dead_snapshots_async(c);
@ -710,6 +713,9 @@ static int check_snapshot(struct btree_trans *trans,
memset(&s, 0, sizeof(s));
memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
if (BCH_SNAPSHOT_DELETED(&s))
return 0;
id = le32_to_cpu(s.parent);
if (id) {
ret = bch2_snapshot_lookup(trans, id, &v);
@ -747,7 +753,7 @@ static int check_snapshot(struct btree_trans *trans,
}
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
!BCH_SNAPSHOT_WILL_DELETE(&s);
if (should_have_subvol) {
id = le32_to_cpu(s.subvol);
@ -997,7 +1003,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
if (fsck_err_on(!bch2_snapshot_exists(c, *id),
if (fsck_err_on(bch2_snapshot_id_state(c, *id) == SNAPSHOT_ID_empty,
trans, snapshot_node_missing,
"snapshot node %u from tree %s missing, recreate?", *id, buf.buf)) {
if (t->nr > 1) {
@ -1022,22 +1028,38 @@ err:
return ret;
}
int bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
int __bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0;
enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
if (fsck_err_on(!bch2_snapshot_exists(c, k.k->p.snapshot),
/* Snapshot was definitively deleted, this error is marked autofix */
if (fsck_err_on(state == SNAPSHOT_ID_deleted,
trans, bkey_in_deleted_snapshot,
"key in deleted snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
/*
* Snapshot missing: we should have caught this with btree_lost_data and
* kicked off reconstruct_snapshots, so if we end up here we have no
* idea what happened:
*/
if (fsck_err_on(state == SNAPSHOT_ID_empty,
trans, bkey_in_missing_snapshot,
"key in missing snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err:
printbuf_exit(&buf);
return ret;
@ -1061,10 +1083,10 @@ int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
}
/* already deleted? */
if (BCH_SNAPSHOT_DELETED(&s->v))
if (BCH_SNAPSHOT_WILL_DELETE(&s->v))
goto err;
SET_BCH_SNAPSHOT_DELETED(&s->v, true);
SET_BCH_SNAPSHOT_WILL_DELETE(&s->v, true);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
s->v.subvol = 0;
err:
@ -1084,24 +1106,25 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
struct btree_iter iter, p_iter = {};
struct btree_iter c_iter = {};
struct btree_iter tree_iter = {};
struct bkey_s_c_snapshot s;
u32 parent_id, child_id;
unsigned i;
int ret = 0;
s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
BTREE_ITER_intent, snapshot);
ret = bkey_err(s);
struct bkey_i_snapshot *s =
bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
BTREE_ITER_intent, snapshot);
ret = PTR_ERR_OR_ZERO(s);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"missing snapshot %u", id);
if (ret)
goto err;
BUG_ON(s.v->children[1]);
BUG_ON(BCH_SNAPSHOT_DELETED(&s->v));
BUG_ON(s->v.children[1]);
parent_id = le32_to_cpu(s.v->parent);
child_id = le32_to_cpu(s.v->children[0]);
parent_id = le32_to_cpu(s->v.parent);
child_id = le32_to_cpu(s->v.children[0]);
if (parent_id) {
struct bkey_i_snapshot *parent;
@ -1159,24 +1182,38 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
*/
struct bkey_i_snapshot_tree *s_t;
BUG_ON(s.v->children[1]);
BUG_ON(s->v.children[1]);
s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(s_t);
if (ret)
goto err;
if (s.v->children[0]) {
s_t->v.root_snapshot = s.v->children[0];
if (s->v.children[0]) {
s_t->v.root_snapshot = s->v.children[0];
} else {
s_t->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s_t->k, 0);
}
}
ret = bch2_btree_delete_at(trans, &iter, 0);
if (!bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)) {
SET_BCH_SNAPSHOT_DELETED(&s->v, true);
s->v.parent = 0;
s->v.children[0] = 0;
s->v.children[1] = 0;
s->v.subvol = 0;
s->v.tree = 0;
s->v.depth = 0;
s->v.skip[0] = 0;
s->v.skip[1] = 0;
s->v.skip[2] = 0;
} else {
s->k.type = KEY_TYPE_deleted;
set_bkey_val_u64s(&s->k, 0);
}
err:
bch2_trans_iter_exit(trans, &tree_iter);
bch2_trans_iter_exit(trans, &p_iter);
@ -1346,12 +1383,6 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it
*/
struct snapshot_interior_delete {
u32 id;
u32 live_child;
};
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
static inline u32 interior_delete_has_id(interior_delete_list *l, u32 id)
{
darray_for_each(*l, i)
@ -1385,28 +1416,28 @@ static unsigned __live_child(struct snapshot_table *t, u32 id,
return 0;
}
static unsigned live_child(struct bch_fs *c, u32 id,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
static unsigned live_child(struct bch_fs *c, u32 id)
{
struct snapshot_delete *d = &c->snapshot_delete;
rcu_read_lock();
u32 ret = __live_child(rcu_dereference(c->snapshots), id,
delete_leaves, delete_interior);
&d->delete_leaves, &d->delete_interior);
rcu_read_unlock();
return ret;
}
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
struct bkey_s_c k)
{
if (snapshot_list_has_id(delete_leaves, k.k->p.snapshot))
struct snapshot_delete *d = &trans->c->snapshot_delete;
if (snapshot_list_has_id(&d->delete_leaves, k.k->p.snapshot))
return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node);
u32 live_child = interior_delete_has_id(delete_interior, k.k->p.snapshot);
u32 live_child = interior_delete_has_id(&d->delete_interior, k.k->p.snapshot);
if (live_child) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
int ret = PTR_ERR_OR_ZERO(new);
@ -1437,46 +1468,70 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
return 0;
}
static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
bch2_snapshot_tree(c, iter->pos.snapshot));
if (unlikely(ret)) {
struct bpos pos = iter->pos;
pos.snapshot = 0;
if (iter->btree_id != BTREE_ID_inodes)
pos.offset = U64_MAX;
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(pos));
}
return ret;
}
/*
* For a given snapshot, if it doesn't have a subvolume that points to it, and
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
* as deleted.
*/
static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k,
snapshot_id_list *delete_leaves,
interior_delete_list *delete_interior)
static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k)
{
if (k.k->type != KEY_TYPE_snapshot)
return 0;
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v))
return 0;
if (BCH_SNAPSHOT_DELETED(s.v))
return 0;
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
live_children += child &&
!snapshot_list_has_id(delete_leaves, child);
!snapshot_list_has_id(&d->delete_leaves, child);
}
u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
if (live_children == 0) {
return snapshot_list_add(c, delete_leaves, s.k->p.offset);
return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
} else if (live_children == 1) {
struct snapshot_interior_delete d = {
struct snapshot_interior_delete n = {
.id = s.k->p.offset,
.live_child = live_child(c, s.k->p.offset, delete_leaves, delete_interior),
.live_child = live_child(c, s.k->p.offset),
};
if (!d.live_child) {
bch_err(c, "error finding live child of snapshot %u", d.id);
if (!n.live_child) {
bch_err(c, "error finding live child of snapshot %u", n.id);
return -EINVAL;
}
return darray_push(delete_interior, d);
return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
darray_push(&d->delete_interior, n);
} else {
return 0;
}
@ -1508,6 +1563,9 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
struct bkey_i_snapshot *s;
int ret;
if (!bch2_snapshot_exists(c, k.k->p.offset))
return 0;
if (k.k->type != KEY_TYPE_snapshot)
return 0;
@ -1555,39 +1613,52 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &s->k_i, 0);
}
static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snapshot_delete *d)
{
prt_printf(out, "deleting from trees");
darray_for_each(d->deleting_from_trees, i)
prt_printf(out, " %u", *i);
prt_printf(out, "deleting leaves");
darray_for_each(d->delete_leaves, i)
prt_printf(out, " %u", *i);
prt_printf(out, " interior");
darray_for_each(d->delete_interior, i)
prt_printf(out, " %u->%u", i->id, i->live_child);
}
int bch2_delete_dead_snapshots(struct bch_fs *c)
{
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0;
struct btree_trans *trans = bch2_trans_get(c);
snapshot_id_list delete_leaves = {};
interior_delete_list delete_interior = {};
struct snapshot_delete *d = &c->snapshot_delete;
int ret = 0;
/*
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
mutex_lock(&d->lock);
d->running = true;
d->pos = BBPOS_MIN;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k, &delete_leaves, &delete_interior));
check_should_delete_snapshot(trans, k));
mutex_unlock(&d->lock);
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "walking snapshots");
if (ret)
goto err;
if (!delete_leaves.nr && !delete_interior.nr)
if (!d->delete_leaves.nr && !d->delete_interior.nr)
goto err;
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "deleting leaves");
darray_for_each(delete_leaves, i)
prt_printf(&buf, " %u", *i);
prt_printf(&buf, " interior");
darray_for_each(delete_interior, i)
prt_printf(&buf, " %u->%u", i->id, i->live_child);
bch2_snapshot_delete_nodes_to_text(&buf, d);
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
printbuf_exit(&buf);
@ -1595,19 +1666,25 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
struct disk_reservation res = { 0 };
if (!btree_type_has_snapshots(btree))
d->pos.pos = POS_MIN;
if (!btree_type_has_snapshots(d->pos.btree))
continue;
ret = for_each_btree_key_commit(trans, iter,
btree, POS_MIN,
d->pos.btree, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
delete_dead_snapshots_process_key(trans, &iter, k,
&delete_leaves,
&delete_interior));
&res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
d->pos.pos = iter.pos;
if (skip_unrelated_snapshot_tree(trans, &iter))
continue;
delete_dead_snapshots_process_key(trans, &iter, k);
}));
bch2_disk_reservation_put(c, &res);
@ -1617,7 +1694,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
darray_for_each(delete_leaves, i) {
darray_for_each(d->delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
if (!bch2_err_matches(ret, EROFS))
@ -1634,11 +1711,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &delete_interior));
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior));
if (ret)
goto err;
darray_for_each(delete_interior, i) {
darray_for_each(d->delete_interior, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, i->id));
if (!bch2_err_matches(ret, EROFS))
@ -1647,8 +1724,12 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
err:
darray_exit(&delete_interior);
darray_exit(&delete_leaves);
mutex_lock(&d->lock);
darray_exit(&d->deleting_from_trees);
darray_exit(&d->delete_interior);
darray_exit(&d->delete_leaves);
d->running = false;
mutex_unlock(&d->lock);
bch2_trans_put(trans);
if (!bch2_err_matches(ret, EROFS))
bch_err_fn(c, ret);
@ -1657,7 +1738,7 @@ err:
void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
@ -1672,10 +1753,27 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c)
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
if (!queue_work(c->write_ref_wq, &c->snapshot_delete.work))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c)
{
struct snapshot_delete *d = &c->snapshot_delete;
if (!d->running) {
prt_str(out, "(not running)");
return;
}
mutex_lock(&d->lock);
bch2_snapshot_delete_nodes_to_text(out, d);
prt_newline(out);
mutex_unlock(&d->lock);
bch2_bbpos_to_text(out, d->pos);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
enum btree_id id,
struct bpos pos)
@ -1714,7 +1812,7 @@ static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct
return 0;
struct bkey_s_c_snapshot snap = bkey_s_c_to_snapshot(k);
if (BCH_SNAPSHOT_DELETED(snap.v) ||
if (BCH_SNAPSHOT_WILL_DELETE(snap.v) ||
interior_snapshot_needs_delete(snap))
set_bit(BCH_FS_need_delete_dead_snapshots, &trans->c->flags);
@ -1750,3 +1848,10 @@ void bch2_fs_snapshots_exit(struct bch_fs *c)
{
kvfree(rcu_dereference_protected(c->snapshots, true));
}
void bch2_fs_snapshots_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work);
mutex_init(&c->snapshot_delete.lock);
mutex_init(&c->snapshots_unlinked_lock);
}

View File

@ -120,19 +120,24 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
return id;
}
static inline bool __bch2_snapshot_exists(struct bch_fs *c, u32 id)
static inline enum snapshot_id_state __bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s = snapshot_t(c, id);
return s ? s->live : 0;
return s ? s->state : SNAPSHOT_ID_empty;
}
static inline enum snapshot_id_state bch2_snapshot_id_state(struct bch_fs *c, u32 id)
{
rcu_read_lock();
enum snapshot_id_state ret = __bch2_snapshot_id_state(c, id);
rcu_read_unlock();
return ret;
}
static inline bool bch2_snapshot_exists(struct bch_fs *c, u32 id)
{
rcu_read_lock();
bool ret = __bch2_snapshot_exists(c, id);
rcu_read_unlock();
return ret;
return bch2_snapshot_id_state(c, id) == SNAPSHOT_ID_live;
}
static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
@ -241,10 +246,19 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
int __bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
static inline int bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
return likely(bch2_snapshot_exists(trans->c, k.k->p.snapshot))
? 0
: __bch2_check_key_has_snapshot(trans, iter, k);
}
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
@ -259,7 +273,13 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
}
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_work(struct work_struct *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
void bch2_snapshot_delete_status_to_text(struct printbuf *, struct bch_fs *);
int bch2_snapshots_read(struct bch_fs *);
void bch2_fs_snapshots_exit(struct bch_fs *);
void bch2_fs_snapshots_init_early(struct bch_fs *);
#endif /* _BCACHEFS_SNAPSHOT_H */

View File

@ -15,10 +15,10 @@ struct bch_snapshot {
bch_le128 btime;
};
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
LE32_BITMASK(BCH_SNAPSHOT_WILL_DELETE, struct bch_snapshot, flags, 0, 1)
/* True if a subvolume points to this snapshot node: */
LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 2, 3)
/*
* Snapshot trees:

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SNAPSHOT_TYPES_H
#define _BCACHEFS_SNAPSHOT_TYPES_H
#include "bbpos_types.h"
#include "darray.h"
#include "subvolume_types.h"
typedef DARRAY(u32) snapshot_id_list;
#define IS_ANCESTOR_BITMAP 128
struct snapshot_t {
enum snapshot_id_state {
SNAPSHOT_ID_empty,
SNAPSHOT_ID_live,
SNAPSHOT_ID_deleted,
} state;
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
};
struct snapshot_table {
struct rcu_head rcu;
size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
struct snapshot_t s[0];
#endif
};
struct snapshot_interior_delete {
u32 id;
u32 live_child;
};
typedef DARRAY(struct snapshot_interior_delete) interior_delete_list;
struct snapshot_delete {
struct work_struct work;
struct mutex lock;
snapshot_id_list deleting_from_trees;
snapshot_id_list delete_leaves;
interior_delete_list delete_interior;
bool running;
struct bbpos pos;
};
#endif /* _BCACHEFS_SNAPSHOT_TYPES_H */

View File

@ -730,8 +730,6 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
void bch2_fs_subvolumes_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
bch2_subvolume_wait_for_pagecache_and_delete);
mutex_init(&c->snapshots_unlinked_lock);
}

View File

@ -77,9 +77,6 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btr
_end, _subvolid, _flags, _k, _do); \
})
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);

View File

@ -2,33 +2,6 @@
#ifndef _BCACHEFS_SUBVOLUME_TYPES_H
#define _BCACHEFS_SUBVOLUME_TYPES_H
#include "darray.h"
typedef DARRAY(u32) snapshot_id_list;
#define IS_ANCESTOR_BITMAP 128
struct snapshot_t {
bool live;
u32 parent;
u32 skip[3];
u32 depth;
u32 children[2];
u32 subvol; /* Nonzero only if a subvolume points to this node: */
u32 tree;
unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
};
struct snapshot_table {
struct rcu_head rcu;
size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
struct snapshot_t s[0];
#endif
};
typedef struct {
/* we can't have padding in this struct: */
u64 subvol;

View File

@ -864,6 +864,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
bch2_fs_quota_init(c);
bch2_fs_rebalance_init(c);
bch2_fs_sb_errors_init_early(c);
bch2_fs_snapshots_init_early(c);
bch2_fs_subvolumes_init_early(c);
INIT_LIST_HEAD(&c->list);
@ -1488,7 +1489,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
{
ca->dev_idx = dev_idx;
__set_bit(ca->dev_idx, ca->self.d);
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
if (!ca->name[0])
scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
ca->fs = c;
rcu_assign_pointer(c->devs[ca->dev_idx], ca);
@ -1540,6 +1543,11 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
if (ret)
return ret;
struct printbuf name = PRINTBUF;
prt_bdevname(&name, sb->bdev);
strscpy(ca->name, name.buf, sizeof(ca->name));
printbuf_exit(&name);
/* Commit: */
ca->disk_sb = *sb;
memset(sb, 0, sizeof(*sb));
@ -1581,11 +1589,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
bch2_dev_sysfs_online(c, ca);
struct printbuf name = PRINTBUF;
prt_bdevname(&name, ca->disk_sb.bdev);
strscpy(ca->name, name.buf, sizeof(ca->name));
printbuf_exit(&name);
bch2_rebalance_wakeup(c);
return 0;
}

View File

@ -147,8 +147,9 @@ write_attribute(trigger_journal_flush);
write_attribute(trigger_journal_writes);
write_attribute(trigger_btree_cache_shrink);
write_attribute(trigger_btree_key_cache_shrink);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_btree_updates);
write_attribute(trigger_freelist_wakeup);
write_attribute(trigger_recalc_capacity);
read_attribute(gc_gens_pos);
__sysfs_attribute(read_fua_test, 0400);
@ -199,6 +200,7 @@ read_attribute(copy_gc_wait);
sysfs_pd_controller_attribute(rebalance);
read_attribute(rebalance_status);
read_attribute(snapshot_delete_status);
read_attribute(new_stripes);
@ -431,6 +433,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_rebalance_status)
bch2_rebalance_status_to_text(out, c);
if (attr == &sysfs_snapshot_delete_status)
bch2_snapshot_delete_status_to_text(out, c);
/* Debugging: */
if (attr == &sysfs_journal_debug)
@ -540,6 +545,12 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_freelist_wakeup)
closure_wake_up(&c->freelist_wait);
if (attr == &sysfs_trigger_recalc_capacity) {
down_read(&c->state_lock);
bch2_recalc_capacity(c);
up_read(&c->state_lock);
}
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@ -571,6 +582,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_write_stats,
&sysfs_rebalance_status,
&sysfs_snapshot_delete_status,
&sysfs_compression_stats,
@ -665,8 +677,9 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_journal_writes,
&sysfs_trigger_btree_cache_shrink,
&sysfs_trigger_btree_key_cache_shrink,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_btree_updates,
&sysfs_trigger_freelist_wakeup,
&sysfs_trigger_recalc_capacity,
&sysfs_gc_gens_pos,

View File

@ -38,7 +38,7 @@ static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
return bch2_xattr_hash(info,
&X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
&X_SEARCH(x.v->x_type, x.v->x_name_and_value, x.v->x_name_len));
}
static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
@ -48,7 +48,7 @@ static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
return l.v->x_type != r->type ||
l.v->x_name_len != r->name.len ||
memcmp(l.v->x_name, r->name.name, r->name.len);
memcmp(l.v->x_name_and_value, r->name.name, r->name.len);
}
static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
@ -58,7 +58,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
return l.v->x_type != r.v->x_type ||
l.v->x_name_len != r.v->x_name_len ||
memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
memcmp(l.v->x_name_and_value, r.v->x_name_and_value, r.v->x_name_len);
}
const struct bch_hash_desc bch2_xattr_hash_desc = {
@ -96,7 +96,7 @@ int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k,
c, xattr_invalid_type,
"invalid type (%u)", xattr.v->x_type);
bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len),
bkey_fsck_err_on(memchr(xattr.v->x_name_and_value, '\0', xattr.v->x_name_len),
c, xattr_name_invalid_chars,
"xattr name has invalid characters");
fsck_err:
@ -120,13 +120,13 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
unsigned name_len = xattr.v->x_name_len;
unsigned val_len = le16_to_cpu(xattr.v->x_val_len);
unsigned max_name_val_bytes = bkey_val_bytes(xattr.k) -
offsetof(struct bch_xattr, x_name);
offsetof(struct bch_xattr, x_name_and_value);
val_len = min_t(int, val_len, max_name_val_bytes - name_len);
name_len = min(name_len, max_name_val_bytes);
prt_printf(out, "%.*s:%.*s",
name_len, xattr.v->x_name,
name_len, xattr.v->x_name_and_value,
val_len, (char *) xattr_val(xattr.v));
if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
@ -176,6 +176,11 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
if (ret)
return ret;
/*
* Besides the ctime update, extents, dirents and xattrs updates require
* that an inode update also happens - to ensure that if a key exists in
* one of those btrees with a given snapshot ID an inode is also present
*/
inode_u->bi_ctime = bch2_current_time(c);
ret = bch2_inode_write(trans, &inode_iter, inode_u);
@ -202,7 +207,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
xattr->v.x_type = type;
xattr->v.x_name_len = namelen;
xattr->v.x_val_len = cpu_to_le16(size);
memcpy(xattr->v.x_name, name, namelen);
memcpy(xattr->v.x_name_and_value, name, namelen);
memcpy(xattr_val(&xattr->v), value, size);
ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
@ -270,7 +275,7 @@ static int bch2_xattr_emit(struct dentry *dentry,
if (!prefix)
return 0;
return __bch2_xattr_emit(prefix, xattr->x_name, xattr->x_name_len, buf);
return __bch2_xattr_emit(prefix, xattr->x_name_and_value, xattr->x_name_len, buf);
}
static int bch2_xattr_list_bcachefs(struct bch_fs *c,

View File

@ -18,12 +18,12 @@ void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
{
return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name_and_value) +
name_len + val_len, sizeof(u64));
}
#define xattr_val(_xattr) \
((void *) (_xattr)->x_name + (_xattr)->x_name_len)
((void *) (_xattr)->x_name_and_value + (_xattr)->x_name_len)
struct xattr_search_key {
u8 type;

View File

@ -13,7 +13,13 @@ struct bch_xattr {
__u8 x_type;
__u8 x_name_len;
__le16 x_val_len;
__u8 x_name[] __counted_by(x_name_len);
/*
* x_name contains the name and value counted by
* x_name_len + x_val_len. The introduction of
* __counted_by(x_name_len) previously caused a false positive
* detection of an out of bounds write.
*/
__u8 x_name_and_value[];
} __packed __aligned(8);
#endif /* _BCACHEFS_XATTR_FORMAT_H */