Update bcachefs sources to e48731a188 bcachefs: Fix BTREE_TRIGGER_WANTS_OLD_AND_NEW

This commit is contained in:
Kent Overstreet 2022-03-13 19:14:01 -04:00
parent bf7924f552
commit d34e731082
36 changed files with 1273 additions and 303 deletions

View File

@ -1 +1 @@
e318fabeb424d4b8fdd46329125c30aaa4f9006a
e48731a188639563444d475622782b7963df4b47

View File

@ -1,10 +1,447 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of https://github.com/facebook/zstd) and
* the GPLv2 (found in the COPYING file in the root directory of
* https://github.com/facebook/zstd). You may select, at your option, one of the
* above-listed licenses.
*/
#ifndef LINUX_ZSTD_H
#define LINUX_ZSTD_H
/**
* This is a kernel-style API that wraps the upstream zstd API, which cannot be
* used directly because the symbols aren't exported. It exposes the minimal
* functionality which is currently required by users of zstd in the kernel.
* Expose extra functions from lib/zstd/zstd.h as needed.
*/
/* ====== Dependency ====== */
#include <linux/types.h>
#include <zstd.h>
#include <linux/zstd_errors.h>
#define ZSTD_initDCtx(w, s) ZSTD_initStaticDCtx(w, s)
#define ZSTD_initCCtx(w, s) ZSTD_initStaticCCtx(w, s)
/* ====== Helper Functions ====== */
/**
* zstd_compress_bound() - maximum compressed size in worst case scenario
* @src_size: The size of the data to compress.
*
* Return: The maximum compressed size in the worst case scenario.
*/
size_t zstd_compress_bound(size_t src_size);
#define ZSTD_compressCCtx(w, dst, d_len, src, src_len, params) \
ZSTD_compressCCtx(w, dst, d_len, src, src_len, 0)
/**
* zstd_is_error() - tells if a size_t function result is an error code
* @code: The function result to check for error.
*
* Return: Non-zero iff the code is an error.
*/
unsigned int zstd_is_error(size_t code);
#define ZSTD_CCtxWorkspaceBound(p) ZSTD_estimateCCtxSize(0)
#define ZSTD_DCtxWorkspaceBound() ZSTD_estimateDCtxSize()
/**
* enum zstd_error_code - zstd error codes
*/
typedef ZSTD_ErrorCode zstd_error_code;
/**
* zstd_get_error_code() - translates an error function result to an error code
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: A unique error code for this error.
*/
zstd_error_code zstd_get_error_code(size_t code);
/**
* zstd_get_error_name() - translates an error function result to a string
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: An error string corresponding to the error code.
*/
const char *zstd_get_error_name(size_t code);
/**
* zstd_min_clevel() - minimum allowed compression level
*
* Return: The minimum allowed compression level.
*/
int zstd_min_clevel(void);
/**
* zstd_max_clevel() - maximum allowed compression level
*
* Return: The maximum allowed compression level.
*/
int zstd_max_clevel(void);
/* ====== Parameter Selection ====== */
/**
* enum zstd_strategy - zstd compression search strategy
*
* From faster to stronger. See zstd_lib.h.
*/
typedef ZSTD_strategy zstd_strategy;
/**
* struct zstd_compression_parameters - zstd compression parameters
* @windowLog: Log of the largest match distance. Larger means more
* compression, and more memory needed during decompression.
* @chainLog: Fully searched segment. Larger means more compression,
* slower, and more memory (useless for fast).
* @hashLog: Dispatch table. Larger means more compression,
* slower, and more memory.
* @searchLog: Number of searches. Larger means more compression and slower.
* @searchLength: Match length searched. Larger means faster decompression,
* sometimes less compression.
* @targetLength: Acceptable match size for optimal parser (only). Larger means
* more compression, and slower.
* @strategy: The zstd compression strategy.
*
* See zstd_lib.h.
*/
typedef ZSTD_compressionParameters zstd_compression_parameters;
/**
* struct zstd_frame_parameters - zstd frame parameters
* @contentSizeFlag: Controls whether content size will be present in the
* frame header (when known).
* @checksumFlag: Controls whether a 32-bit checksum is generated at the
* end of the frame for error detection.
* @noDictIDFlag: Controls whether dictID will be saved into the frame
* header when using dictionary compression.
*
* The default value is all fields set to 0. See zstd_lib.h.
*/
typedef ZSTD_frameParameters zstd_frame_parameters;
/**
* struct zstd_parameters - zstd parameters
* @cParams: The compression parameters.
* @fParams: The frame parameters.
*/
typedef ZSTD_parameters zstd_parameters;
/**
* zstd_get_params() - returns zstd_parameters for selected level
* @level: The compression level
* @estimated_src_size: The estimated source size to compress or 0
* if unknown.
*
* Return: The selected zstd_parameters.
*/
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size);
/* ====== Single-pass Compression ====== */
typedef ZSTD_CCtx zstd_cctx;
/**
* zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
* @parameters: The compression parameters to be used.
*
* If multiple compression parameters might be used, the caller must call
* zstd_cctx_workspace_bound() for each set of parameters and use the maximum
* size.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cctx().
*/
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
/**
* zstd_init_cctx() - initialize a zstd compression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd compression context or NULL on error.
*/
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
/**
* zstd_compress_cctx() - compress src into dst with the initialized parameters
* @cctx: The context. Must have been initialized with zstd_init_cctx().
* @dst: The buffer to compress src into.
* @dst_capacity: The size of the destination buffer. May be any size, but
* ZSTD_compressBound(srcSize) is guaranteed to be large enough.
* @src: The data to compress.
* @src_size: The size of the data to compress.
* @parameters: The compression parameters to be used.
*
* Return: The compressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters);
/* ====== Single-pass Decompression ====== */
typedef ZSTD_DCtx zstd_dctx;
/**
* zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_dctx().
*/
size_t zstd_dctx_workspace_bound(void);
/**
* zstd_init_dctx() - initialize a zstd decompression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd decompression context or NULL on error.
*/
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
/**
* zstd_decompress_dctx() - decompress zstd compressed src into dst
* @dctx: The decompression context.
* @dst: The buffer to decompress src into.
* @dst_capacity: The size of the destination buffer. Must be at least as large
* as the decompressed size. If the caller cannot upper bound the
* decompressed size, then it's better to use the streaming API.
* @src: The zstd compressed data to decompress. Multiple concatenated
* frames and skippable frames are allowed.
* @src_size: The exact size of the data to decompress.
*
* Return: The decompressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size);
/* ====== Streaming Buffers ====== */
/**
* struct zstd_in_buffer - input buffer for streaming
* @src: Start of the input buffer.
* @size: Size of the input buffer.
* @pos: Position where reading stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_inBuffer zstd_in_buffer;
/**
* struct zstd_out_buffer - output buffer for streaming
* @dst: Start of the output buffer.
* @size: Size of the output buffer.
* @pos: Position where writing stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_outBuffer zstd_out_buffer;
/* ====== Streaming Compression ====== */
typedef ZSTD_CStream zstd_cstream;
/**
* zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
* @cparams: The compression parameters to be used for compression.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cstream().
*/
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
/**
* zstd_init_cstream() - initialize a zstd streaming compression context
* @parameters The zstd parameters to use for compression.
* @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
* must pass the source size (zero means empty source).
* Otherwise, the caller may optionally pass the source
* size, or zero if unknown.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace.
* Use zstd_cstream_workspace_bound(params->cparams) to
* determine how large the workspace must be.
*
* Return: The zstd streaming compression context or NULL on error.
*/
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
/**
* zstd_reset_cstream() - reset the context using parameters from creation
* @cstream: The zstd streaming compression context to reset.
* @pledged_src_size: Optionally the source size, or zero if unknown.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused. If `pledged_src_size` is non-zero the frame
* content size is always written into the frame header.
*
* Return: Zero or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size);
/**
* zstd_compress_stream() - streaming compress some of input into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
* @input: Source buffer. `input->pos` is updated to indicate how much data
* was read. Note that it may not consume the entire input, in which
* case `input->pos < input->size`, and it's up to the caller to
* present remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
*
* Return: A hint for the number of bytes to use as the input for the next
* function call or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/**
* zstd_flush_stream() - flush internal buffers into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_flush_stream() must be called until it returns 0, meaning all the data
* has been flushed. Since zstd_flush_stream() causes a block to be ended,
* calling it too often will degrade the compression ratio.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/**
* zstd_end_stream() - flush internal buffers into output and end the frame
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_end_stream() must be called until it returns 0, meaning all the data has
* been flushed and the frame epilogue has been written.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/* ====== Streaming Decompression ====== */
typedef ZSTD_DStream zstd_dstream;
/**
* zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
* @max_window_size: The maximum window size allowed for compressed frames.
*
* Return: A lower bound on the size of the workspace that is passed
* to zstd_init_dstream().
*/
size_t zstd_dstream_workspace_bound(size_t max_window_size);
/**
* zstd_init_dstream() - initialize a zstd streaming decompression context
* @max_window_size: The maximum window size allowed for compressed frames.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspaceSize: The size of workspace.
* Use zstd_dstream_workspace_bound(max_window_size) to
* determine how large the workspace must be.
*
* Return: The zstd streaming decompression context.
*/
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size);
/**
* zstd_reset_dstream() - reset the context using parameters from creation
* @dstream: The zstd streaming decompression context to reset.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused.
*
* Return: Zero or an error, which can be checked using zstd_is_error().
*/
size_t zstd_reset_dstream(zstd_dstream *dstream);
/**
* zstd_decompress_stream() - streaming decompress some of input into output
* @dstream: The zstd streaming decompression context.
* @output: Destination buffer. `output.pos` is updated to indicate how much
* decompressed data was written.
* @input: Source buffer. `input.pos` is updated to indicate how much data was
* read. Note that it may not consume the entire input, in which case
* `input.pos < input.size`, and it's up to the caller to present
* remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
* zstd_decompress_stream() will not consume the last byte of the frame until
* the entire frame is flushed.
*
* Return: Returns 0 iff a frame is completely decoded and fully flushed.
* Otherwise returns a hint for the number of bytes to use as the
* input for the next function call or an error, which can be checked
* using zstd_is_error(). The size hint will never load more than the
* frame.
*/
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/* ====== Frame Inspection Functions ====== */
/**
* zstd_find_frame_compressed_size() - returns the size of a compressed frame
* @src: Source buffer. It should point to the start of a zstd encoded
* frame or a skippable frame.
* @src_size: The size of the source buffer. It must be at least as large as the
* size of the frame.
*
* Return: The compressed size of the frame pointed to by `src` or an error,
* which can be check with zstd_is_error().
* Suitable to pass to ZSTD_decompress() or similar functions.
*/
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
/**
* struct zstd_frame_params - zstd frame parameters stored in the frame header
* @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
* present.
* @windowSize: The window size, or 0 if the frame is a skippable frame.
* @blockSizeMax: The maximum block size.
* @frameType: The frame type (zstd or skippable)
* @headerSize: The size of the frame header.
* @dictID: The dictionary id, or 0 if not present.
* @checksumFlag: Whether a checksum was used.
*
* See zstd_lib.h.
*/
typedef ZSTD_frameHeader zstd_frame_header;
/**
* zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
* @params: On success the frame parameters are written here.
* @src: The source buffer. It must point to a zstd or skippable frame.
* @src_size: The size of the source buffer.
*
* Return: 0 on success. If more data is required it returns how many bytes
* must be provided to make forward progress. Otherwise it returns
* an error, which can be checked using zstd_is_error().
*/
size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
size_t src_size);
#endif /* LINUX_ZSTD_H */

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_ERRORS_H_398273423
#define ZSTD_ERRORS_H_398273423
/*===== dependency =====*/
#include <linux/types.h> /* size_t */
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#define ZSTDERRORLIB_VISIBILITY
#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
/*-*********************************************
* Error codes list
*-*********************************************
* Error codes _values_ are pinned down since v1.3.1 only.
* Therefore, don't rely on values if you may link to any version < v1.3.1.
*
* Only values < 100 are considered stable.
*
* note 1 : this API shall be used with static linking only.
* dynamic linking is not yet officially supported.
* note 2 : Prefer relying on the enum than on its value whenever possible
* This is the only supported way to use the error list < v1.3.1
* note 3 : ZSTD_isError() is always correct, whatever the library version.
**********************************************/
typedef enum {
ZSTD_error_no_error = 0,
ZSTD_error_GENERIC = 1,
ZSTD_error_prefix_unknown = 10,
ZSTD_error_version_unsupported = 12,
ZSTD_error_frameParameter_unsupported = 14,
ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22,
ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
which can be used to compare with enum list published above */
ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
#endif /* ZSTD_ERRORS_H_398273423 */

View File

@ -65,16 +65,19 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
.key_invalid = bch2_alloc_v1_invalid, \
.val_to_text = bch2_alloc_to_text, \
.atomic_trigger = bch2_mark_alloc, \
}
#define bch2_bkey_ops_alloc_v2 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v2_invalid, \
.val_to_text = bch2_alloc_to_text, \
.atomic_trigger = bch2_mark_alloc, \
}
#define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v3_invalid, \
.val_to_text = bch2_alloc_to_text, \
.atomic_trigger = bch2_mark_alloc, \
}
static inline bool bkey_is_alloc(const struct bkey *k)

View File

@ -6,6 +6,7 @@
struct bch_fs;
struct btree;
struct btree_trans;
struct bkey;
enum btree_node_type;
@ -20,6 +21,10 @@ struct bkey_ops {
void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trans_trigger)(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned);
int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c,
struct bkey_s_c, unsigned);
void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write,
struct bkey_s);
@ -57,6 +62,28 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
static inline int bch2_mark_key(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_s_c new,
unsigned flags)
{
const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new.k->type];
return ops->atomic_trigger
? ops->atomic_trigger(trans, old, new, flags)
: 0;
}
static inline int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
struct bkey_i *new, unsigned flags)
{
const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type];
return ops->trans_trigger
? ops->trans_trigger(trans, old, new, flags)
: 0;
}
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,

View File

@ -1626,6 +1626,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
if (new & (1U << BTREE_NODE_write_in_flight))
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
else
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
}
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
@ -2094,7 +2096,6 @@ restart:
rcu_read_unlock();
wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
goto restart;
}
rcu_read_unlock();
}

View File

@ -189,7 +189,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
(btree_node_lock_seq_matches(path, b, level) &&
btree_node_lock_increment(trans, b, level, want))) {
mark_btree_node_locked(path, level, want);
mark_btree_node_locked(trans, path, level, want);
return true;
}
fail:
@ -240,7 +240,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
return false;
success:
mark_btree_node_intent_locked(path, level);
mark_btree_node_intent_locked(trans, path, level);
return true;
}
@ -486,14 +486,15 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
* before interior nodes - now that's handled by
* bch2_btree_path_traverse_all().
*/
trans_for_each_path(trans, linked)
if (linked != path &&
linked->cached == path->cached &&
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
btree_path_get_locks(trans, linked, true);
}
if (!path->cached && !trans->in_traverse_all)
trans_for_each_path(trans, linked)
if (linked != path &&
linked->cached == path->cached &&
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
btree_path_get_locks(trans, linked, true);
}
return false;
}
@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
t != BTREE_NODE_UNLOCKED) {
btree_node_unlock(path, b->c.level);
six_lock_increment(&b->c.lock, t);
mark_btree_node_locked(path, b->c.level, t);
mark_btree_node_locked(trans, path, b->c.level, t);
}
btree_path_level_init(trans, path, b);
@ -1244,7 +1245,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
mark_btree_node_locked(path, path->level, lock_type);
mark_btree_node_locked(trans, path, path->level, lock_type);
btree_path_level_init(trans, path, b);
return 0;
}
@ -1409,7 +1410,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (unlikely(ret))
goto err;
mark_btree_node_locked(path, level, lock_type);
mark_btree_node_locked(trans, path, level, lock_type);
btree_path_level_init(trans, path, b);
if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
@ -1442,6 +1443,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
trans->in_traverse_all = true;
retry_all:
trans->restarted = false;
trans->traverse_all_idx = U8_MAX;
trans_for_each_path(trans, path)
path->should_be_locked = false;
@ -1474,9 +1476,9 @@ retry_all:
}
/* Now, redo traversals in correct order: */
i = 0;
while (i < trans->nr_sorted) {
path = trans->paths + trans->sorted[i];
trans->traverse_all_idx = 0;
while (trans->traverse_all_idx < trans->nr_sorted) {
path = trans->paths + trans->sorted[trans->traverse_all_idx];
/*
* Traversing a path can cause another path to be added at about
@ -1484,10 +1486,13 @@ retry_all:
*/
if (path->uptodate) {
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
if (ret)
if (ret == -EINTR || ret == -ENOMEM)
goto retry_all;
if (ret)
goto err;
BUG_ON(path->uptodate);
} else {
i++;
trans->traverse_all_idx++;
}
}
@ -1498,7 +1503,7 @@ retry_all:
*/
trans_for_each_path(trans, path)
BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
err:
bch2_btree_cache_cannibalize_unlock(c);
trans->in_traverse_all = false;
@ -1807,18 +1812,48 @@ free:
__bch2_path_free(trans, path);
}
void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
{
struct btree_insert_entry *i;
pr_buf(buf, "transaction updates for %s journal seq %llu\n",
trans->fn, trans->journal_res.seq);
trans_for_each_update(trans, i) {
struct bkey_s_c old = { &i->old_k, i->old_v };
pr_buf(buf, "update: btree %s %pS\n old ",
bch2_btree_ids[i->btree_id],
(void *) i->ip_allocated);
bch2_bkey_val_to_text(buf, trans->c, old);
pr_buf(buf, "\n new ");
bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
pr_buf(buf, "\n");
}
}
noinline __cold
void bch2_dump_trans_updates(struct btree_trans *trans)
{
struct printbuf buf = PRINTBUF;
bch2_trans_updates_to_text(&buf, trans);
bch_err(trans->c, "%s", buf.buf);
printbuf_exit(&buf);
}
noinline __cold
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
{
struct btree_path *path;
struct btree_insert_entry *i;
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
struct printbuf buf = PRINTBUF;
unsigned idx;
trans_for_each_path_inorder(trans, path, idx) {
printbuf_reset(&buf1);
printbuf_reset(&buf);
bch2_bpos_to_text(&buf1, path->pos);
bch2_bpos_to_text(&buf, path->pos);
printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
path->idx, path->ref, path->intent_ref,
@ -1826,7 +1861,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
path->preserve ? " P" : "",
bch2_btree_ids[path->btree_id],
path->level,
buf1.buf,
buf.buf,
path->nodes_locked,
#ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated
@ -1836,23 +1871,9 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
);
}
trans_for_each_update(trans, i) {
struct bkey u;
struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
printbuf_exit(&buf);
printbuf_reset(&buf1);
printbuf_reset(&buf2);
bch2_bkey_val_to_text(&buf1, trans->c, old);
bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k));
printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s",
bch2_btree_ids[i->btree_id],
(void *) i->ip_allocated,
buf1.buf, buf2.buf);
}
printbuf_exit(&buf2);
printbuf_exit(&buf1);
bch2_dump_trans_updates(trans);
}
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
@ -2337,11 +2358,12 @@ out:
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position
*/
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
{
struct btree_trans *trans = iter->trans;
struct bpos search_key = btree_iter_search_key(iter);
struct bkey_s_c k;
struct bpos iter_pos;
int ret;
if (iter->update_path) {
@ -2357,6 +2379,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
if (!k.k || bkey_err(k))
goto out;
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
iter_pos = bkey_start_pos(k.k);
else
iter_pos = iter->pos;
if (bkey_cmp(iter_pos, end) > 0) {
bch2_btree_iter_set_pos(iter, end);
k = bkey_s_c_null;
goto out;
}
if (iter->update_path &&
bkey_cmp(iter->update_path->pos, k.k->p)) {
bch2_path_put(trans, iter->update_path,
@ -2387,10 +2427,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
iter->update_path = bch2_btree_path_set_pos(trans,
iter->update_path, pos,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
BUG_ON(!(iter->update_path->nodes_locked & 1));
iter->update_path->should_be_locked = true;
_THIS_IP_);
}
/*
@ -2414,14 +2451,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
break;
}
/*
* iter->pos should be mononotically increasing, and always be equal to
* the key we just returned - except extents can straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter->pos = k.k->p;
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
iter->pos = bkey_start_pos(k.k);
iter->pos = iter_pos;
iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
iter->flags & BTREE_ITER_INTENT,
@ -2429,8 +2459,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
BUG_ON(!iter->path->nodes_locked);
out:
if (iter->update_path) {
BUG_ON(!(iter->update_path->nodes_locked & 1));
iter->update_path->should_be_locked = true;
if (iter->update_path->uptodate &&
!bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) {
k = bkey_s_c_err(-EINTR);
} else {
BUG_ON(!(iter->update_path->nodes_locked & 1));
iter->update_path->should_be_locked = true;
}
}
iter->path->should_be_locked = true;
@ -2661,9 +2696,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_INTENT) {
struct btree_iter iter2;
struct bpos end = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
end.offset = U64_MAX;
bch2_trans_copy_iter(&iter2, iter);
k = bch2_btree_iter_peek(&iter2);
k = bch2_btree_iter_peek_upto(&iter2, end);
if (k.k && !bkey_err(k)) {
iter->k = iter2.k;
@ -2831,6 +2870,11 @@ static inline void btree_path_list_add(struct btree_trans *trans,
path->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
if (trans->in_traverse_all &&
trans->traverse_all_idx != U8_MAX &&
trans->traverse_all_idx >= path->sorted_idx)
trans->traverse_all_idx++;
array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
for (i = path->sorted_idx; i < trans->nr_sorted; i++)

View File

@ -209,9 +209,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
}
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
@ -306,13 +311,26 @@ static inline int bkey_err(struct bkey_s_c k)
}
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
unsigned flags)
unsigned flags)
{
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_peek_slot(iter)
: bch2_btree_iter_peek(iter);
}
static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
struct bpos end,
unsigned flags)
{
if (!(flags & BTREE_ITER_SLOTS))
return bch2_btree_iter_peek_upto(iter, end);
if (bkey_cmp(iter->pos, end) > 0)
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
}
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
@ -349,6 +367,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \
for (; \
(_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
@ -363,6 +389,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
/* new multiple iterator interface: */
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
unsigned, size_t, const char *);

View File

@ -309,7 +309,7 @@ retry:
if (!ck)
goto retry;
mark_btree_node_locked(path, 0, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
@ -330,7 +330,7 @@ retry:
goto retry;
}
mark_btree_node_locked(path, 0, lock_want);
mark_btree_node_locked(trans, path, 0, lock_want);
}
path->l[0].lock_seq = ck->c.lock.state.seq;

View File

@ -58,7 +58,8 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
path->nodes_intent_locked &= ~(1 << level);
}
static inline void mark_btree_node_locked(struct btree_path *path,
static inline void mark_btree_node_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level,
enum six_lock_type type)
{
@ -66,14 +67,17 @@ static inline void mark_btree_node_locked(struct btree_path *path,
BUILD_BUG_ON(SIX_LOCK_read != 0);
BUILD_BUG_ON(SIX_LOCK_intent != 1);
BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx);
path->nodes_locked |= 1 << level;
path->nodes_intent_locked |= type << level;
}
static inline void mark_btree_node_intent_locked(struct btree_path *path,
static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level)
{
mark_btree_node_locked(path, level, SIX_LOCK_intent);
mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
}
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)

View File

@ -361,7 +361,11 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
#else
#define BTREE_ITER_MAX 32
#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@ -388,6 +392,7 @@ struct btree_trans {
u8 nr_sorted;
u8 nr_updates;
u8 traverse_all_idx;
bool used_mempool:1;
bool in_traverse_all:1;
bool restarted:1;

View File

@ -213,7 +213,7 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
/**
* btree_insert_key - insert a key one key into a leaf node
*/
static bool btree_insert_key_leaf(struct btree_trans *trans,
static void btree_insert_key_leaf(struct btree_trans *trans,
struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
@ -226,7 +226,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
&insert_l(insert)->iter, insert->k)))
return false;
return;
i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
le64_to_cpu(i->journal_seq)));
@ -247,8 +247,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
if (u64s_added > live_u64s_added &&
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
return true;
}
/* Cached btree updates: */
@ -400,18 +398,16 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
bool did_work;
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
i->k->k.needs_whiteout = false;
did_work = !i->cached
? btree_insert_key_leaf(trans, i)
: bch2_btree_insert_key_cached(trans, i->path, i->k);
if (!did_work)
return;
if (!i->cached)
btree_insert_key_leaf(trans, i);
else
bch2_btree_insert_key_cached(trans, i->path, i->k);
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
bch2_journal_add_keys(j, &trans->journal_res,
@ -440,7 +436,8 @@ static int run_one_mem_trigger(struct btree_trans *trans,
if (!btree_node_type_needs_gc(i->btree_id))
return 0;
if (old.k->type == new->k.type &&
if (bch2_bkey_ops[old.k->type].atomic_trigger ==
bch2_bkey_ops[i->k->k.type].atomic_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
@ -485,7 +482,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
if (overwrite) {
ret = bch2_trans_mark_old(trans, old, i->flags);
} else if (old.k->type == i->k->k.type &&
} else if (bch2_bkey_ops[old.k->type].trans_trigger ==
bch2_bkey_ops[i->k->k.type].trans_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
i->overwrite_trigger_run = true;
ret = bch2_trans_mark_key(trans, old, i->k,
@ -652,6 +650,32 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
/*
* Revalidate before calling mem triggers - XXX, ugly:
*
* - successful btree node splits don't cause transaction
* restarts and will have invalidated the pointer to the bkey
* value
* - btree_node_lock_for_insert() -> btree_node_prep_for_write()
* when it has to resort
* - btree_key_can_insert_cached() when it has to reallocate
*
* Ugly because we currently have no way to tell if the
* pointer's been invalidated, which means it's debatabale
* whether we should be stashing the old key at all.
*/
i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) {
struct bkey_i *j_k =
bch2_journal_keys_peek(c, i->btree_id, i->level, i->k->k.p);
if (j_k && !bpos_cmp(j_k->k.p, i->k->k.p)) {
i->old_k = j_k->k;
i->old_v = &j_k->v;
}
}
}
/*
@ -1217,7 +1241,7 @@ int bch2_trans_update_extent(struct btree_trans *trans,
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_NOT_EXTENTS);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@ -1369,7 +1393,8 @@ nomerge1:
goto out;
}
next:
k = bch2_btree_iter_next(&iter);
bch2_btree_iter_advance(&iter);
k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@ -1629,14 +1654,14 @@ int bch2_btree_delete_at(struct btree_trans *trans,
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
unsigned iter_flags,
unsigned update_flags,
u64 *journal_seq)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags);
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
retry:
while ((bch2_trans_begin(trans),
(k = bch2_btree_iter_peek(&iter)).k) &&
@ -1679,7 +1704,8 @@ retry:
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
bch2_trans_commit(trans, &disk_res, journal_seq,
BTREE_INSERT_NOFAIL);
BTREE_INSERT_NOFAIL|
update_flags);
bch2_disk_reservation_put(trans->c, &disk_res);
if (ret)
break;
@ -1701,10 +1727,10 @@ retry:
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
unsigned iter_flags,
unsigned update_flags,
u64 *journal_seq)
{
return bch2_trans_do(c, NULL, journal_seq, 0,
bch2_btree_delete_range_trans(&trans, id, start, end,
iter_flags, journal_seq));
update_flags, journal_seq));
}

View File

@ -497,20 +497,25 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(owned_by_allocator == old.owned_by_allocator);
}
static int bch2_mark_alloc(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_alloc(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
struct bch_dev *ca;
struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
struct bucket *g;
struct bucket_mark old_m, m;
int ret = 0;
if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
new_u.bucket >= ca->mi.nbuckets, trans,
"alloc key outside range of device's buckets"))
return -EIO;
/*
* alloc btree is read in by bch2_alloc_read, not gc:
*/
@ -550,11 +555,6 @@ static int bch2_mark_alloc(struct btree_trans *trans,
}
}
ca = bch_dev_bkey_exists(c, new_u.dev);
if (new_u.bucket >= ca->mi.nbuckets)
return 0;
percpu_down_read(&c->mark_lock);
if (!gc && new_u.gen != old_u.gen)
*bucket_gen(ca, new_u.bucket) = new_u.gen;
@ -929,9 +929,9 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
return 0;
}
static int bch2_mark_extent(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_extent(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
@ -1011,9 +1011,9 @@ static int bch2_mark_extent(struct btree_trans *trans,
return 0;
}
static int bch2_mark_stripe(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_stripe(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq;
@ -1118,9 +1118,9 @@ static int bch2_mark_stripe(struct btree_trans *trans,
return 0;
}
static int bch2_mark_inode(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_inode(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bch_fs_usage __percpu *fs_usage;
@ -1149,9 +1149,9 @@ static int bch2_mark_inode(struct btree_trans *trans,
return 0;
}
static int bch2_mark_reservation(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_reservation(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@ -1228,9 +1228,9 @@ fsck_err:
return ret;
}
static int bch2_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
int bch2_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@ -1267,39 +1267,6 @@ static int bch2_mark_reflink_p(struct btree_trans *trans,
return ret;
}
int bch2_mark_key(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_s_c new,
unsigned flags)
{
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
switch (k.k->type) {
case KEY_TYPE_alloc:
case KEY_TYPE_alloc_v2:
case KEY_TYPE_alloc_v3:
return bch2_mark_alloc(trans, old, new, flags);
case KEY_TYPE_btree_ptr:
case KEY_TYPE_btree_ptr_v2:
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v:
return bch2_mark_extent(trans, old, new, flags);
case KEY_TYPE_stripe:
return bch2_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode:
case KEY_TYPE_inode_v2:
return bch2_mark_inode(trans, old, new, flags);
case KEY_TYPE_reservation:
return bch2_mark_reservation(trans, old, new, flags);
case KEY_TYPE_reflink_p:
return bch2_mark_reflink_p(trans, old, new, flags);
case KEY_TYPE_snapshot:
return bch2_mark_snapshot(trans, old, new, flags);
default:
return 0;
}
}
static noinline __cold
void fs_usage_apply_warn(struct btree_trans *trans,
unsigned disk_res_sectors,
@ -1462,7 +1429,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i_stripe *s;
@ -1478,16 +1444,15 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
goto err;
if (k.k->type != KEY_TYPE_stripe) {
bch2_fs_inconsistent(c,
bch2_trans_inconsistent(trans,
"pointer to nonexistent stripe %llu",
(u64) p.ec.idx);
bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
bch2_fs_inconsistent(c,
bch2_trans_inconsistent(trans,
"stripe pointer doesn't match stripe %llu",
(u64) p.ec.idx);
ret = -EIO;
@ -1516,10 +1481,14 @@ err:
return ret;
}
static int bch2_trans_mark_extent(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags)
int bch2_trans_mark_extent(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
? old
: bkey_i_to_s_c(new);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@ -1601,8 +1570,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
goto err;
if (!deleting) {
if (bch2_fs_inconsistent_on(u.stripe ||
u.stripe_redundancy, c,
if (bch2_trans_inconsistent_on(u.stripe ||
u.stripe_redundancy, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
@ -1612,7 +1581,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
goto err;
}
if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c,
if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
@ -1625,8 +1594,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
u.stripe = s.k->p.offset;
u.stripe_redundancy = s.v->nr_redundant;
} else {
if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset ||
u.stripe_redundancy != s.v->nr_redundant, c,
if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset ||
u.stripe_redundancy != s.v->nr_redundant, trans,
"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
iter.pos.inode, iter.pos.offset, u.gen,
s.k->p.offset, u.stripe)) {
@ -1650,9 +1619,9 @@ err:
return ret;
}
static int bch2_trans_mark_stripe(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
int bch2_trans_mark_stripe(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
{
const struct bch_stripe *old_s = NULL;
struct bch_stripe *new_s = NULL;
@ -1720,10 +1689,10 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
return ret;
}
static int bch2_trans_mark_inode(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
int bch2_trans_mark_inode(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
{
int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
@ -1736,9 +1705,14 @@ static int bch2_trans_mark_inode(struct btree_trans *trans,
return 0;
}
static int bch2_trans_mark_reservation(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags)
int bch2_trans_mark_reservation(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
{
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
? old
: bkey_i_to_s_c(new);
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
s64 sectors = (s64) k.k->size;
struct replicas_delta_list *d;
@ -1787,7 +1761,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
refcount = bkey_refcount(n);
if (!refcount) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
bch2_fs_inconsistent(c,
bch2_trans_inconsistent(trans,
"nonexistent indirect extent at %llu while marking\n %s",
*idx, buf.buf);
ret = -EIO;
@ -1796,7 +1770,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
bch2_fs_inconsistent(c,
bch2_trans_inconsistent(trans,
"indirect extent refcount underflow at %llu while marking\n %s",
*idx, buf.buf);
ret = -EIO;
@ -1837,9 +1811,14 @@ err:
return ret;
}
static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags)
int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
{
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
? old
: bkey_i_to_s_c(new);
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
u64 idx, end_idx;
int ret = 0;
@ -1860,33 +1839,6 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
return ret;
}
int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
struct bkey_i *new, unsigned flags)
{
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
? old
: bkey_i_to_s_c(new);
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_btree_ptr_v2:
case KEY_TYPE_extent:
case KEY_TYPE_reflink_v:
return bch2_trans_mark_extent(trans, k, flags);
case KEY_TYPE_stripe:
return bch2_trans_mark_stripe(trans, old, new, flags);
case KEY_TYPE_inode:
case KEY_TYPE_inode_v2:
return bch2_trans_mark_inode(trans, old, new, flags);
case KEY_TYPE_reservation:
return bch2_trans_mark_reservation(trans, k, flags);
case KEY_TYPE_reflink_p:
return bch2_trans_mark_reflink_p(trans, k, flags);
default:
return 0;
}
}
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct bch_dev *ca, size_t b,
enum bch_data_type type,

View File

@ -229,6 +229,19 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
int bch2_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,

View File

@ -197,9 +197,11 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
goto err;
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
ret = ZSTD_decompressDCtx(ctx,
src_len = le32_to_cpup(src_data.b);
ret = zstd_decompress_dctx(ctx,
dst_data, dst_len,
src_data.b + 4, real_src_len);
@ -333,8 +335,8 @@ static int attempt_compress(struct bch_fs *c,
return strm.total_out;
}
case BCH_COMPRESSION_TYPE_zstd: {
ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
zstd_cctx_workspace_bound(&c->zstd_params.cParams));
/*
* ZSTD requires that when we decompress we pass in the exact
@ -347,11 +349,11 @@ static int attempt_compress(struct bch_fs *c,
* factor (7 bytes) from the dst buffer size to account for
* that.
*/
size_t len = ZSTD_compressCCtx(ctx,
size_t len = zstd_compress_cctx(ctx,
dst + 4, dst_len - 4 - 7,
src, src_len,
c->zstd_params);
if (ZSTD_isError(len))
&c->zstd_params);
if (zstd_is_error(len))
return 0;
*((__le32 *) dst) = cpu_to_le32(len);
@ -546,7 +548,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
ZSTD_parameters params = ZSTD_getParams(0, c->opts.encoded_extent_max, 0);
ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
struct {
unsigned feature;
unsigned type;
@ -558,8 +560,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
ZSTD_CCtxWorkspaceBound(params.cParams),
ZSTD_DCtxWorkspaceBound() },
zstd_cctx_workspace_bound(&params.cParams),
zstd_dctx_workspace_bound() },
}, *i;
int ret = 0;

View File

@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
if (ret)
return ret;
for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
SPOS(dir.inum, 0, snapshot), 0, k, ret) {
if (k.k->p.inode > dir.inum)
break;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(dir.inum, 0, snapshot),
POS(dir.inum, U64_MAX), 0, k, ret)
if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY;
break;
}
}
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -503,11 +500,9 @@ retry:
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
if (k.k->p.inode > inum.inum)
break;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot),
POS(inum.inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
continue;

View File

@ -14,6 +14,8 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
.key_invalid = bch2_stripe_invalid, \
.val_to_text = bch2_stripe_to_text, \
.swab = bch2_ptr_swab, \
.trans_trigger = bch2_trans_mark_stripe, \
.atomic_trigger = bch2_mark_stripe, \
}
static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)

View File

@ -66,6 +66,26 @@ do { \
_ret; \
})
/*
* When a transaction update discovers or is causing a fs inconsistency, it's
* helpful to also dump the pending updates:
*/
#define bch2_trans_inconsistent(trans, ...) \
({ \
bch_err(trans->c, __VA_ARGS__); \
bch2_inconsistent_error(trans->c); \
bch2_dump_trans_updates(trans); \
})
#define bch2_trans_inconsistent_on(cond, trans, ...) \
({ \
bool _ret = unlikely(!!(cond)); \
\
if (_ret) \
bch2_trans_inconsistent(trans, __VA_ARGS__); \
_ret; \
})
/*
* Fsck errors: inconsistency errors we detect at mount time, and should ideally
* be able to repair:

View File

@ -381,6 +381,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.key_invalid = bch2_btree_ptr_invalid, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
.trans_trigger = bch2_trans_mark_extent, \
.atomic_trigger = bch2_mark_extent, \
}
#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \
@ -388,6 +390,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.val_to_text = bch2_btree_ptr_v2_to_text, \
.swab = bch2_ptr_swab, \
.compat = bch2_btree_ptr_v2_compat, \
.trans_trigger = bch2_trans_mark_extent, \
.atomic_trigger = bch2_mark_extent, \
}
/* KEY_TYPE_extent: */
@ -402,6 +406,8 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.swab = bch2_ptr_swab, \
.key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
.trans_trigger = bch2_trans_mark_extent, \
.atomic_trigger = bch2_mark_extent, \
}
/* KEY_TYPE_reservation: */
@ -414,6 +420,8 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.key_invalid = bch2_reservation_invalid, \
.val_to_text = bch2_reservation_to_text, \
.key_merge = bch2_reservation_merge, \
.trans_trigger = bch2_trans_mark_reservation, \
.atomic_trigger = bch2_mark_reservation, \
}
/* Extent checksum entries: */

View File

@ -35,6 +35,15 @@
#include <trace/events/bcachefs.h>
#include <trace/events/writeback.h>
static inline bool bio_full(struct bio *bio, unsigned len)
{
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;
if (bio->bi_iter.bi_size > UINT_MAX - len)
return true;
return false;
}
static inline struct address_space *faults_disabled_mapping(void)
{
return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
@ -1808,11 +1817,11 @@ again:
* to check that the address is actually valid, when atomic
* usercopies are used, below.
*/
if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
bytes = min_t(unsigned long, iov_iter_count(iter),
PAGE_SIZE - offset);
if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
ret = -EFAULT;
break;
}
@ -1870,7 +1879,7 @@ static void bch2_dio_read_complete(struct closure *cl)
{
struct dio_read *dio = container_of(cl, struct dio_read, cl);
dio->req->ki_complete(dio->req, dio->ret, 0);
dio->req->ki_complete(dio->req, dio->ret);
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
}
@ -1919,7 +1928,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
iter->count -= shorten;
bio = bio_alloc_bioset(GFP_KERNEL,
iov_iter_npages(iter, BIO_MAX_VECS),
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->dio_read_bioset);
bio->bi_end_io = bch2_direct_IO_read_endio;
@ -1954,7 +1963,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
goto start;
while (iter->count) {
bio = bio_alloc_bioset(GFP_KERNEL,
iov_iter_npages(iter, BIO_MAX_VECS),
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->bio_read);
bio->bi_end_io = bch2_direct_IO_read_split_endio;
start:
@ -2101,7 +2110,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
while (1) {
iter_count = dio->iter.count;
if (kthread)
if (kthread && dio->mm)
kthread_use_mm(dio->mm);
BUG_ON(current->faults_disabled_mapping);
current->faults_disabled_mapping = mapping;
@ -2111,7 +2120,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dropped_locks = fdm_dropped_locks();
current->faults_disabled_mapping = NULL;
if (kthread)
if (kthread && dio->mm)
kthread_unuse_mm(dio->mm);
/*
@ -2244,7 +2253,7 @@ err:
inode_dio_end(&inode->v);
if (!sync) {
req->ki_complete(req, ret, 0);
req->ki_complete(req, ret);
ret = -EIOCBQUEUED;
}
return ret;
@ -2304,9 +2313,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
}
bio = bio_alloc_bioset(GFP_KERNEL,
iov_iter_is_bvec(iter)
? 0
: iov_iter_npages(iter, BIO_MAX_VECS),
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
init_completion(&dio->done);

View File

@ -30,6 +30,7 @@
#include <linux/pagemap.h>
#include <linux/posix_acl.h>
#include <linux/random.h>
#include <linux/seq_file.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/xattr.h>
@ -934,9 +935,8 @@ retry:
SPOS(ei->v.i_ino, start, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(&trans)) &&
(k = bch2_btree_iter_peek(&iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(iter.pos, end) < 0) {
(k = bch2_btree_iter_peek_upto(&iter, end)).k &&
!(ret = bkey_err(k))) {
enum btree_id data_btree = BTREE_ID_extents;
if (!bkey_extent_is_data(k.k) &&

View File

@ -606,12 +606,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
bch2_btree_iter_set_snapshot(&iter, snapshot);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
if (!k.k || iter.pos.inode != inum.inum)
if (!k.k)
break;
bkey_init(&delete.k);

View File

@ -13,11 +13,15 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode (struct bkey_ops) { \
.key_invalid = bch2_inode_invalid, \
.val_to_text = bch2_inode_to_text, \
.trans_trigger = bch2_trans_mark_inode, \
.atomic_trigger = bch2_mark_inode, \
}
#define bch2_bkey_ops_inode_v2 (struct bkey_ops) { \
.key_invalid = bch2_inode_v2_invalid, \
.val_to_text = bch2_inode_to_text, \
.trans_trigger = bch2_trans_mark_inode, \
.atomic_trigger = bch2_mark_inode, \
}
static inline bool bkey_is_inode(const struct bkey *k)

View File

@ -241,6 +241,9 @@ static int journal_entry_open(struct journal *j)
if (u64s <= 0)
return cur_entry_journal_full;
if (fifo_empty(&j->pin) && j->reclaim_thread)
wake_up_process(j->reclaim_thread);
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
@ -628,31 +631,6 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
return ret ?: ret2 < 0 ? ret2 : 0;
}
int bch2_journal_meta(struct journal *j)
{
struct journal_buf *buf;
struct journal_res res;
int ret;
memset(&res, 0, sizeof(res));
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
if (ret)
return ret;
buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
buf->must_flush = true;
if (!buf->flush_time) {
buf->flush_time = local_clock() ?: 1;
buf->expires = jiffies;
}
bch2_journal_res_put(j, &res);
return bch2_journal_flush_seq(j, res.seq);
}
/*
* bch2_journal_flush_async - if there is an open journal entry, or a journal
* still being written, write it and wait for the write to complete
@ -705,6 +683,64 @@ out:
return ret;
}
int bch2_journal_meta(struct journal *j)
{
struct journal_buf *buf;
struct journal_res res;
int ret;
memset(&res, 0, sizeof(res));
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
if (ret)
return ret;
buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
buf->must_flush = true;
if (!buf->flush_time) {
buf->flush_time = local_clock() ?: 1;
buf->expires = jiffies;
}
bch2_journal_res_put(j, &res);
return bch2_journal_flush_seq(j, res.seq);
}
int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
{
struct jset_entry_log *entry;
struct journal_res res = { 0 };
unsigned msglen, u64s;
va_list args;
int ret;
va_start(args, fmt);
msglen = vsnprintf(NULL, 0, fmt, args) + 1;
va_end(args);
u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64)));
ret = bch2_journal_res_get(j, &res, u64s, 0);
if (ret)
return ret;
entry = container_of(journal_res_entry(j, &res),
struct jset_entry_log, entry);;
memset(entry, 0, u64s * sizeof(u64));
entry->entry.type = BCH_JSET_ENTRY_log;
entry->entry.u64s = u64s - 1;
va_start(args, fmt);
vsnprintf(entry->d, INT_MAX, fmt, args);
va_end(args);
bch2_journal_res_put(j, &res);
return bch2_journal_flush_seq(j, res.seq);
}
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
@ -802,7 +838,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
* superblock before inserting into the journal array
*/
pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
pos = ja->discard_idx ?: ja->nr;
__array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(ja->bucket_seq, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, pos);

View File

@ -480,6 +480,7 @@ int bch2_journal_flush_seq(struct journal *, u64);
int bch2_journal_flush(struct journal *);
bool bch2_journal_noflush_seq(struct journal *, u64);
int bch2_journal_meta(struct journal *);
int bch2_journal_log_msg(struct journal *, const char *, ...);
void bch2_journal_halt(struct journal *);

View File

@ -670,6 +670,7 @@ static int bch2_journal_reclaim_thread(void *arg)
struct journal *j = arg;
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned long delay, now;
bool journal_empty;
int ret = 0;
set_freezable();
@ -696,10 +697,17 @@ static int bch2_journal_reclaim_thread(void *arg)
break;
if (j->reclaim_kicked)
break;
if (time_after_eq(jiffies, j->next_reclaim))
break;
freezable_schedule_timeout(j->next_reclaim - jiffies);
spin_lock(&j->lock);
journal_empty = fifo_empty(&j->pin);
spin_unlock(&j->lock);
if (journal_empty)
freezable_schedule();
else if (time_after(j->next_reclaim, jiffies))
freezable_schedule_timeout(j->next_reclaim - jiffies);
else
break;
}
__set_current_state(TASK_RUNNING);
}
@ -809,10 +817,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0;
spin_lock(&j->lock);
while (!ret && seq < j->pin.back) {
while (!ret) {
struct bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j));
if (seq >= j->pin.back)
break;
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
journal_seq_pin(j, seq)->devs);
seq++;

View File

@ -240,6 +240,10 @@ struct journal {
spinlock_t err_lock;
struct mutex reclaim_lock;
/*
* Used for waiting until journal reclaim has freed up space in the
* journal:
*/
wait_queue_head_t reclaim_wait;
struct task_struct *reclaim_thread;
bool reclaim_kicked;

View File

@ -578,6 +578,9 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j);
bch2_journal_flush_all_pins(j);
ret = bch2_journal_error(j);
if (keys->nr && !ret)
bch2_journal_log_msg(&c->journal, "journal replay finished");
err:
kvfree(keys_sorted);
return ret;

View File

@ -10,7 +10,9 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
#define bch2_bkey_ops_reflink_p (struct bkey_ops) { \
.key_invalid = bch2_reflink_p_invalid, \
.val_to_text = bch2_reflink_p_to_text, \
.key_merge = bch2_reflink_p_merge, \
.key_merge = bch2_reflink_p_merge, \
.trans_trigger = bch2_trans_mark_reflink_p, \
.atomic_trigger = bch2_mark_reflink_p, \
}
const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
@ -21,6 +23,8 @@ void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
.key_invalid = bch2_reflink_v_invalid, \
.val_to_text = bch2_reflink_v_to_text, \
.swab = bch2_ptr_swab, \
.trans_trigger = bch2_trans_mark_extent, \
.atomic_trigger = bch2_mark_extent, \
}
const char *bch2_indirect_inline_data_invalid(const struct bch_fs *,

View File

@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans,
if (ret)
return ret;
for_each_btree_key_norestart(trans, *iter, desc.btree_id,
for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|flags, k, ret) {
if (iter->pos.inode != inum.inum)
break;
if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_key(k, key))
return 0;
@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans,
if (ret)
return ret;
for_each_btree_key_norestart(trans, *iter, desc.btree_id,
for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (iter->pos.inode != inum.inum)
break;
POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
if (!is_visible_key(desc, inum, k))
return 0;
}
bch2_trans_iter_exit(trans, iter);
return ret ?: -ENOSPC;
@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans,
if (ret)
return ret;
for_each_btree_key_norestart(trans, iter, desc.btree_id,
for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
SPOS(inum.inum,
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
snapshot),
POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (iter.pos.inode != inum.inum)
break;
if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
goto found;

View File

@ -607,23 +607,32 @@ STORE(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr);
int ret, id = opt - bch2_opt_table;
int ret = size, id = opt - bch2_opt_table;
char *tmp;
u64 v;
/*
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS;
tmp = kstrdup(buf, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
if (!tmp) {
ret = -ENOMEM;
goto err;
}
ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v);
kfree(tmp);
if (ret < 0)
return ret;
goto err;
ret = bch2_opt_check_may_set(c, id, v);
if (ret < 0)
return ret;
goto err;
bch2_opt_set_sb(c, opt, v);
bch2_opt_set_by_id(&c->opts, id, v);
@ -633,8 +642,9 @@ STORE(bch2_fs_opts_dir)
bch2_rebalance_add_work(c, S64_MAX);
rebalance_wakeup(c);
}
return size;
err:
percpu_ref_put(&c->writes);
return ret;
}
SYSFS_OPS(bch2_fs_opts_dir);

View File

@ -15,15 +15,14 @@ static void delete_test_keys(struct bch_fs *c)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_extents,
POS_MIN, SPOS_MAX,
BTREE_ITER_ALL_SNAPSHOTS,
SPOS(0, 0, U32_MAX), SPOS_MAX,
0,
NULL);
BUG_ON(ret);
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
POS_MIN, SPOS_MAX,
BTREE_ITER_ALL_SNAPSHOTS,
NULL);
SPOS(0, 0, U32_MAX), SPOS_MAX,
0, NULL);
BUG_ON(ret);
}
@ -814,9 +813,8 @@ static int seq_delete(struct bch_fs *c, u64 nr)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
POS_MIN, SPOS_MAX,
BTREE_ITER_ALL_SNAPSHOTS,
NULL);
SPOS(0, 0, U32_MAX), SPOS_MAX,
0, NULL);
if (ret)
bch_err(c, "error in seq_delete: %i", ret);
return ret;

View File

@ -372,7 +372,7 @@ static inline void pr_time(struct printbuf *out, u64 _time)
#ifdef __KERNEL__
static inline void uuid_unparse_lower(u8 *uuid, char *out)
{
sprintf(out, "%plU", uuid);
sprintf(out, "%pUb", uuid);
}
#else
#include <uuid/uuid.h>

View File

@ -311,13 +311,9 @@ retry:
if (ret)
goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot), 0, k, ret) {
BUG_ON(k.k->p.inode < inum);
if (k.k->p.inode > inum)
break;
for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot),
POS(inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_xattr)
continue;

View File

@ -0,0 +1,157 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
#define ZSTD_FORWARD_IF_ERR(ret) \
do { \
size_t const __ret = (ret); \
if (ZSTD_isError(__ret)) \
return __ret; \
} while (0)
static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
unsigned long long pledged_src_size)
{
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
cctx, ZSTD_reset_session_and_parameters));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
cctx, pledged_src_size));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_strategy, parameters->cParams.strategy));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
return 0;
}
int zstd_min_clevel(void)
{
return ZSTD_minCLevel();
}
EXPORT_SYMBOL(zstd_min_clevel);
int zstd_max_clevel(void)
{
return ZSTD_maxCLevel();
}
EXPORT_SYMBOL(zstd_max_clevel);
size_t zstd_compress_bound(size_t src_size)
{
return ZSTD_compressBound(src_size);
}
EXPORT_SYMBOL(zstd_compress_bound);
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size)
{
return ZSTD_getParams(level, estimated_src_size, 0);
}
EXPORT_SYMBOL(zstd_get_params);
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCCtxSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cctx_workspace_bound);
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticCCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_cctx);
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters)
{
ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
}
EXPORT_SYMBOL(zstd_compress_cctx);
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCStreamSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cstream_workspace_bound);
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
{
zstd_cstream *cstream;
if (workspace == NULL)
return NULL;
cstream = ZSTD_initStaticCStream(workspace, workspace_size);
if (cstream == NULL)
return NULL;
/* 0 means unknown in linux zstd API but means 0 in new zstd API */
if (pledged_src_size == 0)
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
return NULL;
return cstream;
}
EXPORT_SYMBOL(zstd_init_cstream);
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size)
{
return ZSTD_resetCStream(cstream, pledged_src_size);
}
EXPORT_SYMBOL(zstd_reset_cstream);
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_compressStream(cstream, output, input);
}
EXPORT_SYMBOL(zstd_compress_stream);
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_flushStream(cstream, output);
}
EXPORT_SYMBOL(zstd_flush_stream);
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_endStream(cstream, output);
}
EXPORT_SYMBOL(zstd_end_stream);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Compressor");

View File

@ -0,0 +1,103 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
/* Common symbols. zstd_compress must depend on zstd_decompress. */
unsigned int zstd_is_error(size_t code)
{
return ZSTD_isError(code);
}
EXPORT_SYMBOL(zstd_is_error);
zstd_error_code zstd_get_error_code(size_t code)
{
return ZSTD_getErrorCode(code);
}
EXPORT_SYMBOL(zstd_get_error_code);
const char *zstd_get_error_name(size_t code)
{
return ZSTD_getErrorName(code);
}
EXPORT_SYMBOL(zstd_get_error_name);
/* Decompression symbols. */
size_t zstd_dctx_workspace_bound(void)
{
return ZSTD_estimateDCtxSize();
}
EXPORT_SYMBOL(zstd_dctx_workspace_bound);
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticDCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dctx);
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size)
{
return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
}
EXPORT_SYMBOL(zstd_decompress_dctx);
size_t zstd_dstream_workspace_bound(size_t max_window_size)
{
return ZSTD_estimateDStreamSize(max_window_size);
}
EXPORT_SYMBOL(zstd_dstream_workspace_bound);
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size)
{
if (workspace == NULL)
return NULL;
(void)max_window_size;
return ZSTD_initStaticDStream(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dstream);
size_t zstd_reset_dstream(zstd_dstream *dstream)
{
return ZSTD_resetDStream(dstream);
}
EXPORT_SYMBOL(zstd_reset_dstream);
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_decompressStream(dstream, output, input);
}
EXPORT_SYMBOL(zstd_decompress_stream);
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
{
return ZSTD_findFrameCompressedSize(src, src_size);
}
EXPORT_SYMBOL(zstd_find_frame_compressed_size);
size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
size_t src_size)
{
return ZSTD_getFrameHeader(header, src, src_size);
}
EXPORT_SYMBOL(zstd_get_frame_header);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Decompressor");