diff --git a/.bcachefs_revision b/.bcachefs_revision index 90d4c53b..ecdb9454 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -fb365e1745d352401a2af47aedb43ab1bf6f8ce1 +3610542890c4b8329d83361ba48fa874d27c97a8 diff --git a/Makefile b/Makefile index d6286dea..a305b4bc 100644 --- a/Makefile +++ b/Makefile @@ -134,5 +134,5 @@ update-bcachefs-sources: .PHONE: update-commit-bcachefs-sources update-commit-bcachefs-sources: update-bcachefs-sources - git commit -m "Update bcachefs sources to `cut -b1-10 .bcachefs_revision`" \ + git commit -m "Update bcachefs sources to `cd $(LINUX_DIR); git show --oneline --no-patch`"\ .bcachefs_revision libbcachefs/ diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index d8bfcc1f..0f2ea7c1 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -13,6 +13,7 @@ #define _CRYPTO_ALGAPI_H #include +#include struct crypto_type { unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask); diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h new file mode 100644 index 00000000..8b5f4254 --- /dev/null +++ b/include/crypto/skcipher.h @@ -0,0 +1,149 @@ +/* + * Symmetric key ciphers. + * + * Copyright (c) 2007-2015 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_SKCIPHER_H +#define _CRYPTO_SKCIPHER_H + +#include +#include +#include + +struct skcipher_request { + unsigned int cryptlen; + + u8 *iv; + + struct scatterlist *src; + struct scatterlist *dst; + + struct crypto_tfm *tfm; + //struct crypto_async_request base; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +struct crypto_skcipher { + int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct skcipher_request *req); + int (*decrypt)(struct skcipher_request *req); + + unsigned int ivsize; + unsigned int reqsize; + unsigned int keysize; + + struct crypto_tfm base; +}; + +struct skcipher_alg { + int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct skcipher_request *req); + int (*decrypt)(struct skcipher_request *req); + int (*init)(struct crypto_skcipher *tfm); + void (*exit)(struct crypto_skcipher *tfm); + + unsigned int min_keysize; + unsigned int max_keysize; + unsigned int ivsize; + unsigned int chunksize; + unsigned int walksize; + + struct crypto_alg base; +}; + +#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \ + char __##name##_desc[sizeof(struct skcipher_request) + \ + crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \ + struct skcipher_request *name = (void *)__##name##_desc + +static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) +{ + return crypto_tfm_ctx(&tfm->base); +} + +static inline struct crypto_skcipher *__crypto_skcipher_cast( + struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_skcipher, base); +} + +struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, + u32 type, u32 mask); + +static inline struct crypto_tfm *crypto_skcipher_tfm( + struct crypto_skcipher *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) +{ + crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm)); +} + +static inline struct skcipher_alg *crypto_skcipher_alg( + struct crypto_skcipher *tfm) +{ + return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, + struct skcipher_alg, base); +} + +static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return tfm->setkey(tfm, key, keylen); +} + +static inline struct crypto_skcipher *crypto_skcipher_reqtfm( + struct skcipher_request *req) +{ + return __crypto_skcipher_cast(req->tfm); +} + +static inline int crypto_skcipher_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + return tfm->encrypt(req); +} + +static inline int crypto_skcipher_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + return tfm->decrypt(req); +} + +static inline unsigned int crypto_skcipher_reqsize(struct crypto_skcipher *tfm) +{ + return tfm->reqsize; +} + +static inline void skcipher_request_set_tfm(struct skcipher_request *req, + struct crypto_skcipher *tfm) +{ + req->tfm = crypto_skcipher_tfm(tfm); +} + +static inline void skcipher_request_set_crypt( + struct skcipher_request *req, + struct scatterlist *src, struct scatterlist *dst, + unsigned int cryptlen, void *iv) +{ + req->src = src; + req->dst = dst; + req->cryptlen = cryptlen; + req->iv = iv; +} + +#endif /* _CRYPTO_SKCIPHER_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4b06a0cc..f196c704 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -60,7 +60,7 @@ static inline struct inode *file_inode(const struct file *f) #define BDEVNAME_SIZE 32 struct request_queue { - struct backing_dev_info backing_dev_info; + struct backing_dev_info *backing_dev_info; }; struct gendisk { @@ -82,6 +82,9 @@ struct block_device { struct gendisk __bd_disk; int bd_fd; int bd_sync_fd; + + struct backing_dev_info *bd_bdi; + struct backing_dev_info __bd_bdi; }; void generic_make_request(struct bio *); @@ -100,13 +103,6 @@ int blkdev_issue_discard(struct block_device *, sector_t, #define blk_queue_discard(q) ((void) (q), 0) #define blk_queue_nonrot(q) ((void) (q), 0) -static inline struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - return &q->backing_dev_info; -} - unsigned bdev_logical_block_size(struct block_device *bdev); sector_t get_capacity(struct gendisk *disk); diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 6b784c59..394e3d92 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -1,87 +1,648 @@ -#ifndef __LZ4_H__ -#define __LZ4_H__ -/* - * LZ4 Kernel Interface +/* LZ4 Kernel Interface * * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * This file is based on the original header file + * for LZ4 - Fast LZ compression algorithm. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2016, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 */ -#define LZ4_MEM_COMPRESS (16384) -#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *))) + +#ifndef __LZ4_H__ +#define __LZ4_H__ + +#include +#include /* memset, memcpy */ + +/*-************************************************************************ + * CONSTANTS + **************************************************************************/ +/* + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes + * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#define LZ4_MEMORY_USAGE 14 + +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) (\ + (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \ + ? 0 \ + : (isize) + ((isize)/255) + 16) + +#define LZ4_ACCELERATION_DEFAULT 1 +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) + +#define LZ4HC_MIN_CLEVEL 3 +#define LZ4HC_DEFAULT_CLEVEL 9 +#define LZ4HC_MAX_CLEVEL 16 + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= LZ4_compressBound(inputSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'source' into a more limited 'dest' budget, + * compression stops *immediately*, and the function result is zero. + * As a consequence, 'dest' content is not valid. + * + * Return: Number of bytes written into buffer 'dest' + * (necessarily <= maxOutputSize) or 0 if compression fails */ -int lz4_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); +int LZ4_compress_default(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem); - /* - * lz4hc_compress() - * src : source address of the original data - * src_len : size of the original data - * dst : output buffer address of the compressed data - * This requires 'dst' of size LZ4_COMPRESSBOUND. - * dst_len : is the output size, which is returned after compress done - * workmem : address of the working memory. - * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer and workmem must be already allocated with - * the defined size. - */ -int lz4hc_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); - -/* - * lz4_decompress() - * src : source address of the compressed data - * src_len : is the input size, whcih is returned after decompress done - * dest : output buffer address of the decompressed data - * actual_dest_len: is the size of uncompressed data, supposing it's known - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. - * slightly faster than lz4_decompress_unknownoutputsize() +/** + * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param + * @source: source address of the original data + * @dest: output buffer address of the compressed data + * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxOutputSize: full or partial size of buffer 'dest' + * which must be already allocated + * @acceleration: acceleration factor + * @wrkmem: address of the working memory. + * This requires 'workmem' of LZ4_MEM_COMPRESS. + * + * Same as LZ4_compress_default(), but allows to select an "acceleration" + * factor. The larger the acceleration value, the faster the algorithm, + * but also the lesser the compression. It's a trade-off. It can be fine tuned, + * with each successive value providing roughly +~3% to speed. + * An acceleration value of "1" is the same as regular LZ4_compress_default() + * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1. + * + * Return: Number of bytes written into buffer 'dest' + * (necessarily <= maxOutputSize) or 0 if compression fails */ -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len); +int LZ4_compress_fast(const char *source, char *dest, int inputSize, + int maxOutputSize, int acceleration, void *wrkmem); -/* - * lz4_decompress_unknownoutputsize() - * src : source address of the compressed data - * src_len : is the input size, therefore the compressed size - * dest : output buffer address of the decompressed data - * dest_len: is the max size of the destination buffer, which is - * returned with actual size of decompressed data after - * decompress done - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. +/** + * LZ4_compress_destSize() - Compress as much data as possible + * from source to dest + * @source: source address of the original data + * @dest: output buffer address of the compressed data + * @sourceSizePtr: will be modified to indicate how many bytes where read + * from 'source' to fill 'dest'. New value is necessarily <= old value. + * @targetDestSize: Size of buffer 'dest' which must be already allocated + * @wrkmem: address of the working memory. + * This requires 'workmem' of LZ4_MEM_COMPRESS. + * + * Reverse the logic, by compressing as much data as possible + * from 'source' buffer into already allocated buffer 'dest' + * of size 'targetDestSize'. + * This function either compresses the entire 'source' content into 'dest' + * if it's large enough, or fill 'dest' buffer completely with as much data as + * possible from 'source'. + * + * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize) + * or 0 if compression fails */ -int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len); +int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, + int targetDestSize, void *wrkmem); + +/*-************************************************************************ + * Decompression Functions + **************************************************************************/ + +/** + * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest' + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * + * Decompresses data from 'source' into 'dest'. + * This function fully respect memory boundaries for properly formed + * compressed data. + * It is a bit faster than LZ4_decompress_safe(). + * However, it does not provide any protection against intentionally + * modified data stream (malicious input). + * Use this function in trusted environment only + * (data to decode comes from a trusted source). + * + * Return: number of bytes read from the source buffer + * or a negative result if decompression fails. + */ +int LZ4_decompress_fast(const char *source, char *dest, int originalSize); + +/** + * LZ4_decompress_safe() - Decompression protected against buffer overflow + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * + * Decompresses data fom 'source' into 'dest'. + * If the source stream is detected malformed, the function will + * stop decoding and return a negative result. + * This function is protected against buffer overflow exploits, + * including malicious data packets. It never writes outside output buffer, + * nor reads outside input buffer. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, + int maxDecompressedSize); + +/** + * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize' + * at position 'source' into buffer 'dest' + * @source: source address of the compressed data + * @dest: output buffer address of the decompressed data which must be + * already allocated + * @compressedSize: is the precise full size of the compressed block. + * @targetOutputSize: the decompression operation will try + * to stop as soon as 'targetOutputSize' has been reached + * @maxDecompressedSize: is the size of destination buffer + * + * This function decompresses a compressed block of size 'compressedSize' + * at position 'source' into destination buffer 'dest' + * of size 'maxDecompressedSize'. + * The function tries to stop decompressing operation as soon as + * 'targetOutputSize' has been reached, reducing decompression time. + * This function never writes outside of output buffer, + * and never reads outside of input buffer. + * It is therefore protected against malicious data packets. + * + * Return: the number of bytes decoded in the destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + * + */ +int LZ4_decompress_safe_partial(const char *source, char *dest, + int compressedSize, int targetOutputSize, int maxDecompressedSize); + +/*-************************************************************************ + * LZ4 HC Compression + **************************************************************************/ + +/** + * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm + * @src: source address of the original data + * @dst: output buffer address of the compressed data + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @dstCapacity: full or partial size of buffer 'dst', + * which must be already allocated + * @compressionLevel: Recommended values are between 4 and 9, although any + * value between 1 and LZ4HC_MAX_CLEVEL will work. + * Values >LZ4HC_MAX_CLEVEL behave the same as 16. + * @wrkmem: address of the working memory. + * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS. + * + * Compress data from 'src' into 'dst', using the more powerful + * but slower "HC" algorithm. Compression is guaranteed to succeed if + * `dstCapacity >= LZ4_compressBound(srcSize) + * + * Return : the number of bytes written into 'dst' or 0 if compression fails. + */ +int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, + int compressionLevel, void *wrkmem); + +/** + * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure + * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure + * @compressionLevel: Recommended values are between 4 and 9, although any + * value between 1 and LZ4HC_MAX_CLEVEL will work. + * Values >LZ4HC_MAX_CLEVEL behave the same as 16. + * + * An LZ4_streamHC_t structure can be allocated once + * and re-used multiple times. + * Use this function to init an allocated `LZ4_streamHC_t` structure + * and start a new compression. + */ +void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel); + +/** + * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC + * @streamHCPtr: pointer to the LZ4HC_stream_t + * @dictionary: dictionary to load + * @dictSize: size of dictionary + * + * Use this function to load a static dictionary into LZ4HC_stream. + * Any previous data will be forgotten, only 'dictionary' + * will remain in memory. + * Loading a size of 0 is allowed. + * + * Return : dictionary size, in bytes (necessarily <= 64 KB) + */ +int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary, + int dictSize); + +/** + * LZ4_compress_HC_continue() - Compress 'src' using data from previously + * compressed blocks as a dictionary using the HC algorithm + * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure + * @src: source address of the original data + * @dst: output buffer address of the compressed data, + * which must be already allocated + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxDstSize: full or partial size of buffer 'dest' + * which must be already allocated + * + * These functions compress data in successive blocks of any size, using + * previous blocks as dictionary. One key assumption is that previous + * blocks (up to 64 KB) remain read-accessible while + * compressing next blocks. There is an exception for ring buffers, + * which can be smaller than 64 KB. + * Ring buffers scenario is automatically detected and handled by + * LZ4_compress_HC_continue(). + * Before starting compression, state must be properly initialized, + * using LZ4_resetStreamHC(). + * A first "fictional block" can then be designated as + * initial dictionary, using LZ4_loadDictHC() (Optional). + * Then, use LZ4_compress_HC_continue() + * to compress each successive block. Previous memory blocks + * (including initial dictionary when present) must remain accessible + * and unmodified during compression. + * 'dst' buffer should be sized to handle worst case scenarios, using + * LZ4_compressBound(), to ensure operation success. + * If, for any reason, previous data blocks can't be preserved unmodified + * in memory during next compression block, + * you must save it to a safer memory space, using LZ4_saveDictHC(). + * Return value of LZ4_saveDictHC() is the size of dictionary + * effectively saved into 'safeBuffer'. + * + * Return: Number of bytes written into buffer 'dst' or 0 if compression fails + */ +int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src, + char *dst, int srcSize, int maxDstSize); + +/** + * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream + * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure + * @safeBuffer: buffer to save dictionary to, must be already allocated + * @maxDictSize: size of 'safeBuffer' + * + * If previously compressed data block is not guaranteed + * to remain available at its memory location, + * save it into a safer place (char *safeBuffer). + * Note : you don't need to call LZ4_loadDictHC() afterwards, + * dictionary is immediately usable, you can therefore call + * LZ4_compress_HC_continue(). + * + * Return : saved dictionary size in bytes (necessarily <= maxDictSize), + * or 0 if error. + */ +int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer, + int maxDictSize); + +/*-********************************************* + * Streaming Compression Functions + ***********************************************/ + +/** + * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure + * @LZ4_stream: pointer to the 'LZ4_stream_t' structure + * + * An LZ4_stream_t structure can be allocated once + * and re-used multiple times. + * Use this function to init an allocated `LZ4_stream_t` structure + * and start a new compression. + */ +void LZ4_resetStream(LZ4_stream_t *LZ4_stream); + +/** + * LZ4_loadDict() - Load a static dictionary into LZ4_stream + * @streamPtr: pointer to the LZ4_stream_t + * @dictionary: dictionary to load + * @dictSize: size of dictionary + * + * Use this function to load a static dictionary into LZ4_stream. + * Any previous data will be forgotten, only 'dictionary' + * will remain in memory. + * Loading a size of 0 is allowed. + * + * Return : dictionary size, in bytes (necessarily <= 64 KB) + */ +int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary, + int dictSize); + +/** + * LZ4_saveDict() - Save static dictionary from LZ4_stream + * @streamPtr: pointer to the 'LZ4_stream_t' structure + * @safeBuffer: buffer to save dictionary to, must be already allocated + * @dictSize: size of 'safeBuffer' + * + * If previously compressed data block is not guaranteed + * to remain available at its memory location, + * save it into a safer place (char *safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call + * LZ4_compress_fast_continue(). + * + * Return : saved dictionary size in bytes (necessarily <= dictSize), + * or 0 if error. + */ +int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize); + +/** + * LZ4_compress_fast_continue() - Compress 'src' using data from previously + * compressed blocks as a dictionary + * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure + * @src: source address of the original data + * @dst: output buffer address of the compressed data, + * which must be already allocated + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxDstSize: full or partial size of buffer 'dest' + * which must be already allocated + * @acceleration: acceleration factor + * + * Compress buffer content 'src', using data from previously compressed blocks + * as dictionary to improve compression ratio. + * Important : Previous data blocks are assumed to still + * be present and unmodified ! + * If maxDstSize >= LZ4_compressBound(srcSize), + * compression is guaranteed to succeed, and runs faster. + * + * Return: Number of bytes written into buffer 'dst' or 0 if compression fails + */ +int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src, + char *dst, int srcSize, int maxDstSize, int acceleration); + +/** + * LZ4_setStreamDecode() - Instruct where to find dictionary + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @dictionary: dictionary to use + * @dictSize: size of dictionary + * + * Use this function to instruct where to find the dictionary. + * Setting a size of 0 is allowed (same effect as reset). + * + * Return: 1 if OK, 0 if error + */ +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize); + +/** + * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * + * These decoding function allows decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position + * where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule + * (block boundaries at same positions) In which case, + * the decoding & encoding ring buffer can have any size, + * including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. + * It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need + * to be synchronized, and encoding ring buffer can have any size, + * including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be + * synchronized, and encoding ring buffer can have any size, + * including larger than decoding buffer. W + * Whenever these conditions are not possible, save the last 64KB of decoded + * data into a safe buffer, and indicate where it is saved + * using LZ4_setStreamDecode() + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int compressedSize, + int maxDecompressedSize); + +/** + * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * + * These decoding function allows decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position + * where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule + * (block boundaries at same positions) In which case, + * the decoding & encoding ring buffer can have any size, + * including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. + * It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need + * to be synchronized, and encoding ring buffer can have any size, + * including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be + * synchronized, and encoding ring buffer can have any size, + * including larger than decoding buffer. W + * Whenever these conditions are not possible, save the last 64KB of decoded + * data into a safe buffer, and indicate where it is saved + * using LZ4_setStreamDecode() + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize); + +/** + * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode() + * followed by LZ4_decompress_safe_continue() + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * @dictStart: pointer to the start of the dictionary in memory + * @dictSize: size of dictionary + * + * These decoding function works the same as + * a combination of LZ4_setStreamDecode() followed by + * LZ4_decompress_safe_continue() + * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxDecompressedSize, const char *dictStart, + int dictSize); + +/** + * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode() + * followed by LZ4_decompress_fast_continue() + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * @dictStart: pointer to the start of the dictionary in memory + * @dictSize: size of dictionary + * + * These decoding function works the same as + * a combination of LZ4_setStreamDecode() followed by + * LZ4_decompress_safe_continue() + * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, int dictSize); + #endif diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 00000000..e69de29b diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h new file mode 100644 index 00000000..e69de29b diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index a4e412ea..f3ded7b4 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -71,6 +71,8 @@ #include #include #include +#include +#include #include static void __bch2_bucket_free(struct bch_dev *, struct bucket *); @@ -283,8 +285,8 @@ int bch2_prio_write(struct bch_dev *ca) r < ca->mi.nbuckets && d < end; r++, d++) { g = ca->buckets + r; - d->read_prio = cpu_to_le16(g->read_prio); - d->write_prio = cpu_to_le16(g->write_prio); + d->prio[READ] = cpu_to_le16(g->prio[READ]); + d->prio[WRITE] = cpu_to_le16(g->prio[WRITE]); d->gen = ca->buckets[r].mark.gen; } @@ -445,8 +447,8 @@ int bch2_prio_read(struct bch_dev *ca) d = p->data; } - ca->buckets[b].read_prio = le16_to_cpu(d->read_prio); - ca->buckets[b].write_prio = le16_to_cpu(d->write_prio); + ca->buckets[b].prio[READ] = le16_to_cpu(d->prio[READ]); + ca->buckets[b].prio[WRITE] = le16_to_cpu(d->prio[WRITE]); bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen); } @@ -469,9 +471,9 @@ fsck_err: * If there aren't enough available buckets to fill up free_inc, wait until * there are. */ -static int wait_buckets_available(struct bch_dev *ca) +static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca) { - struct bch_fs *c = ca->fs; + unsigned long gc_count = c->gc_count; int ret = 0; while (1) { @@ -481,27 +483,18 @@ static int wait_buckets_available(struct bch_dev *ca) break; } - if (ca->inc_gen_needs_gc >= fifo_free(&ca->free_inc)) { - if (c->gc_thread) { - trace_gc_cannot_inc_gens(ca->fs); - atomic_inc(&c->kick_gc); - wake_up_process(ca->fs->gc_thread); - } + if (gc_count != c->gc_count) + ca->inc_gen_really_needs_gc = 0; - /* - * We are going to wait for GC to wake us up, even if - * bucket counters tell us enough buckets are available, - * because we are actually waiting for GC to rewrite - * nodes with stale pointers - */ - } else if (dev_buckets_available(ca) >= - fifo_free(&ca->free_inc)) + if ((ssize_t) (dev_buckets_available(ca) - + ca->inc_gen_really_needs_gc) >= + (ssize_t) fifo_free(&ca->free_inc)) break; - up_read(&ca->fs->gc_lock); + up_read(&c->gc_lock); schedule(); try_to_freeze(); - down_read(&ca->fs->gc_lock); + down_read(&c->gc_lock); } __set_current_state(TASK_RUNNING); @@ -639,9 +632,12 @@ static bool bch2_can_invalidate_bucket(struct bch_dev *ca, struct bucket *g, if (!is_available_bucket(mark)) return false; - if (bucket_gc_gen(ca, g) >= BUCKET_GC_GEN_MAX - 1) + if (bucket_gc_gen(ca, g) >= BUCKET_GC_GEN_MAX / 2) ca->inc_gen_needs_gc++; + if (bucket_gc_gen(ca, g) >= BUCKET_GC_GEN_MAX) + ca->inc_gen_really_needs_gc++; + return can_inc_bucket_gen(ca, g); } @@ -651,8 +647,8 @@ static void bch2_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g) bch2_invalidate_bucket(ca, g); - g->read_prio = ca->fs->prio_clock[READ].hand; - g->write_prio = ca->fs->prio_clock[WRITE].hand; + g->prio[READ] = ca->fs->prio_clock[READ].hand; + g->prio[WRITE] = ca->fs->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -672,40 +668,34 @@ static void bch2_invalidate_one_bucket(struct bch_dev *ca, struct bucket *g) * - The number of sectors of cached data in the bucket, which gives us an * indication of the cost in cache misses this eviction will cause. * - * - The difference between the bucket's current gen and oldest gen of any - * pointer into it, which gives us an indication of the cost of an eventual - * btree GC to rewrite nodes with stale pointers. + * - If hotness * sectors used compares equal, we pick the bucket with the + * smallest bucket_gc_gen() - since incrementing the same bucket's generation + * number repeatedly forces us to run mark and sweep gc to avoid generation + * number wraparound. */ -static unsigned long bucket_sort_key(bucket_heap *h, - struct bucket_heap_entry e) +static unsigned long bucket_sort_key(struct bch_dev *ca, + struct bucket *g, + struct bucket_mark m) { - struct bch_dev *ca = container_of(h, struct bch_dev, alloc_heap); - struct bucket *g = ca->buckets + e.bucket; - unsigned long prio = g->read_prio - ca->min_prio[READ]; - prio = (prio * 7) / (ca->fs->prio_clock[READ].hand - - ca->min_prio[READ]); + unsigned long hotness = + (g->prio[READ] - ca->min_prio[READ]) * 7 / + (ca->fs->prio_clock[READ].hand - ca->min_prio[READ]); - return (prio + 1) * bucket_sectors_used(e.mark); + return (((hotness + 1) * bucket_sectors_used(m)) << 8) | + bucket_gc_gen(ca, g); } -static inline int bucket_alloc_cmp(bucket_heap *h, - struct bucket_heap_entry l, - struct bucket_heap_entry r) +static inline int bucket_alloc_cmp(alloc_heap *h, + struct alloc_heap_entry l, + struct alloc_heap_entry r) { - return bucket_sort_key(h, l) - bucket_sort_key(h, r); -} - -static inline long bucket_idx_cmp(bucket_heap *h, - struct bucket_heap_entry l, - struct bucket_heap_entry r) -{ - return l.bucket - r.bucket; + return (l.key > r.key) - (l.key < r.key); } static void invalidate_buckets_lru(struct bch_dev *ca) { - struct bucket_heap_entry e; + struct alloc_heap_entry e; struct bucket *g; ca->alloc_heap.used = 0; @@ -721,23 +711,26 @@ static void invalidate_buckets_lru(struct bch_dev *ca) */ for_each_bucket(g, ca) { struct bucket_mark m = READ_ONCE(g->mark); - struct bucket_heap_entry e = { g - ca->buckets, m }; if (!bch2_can_invalidate_bucket(ca, g, m)) continue; + e = (struct alloc_heap_entry) { + .bucket = g - ca->buckets, + .key = bucket_sort_key(ca, g, m) + }; + heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp); } - /* Sort buckets by physical location on disk for better locality */ - heap_resort(&ca->alloc_heap, bucket_idx_cmp); + heap_resort(&ca->alloc_heap, bucket_alloc_cmp); /* * If we run out of buckets to invalidate, bch2_allocator_thread() will * kick stuff and retry us */ while (!fifo_full(&ca->free_inc) && - heap_pop(&ca->alloc_heap, e, bucket_idx_cmp)) + heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp)) bch2_invalidate_one_bucket(ca, &ca->buckets[e.bucket]); mutex_unlock(&ca->fs->bucket_lock); @@ -790,6 +783,7 @@ static void invalidate_buckets_random(struct bch_dev *ca) static void invalidate_buckets(struct bch_dev *ca) { ca->inc_gen_needs_gc = 0; + ca->inc_gen_really_needs_gc = 0; switch (ca->mi.replacement) { case CACHE_REPLACEMENT_LRU: @@ -852,8 +846,8 @@ static void bch2_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca) spin_lock(&ca->freelist_lock); bch2_mark_alloc_bucket(ca, g, true); - g->read_prio = c->prio_clock[READ].hand; - g->write_prio = c->prio_clock[WRITE].hand; + g->prio[READ] = c->prio_clock[READ].hand; + g->prio[WRITE] = c->prio_clock[WRITE].hand; verify_not_on_freelist(ca, g - ca->buckets); BUG_ON(!fifo_push(&ca->free_inc, g - ca->buckets)); @@ -866,6 +860,13 @@ static void bch2_find_empty_buckets(struct bch_fs *c, struct bch_dev *ca) } } +static int size_t_cmp(const void *_l, const void *_r) +{ + const size_t *l = _l, *r = _r; + + return (*l > *r) - (*l < *r); +} + /** * bch_allocator_thread - move buckets from free_inc to reserves * @@ -923,27 +924,13 @@ static int bch2_allocator_thread(void *arg) __set_current_state(TASK_RUNNING); } - down_read(&c->gc_lock); - - /* - * See if we have buckets we can reuse without invalidating them - * or forcing a journal commit: - */ - //bch2_find_empty_buckets(c, ca); - - if (fifo_used(&ca->free_inc) * 2 > ca->free_inc.size) { - up_read(&c->gc_lock); - continue; - } - /* We've run out of free buckets! */ - while (!fifo_full(&ca->free_inc)) { - if (wait_buckets_available(ca)) { - up_read(&c->gc_lock); - goto out; - } + BUG_ON(fifo_used(&ca->free_inc)); + ca->free_inc.front = ca->free_inc.back = 0; + down_read(&c->gc_lock); + while (1) { /* * Find some buckets that we can invalidate, either * they're completely unused, or only contain clean data @@ -952,12 +939,38 @@ static int bch2_allocator_thread(void *arg) */ invalidate_buckets(ca); - trace_alloc_batch(ca, fifo_used(&ca->free_inc), - ca->free_inc.size); - } + trace_alloc_batch(ca, fifo_used(&ca->free_inc), + ca->free_inc.size); + + if ((ca->inc_gen_needs_gc >= ca->free_inc.size || + (!fifo_full(&ca->free_inc) && + ca->inc_gen_really_needs_gc >= + fifo_free(&ca->free_inc))) && + c->gc_thread) { + atomic_inc(&c->kick_gc); + wake_up_process(c->gc_thread); + } + + if (fifo_full(&ca->free_inc)) + break; + + if (wait_buckets_available(c, ca)) { + up_read(&c->gc_lock); + goto out; + } + } up_read(&c->gc_lock); + BUG_ON(ca->free_inc.front); + + spin_lock(&ca->freelist_lock); + sort(ca->free_inc.data, + ca->free_inc.back, + sizeof(ca->free_inc.data[0]), + size_t_cmp, NULL); + spin_unlock(&ca->freelist_lock); + /* * free_inc is full of newly-invalidated buckets, must write out * prios and gens before they can be re-used @@ -1022,8 +1035,8 @@ out: g = ca->buckets + r; - g->read_prio = ca->fs->prio_clock[READ].hand; - g->write_prio = ca->fs->prio_clock[WRITE].hand; + g->prio[READ] = ca->fs->prio_clock[READ].hand; + g->prio[WRITE] = ca->fs->prio_clock[WRITE].hand; return r; } @@ -1031,9 +1044,6 @@ out: static void __bch2_bucket_free(struct bch_dev *ca, struct bucket *g) { bch2_mark_free_bucket(ca, g); - - g->read_prio = ca->fs->prio_clock[READ].hand; - g->write_prio = ca->fs->prio_clock[WRITE].hand; } enum bucket_alloc_ret { @@ -1614,8 +1624,7 @@ void bch2_recalc_capacity(struct bch_fs *c) unsigned i, j; for_each_online_member(ca, c, i) { - struct backing_dev_info *bdi = - blk_get_backing_dev_info(ca->disk_sb.bdev); + struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_bdi; ra_pages += bdi->ra_pages; } diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h index 1bf48ef9..ae58d083 100644 --- a/libbcachefs/alloc_types.h +++ b/libbcachefs/alloc_types.h @@ -99,4 +99,11 @@ struct write_point { */ }; +struct alloc_heap_entry { + size_t bucket; + unsigned long key; +}; + +typedef HEAP(struct alloc_heap_entry) alloc_heap; + #endif /* _BCACHE_ALLOC_TYPES_H */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 6259b50e..977ac364 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -240,8 +240,6 @@ do { \ "btree node it traverses") \ BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \ "Disables rewriting of btree nodes during mark and sweep")\ - BCH_DEBUG_PARAM(btree_gc_coalesce_disabled, \ - "Disables coalescing of btree nodes") \ BCH_DEBUG_PARAM(btree_shrinker_disabled, \ "Disables the shrinker callback for the btree node cache") @@ -273,7 +271,6 @@ do { \ #define BCH_TIME_STATS() \ BCH_TIME_STAT(btree_node_mem_alloc, sec, us) \ BCH_TIME_STAT(btree_gc, sec, ms) \ - BCH_TIME_STAT(btree_coalesce, sec, ms) \ BCH_TIME_STAT(btree_split, sec, us) \ BCH_TIME_STAT(btree_sort, ms, us) \ BCH_TIME_STAT(btree_read, ms, us) \ @@ -417,8 +414,9 @@ struct bch_dev { atomic_long_t saturated_count; size_t inc_gen_needs_gc; + size_t inc_gen_really_needs_gc; - bucket_heap alloc_heap; + alloc_heap alloc_heap; bucket_heap copygc_heap; /* Moving GC: */ @@ -681,6 +679,7 @@ struct bch_fs { /* GARBAGE COLLECTION */ struct task_struct *gc_thread; atomic_t kick_gc; + unsigned long gc_count; /* * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos] @@ -716,7 +715,7 @@ struct bch_fs { mempool_t compression_bounce[2]; struct crypto_shash *sha256; - struct crypto_blkcipher *chacha20; + struct crypto_skcipher *chacha20; struct crypto_shash *poly1305; atomic64_t key_version; @@ -762,6 +761,7 @@ struct bch_fs { /* The rest of this all shows up in sysfs */ atomic_long_t read_realloc_races; + unsigned btree_gc_periodic:1; unsigned foreground_write_ratelimit_enabled:1; unsigned copy_gc_enabled:1; unsigned tiering_enabled:1; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index ef854fb1..2d64bcae 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1208,8 +1208,7 @@ struct prio_set { __le64 next_bucket; struct bucket_disk { - __le16 read_prio; - __le16 write_prio; + __le16 prio[2]; __u8 gen; } __attribute__((packed)) data[]; } __attribute__((packed, aligned(8))); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 99d28f64..d907ef58 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -166,14 +166,14 @@ fsck_err: return ret; } -static bool btree_gc_mark_node(struct bch_fs *c, struct btree *b) +static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b) { - if (btree_node_has_ptrs(b)) { - struct btree_node_iter iter; - struct bkey unpacked; - struct bkey_s_c k; - u8 stale = 0; + struct btree_node_iter iter; + struct bkey unpacked; + struct bkey_s_c k; + u8 stale = 0; + if (btree_node_has_ptrs(b)) for_each_btree_node_key_unpack(b, k, &iter, btree_node_is_extents(b), &unpacked) { @@ -182,17 +182,7 @@ static bool btree_gc_mark_node(struct bch_fs *c, struct btree *b) btree_node_type(b), k)); } - if (btree_gc_rewrite_disabled(c)) - return false; - - if (stale > 10) - return true; - } - - if (btree_gc_always_rewrite(c)) - return true; - - return false; + return stale; } static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) @@ -212,10 +202,10 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id) { struct btree_iter iter; struct btree *b; - bool should_rewrite; struct range_checks r; unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1; - int ret; + unsigned max_stale; + int ret = 0; /* * if expensive_debug_checks is on, run range_checks on all leaf nodes: @@ -231,12 +221,21 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id) bch2_verify_btree_nr_keys(b); - should_rewrite = btree_gc_mark_node(c, b); + max_stale = btree_gc_mark_node(c, b); gc_pos_set(c, gc_pos_btree_node(b)); - if (should_rewrite) - bch2_btree_node_rewrite(&iter, b, NULL); + if (max_stale > 32) + bch2_btree_node_rewrite(c, &iter, + b->data->keys.seq, + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_GC_LOCK_HELD); + else if (!btree_gc_rewrite_disabled(c) && + (btree_gc_always_rewrite(c) || max_stale > 16)) + bch2_btree_node_rewrite(c, &iter, + b->data->keys.seq, + BTREE_INSERT_NOWAIT| + BTREE_INSERT_GC_LOCK_HELD); bch2_btree_iter_cond_resched(&iter); } @@ -507,6 +506,7 @@ void bch2_gc(struct bch_fs *c) /* Indicates that gc is no longer in progress: */ gc_pos_set(c, gc_phase(GC_PHASE_DONE)); + c->gc_count++; up_write(&c->gc_lock); trace_gc_end(c); @@ -835,7 +835,6 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) */ void bch2_coalesce(struct bch_fs *c) { - u64 start_time; enum btree_id id; if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) @@ -843,7 +842,6 @@ void bch2_coalesce(struct bch_fs *c) down_read(&c->gc_lock); trace_gc_coalesce_start(c); - start_time = local_clock(); for (id = 0; id < BTREE_ID_NR; id++) { int ret = c->btree_roots[id].b @@ -858,7 +856,6 @@ void bch2_coalesce(struct bch_fs *c) } } - bch2_time_stats_update(&c->btree_coalesce_time, start_time); trace_gc_coalesce_end(c); up_read(&c->gc_lock); } @@ -873,9 +870,7 @@ static int bch2_gc_thread(void *arg) set_freezable(); while (1) { - unsigned long next = last + c->capacity / 16; - - while (atomic_long_read(&clock->now) < next) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { @@ -883,21 +878,28 @@ static int bch2_gc_thread(void *arg) return 0; } - if (atomic_read(&c->kick_gc) != last_kick) { - __set_current_state(TASK_RUNNING); + if (atomic_read(&c->kick_gc) != last_kick) break; + + if (c->btree_gc_periodic) { + unsigned long next = last + c->capacity / 16; + + if (atomic_long_read(&clock->now) >= next) + break; + + bch2_io_clock_schedule_timeout(clock, next); + } else { + schedule(); } - bch2_io_clock_schedule_timeout(clock, next); try_to_freeze(); } + __set_current_state(TASK_RUNNING); last = atomic_long_read(&clock->now); last_kick = atomic_read(&c->kick_gc); bch2_gc(c); - if (!btree_gc_coalesce_disabled(c)) - bch2_coalesce(c); debug_check_no_locks_held(); } diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 2f67c092..8a4ee6d1 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -533,6 +533,9 @@ static struct btree_reserve *__bch2_btree_reserve_get(struct bch_fs *c, if (flags & BTREE_INSERT_NOFAIL) disk_res_flags |= BCH_DISK_RESERVATION_NOFAIL; + if (flags & BTREE_INSERT_NOWAIT) + cl = NULL; + /* * This check isn't necessary for correctness - it's just to potentially * prevent us from doing a lot of work that'll end up being wasted: @@ -2279,30 +2282,13 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, return ret; } -/** - * bch_btree_node_rewrite - Rewrite/move a btree node - * - * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e. - * btree_check_reserve() has to wait) - */ -int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b, - struct closure *cl) +static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, + struct btree *b, unsigned flags, + struct closure *cl) { - struct bch_fs *c = iter->c; struct btree *n, *parent = iter->nodes[b->level + 1]; struct btree_reserve *reserve; struct btree_interior_update *as; - unsigned flags = BTREE_INSERT_NOFAIL; - - /* - * if caller is going to wait if allocating reserve fails, then this is - * a rewrite that must succeed: - */ - if (cl) - flags |= BTREE_INSERT_USE_RESERVE; - - if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) - return -EINTR; reserve = bch2_btree_reserve_get(c, b, 0, flags, cl); if (IS_ERR(reserve)) { @@ -2341,3 +2327,57 @@ int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b, bch2_btree_reserve_put(c, reserve); return 0; } + +/** + * bch_btree_node_rewrite - Rewrite/move a btree node + * + * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e. + * btree_check_reserve() has to wait) + */ +int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, + __le64 seq, unsigned flags) +{ + unsigned locks_want = iter->locks_want; + struct closure cl; + struct btree *b; + int ret; + + flags |= BTREE_INSERT_NOFAIL; + + closure_init_stack(&cl); + + bch2_btree_iter_set_locks_want(iter, U8_MAX); + + if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) { + if (!down_read_trylock(&c->gc_lock)) { + bch2_btree_iter_unlock(iter); + down_read(&c->gc_lock); + } + } + + while (1) { + ret = bch2_btree_iter_traverse(iter); + if (ret) + break; + + b = bch2_btree_iter_peek_node(iter); + if (!b || b->data->keys.seq != seq) + break; + + ret = __btree_node_rewrite(c, iter, b, flags, &cl); + if (ret != -EAGAIN && + ret != -EINTR) + break; + + bch2_btree_iter_unlock(iter); + closure_sync(&cl); + } + + bch2_btree_iter_set_locks_want(iter, locks_want); + + if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) + up_read(&c->gc_lock); + + closure_sync(&cl); + return ret; +} diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index a933d5a9..7c4abe4a 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -380,6 +380,10 @@ int __bch2_btree_insert_at(struct btree_insert *); */ #define BTREE_INSERT_JOURNAL_REPLAY (1 << 3) +/* Don't block on allocation failure (for new btree nodes: */ +#define BTREE_INSERT_NOWAIT (1 << 4) +#define BTREE_INSERT_GC_LOCK_HELD (1 << 5) + int bch2_btree_delete_at(struct btree_iter *, unsigned); int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *, @@ -416,7 +420,8 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, struct disk_reservation *, struct extent_insert_hook *, u64 *); -int bch2_btree_node_rewrite(struct btree_iter *, struct btree *, struct closure *); +int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, + __le64, unsigned); #endif /* _BCACHE_BTREE_INSERT_H */ diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index 18bf1713..68f863f3 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -50,13 +50,7 @@ struct bucket_mark { }; struct bucket { - union { - struct { - u16 read_prio; - u16 write_prio; - }; - u16 prio[2]; - }; + u16 prio[2]; union { struct bucket_mark _mark; diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 4545a499..f2883e1f 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -178,18 +178,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t } } -static inline void do_encrypt_sg(struct crypto_blkcipher *tfm, +static inline void do_encrypt_sg(struct crypto_skcipher *tfm, struct nonce nonce, struct scatterlist *sg, size_t len) { - struct blkcipher_desc desc = { .tfm = tfm, .info = nonce.d }; + SKCIPHER_REQUEST_ON_STACK(req, tfm); int ret; - ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); + skcipher_request_set_tfm(req, tfm); + skcipher_request_set_crypt(req, sg, sg, len, nonce.d); + + ret = crypto_skcipher_encrypt(req); BUG_ON(ret); } -static inline void do_encrypt(struct crypto_blkcipher *tfm, +static inline void do_encrypt(struct crypto_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { @@ -202,20 +205,20 @@ static inline void do_encrypt(struct crypto_blkcipher *tfm, int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, void *buf, size_t len) { - struct crypto_blkcipher *chacha20 = - crypto_alloc_blkcipher("chacha20", 0, CRYPTO_ALG_ASYNC); + struct crypto_skcipher *chacha20 = + crypto_alloc_skcipher("chacha20", 0, 0); int ret; if (!chacha20) return PTR_ERR(chacha20); - ret = crypto_blkcipher_setkey(chacha20, (void *) key, sizeof(*key)); + ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key)); if (ret) goto err; do_encrypt(chacha20, nonce, buf, len); err: - crypto_free_blkcipher(chacha20); + crypto_free_skcipher(chacha20); return ret; } @@ -377,7 +380,7 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) return PTR_ERR(keyring_key); down_read(&keyring_key->sem); - ukp = user_key_payload(keyring_key); + ukp = dereference_key_locked(keyring_key); if (ukp->datalen == sizeof(*key)) { memcpy(key, ukp->data, ukp->datalen); ret = 0; @@ -454,8 +457,7 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { if (!c->chacha20) - c->chacha20 = crypto_alloc_blkcipher("chacha20", 0, - CRYPTO_ALG_ASYNC); + c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0); if (IS_ERR(c->chacha20)) return PTR_ERR(c->chacha20); @@ -532,7 +534,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) goto err; } - ret = crypto_blkcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(c->chacha20, (void *) &key.key, sizeof(key.key)); if (ret) goto err; @@ -560,7 +562,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c) if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); if (!IS_ERR_OR_NULL(c->chacha20)) - crypto_free_blkcipher(c->chacha20); + crypto_free_skcipher(c->chacha20); if (!IS_ERR_OR_NULL(c->sha256)) crypto_free_shash(c->sha256); } @@ -587,7 +589,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) if (ret) goto err; - ret = crypto_blkcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(c->chacha20, (void *) &key.key, sizeof(key.key)); err: memzero_explicit(&key, sizeof(key)); diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 547ea732..62b42042 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -148,9 +148,10 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, switch (crc.compression_type) { case BCH_COMPRESSION_LZ4: - ret = lz4_decompress(src_data, &src_len, - dst_data, dst_len); - if (ret) { + ret = LZ4_decompress_safe(src_data, dst_data, + src_len, dst_len); + + if (ret != dst_len) { ret = -EIO; goto err; } @@ -286,32 +287,27 @@ static int __bio_compress(struct bch_fs *c, switch (compression_type) { case BCH_COMPRESSION_LZ4: { void *workspace; - - *dst_len = dst->bi_iter.bi_size; - *src_len = src->bi_iter.bi_size; + int srclen = src->bi_iter.bi_size; + ret = 0; workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO); - while (*src_len > block_bytes(c) && - (ret = lz4_compress(src_data, *src_len, - dst_data, dst_len, - workspace))) { - /* - * On error, the compressed data was bigger than - * dst_len, and -ret is the amount of data we were able - * to compress - round down to nearest block and try - * again: - */ - BUG_ON(ret > 0); - BUG_ON(-ret >= *src_len); - - *src_len = round_down(-ret, block_bytes(c)); + while (srclen > block_bytes(c) && + (ret = LZ4_compress_destSize(src_data, dst_data, + &srclen, dst->bi_iter.bi_size, + workspace)) && + (srclen & (block_bytes(c) - 1))) { + /* Round down to nearest block and try again: */ + srclen = round_down(srclen, block_bytes(c)); } mempool_free(workspace, &c->lz4_workspace_pool); - if (ret) + if (!ret) goto err; + + *src_len = srclen; + *dst_len = ret; break; } case BCH_COMPRESSION_GZIP: { diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 219b60a3..57bfb4a6 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -559,10 +559,10 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, return; err: bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); - bch2_fs_bug(c, "%s btree pointer %s: bucket %zi prio %i " + bch2_fs_bug(c, "%s btree pointer %s: bucket %zi " "gen %i last_gc %i mark %08x", err, buf, PTR_BUCKET_NR(ca, ptr), - g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, + PTR_BUCKET(ca, ptr)->mark.gen, ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], (unsigned) g->mark.counter); } @@ -1769,10 +1769,9 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, bad_ptr: bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); - bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i " + bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu " "gen %i last_gc %i mark 0x%08x", - buf, PTR_BUCKET_NR(ca, ptr), - g->read_prio, PTR_BUCKET(ca, ptr)->mark.gen, + buf, PTR_BUCKET_NR(ca, ptr), PTR_BUCKET(ca, ptr)->mark.gen, ca->oldest_gens[PTR_BUCKET_NR(ca, ptr)], (unsigned) g->mark.counter); return; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 4a680ade..803611d1 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -757,7 +757,7 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, flags |= BCH_READ_IS_LAST; if (pick.ca) { - PTR_BUCKET(pick.ca, &pick.ptr)->read_prio = + PTR_BUCKET(pick.ca, &pick.ptr)->prio[READ] = c->prio_clock[READ].hand; bch2_read_extent(c, rbio, k, &pick, flags); @@ -1775,16 +1775,17 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) return ret; } -int bch2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int bch2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct file *file = vmf->vma->vm_file; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; struct bch_fs *c = inode->i_sb->s_fs_info; int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(file); /* * Not strictly necessary, but helps avoid dio writes livelocking in diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h index f3fcf947..3fcc1e7d 100644 --- a/libbcachefs/fs-io.h +++ b/libbcachefs/fs-io.h @@ -29,7 +29,7 @@ long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); loff_t bch2_llseek(struct file *, loff_t, int); -int bch2_page_mkwrite(struct vm_area_struct *, struct vm_fault *); +int bch2_page_mkwrite(struct vm_fault *); void bch2_invalidatepage(struct page *, unsigned int, unsigned int); int bch2_releasepage(struct page *, gfp_t); int bch2_migrate_page(struct address_space *, struct page *, diff --git a/libbcachefs/io.c b/libbcachefs/io.c index d588f6ab..1145a190 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1300,11 +1300,11 @@ static void bch2_read_iter(struct bch_fs *c, struct bch_read_bio *rbio, flags |= BCH_READ_IS_LAST; if (pick.ca) { - PTR_BUCKET(pick.ca, &pick.ptr)->read_prio = + PTR_BUCKET(pick.ca, &pick.ptr)->prio[READ] = c->prio_clock[READ].hand; bch2_read_extent_iter(c, rbio, bvec_iter, - k, &pick, flags); + k, &pick, flags); flags &= ~BCH_READ_MAY_REUSE_BIO; } else { diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 510066a2..92364fea 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -164,21 +164,15 @@ static void journal_seq_blacklist_flush(struct journal *j, mutex_unlock(&j->blacklist_lock); __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, 0, 0, 0); -redo_peek: + b = bch2_btree_iter_peek_node(&iter); /* The node might have already been rewritten: */ if (b->data->keys.seq == n.seq) { - ret = bch2_btree_node_rewrite(&iter, b, &cl); + ret = bch2_btree_node_rewrite(c, &iter, n.seq, 0); if (ret) { bch2_btree_iter_unlock(&iter); - closure_sync(&cl); - - if (ret == -EAGAIN || - ret == -EINTR) - goto redo_peek; - bch2_fs_fatal_error(c, "error %i rewriting btree node with blacklisted journal seq", ret); @@ -190,8 +184,6 @@ redo_peek: bch2_btree_iter_unlock(&iter); } - closure_sync(&cl); - for (i = 0;; i++) { struct btree_interior_update *as; struct pending_btree_node_free *d; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 8680b100..8c9e3c25 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -156,9 +156,9 @@ next: * This walks the btree, and for any node on the relevant device it moves the * node elsewhere. */ -static int bch2_move_btree_off(struct bch_dev *ca, enum btree_id id) +static int bch2_move_btree_off(struct bch_fs *c, struct bch_dev *ca, + enum btree_id id) { - struct bch_fs *c = ca->fs; struct btree_iter iter; struct closure cl; struct btree *b; @@ -170,22 +170,11 @@ static int bch2_move_btree_off(struct bch_dev *ca, enum btree_id id) for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key); -retry: + if (!bch2_extent_has_device(e, ca->dev_idx)) continue; - ret = bch2_btree_node_rewrite(&iter, b, &cl); - if (ret == -EINTR || ret == -ENOSPC) { - /* - * Drop locks to upgrade locks or wait on - * reserve: after retaking, recheck in case we - * raced. - */ - bch2_btree_iter_unlock(&iter); - closure_sync(&cl); - b = bch2_btree_iter_peek_node(&iter); - goto retry; - } + ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0); if (ret) { bch2_btree_iter_unlock(&iter); return ret; @@ -268,7 +257,7 @@ int bch2_move_metadata_off_device(struct bch_dev *ca) /* 1st, Move the btree nodes off the device */ for (i = 0; i < BTREE_ID_NR; i++) { - ret = bch2_move_btree_off(ca, i); + ret = bch2_move_btree_off(c, ca, i); if (ret) return ret; } diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index fa020af3..130b130f 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -442,7 +442,7 @@ static const char *bch2_blkdev_open(const char *path, fmode_t mode, return "failed to open device"; if (mode & FMODE_WRITE) - bdev_get_queue(bdev)->backing_dev_info.capabilities + bdev_get_queue(bdev)->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; *ret = bdev; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 528538b5..2a3947e2 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -148,7 +148,7 @@ int bch2_congested(struct bch_fs *c, int bdi_bits) if (bdi_bits & (1 << WB_sync_congested)) { /* Reads - check all devices: */ for_each_readable_member(ca, c, i) { - bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); + bdi = ca->disk_sb.bdev->bd_bdi; if (bdi_congested(bdi, bdi_bits)) { ret = 1; @@ -162,7 +162,7 @@ int bch2_congested(struct bch_fs *c, int bdi_bits) rcu_read_lock(); group_for_each_dev(ca, grp, i) { - bdi = blk_get_backing_dev_info(ca->disk_sb.bdev); + bdi = ca->disk_sb.bdev->bd_bdi; if (bdi_congested(bdi, bdi_bits)) { ret = 1; @@ -1144,7 +1144,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) movinggc_reserve, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) || !init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) || - !init_heap(&ca->alloc_heap, heap_size, GFP_KERNEL) || + !init_heap(&ca->alloc_heap, free_inc_reserve, GFP_KERNEL) || !init_heap(&ca->copygc_heap,heap_size, GFP_KERNEL) || !(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets * sizeof(u8), diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 3c47f1cb..edfa85b0 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -24,6 +24,7 @@ #include #include +#include #include "util.h" @@ -124,6 +125,7 @@ write_attribute(trigger_journal_flush); write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); write_attribute(prune_cache); +rw_attribute(btree_gc_periodic); read_attribute(uuid); read_attribute(minor); @@ -319,6 +321,8 @@ SHOW(bch2_fs) sysfs_print(read_realloc_races, atomic_long_read(&c->read_realloc_races)); + sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); + sysfs_printf(foreground_write_ratelimit_enabled, "%i", c->foreground_write_ratelimit_enabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); @@ -367,6 +371,14 @@ STORE(__bch2_fs) sysfs_strtoul(foreground_write_ratelimit_enabled, c->foreground_write_ratelimit_enabled); + if (attr == &sysfs_btree_gc_periodic) { + ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) + ?: (ssize_t) size; + + wake_up_process(c->gc_thread); + return ret; + } + if (attr == &sysfs_copy_gc_enabled) { struct bch_dev *ca; unsigned i; diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 6ffc9811..5400dec5 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "util.h" diff --git a/libbcachefs/util.h b/libbcachefs/util.h index d7511aeb..927aa3a9 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -5,9 +5,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/linux/blkdev.c b/linux/blkdev.c index 7fe638f6..70995c8b 100644 --- a/linux/blkdev.c +++ b/linux/blkdev.c @@ -194,6 +194,8 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, bdev->bd_sync_fd = sync_fd; bdev->bd_holder = holder; bdev->bd_disk = &bdev->__bd_disk; + bdev->bd_bdi = &bdev->__bd_bdi; + bdev->queue.backing_dev_info = bdev->bd_bdi; return bdev; } diff --git a/linux/crypto/api.c b/linux/crypto/api.c index 2d24630e..63efee30 100644 --- a/linux/crypto/api.c +++ b/linux/crypto/api.c @@ -201,3 +201,36 @@ int crypto_register_alg(struct crypto_alg *alg) return 0; } + +/* skcipher: */ + +static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); + + skcipher->setkey = alg->setkey; + skcipher->encrypt = alg->encrypt; + skcipher->decrypt = alg->decrypt; + skcipher->ivsize = alg->ivsize; + skcipher->keysize = alg->max_keysize; + + if (alg->init) + return alg->init(skcipher); + + return 0; +} + +static const struct crypto_type crypto_skcipher_type2 = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_skcipher_init_tfm, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_BLKCIPHER_MASK, + .tfmsize = offsetof(struct crypto_skcipher, base), +}; + +struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, + u32 type, u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask); +} diff --git a/linux/crypto/chacha20_generic.c b/linux/crypto/chacha20_generic.c index 7ac68321..df4c0e04 100644 --- a/linux/crypto/chacha20_generic.c +++ b/linux/crypto/chacha20_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -25,10 +26,10 @@ struct chacha20_ctx { u32 key[8]; }; -static int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize) { - struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); int i; if (keysize != CHACHA20_KEY_SIZE) @@ -40,19 +41,18 @@ static int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } -static int crypto_chacha20_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned nbytes) +static int crypto_chacha20_crypt(struct skcipher_request *req) { - struct chacha20_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct scatterlist *sg = src; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct scatterlist *sg = req->src; + unsigned nbytes = req->cryptlen; u32 iv[4]; int ret; - BUG_ON(src != dst); + BUG_ON(req->src != req->dst); - memcpy(iv, desc->info, sizeof(iv)); + memcpy(iv, req->iv, sizeof(iv)); while (1) { ret = crypto_stream_chacha20_xor_ic(sg_virt(sg), @@ -78,22 +78,21 @@ static int crypto_chacha20_crypt(struct blkcipher_desc *desc, return 0; } -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_u = { - .blkcipher = { - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha20_crypt, + .decrypt = crypto_chacha20_crypt, }; __attribute__((constructor(110))) static int chacha20_generic_mod_init(void) { - return crypto_register_alg(&alg); + return crypto_register_alg(&alg.base); } diff --git a/linux/lz4_compress.c b/linux/lz4_compress.c index 808fe93e..091b57bb 100644 --- a/linux/lz4_compress.c +++ b/linux/lz4_compress.c @@ -1,19 +1,16 @@ /* * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2012, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - + * Copyright (C) 2011 - 2016, Yann Collet. + * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. - * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -25,200 +22,909 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * Changed for kernel use by: - * Chanho Min + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ -#include -#include +/*-************************************ + * Dependencies + **************************************/ #include -#include #include "lz4defs.h" +#include +#include -#define LZ4_HASH_VALUE(p, _table) \ - __HASH_VALUE(p, MEMORY_USAGE - ilog2(sizeof(_table[0]))) +static const int LZ4_minLength = (MFLIMIT + 1); +static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1)); -struct lz4_hash_table { - const u8 *(*add)(const struct lz4_hash_table, const u8 *); - void *ctx; - const u8 *base; -}; - -#if __SIZEOF_POINTER__ == 4 -static inline const u8 *hash_table_add32(const struct lz4_hash_table hash, - const u8 *ip) +/*-****************************** + * Compression functions + ********************************/ +static FORCE_INLINE U32 LZ4_hash4( + U32 sequence, + tableType_t const tableType) { - const u8 **table = hash.ctx; - - swap(table[LZ4_HASH_VALUE(ip, table)], ip); - return ip; + if (tableType == byU16) + return ((sequence * 2654435761U) + >> ((MINMATCH * 8) - (LZ4_HASHLOG + 1))); + else + return ((sequence * 2654435761U) + >> ((MINMATCH * 8) - LZ4_HASHLOG)); } + +static FORCE_INLINE U32 LZ4_hash5( + U64 sequence, + tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) + ? LZ4_HASHLOG + 1 + : LZ4_HASHLOG; + +#if LZ4_LITTLE_ENDIAN + static const U64 prime5bytes = 889523592379ULL; + + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); #else -static inline const u8 *hash_table_add32(const struct lz4_hash_table hash, - const u8 *ip) -{ - u32 *table = hash.ctx; - size_t offset = ip - hash.base; + static const U64 prime8bytes = 11400714785074694791ULL; - swap(table[LZ4_HASH_VALUE(ip, table)], offset); - return hash.base + offset; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); +#endif } + +static FORCE_INLINE U32 LZ4_hashPosition( + const void *p, + tableType_t const tableType) +{ +#if LZ4_ARCH64 + if (tableType == byU32) + return LZ4_hash5(LZ4_read_ARCH(p), tableType); #endif -static inline const u8 *hash_table_add16(const struct lz4_hash_table hash, - const u8 *ip) -{ - u16 *table = hash.ctx; - size_t offset = ip - hash.base; - - swap(table[LZ4_HASH_VALUE(ip, table)], offset); - return hash.base + offset; + return LZ4_hash4(LZ4_read32(p), tableType); } -static inline const u8 *find_match(const struct lz4_hash_table hash, - const u8 **ip, const u8 *anchor, - const u8 *start, const u8 *mflimit) +static void LZ4_putPositionOnHash( + const BYTE *p, + U32 h, + void *tableBase, + tableType_t const tableType, + const BYTE *srcBase) { - int findmatchattempts = (1U << SKIPSTRENGTH) + 3; + switch (tableType) { + case byPtr: + { + const BYTE **hashTable = (const BYTE **)tableBase; - while (*ip <= mflimit) { - const u8 *ref = hash.add(hash, *ip); + hashTable[h] = p; + return; + } + case byU32: + { + U32 *hashTable = (U32 *) tableBase; - if (ref >= *ip - MAX_DISTANCE && A32(ref) == A32(*ip)) { - /* found match: */ - while (*ip > anchor && - ref > start && - unlikely((*ip)[-1] == ref[-1])) { - (*ip)--; - ref--; - } + hashTable[h] = (U32)(p - srcBase); + return; + } + case byU16: + { + U16 *hashTable = (U16 *) tableBase; - return ref; - } + hashTable[h] = (U16)(p - srcBase); + return; + } + } +} - *ip += findmatchattempts++ >> SKIPSTRENGTH; +static FORCE_INLINE void LZ4_putPosition( + const BYTE *p, + void *tableBase, + tableType_t tableType, + const BYTE *srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +static const BYTE *LZ4_getPositionOnHash( + U32 h, + void *tableBase, + tableType_t tableType, + const BYTE *srcBase) +{ + if (tableType == byPtr) { + const BYTE **hashTable = (const BYTE **) tableBase; + + return hashTable[h]; } - return NULL; + if (tableType == byU32) { + const U32 * const hashTable = (U32 *) tableBase; + + return hashTable[h] + srcBase; + } + + { + /* default, to ensure a return */ + const U16 * const hashTable = (U16 *) tableBase; + + return hashTable[h] + srcBase; + } } -static inline int length_len(unsigned length) +static FORCE_INLINE const BYTE *LZ4_getPosition( + const BYTE *p, + void *tableBase, + tableType_t tableType, + const BYTE *srcBase) { - return length / 255 + 1; + U32 const h = LZ4_hashPosition(p, tableType); + + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); } + /* - * LZ4_compressCtx : - * ----------------- - * Compress 'isize' bytes from 'source' into an output buffer 'dest' of - * maximum size 'maxOutputSize'. * If it cannot achieve it, compression - * will stop, and result of the function will be zero. - * return : the number of bytes written in buffer 'dest', or 0 if the - * compression fails + * LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time */ -static inline int lz4_compressctx(const struct lz4_hash_table hash, - const u8 *src, size_t src_len, - u8 *dst, size_t *dst_len) +static FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal * const dictPtr, + const char * const source, + char * const dest, + const int inputSize, + const int maxOutputSize, + const limitedOutput_directive outputLimited, + const tableType_t tableType, + const dict_directive dict, + const dictIssue_directive dictIssue, + const U32 acceleration) { - const u8 *ip = src, *anchor = ip, *ref; - const u8 *const iend = ip + src_len; - const u8 *const mflimit = iend - MFLIMIT; - const u8 *const matchlimit = iend - LASTLITERALS; - u8 *op = dst, *token; - u8 *const oend = op + *dst_len; - size_t literal_len, match_len, match_offset; + const BYTE *ip = (const BYTE *) source; + const BYTE *base; + const BYTE *lowLimit; + const BYTE * const lowRefLimit = ip - dictPtr->dictSize; + const BYTE * const dictionary = dictPtr->dictionary; + const BYTE * const dictEnd = dictionary + dictPtr->dictSize; + const size_t dictDelta = dictEnd - (const BYTE *)source; + const BYTE *anchor = (const BYTE *) source; + const BYTE * const iend = ip + inputSize; + const BYTE * const mflimit = iend - MFLIMIT; + const BYTE * const matchlimit = iend - LASTLITERALS; - /* Init */ - memset(hash.ctx, 0, LZ4_MEM_COMPRESS); - hash.add(hash, ip); + BYTE *op = (BYTE *) dest; + BYTE * const olimit = op + maxOutputSize; - /* Always start with a literal: */ + U32 forwardH; + size_t refDelta = 0; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { + /* Unsupported inputSize, too large (or negative) */ + return 0; + } + + switch (dict) { + case noDict: + default: + base = (const BYTE *)source; + lowLimit = (const BYTE *)source; + break; + case withPrefix64k: + base = (const BYTE *)source - dictPtr->currentOffset; + lowLimit = (const BYTE *)source - dictPtr->dictSize; + break; + case usingExtDict: + base = (const BYTE *)source - dictPtr->currentOffset; + lowLimit = (const BYTE *)source; + break; + } + + if ((tableType == byU16) + && (inputSize >= LZ4_64Klimit)) { + /* Size too large (not within 64K limit) */ + return 0; + } + + if (inputSize < LZ4_minLength) { + /* Input too small, no compression (all literals) */ + goto _last_literals; + } + + /* First Byte */ + LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); ip++; + forwardH = LZ4_hashPosition(ip, tableType); - while ((ref = find_match(hash, &ip, anchor, src, mflimit))) { - /* - * We found a match; @ip now points to the match and @ref points - * to the prior part of the input we matched with. Everything up - * to @anchor has been encoded; the range from @anchor to @ip - * didn't match and now has to be encoded as a literal: - */ - literal_len = ip - anchor; - match_offset = ip - ref; + /* Main Loop */ + for ( ; ; ) { + const BYTE *match; + BYTE *token; - /* MINMATCH bytes already matched from find_match(): */ - ip += MINMATCH; - ref += MINMATCH; - match_len = common_length(ip, ref, matchlimit); - ip += match_len; + /* Find a match */ + { + const BYTE *forwardIp = ip; + unsigned int step = 1; + unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER; - /* check output limit */ - if (unlikely(op + - 1 + /* token */ - 2 + /* match ofset */ - literal_len + - length_len(literal_len) + - length_len(match_len) + - LASTLITERALS > oend)) - break; + do { + U32 const h = forwardH; - token = op++; - *token = encode_length(&op, literal_len) << ML_BITS; - MEMCPY_ADVANCE_CHUNKED(op, anchor, literal_len); - PUT_LE16_ADVANCE(op, match_offset); - *token += encode_length(&op, match_len); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); + + if (unlikely(forwardIp > mflimit)) + goto _last_literals; + + match = LZ4_getPositionOnHash(h, + dictPtr->hashTable, + tableType, base); + + if (dict == usingExtDict) { + if (match < (const BYTE *)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE *)source; + } } + + forwardH = LZ4_hashPosition(forwardIp, + tableType); + + LZ4_putPositionOnHash(ip, h, dictPtr->hashTable, + tableType, base); + } while (((dictIssue == dictSmall) + ? (match < lowRefLimit) + : 0) + || ((tableType == byU16) + ? 0 + : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match + refDelta) + != LZ4_read32(ip))); + } + + /* Catch up */ + while (((ip > anchor) & (match + refDelta > lowLimit)) + && (unlikely(ip[-1] == match[refDelta - 1]))) { + ip--; + match--; + } + + /* Encode Literals */ + { + unsigned const int litLength = (unsigned int)(ip - anchor); + + token = op++; + + if ((outputLimited) && + /* Check output buffer overflow */ + (unlikely(op + litLength + + (2 + 1 + LASTLITERALS) + + (litLength / 255) > olimit))) + return 0; + + if (litLength >= RUN_MASK) { + int len = (int)litLength - RUN_MASK; + + *token = (RUN_MASK << ML_BITS); + + for (; len >= 255; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (BYTE)(litLength << ML_BITS); + + /* Copy Literals */ + LZ4_wildCopy(op, anchor, op + litLength); + op += litLength; + } + +_next_match: + /* Encode Offset */ + LZ4_writeLE16(op, (U16)(ip - match)); + op += 2; + + /* Encode MatchLength */ + { + unsigned int matchCode; + + if ((dict == usingExtDict) + && (lowLimit == dictionary)) { + const BYTE *limit; + + match += refDelta; + limit = ip + (dictEnd - match); + + if (limit > matchlimit) + limit = matchlimit; + + matchCode = LZ4_count(ip + MINMATCH, + match + MINMATCH, limit); + + ip += MINMATCH + matchCode; + + if (ip == limit) { + unsigned const int more = LZ4_count(ip, + (const BYTE *)source, + matchlimit); + + matchCode += more; + ip += more; + } + } else { + matchCode = LZ4_count(ip + MINMATCH, + match + MINMATCH, matchlimit); + ip += MINMATCH + matchCode; + } + + if (outputLimited && + /* Check output buffer overflow */ + (unlikely(op + + (1 + LASTLITERALS) + + (matchCode >> 8) > olimit))) + return 0; + + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + + while (matchCode >= 4 * 255) { + op += 4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4 * 255; + } + + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } anchor = ip; + + /* Test end of chunk */ + if (ip > mflimit) + break; + + /* Fill table */ + LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base); + + /* Test next position */ + match = LZ4_getPosition(ip, dictPtr->hashTable, + tableType, base); + + if (dict == usingExtDict) { + if (match < (const BYTE *)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE *)source; + } + } + + LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); + + if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1) + && (match + MAX_DISTANCE >= ip) + && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); } - /* Encode remaining input as literal: */ - literal_len = iend - anchor; - if (unlikely(op + - 1 + - literal_len + - length_len(literal_len) > oend)) { - /* Return how much would be able to fit: */ - ssize_t remaining = oend - op; - ssize_t encoded = anchor - src; +_last_literals: + /* Encode Last Literals */ + { + size_t const lastRun = (size_t)(iend - anchor); - remaining -= length_len(remaining) + 1; + if ((outputLimited) && + /* Check output buffer overflow */ + ((op - (BYTE *)dest) + lastRun + 1 + + ((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize)) + return 0; - return -max(encoded + remaining, 1L); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun << ML_BITS); + } + + memcpy(op, anchor, lastRun); + + op += lastRun; } - token = op++; - *token = encode_length(&op, literal_len) << ML_BITS; - MEMCPY_ADVANCE(op, anchor, literal_len); - /* End */ - BUG_ON(op > oend); - *dst_len = op - dst; - return 0; + return (int) (((char *)op) - dest); } -__attribute__((flatten)) -int lz4_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem) +static int LZ4_compress_fast_extState( + void *state, + const char *source, + char *dest, + int inputSize, + int maxOutputSize, + int acceleration) { - if (src_len < LZ4_64KLIMIT) { - const struct lz4_hash_table hash = { - .add = hash_table_add16, - .ctx = wrkmem, - .base = src, - }; + LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse; +#if LZ4_ARCH64 + const tableType_t tableType = byU32; +#else + const tableType_t tableType = byPtr; +#endif - return lz4_compressctx(hash, src, src_len, dst, dst_len); + LZ4_resetStream((LZ4_stream_t *)state); + + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + + if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) { + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, + dest, inputSize, 0, + noLimit, byU16, noDict, + noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, + dest, inputSize, 0, + noLimit, tableType, noDict, + noDictIssue, acceleration); } else { - const struct lz4_hash_table hash = { - .add = hash_table_add32, - .ctx = wrkmem, - .base = src, - }; - - return lz4_compressctx(hash, src, src_len, dst, dst_len); + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, + dest, inputSize, + maxOutputSize, limitedOutput, byU16, noDict, + noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, + dest, inputSize, + maxOutputSize, limitedOutput, tableType, noDict, + noDictIssue, acceleration); + } +} + +int LZ4_compress_fast(const char *source, char *dest, int inputSize, + int maxOutputSize, int acceleration, void *wrkmem) +{ + return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, + maxOutputSize, acceleration); +} + +int LZ4_compress_default(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem) +{ + return LZ4_compress_fast(source, dest, inputSize, + maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem); +} + +/*-****************************** + * *_destSize() variant + ********************************/ +static int LZ4_compress_destSize_generic( + LZ4_stream_t_internal * const ctx, + const char * const src, + char * const dst, + int * const srcSizePtr, + const int targetDstSize, + const tableType_t tableType) +{ + const BYTE *ip = (const BYTE *) src; + const BYTE *base = (const BYTE *) src; + const BYTE *lowLimit = (const BYTE *) src; + const BYTE *anchor = ip; + const BYTE * const iend = ip + *srcSizePtr; + const BYTE * const mflimit = iend - MFLIMIT; + const BYTE * const matchlimit = iend - LASTLITERALS; + + BYTE *op = (BYTE *) dst; + BYTE * const oend = op + targetDstSize; + BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */ + - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */; + BYTE * const oMaxMatch = op + targetDstSize + - (LASTLITERALS + 1 /* token */); + BYTE * const oMaxSeq = oMaxLit - 1 /* token */; + + U32 forwardH; + + /* Init conditions */ + /* Impossible to store anything */ + if (targetDstSize < 1) + return 0; + /* Unsupported input size, too large (or negative) */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) + return 0; + /* Size too large (not within 64K limit) */ + if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit)) + return 0; + /* Input too small, no compression (all literals) */ + if (*srcSizePtr < LZ4_minLength) + goto _last_literals; + + /* First Byte */ + *srcSizePtr = 0; + LZ4_putPosition(ip, ctx->hashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE *match; + BYTE *token; + + /* Find a match */ + { + const BYTE *forwardIp = ip; + unsigned int step = 1; + unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER; + + do { + U32 h = forwardH; + + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); + + if (unlikely(forwardIp > mflimit)) + goto _last_literals; + + match = LZ4_getPositionOnHash(h, ctx->hashTable, + tableType, base); + forwardH = LZ4_hashPosition(forwardIp, + tableType); + LZ4_putPositionOnHash(ip, h, + ctx->hashTable, tableType, + base); + + } while (((tableType == byU16) + ? 0 + : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match) != LZ4_read32(ip))); + } + + /* Catch up */ + while ((ip > anchor) + && (match > lowLimit) + && (unlikely(ip[-1] == match[-1]))) { + ip--; + match--; + } + + /* Encode Literal length */ + { + unsigned int litLength = (unsigned int)(ip - anchor); + + token = op++; + if (op + ((litLength + 240) / 255) + + litLength > oMaxLit) { + /* Not enough space for a last match */ + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + unsigned int len = litLength - RUN_MASK; + *token = (RUN_MASK<= 255; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (BYTE)(litLength << ML_BITS); + + /* Copy Literals */ + LZ4_wildCopy(op, anchor, op + litLength); + op += litLength; + } + +_next_match: + /* Encode Offset */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; + + /* Encode MatchLength */ + { + size_t matchLength = LZ4_count(ip + MINMATCH, + match + MINMATCH, matchlimit); + + if (op + ((matchLength + 240)/255) > oMaxMatch) { + /* Match description too long : reduce it */ + matchLength = (15 - 1) + (oMaxMatch - op) * 255; + } + ip += MINMATCH + matchLength; + + if (matchLength >= ML_MASK) { + *token += ML_MASK; + matchLength -= ML_MASK; + while (matchLength >= 255) { + matchLength -= 255; + *op++ = 255; + } + *op++ = (BYTE)matchLength; + } else + *token += (BYTE)(matchLength); + } + + anchor = ip; + + /* Test end of block */ + if (ip > mflimit) + break; + if (op > oMaxSeq) + break; + + /* Fill table */ + LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base); + + /* Test next position */ + match = LZ4_getPosition(ip, ctx->hashTable, tableType, base); + LZ4_putPosition(ip, ctx->hashTable, tableType, base); + + if ((match + MAX_DISTANCE >= ip) + && (LZ4_read32(match) == LZ4_read32(ip))) { + token = op++; *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { + size_t lastRunSize = (size_t)(iend - anchor); + + if (op + 1 /* token */ + + ((lastRunSize + 240) / 255) /* litLength */ + + lastRunSize /* literals */ > oend) { + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (oend - op) - 1; + lastRunSize -= (lastRunSize + 240) / 255; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + + *op++ = RUN_MASK << ML_BITS; + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize<= LZ4_COMPRESSBOUND(*srcSizePtr)) { + /* compression success is guaranteed */ + return LZ4_compress_fast_extState( + state, src, dst, *srcSizePtr, + targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) + return LZ4_compress_destSize_generic( + &state->internal_donotuse, + src, dst, srcSizePtr, + targetDstSize, byU16); + else + return LZ4_compress_destSize_generic( + &state->internal_donotuse, + src, dst, srcSizePtr, + targetDstSize, tableType); + } +} + + +int LZ4_compress_destSize( + const char *src, + char *dst, + int *srcSizePtr, + int targetDstSize, + void *wrkmem) +{ + return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, + targetDstSize); +} + +/*-****************************** + * Streaming functions + ********************************/ +void LZ4_resetStream(LZ4_stream_t *LZ4_stream) +{ + memset(LZ4_stream, 0, sizeof(LZ4_stream_t)); +} + +int LZ4_loadDict(LZ4_stream_t *LZ4_dict, + const char *dictionary, int dictSize) +{ + LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse; + const BYTE *p = (const BYTE *)dictionary; + const BYTE * const dictEnd = p + dictSize; + const BYTE *base; + + if ((dict->initCheck) + || (dict->currentOffset > 1 * GB)) { + /* Uninitialized structure, or reuse overflow */ + LZ4_resetStream(LZ4_dict); + } + + if (dictSize < (int)HASH_UNIT) { + dict->dictionary = NULL; + dict->dictSize = 0; + return 0; + } + + if ((dictEnd - p) > 64 * KB) + p = dictEnd - 64 * KB; + dict->currentOffset += 64 * KB; + base = p - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->currentOffset += dict->dictSize; + + while (p <= dictEnd - HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, byU32, base); + p += 3; + } + + return dict->dictSize; +} + +static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, + const BYTE *src) +{ + if ((LZ4_dict->currentOffset > 0x80000000) || + ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) { + /* address space overflow */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 * KB; + const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + + for (i = 0; i < LZ4_HASH_SIZE_U32; i++) { + if (LZ4_dict->hashTable[i] < delta) + LZ4_dict->hashTable[i] = 0; + else + LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 * KB; + if (LZ4_dict->dictSize > 64 * KB) + LZ4_dict->dictSize = 64 * KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + +int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize) +{ + LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse; + const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 * KB) { + /* useless to define a dictionary > 64 * KB */ + dictSize = 64 * KB; + } + if ((U32)dictSize > dict->dictSize) + dictSize = dict->dictSize; + + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE *)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + +int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source, + char *dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse; + const BYTE * const dictEnd = streamPtr->dictionary + + streamPtr->dictSize; + + const BYTE *smallest = (const BYTE *) source; + + if (streamPtr->initCheck) { + /* Uninitialized structure detected */ + return 0; + } + + if ((streamPtr->dictSize > 0) && (smallest > dictEnd)) + smallest = dictEnd; + + LZ4_renormDictT(streamPtr, smallest); + + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + + /* Check overlapping input/dictionary space */ + { + const BYTE *sourceEnd = (const BYTE *) source + inputSize; + + if ((sourceEnd > streamPtr->dictionary) + && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 * KB) + streamPtr->dictSize = 64 * KB; + if (streamPtr->dictSize < 4) + streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE *)source) { + int result; + + if ((streamPtr->dictSize < 64 * KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + withPrefix64k, dictSmall, acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + withPrefix64k, noDictIssue, acceleration); + } + streamPtr->dictSize += (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; + } + + /* external dictionary mode */ + { + int result; + + if ((streamPtr->dictSize < 64 * KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + maxOutputSize, limitedOutput, byU32, + usingExtDict, noDictIssue, acceleration); + } + streamPtr->dictionary = (const BYTE *)source; + streamPtr->dictSize = (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; } } diff --git a/linux/lz4_decompress.c b/linux/lz4_decompress.c index 0f3e42dd..5cf910d7 100644 --- a/linux/lz4_decompress.c +++ b/linux/lz4_decompress.c @@ -1,25 +1,16 @@ /* - * LZ4 Decompressor for Linux kernel - * - * Copyright (C) 2013, LG Electronics, Kyungsik Lee - * - * Based on LZ4 implementation by Yann Collet. - * * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2012, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * + * Copyright (C) 2011 - 2016, Yann Collet. + * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. - * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,286 +22,471 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 * - * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ -#ifndef STATIC -#include -#include -#endif +/*-************************************ + * Dependencies + **************************************/ #include - #include "lz4defs.h" +#include +#include -static const int dec32table[8] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 -static const int dec64table[8] = {0, 0, 0, -1, 0, 1, 2, 3}; -#else -static const int dec64table[8] = {0, 0, 0, 0, 0, 0, 0, 0}; -#endif - -static inline size_t get_length(const u8 **ip, size_t length) -{ - if (length == LENGTH_LONG) { - size_t len; - - do { - length += (len = *(*ip)++); - } while (len == 255); - } - - return length; -} - -static int lz4_uncompress(const u8 *source, u8 *dest, int osize) -{ - const u8 *ip = source; - const u8 *ref; - u8 *op = dest; - u8 * const oend = op + osize; - u8 *cpy; - unsigned token, offset; - ssize_t length; - - while (1) { - /* get runlength */ - token = *ip++; - length = get_length(&ip, token >> ML_BITS); - - /* copy literals */ - if (unlikely(op + length > oend - COPYLENGTH)) { - /* - * Error: not enough place for another match - * (min 4) + 5 literals - */ - if (op + length != oend) - goto _output_error; - - MEMCPY_ADVANCE(op, ip, length); - break; /* EOF */ - } - MEMCPY_ADVANCE_CHUNKED(op, ip, length); - - /* get match offset */ - offset = GET_LE16_ADVANCE(ip); - ref = op - offset; - - /* Error: offset create reference outside destination buffer */ - if (unlikely(ref < (u8 *const) dest)) - goto _output_error; - - /* get match length */ - length = get_length(&ip, token & ML_MASK); - length += MINMATCH; - - /* copy first STEPSIZE bytes of match: */ - if (unlikely(offset < STEPSIZE)) { - MEMCPY_ADVANCE_BYTES(op, ref, 4); - ref -= dec32table[offset]; - - memcpy(op, ref, 4); - op += STEPSIZE - 4; - ref -= dec64table[offset]; - } else { - MEMCPY_ADVANCE(op, ref, STEPSIZE); - } - length -= STEPSIZE; +/*-***************************** + * Decompression functions + *******************************/ +/* LZ4_decompress_generic() : + * This generic decompression function cover all use cases. + * It shall be instantiated several times, using different sets of directives + * Note that it is important this generic function is really inlined, + * in order to remove useless branches during compilation optimization. + */ +static FORCE_INLINE int LZ4_decompress_generic( + const char * const source, + char * const dest, + int inputSize, /* - * Note - length could have been < STEPSIZE; that's ok, length - * will now be negative and we'll just end up rewinding op: + * If endOnInput == endOnInputSize, + * this value is the max size of Output Buffer. */ + int outputSize, + /* endOnOutputSize, endOnInputSize */ + int endOnInput, + /* full, partial */ + int partialDecoding, + /* only used if partialDecoding == partial */ + int targetOutputSize, + /* noDict, withPrefix64k, usingExtDict */ + int dict, + /* == dest when no prefix */ + const BYTE * const lowPrefix, + /* only if dict == usingExtDict */ + const BYTE * const dictStart, + /* note : = 0 if noDict */ + const size_t dictSize + ) +{ + /* Local Variables */ + const BYTE *ip = (const BYTE *) source; + const BYTE * const iend = ip + inputSize; - /* copy rest of match: */ - cpy = op + length; - if (cpy > oend - COPYLENGTH) { - /* Error: request to write beyond destination buffer */ - if (cpy > oend || - ref + COPYLENGTH > oend) + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + outputSize; + BYTE *cpy; + BYTE *oexit = op + targetOutputSize; + const BYTE * const lowLimit = lowPrefix - dictSize; + + const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; + const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; + const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + + const int safeDecode = (endOnInput == endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); + + /* Special cases */ + /* targetOutputSize too high => decode everything */ + if ((partialDecoding) && (oexit > oend - MFLIMIT)) + oexit = oend - MFLIMIT; + + /* Empty output buffer */ + if ((endOnInput) && (unlikely(outputSize == 0))) + return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; + + if ((!endOnInput) && (unlikely(outputSize == 0))) + return (*ip == 0 ? 1 : -1); + + /* Main Loop : decode sequences */ + while (1) { + size_t length; + const BYTE *match; + size_t offset; + + /* get literal length */ + unsigned int const token = *ip++; + + length = token>>ML_BITS; + + if (length == RUN_MASK) { + unsigned int s; + + do { + s = *ip++; + length += s; + } while (likely(endOnInput + ? ip < iend - RUN_MASK + : 1) & (s == 255)); + + if ((safeDecode) + && unlikely( + (size_t)(op + length) < (size_t)(op))) { + /* overflow detection */ goto _output_error; -#if !LZ4_ARCH64 - if (op + COPYLENGTH > oend) + } + if ((safeDecode) + && unlikely( + (size_t)(ip + length) < (size_t)(ip))) { + /* overflow detection */ goto _output_error; -#endif - MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH); - /* op could be > cpy here */ - while (op < cpy) - *op++ = *ref++; - op = cpy; - /* - * Check EOF (should never happen, since last 5 bytes - * are supposed to be literals) - */ - if (op == oend) - goto _output_error; - } else { - MEMCPY_ADVANCE_CHUNKED(op, ref, length); + } } - } - /* end of decoding */ - return ip - source; - - /* write overflow error detected */ -_output_error: - return -1; -} - -static inline ssize_t get_length_safe(const u8 **ip, ssize_t length) -{ - if (length == 15) { - size_t len; - - do { - length += (len = *(*ip)++); - if (unlikely((ssize_t) length < 0)) - return -1; - - length += len; - } while (len == 255); - } - - return length; -} - -static int lz4_uncompress_unknownoutputsize(const u8 *source, u8 *dest, - int isize, size_t maxoutputsize) -{ - const u8 *ip = source; - const u8 *const iend = ip + isize; - const u8 *ref; - u8 *op = dest; - u8 * const oend = op + maxoutputsize; - u8 *cpy; - unsigned token, offset; - size_t length; - - /* Main Loop */ - while (ip < iend) { - /* get runlength */ - token = *ip++; - length = get_length_safe(&ip, token >> ML_BITS); - if (unlikely((ssize_t) length < 0)) - goto _output_error; /* copy literals */ - if ((op + length > oend - COPYLENGTH) || - (ip + length > iend - COPYLENGTH)) { + cpy = op + length; + if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) + || (ip + length > iend - (2 + 1 + LASTLITERALS)))) + || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { + if (partialDecoding) { + if (cpy > oend) { + /* + * Error : + * write attempt beyond end of output buffer + */ + goto _output_error; + } + if ((endOnInput) + && (ip + length > iend)) { + /* + * Error : + * read attempt beyond + * end of input buffer + */ + goto _output_error; + } + } else { + if ((!endOnInput) + && (cpy != oend)) { + /* + * Error : + * block decoding must + * stop exactly there + */ + goto _output_error; + } + if ((endOnInput) + && ((ip + length != iend) + || (cpy > oend))) { + /* + * Error : + * input must be consumed + */ + goto _output_error; + } + } - if (op + length > oend) - goto _output_error;/* writes beyond buffer */ - - if (ip + length != iend) - goto _output_error;/* - * Error: LZ4 format requires - * to consume all input - * at this stage - */ - MEMCPY_ADVANCE(op, ip, length); - break;/* Necessarily EOF, due to parsing restrictions */ + memcpy(op, ip, length); + ip += length; + op += length; + /* Necessarily EOF, due to parsing restrictions */ + break; } - MEMCPY_ADVANCE_CHUNKED(op, ip, length); - /* get match offset */ - offset = GET_LE16_ADVANCE(ip); - ref = op - offset; + LZ4_wildCopy(op, ip, cpy); + ip += length; + op = cpy; - /* Error: offset create reference outside destination buffer */ - if (ref < (u8 * const) dest) + /* get offset */ + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; + + if ((checkOffset) && (unlikely(match < lowLimit))) { + /* Error : offset outside buffers */ goto _output_error; + } - /* get match length */ - length = get_length_safe(&ip, token & ML_MASK); - if (unlikely((ssize_t) length < 0)) - goto _output_error; + /* costs ~1%; silence an msan warning when offset == 0 */ + LZ4_write32(op, (U32)offset); + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + unsigned int s; + + do { + s = *ip++; + + if ((endOnInput) && (ip > iend - LASTLITERALS)) + goto _output_error; + + length += s; + } while (s == 255); + + if ((safeDecode) + && unlikely( + (size_t)(op + length) < (size_t)op)) { + /* overflow detection */ + goto _output_error; + } + } length += MINMATCH; - /* copy first STEPSIZE bytes of match: */ - if (unlikely(offset < STEPSIZE)) { - MEMCPY_ADVANCE_BYTES(op, ref, 4); - ref -= dec32table[offset]; + /* check external dictionary */ + if ((dict == usingExtDict) && (match < lowPrefix)) { + if (unlikely(op + length > oend - LASTLITERALS)) { + /* doesn't respect parsing restriction */ + goto _output_error; + } - memcpy(op, ref, 4); - op += STEPSIZE - 4; - ref -= dec64table[offset]; - } else { - MEMCPY_ADVANCE(op, ref, STEPSIZE); + if (length <= (size_t)(lowPrefix - match)) { + /* + * match can be copied as a single segment + * from external dictionary + */ + memmove(op, dictEnd - (lowPrefix - match), + length); + op += length; + } else { + /* + * match encompass external + * dictionary and current block + */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + + memcpy(op, dictEnd - copySize, copySize); + op += copySize; + + if (restSize > (size_t)(op - lowPrefix)) { + /* overlap copy */ + BYTE * const endOfMatch = op + restSize; + const BYTE *copyFrom = lowPrefix; + + while (op < endOfMatch) + *op++ = *copyFrom++; + } else { + memcpy(op, lowPrefix, restSize); + op += restSize; + } + } + + continue; } - length -= STEPSIZE; - /* copy rest of match: */ + /* copy match within block */ cpy = op + length; - if (cpy > oend - COPYLENGTH) { - /* Error: request to write beyond destination buffer */ - if (cpy > oend || - ref + COPYLENGTH > oend) - goto _output_error; -#if !LZ4_ARCH64 - if (op + COPYLENGTH > oend) - goto _output_error; -#endif - MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH); - while (op < cpy) - *op++ = *ref++; - op = cpy; - /* - * Check EOF (should never happen, since last 5 bytes - * are supposed to be literals) - */ - if (op == oend) - goto _output_error; - } else { - MEMCPY_ADVANCE_CHUNKED(op, ref, length); - } - } - /* end of decoding */ - return op - dest; - /* write overflow error detected */ + if (unlikely(offset < 8)) { + const int dec64 = dec64table[offset]; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[offset]; + memcpy(op + 4, match, 4); + match -= dec64; + } else { + LZ4_copy8(op, match); + match += 8; + } + + op += 8; + + if (unlikely(cpy > oend - 12)) { + BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); + + if (cpy > oend - LASTLITERALS) { + /* + * Error : last LASTLITERALS bytes + * must be literals (uncompressed) + */ + goto _output_error; + } + + if (op < oCopyLimit) { + LZ4_wildCopy(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + + while (op < cpy) + *op++ = *match++; + } else { + LZ4_copy8(op, match); + + if (length > 16) + LZ4_wildCopy(op + 8, match + 8, cpy); + } + + op = cpy; /* correction */ + } + + /* end of decoding */ + if (endOnInput) { + /* Nb of output bytes decoded */ + return (int) (((char *)op) - dest); + } else { + /* Nb of input bytes read */ + return (int) (((const char *)ip) - source); + } + + /* Overflow error detected */ _output_error: return -1; } -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len) +int LZ4_decompress_safe(const char *source, char *dest, + int compressedSize, int maxDecompressedSize) { - int ret = -1; - int input_len = 0; - - input_len = lz4_uncompress(src, dest, actual_dest_len); - if (input_len < 0) - goto exit_0; - *src_len = input_len; - - return 0; -exit_0: - return ret; + return LZ4_decompress_generic(source, dest, compressedSize, + maxDecompressedSize, endOnInputSize, full, 0, + noDict, (BYTE *)dest, NULL, 0); } -#ifndef STATIC -EXPORT_SYMBOL(lz4_decompress); -#endif -int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len) +int LZ4_decompress_safe_partial(const char *source, char *dest, + int compressedSize, int targetOutputSize, int maxDecompressedSize) { - int ret = -1; - int out_len = 0; - - out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, - *dest_len); - if (out_len < 0) - goto exit_0; - *dest_len = out_len; - - return 0; -exit_0: - return ret; + return LZ4_decompress_generic(source, dest, compressedSize, + maxDecompressedSize, endOnInputSize, partial, + targetOutputSize, noDict, (BYTE *)dest, NULL, 0); } -#ifndef STATIC -EXPORT_SYMBOL(lz4_decompress_unknownoutputsize); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 Decompressor"); -#endif +int LZ4_decompress_fast(const char *source, char *dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, withPrefix64k, + (BYTE *)(dest - 64 * KB), NULL, 64 * KB); +} + +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode; + + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/* + * *_continue() : + * These decoding functions allow decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks must still be available at the memory + * position where they were decoded. + * If it's not possible, save the relevant part of + * decoded data into a safe buffer, + * and indicate where it stands using LZ4_setStreamDecode() + */ +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE *)dest) { + result = LZ4_decompress_generic(source, dest, + compressedSize, + maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, + lz4sd->externalDict, + lz4sd->extDictSize); + + if (result <= 0) + return result; + + lz4sd->prefixSize += result; + lz4sd->prefixEnd += result; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, (BYTE *)dest, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } + + return result; +} + +int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize) +{ + LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE *)dest) { + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, + lz4sd->prefixEnd - lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + + if (result <= 0) + return result; + + lz4sd->prefixSize += originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, (BYTE *)dest, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } + + return result; +} + +/* + * Advanced decoding functions : + * *_usingDict() : + * These decoding functions work the same as "_continue" ones, + * the dictionary must be explicitly provided within parameters + */ +static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source, + char *dest, int compressedSize, int maxOutputSize, int safe, + const char *dictStart, int dictSize) +{ + if (dictSize == 0) + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, safe, full, 0, + noDict, (BYTE *)dest, NULL, 0); + if (dictStart + dictSize == dest) { + if (dictSize >= (int)(64 * KB - 1)) + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, safe, full, 0, + withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, safe, full, 0, noDict, + (BYTE *)dest - dictSize, NULL, 0); + } + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, safe, full, 0, usingExtDict, + (BYTE *)dest, (const BYTE *)dictStart, dictSize); +} + +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) +{ + return LZ4_decompress_usingDict_generic(source, dest, + compressedSize, maxOutputSize, 1, dictStart, dictSize); +} + +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, int dictSize) +{ + return LZ4_decompress_usingDict_generic(source, dest, 0, + originalSize, 0, dictStart, dictSize); +} diff --git a/linux/lz4defs.h b/linux/lz4defs.h index 586b217f..42045272 100644 --- a/linux/lz4defs.h +++ b/linux/lz4defs.h @@ -1,181 +1,228 @@ -/* - * lz4defs.h -- architecture specific defines - * - * Copyright (C) 2013, LG Electronics, Kyungsik Lee - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ +#ifndef __LZ4DEFS_H__ +#define __LZ4DEFS_H__ /* - * Detects 64 bits mode + * lz4defs.h -- common and architecture specific defines for the kernel usage + + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2016, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 + * + * Changed for kernel usage by: + * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ -#if __SIZEOF_POINTER__ == 8 + +#include +#include +#include /* memset, memcpy */ + +#define FORCE_INLINE __always_inline + +/*-************************************ + * Basic Types + **************************************/ +#include + +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef uintptr_t uptrval; + +/*-************************************ + * Architecture specifics + **************************************/ +#if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 #endif -#include -#include +#if defined(__LITTLE_ENDIAN) +#define LZ4_LITTLE_ENDIAN 1 +#else +#define LZ4_LITTLE_ENDIAN 0 +#endif -#define A32(_p) get_unaligned((u32 *) (_p)) -#define A16(_p) get_unaligned((u16 *) (_p)) +/*-************************************ + * Constants + **************************************/ +#define MINMATCH 4 -#define GET_LE16_ADVANCE(_src) \ -({ \ - u16 _r = get_unaligned_le16(_src); \ - (_src) += 2; \ - _r; \ -}) +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (WILDCOPYLENGTH + MINMATCH) -#define PUT_LE16_ADVANCE(_dst, _v) \ -do { \ - put_unaligned_le16((_v), (_dst)); \ - (_dst) += 2; \ -} while (0) +/* Increase this value ==> compression run slower on incompressible data */ +#define LZ4_SKIPTRIGGER 6 -#define LENGTH_LONG 15 -#define COPYLENGTH 8 -#define ML_BITS 4 -#define ML_MASK ((1U << ML_BITS) - 1) -#define RUN_BITS (8 - ML_BITS) -#define RUN_MASK ((1U << RUN_BITS) - 1) -#define MEMORY_USAGE 14 -#define MINMATCH 4 -#define SKIPSTRENGTH 6 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH + MINMATCH) -#define MINLENGTH (MFLIMIT + 1) -#define MAXD_LOG 16 -#define MAXD (1 << MAXD_LOG) -#define MAXD_MASK (u32)(MAXD - 1) -#define MAX_DISTANCE (MAXD - 1) -#define HASH_LOG (MAXD_LOG - 1) -#define HASHTABLESIZE (1 << HASH_LOG) -#define MAX_NB_ATTEMPTS 256 -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASH_UNIT sizeof(size_t) -#define __HASH_VALUE(p, bits) \ - (((A32(p)) * 2654435761U) >> (32 - (bits))) +#define KB (1 << 10) +#define MB (1 << 20) +#define GB (1U << 30) -#define HASH_VALUE(p) __HASH_VALUE(p, HASH_LOG) +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) +#define STEPSIZE sizeof(size_t) -#define MEMCPY_ADVANCE(_dst, _src, length) \ -do { \ - typeof(length) _length = (length); \ - memcpy(_dst, _src, _length); \ - _src += _length; \ - _dst += _length; \ -} while (0) +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) -#define MEMCPY_ADVANCE_BYTES(_dst, _src, _length) \ -do { \ - const u8 *_end = (_src) + (_length); \ - while ((_src) < _end) \ - *_dst++ = *_src++; \ -} while (0) +/*-************************************ + * Reading and writing into memory + **************************************/ +static FORCE_INLINE U16 LZ4_read16(const void *ptr) +{ + return get_unaligned((const U16 *)ptr); +} -#define STEPSIZE __SIZEOF_LONG__ +static FORCE_INLINE U32 LZ4_read32(const void *ptr) +{ + return get_unaligned((const U32 *)ptr); +} -#define LZ4_COPYPACKET(_src, _dst) \ -do { \ - MEMCPY_ADVANCE(_dst, _src, STEPSIZE); \ - MEMCPY_ADVANCE(_dst, _src, COPYLENGTH - STEPSIZE);\ -} while (0) +static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr) +{ + return get_unaligned((const size_t *)ptr); +} + +static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value) +{ + put_unaligned(value, (U16 *)memPtr); +} + +static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value) +{ + put_unaligned(value, (U32 *)memPtr); +} + +static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr) +{ + return get_unaligned_le16(memPtr); +} + +static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) +{ + return put_unaligned_le16(value, memPtr); +} + +static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) +{ +#if LZ4_ARCH64 + U64 a = get_unaligned((const U64 *)src); + + put_unaligned(a, (U64 *)dst); +#else + U32 a = get_unaligned((const U32 *)src); + U32 b = get_unaligned((const U32 *)src + 1); + + put_unaligned(a, (U32 *)dst); + put_unaligned(b, (U32 *)dst + 1); +#endif +} /* - * Equivalent to MEMCPY_ADVANCE - except may overrun @_dst and @_src by - * COPYLENGTH: - * - * Note: src and dst may overlap (with src < dst) - we must do the copy in - * STEPSIZE chunks for correctness - * - * Note also: length may be negative - we must not call memcpy if length is - * negative, but still adjust dst and src by length + * customized variant of memcpy, + * which can overwrite up to 7 bytes beyond dstEnd */ -#define MEMCPY_ADVANCE_CHUNKED(_dst, _src, _length) \ -do { \ - u8 *_end = (_dst) + (_length); \ - while ((_dst) < _end) \ - LZ4_COPYPACKET(_src, _dst); \ - _src -= (_dst) - _end; \ - _dst = _end; \ -} while (0) - -#define MEMCPY_ADVANCE_CHUNKED_NOFIXUP(_dst, _src, _end)\ -do { \ - while ((_dst) < (_end)) \ - LZ4_COPYPACKET((_src), (_dst)); \ -} while (0) - -struct lz4_hashtable { -#if LZ4_ARCH64 - const u8 * const base; - u32 *table; -#else - const int base; - const u8 *table; -#endif -}; - -#if LZ4_ARCH64 -#define HTYPE u32 -#else /* 32-bit */ -#define HTYPE const u8* -#endif - -#ifdef __BIG_ENDIAN -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzl(val) >> 3) -#else -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzl(val) >> 3) -#endif - -static inline unsigned common_length(const u8 *l, const u8 *r, - const u8 *const l_end) +static FORCE_INLINE void LZ4_wildCopy(void *dstPtr, + const void *srcPtr, void *dstEnd) { - const u8 *l_start = l; + BYTE *d = (BYTE *)dstPtr; + const BYTE *s = (const BYTE *)srcPtr; + BYTE *const e = (BYTE *)dstEnd; - while (likely(l <= l_end - sizeof(long))) { - unsigned long diff = - get_unaligned((unsigned long *) l) ^ - get_unaligned((unsigned long *) r); - - if (diff) - return l + LZ4_NBCOMMONBYTES(diff) - l_start; - - l += sizeof(long); - r += sizeof(long); - } -#if LZ4_ARCH64 - if (l <= l_end - 4 && A32(r) == A32(l)) { - l += 4; - r += 4; - } -#endif - if (l <= l_end - 2 && A16(r) == A16(l)) { - l += 2; - r += 2; - } - if (l <= l_end - 1 && *r == *l) { - l++; - r++; - } - - return l - l_start; + do { + LZ4_copy8(d, s); + d += 8; + s += 8; + } while (d < e); } -static inline unsigned encode_length(u8 **op, unsigned length) +static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val) { - if (length >= LENGTH_LONG) { - length -= LENGTH_LONG; - - for (; length > 254 ; length -= 255) - *(*op)++ = 255; - *(*op)++ = length; - return LENGTH_LONG; - } else - return length; +#if LZ4_LITTLE_ENDIAN + return __ffs(val) >> 3; +#else + return (BITS_PER_LONG - 1 - __fls(val)) >> 3; +#endif } + +static FORCE_INLINE unsigned int LZ4_count( + const BYTE *pIn, + const BYTE *pMatch, + const BYTE *pInLimit) +{ + const BYTE *const pStart = pIn; + + while (likely(pIn < pInLimit - (STEPSIZE - 1))) { + size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + + if (!diff) { + pIn += STEPSIZE; + pMatch += STEPSIZE; + continue; + } + + pIn += LZ4_NbCommonBytes(diff); + + return (unsigned int)(pIn - pStart); + } + +#if LZ4_ARCH64 + if ((pIn < (pInLimit - 3)) + && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { + pIn += 4; + pMatch += 4; + } +#endif + + if ((pIn < (pInLimit - 1)) + && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { + pIn += 2; + pMatch += 2; + } + + if ((pIn < pInLimit) && (*pMatch == *pIn)) + pIn++; + + return (unsigned int)(pIn - pStart); +} + +typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; +typedef enum { byPtr, byU32, byU16 } tableType_t; + +typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { full = 0, partial = 1 } earlyEnd_directive; + +#endif