diff --git a/.bcachefs_revision b/.bcachefs_revision
index ecdb9454..81d9f67c 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-3610542890c4b8329d83361ba48fa874d27c97a8
+4231dd5cf0f04dd61b0b8bae44a357da8331c0e2
diff --git a/Makefile b/Makefile
index a305b4bc..4d406cc3 100644
--- a/Makefile
+++ b/Makefile
@@ -83,6 +83,8 @@ SRCS=bcachefs.c				\
      libbcachefs/io.c			\
      libbcachefs/journal.c		\
      libbcachefs/keylist.c		\
+     libbcachefs/lz4_compress.c		\
+     libbcachefs/lz4_decompress.c	\
      libbcachefs/migrate.c		\
      libbcachefs/move.c			\
      libbcachefs/movinggc.c		\
diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c
index f3ded7b4..9d54dd80 100644
--- a/libbcachefs/alloc.c
+++ b/libbcachefs/alloc.c
@@ -403,39 +403,44 @@ int bch2_prio_read(struct bch_dev *ca)
 	if (!bucket)
 		return 0;
 
-	unfixable_fsck_err_on(bucket < ca->mi.first_bucket ||
-			      bucket >= ca->mi.nbuckets, c,
-			      "bad prio bucket %llu", bucket);
+	if (mustfix_fsck_err_on(bucket < ca->mi.first_bucket ||
+				bucket >= ca->mi.nbuckets, c,
+				"bad prio bucket %llu", bucket))
+		return 0;
 
 	for (b = 0; b < ca->mi.nbuckets; b++, d++) {
 		if (d == end) {
 			ca->prio_last_buckets[bucket_nr] = bucket;
 			bucket_nr++;
 
-			ret = prio_io(ca, bucket, REQ_OP_READ);
-			if (bch2_dev_fatal_io_err_on(ret, ca,
-					"prior read from bucket %llu",
-					bucket) ||
-			    bch2_meta_read_fault("prio"))
-				return -EIO;
+			ret = prio_io(ca, bucket, REQ_OP_READ) ||
+				bch2_meta_read_fault("prio");
+
+			if (mustfix_fsck_err_on(ret, c,
+					"IO error reading bucket gens (%i)",
+					ret))
+				return 0;
 
 			got = le64_to_cpu(p->magic);
 			expect = pset_magic(c);
-			unfixable_fsck_err_on(got != expect, c,
-				"bad magic (got %llu expect %llu) while reading prios from bucket %llu",
-				got, expect, bucket);
+			if (mustfix_fsck_err_on(got != expect, c,
+					"bad magic (got %llu expect %llu) while reading prios from bucket %llu",
+					got, expect, bucket))
+				return 0;
 
-			unfixable_fsck_err_on(PSET_CSUM_TYPE(p) >= BCH_CSUM_NR, c,
-				"prio bucket with unknown csum type %llu bucket %lluu",
-				PSET_CSUM_TYPE(p), bucket);
+			if (mustfix_fsck_err_on(PSET_CSUM_TYPE(p) >= BCH_CSUM_NR, c,
+					"prio bucket with unknown csum type %llu bucket %lluu",
+					PSET_CSUM_TYPE(p), bucket))
+				return 0;
 
 			csum = bch2_checksum(c, PSET_CSUM_TYPE(p),
 					    prio_nonce(p),
 					    (void *) p + sizeof(p->csum),
 					    bucket_bytes(ca) - sizeof(p->csum));
-			unfixable_fsck_err_on(bch2_crc_cmp(csum, p->csum), c,
-				"bad checksum reading prios from bucket %llu",
-				bucket);
+			if (fsck_err_on(bch2_crc_cmp(csum, p->csum), c,
+					"bad checksum reading prios from bucket %llu",
+					bucket))
+				return 0;
 
 			bch2_encrypt(c, PSET_CSUM_TYPE(p),
 				    prio_nonce(p),
@@ -450,7 +455,10 @@ int bch2_prio_read(struct bch_dev *ca)
 		ca->buckets[b].prio[READ] = le16_to_cpu(d->prio[READ]);
 		ca->buckets[b].prio[WRITE] = le16_to_cpu(d->prio[WRITE]);
 
-		bucket_cmpxchg(&ca->buckets[b], new, new.gen = d->gen);
+		bucket_cmpxchg(&ca->buckets[b], new, ({
+			new.gen = d->gen;
+			new.gen_valid = 1;
+		}));
 	}
 
 	mutex_lock(&c->bucket_lock);
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index d907ef58..78132e40 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -142,14 +142,25 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
 			struct bucket *g = PTR_BUCKET(ca, ptr);
 			struct bucket_mark new;
 
+			if (!g->mark.gen_valid) {
+				bucket_cmpxchg(g, new, ({
+					new.gen = ptr->gen;
+					new.gen_valid = 1;
+				}));
+				ca->need_prio_write = true;
+			}
+
 			if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
 					"%s ptr gen in the future: %u > %u",
 					type == BKEY_TYPE_BTREE
 					? "btree" : "data",
 					ptr->gen, g->mark.gen)) {
-				bucket_cmpxchg(g, new, new.gen = ptr->gen);
-				set_bit(BCH_FS_FIXED_GENS, &c->flags);
+				bucket_cmpxchg(g, new, ({
+					new.gen = ptr->gen;
+					new.gen_valid = 1;
+				}));
 				ca->need_prio_write = true;
+				set_bit(BCH_FS_FIXED_GENS, &c->flags);
 			}
 
 		}
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 541fffb6..18469486 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1025,6 +1025,17 @@ static bool extent_contains_ptr(struct bkey_s_c_extent e,
 	return false;
 }
 
+static void bch2_btree_node_read_complete(struct btree_read_bio *rb,
+					  struct btree *b)
+{
+	struct bch_dev *ca = rb->pick.ca;
+
+	bio_put(&rb->bio);
+	percpu_ref_put(&ca->io_ref);
+	clear_btree_node_read_in_flight(b);
+	wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
+}
+
 void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
 			      struct bch_dev *ca,
 			      const struct bch_extent_ptr *ptr)
@@ -1196,8 +1207,6 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
 
 	btree_node_reset_sib_u64s(b);
 out:
-	clear_btree_node_read_in_flight(b);
-	wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
 	mempool_free(iter, &c->fill_iter);
 	return;
 err:
@@ -1215,9 +1224,7 @@ static void btree_node_read_work(struct work_struct *work)
 
 	bch2_btree_node_read_done(rb->c, rb->bio.bi_private,
 				  rb->pick.ca, &rb->pick.ptr);
-
-	percpu_ref_put(&rb->pick.ca->io_ref);
-	bio_put(&rb->bio);
+	bch2_btree_node_read_complete(rb, rb->bio.bi_private);
 }
 
 static void btree_node_read_endio(struct bio *bio)
@@ -1231,8 +1238,7 @@ static void btree_node_read_endio(struct bio *bio)
 			PTR_BUCKET_NR(rb->pick.ca, &rb->pick.ptr)) ||
 	    bch2_meta_read_fault("btree")) {
 		set_btree_node_read_error(b);
-		percpu_ref_put(&rb->pick.ca->io_ref);
-		bio_put(bio);
+		bch2_btree_node_read_complete(rb, rb->bio.bi_private);
 		return;
 	}
 
@@ -1249,7 +1255,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 	struct bio *bio;
 
 	trace_btree_read(c, b);
-	set_btree_node_read_in_flight(b);
 
 	pick = bch2_btree_pick_ptr(c, b);
 	if (bch2_fs_fatal_err_on(!pick.ca, c,
@@ -1268,6 +1273,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 	bio->bi_iter.bi_size	= btree_bytes(c);
 	bch2_bio_map(bio, b->data);
 
+	set_btree_node_read_in_flight(b);
+
 	if (sync) {
 		submit_bio_wait(bio);
 
@@ -1282,8 +1289,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
 		bch2_btree_node_read_done(c, b, pick.ca, &pick.ptr);
 		bch2_time_stats_update(&c->btree_read_time, start_time);
 out:
-		bio_put(bio);
-		percpu_ref_put(&pick.ca->io_ref);
+		bch2_btree_node_read_complete(rb, b);
 	} else {
 		bio->bi_end_io	= btree_node_read_endio;
 		bio->bi_private	= b;
diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h
index 68f863f3..3c8b6447 100644
--- a/libbcachefs/buckets_types.h
+++ b/libbcachefs/buckets_types.h
@@ -20,6 +20,7 @@ struct bucket_mark {
 	struct {
 		u8		gen;
 
+		unsigned	gen_valid:1;
 		unsigned	journal_seq_valid:1;
 
 		/*
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index 62b42042..80b12f3b 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -4,7 +4,7 @@
 #include "io.h"
 #include "super-io.h"
 
-#include <linux/lz4.h>
+#include "lz4.h"
 #include <linux/zlib.h>
 
 enum bounced {
@@ -148,10 +148,9 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
 
 	switch (crc.compression_type) {
 	case BCH_COMPRESSION_LZ4:
-		ret = LZ4_decompress_safe(src_data, dst_data,
-					  src_len, dst_len);
-
-		if (ret != dst_len) {
+		ret = lz4_decompress(src_data, &src_len,
+				     dst_data, dst_len);
+		if (ret) {
 			ret = -EIO;
 			goto err;
 		}
@@ -287,27 +286,32 @@ static int __bio_compress(struct bch_fs *c,
 	switch (compression_type) {
 	case BCH_COMPRESSION_LZ4: {
 		void *workspace;
-		int srclen = src->bi_iter.bi_size;
-		ret = 0;
+
+		*dst_len = dst->bi_iter.bi_size;
+		*src_len = src->bi_iter.bi_size;
 
 		workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
 
-		while (srclen > block_bytes(c) &&
-		       (ret = LZ4_compress_destSize(src_data, dst_data,
-						    &srclen, dst->bi_iter.bi_size,
-						    workspace)) &&
-		       (srclen & (block_bytes(c) - 1))) {
-			/* Round down to nearest block and try again: */
-			srclen = round_down(srclen, block_bytes(c));
+		while (*src_len > block_bytes(c) &&
+		       (ret = lz4_compress(src_data, *src_len,
+					   dst_data, dst_len,
+					   workspace))) {
+			/*
+			 * On error, the compressed data was bigger than
+			 * dst_len, and -ret is the amount of data we were able
+			 * to compress - round down to nearest block and try
+			 * again:
+			 */
+			BUG_ON(ret > 0);
+			BUG_ON(-ret >= *src_len);
+
+			*src_len = round_down(-ret, block_bytes(c));
 		}
 
 		mempool_free(workspace, &c->lz4_workspace_pool);
 
-		if (!ret)
+		if (ret)
 			goto err;
-
-		*src_len = srclen;
-		*dst_len = ret;
 		break;
 	}
 	case BCH_COMPRESSION_GZIP: {
diff --git a/libbcachefs/lz4.h b/libbcachefs/lz4.h
new file mode 100644
index 00000000..6b784c59
--- /dev/null
+++ b/libbcachefs/lz4.h
@@ -0,0 +1,87 @@
+#ifndef __LZ4_H__
+#define __LZ4_H__
+/*
+ * LZ4 Kernel Interface
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define LZ4_MEM_COMPRESS	(16384)
+#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+
+/*
+ * lz4_compressbound()
+ * Provides the maximum size that LZ4 may output in a "worst case" scenario
+ * (input data not compressible)
+ */
+static inline size_t lz4_compressbound(size_t isize)
+{
+	return isize + (isize / 255) + 16;
+}
+
+/*
+ * lz4_compress()
+ *	src     : source address of the original data
+ *	src_len : size of the original data
+ *	dst	: output buffer address of the compressed data
+ *		This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len : is the output size, which is returned after compress done
+ *	workmem : address of the working memory.
+ *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return  : Success if return 0
+ *		  Error if return (< 0)
+ *	note :  Destination buffer and workmem must be already allocated with
+ *		the defined size.
+ */
+int lz4_compress(const unsigned char *src, size_t src_len,
+		unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+ /*
+  * lz4hc_compress()
+  *	 src	 : source address of the original data
+  *	 src_len : size of the original data
+  *	 dst	 : output buffer address of the compressed data
+  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
+  *	 dst_len : is the output size, which is returned after compress done
+  *	 workmem : address of the working memory.
+  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+  *	 return  : Success if return 0
+  *		   Error if return (< 0)
+  *	 note :  Destination buffer and workmem must be already allocated with
+  *		 the defined size.
+  */
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+		unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+/*
+ * lz4_decompress()
+ *	src     : source address of the compressed data
+ *	src_len : is the input size, whcih is returned after decompress done
+ *	dest	: output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return  : Success if return 0
+ *		  Error if return (< 0)
+ *	note :  Destination buffer must be already allocated.
+ *		slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+		unsigned char *dest, size_t actual_dest_len);
+
+/*
+ * lz4_decompress_unknownoutputsize()
+ *	src     : source address of the compressed data
+ *	src_len : is the input size, therefore the compressed size
+ *	dest	: output buffer address of the decompressed data
+ *	dest_len: is the max size of the destination buffer, which is
+ *			returned with actual size of decompressed data after
+ *			decompress done
+ *	return  : Success if return 0
+ *		  Error if return (< 0)
+ *	note :  Destination buffer must be already allocated.
+ */
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+		unsigned char *dest, size_t *dest_len);
+#endif
diff --git a/libbcachefs/lz4_compress.c b/libbcachefs/lz4_compress.c
new file mode 100644
index 00000000..de33acf3
--- /dev/null
+++ b/libbcachefs/lz4_compress.c
@@ -0,0 +1,228 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2012, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ * - LZ4 source repository : http://code.google.com/p/lz4/
+ *
+ *  Changed for kernel use by:
+ *  Chanho Min <chanho.min@lge.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+#include "lz4.h"
+#include "lz4defs.h"
+
+#define LZ4_HASH_VALUE(p, _table)				\
+	__HASH_VALUE(p, MEMORY_USAGE - ilog2(sizeof(_table[0])))
+
+struct lz4_hash_table {
+	const u8	*(*add)(const struct lz4_hash_table, const u8 *);
+	void		*ctx;
+	const u8	*base;
+};
+
+#if __SIZEOF_POINTER__ == 4
+static inline const u8 *hash_table_add32(const struct lz4_hash_table hash,
+					 const u8 *ip)
+{
+	const u8 **table = hash.ctx;
+
+	swap(table[LZ4_HASH_VALUE(ip, table)], ip);
+	return ip;
+}
+#else
+static inline const u8 *hash_table_add32(const struct lz4_hash_table hash,
+					 const u8 *ip)
+{
+	u32 *table = hash.ctx;
+	size_t offset = ip - hash.base;
+
+	swap(table[LZ4_HASH_VALUE(ip, table)], offset);
+	return hash.base + offset;
+}
+#endif
+
+static inline const u8 *hash_table_add16(const struct lz4_hash_table hash,
+					 const u8 *ip)
+{
+	u16 *table = hash.ctx;
+	size_t offset = ip - hash.base;
+
+	swap(table[LZ4_HASH_VALUE(ip, table)], offset);
+	return hash.base + offset;
+}
+
+static inline const u8 *find_match(const struct lz4_hash_table hash,
+				   const u8 **ip, const u8 *anchor,
+				   const u8 *start, const u8 *mflimit)
+{
+	int findmatchattempts = (1U << SKIPSTRENGTH) + 3;
+
+	while (*ip <= mflimit) {
+		const u8 *ref = hash.add(hash, *ip);
+
+		if (ref >= *ip - MAX_DISTANCE && A32(ref) == A32(*ip)) {
+			/* found match: */
+			while (*ip > anchor &&
+			       ref > start &&
+			       unlikely((*ip)[-1] == ref[-1])) {
+				(*ip)--;
+				ref--;
+			}
+
+			return ref;
+		}
+
+		*ip += findmatchattempts++ >> SKIPSTRENGTH;
+	}
+
+	return NULL;
+}
+
+static inline int length_len(unsigned length)
+{
+	return length / 255 + 1;
+}
+
+/*
+ * LZ4_compressCtx :
+ * -----------------
+ * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
+ * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
+ * will stop, and result of the function will be zero.
+ * return : the number of bytes written in buffer 'dest', or 0 if the
+ * compression fails
+ */
+static inline int lz4_compressctx(const struct lz4_hash_table hash,
+				  const u8 *src, size_t src_len,
+				  u8 *dst, size_t *dst_len)
+{
+	const u8 *ip = src, *anchor = ip, *ref;
+	const u8 *const iend = ip + src_len;
+	const u8 *const mflimit = iend - MFLIMIT;
+	const u8 *const matchlimit = iend - LASTLITERALS;
+	u8 *op = dst, *token;
+	u8 *const oend = op + *dst_len;
+	size_t literal_len, match_len, match_offset;
+
+	/* Init */
+	memset(hash.ctx, 0, LZ4_MEM_COMPRESS);
+	hash.add(hash, ip);
+
+	/* Always start with a literal: */
+	ip++;
+
+	while ((ref = find_match(hash, &ip, anchor, src, mflimit))) {
+		/*
+		 * We found a match; @ip now points to the match and @ref points
+		 * to the prior part of the input we matched with. Everything up
+		 * to @anchor has been encoded; the range from @anchor to @ip
+		 * didn't match and now has to be encoded as a literal:
+		 */
+		literal_len = ip - anchor;
+		match_offset = ip - ref;
+
+		/* MINMATCH bytes already matched from find_match(): */
+		ip += MINMATCH;
+		ref += MINMATCH;
+		match_len = common_length(ip, ref, matchlimit);
+		ip += match_len;
+
+		/* check output limit */
+		if (unlikely(op +
+			     1 + /* token */
+			     2 + /* match ofset */
+			     literal_len +
+			     length_len(literal_len) +
+			     length_len(match_len) +
+			     LASTLITERALS > oend))
+			break;
+
+		token = op++;
+		*token = encode_length(&op, literal_len) << ML_BITS;
+		MEMCPY_ADVANCE_CHUNKED(op, anchor, literal_len);
+		PUT_LE16_ADVANCE(op, match_offset);
+		*token += encode_length(&op, match_len);
+
+		anchor = ip;
+	}
+
+	/* Encode remaining input as literal: */
+	literal_len = iend - anchor;
+	if (unlikely(op +
+		     1 +
+		     literal_len +
+		     length_len(literal_len) > oend)) {
+		/* Return how much would be able to fit: */
+		ssize_t remaining = oend - op;
+		ssize_t encoded = anchor - src;
+
+		remaining -= length_len(remaining) + 1;
+
+		return -max(encoded + remaining, 1L);
+	}
+
+	token = op++;
+	*token = encode_length(&op, literal_len) << ML_BITS;
+	MEMCPY_ADVANCE(op, anchor, literal_len);
+
+	/* End */
+	BUG_ON(op > oend);
+	*dst_len = op - dst;
+	return 0;
+}
+
+__attribute__((flatten))
+int lz4_compress(const unsigned char *src, size_t src_len,
+		 unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	if (src_len < LZ4_64KLIMIT) {
+		const struct lz4_hash_table hash = {
+			.add	= hash_table_add16,
+			.ctx	= wrkmem,
+			.base	= src,
+		};
+
+		return lz4_compressctx(hash, src, src_len, dst, dst_len);
+	} else {
+		const struct lz4_hash_table hash = {
+			.add	= hash_table_add32,
+			.ctx	= wrkmem,
+			.base	= src,
+		};
+
+		return lz4_compressctx(hash, src, src_len, dst, dst_len);
+	}
+}
+EXPORT_SYMBOL(lz4_compress);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/libbcachefs/lz4_decompress.c b/libbcachefs/lz4_decompress.c
new file mode 100644
index 00000000..77c9c391
--- /dev/null
+++ b/libbcachefs/lz4_decompress.c
@@ -0,0 +1,316 @@
+/*
+ * LZ4 Decompressor for Linux kernel
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * Based on LZ4 implementation by Yann Collet.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2012, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You can contact the author at :
+ *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ *  - LZ4 source repository : http://code.google.com/p/lz4/
+ */
+
+#ifndef STATIC
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+
+#include "lz4.h"
+#include "lz4defs.h"
+
+static const int dec32table[8] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+static const int dec64table[8] = {0, 0, 0, -1, 0, 1, 2, 3};
+#else
+static const int dec64table[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+#endif
+
+static inline size_t get_length(const u8 **ip, size_t length)
+{
+	if (length == LENGTH_LONG) {
+		size_t len;
+
+		do {
+			length += (len = *(*ip)++);
+		} while (len == 255);
+	}
+
+	return length;
+}
+
+static int lz4_uncompress(const u8 *source, u8 *dest, int osize)
+{
+	const u8 *ip = source;
+	const u8 *ref;
+	u8 *op = dest;
+	u8 * const oend = op + osize;
+	u8 *cpy;
+	unsigned token, offset;
+	ssize_t length;
+
+	while (1) {
+		/* get runlength */
+		token = *ip++;
+		length = get_length(&ip, token >> ML_BITS);
+
+		/* copy literals */
+		if (unlikely(op + length > oend - COPYLENGTH)) {
+			/*
+			 * Error: not enough place for another match
+			 * (min 4) + 5 literals
+			 */
+			if (op + length != oend)
+				goto _output_error;
+
+			MEMCPY_ADVANCE(op, ip, length);
+			break; /* EOF */
+		}
+		MEMCPY_ADVANCE_CHUNKED(op, ip, length);
+
+		/* get match offset */
+		offset = GET_LE16_ADVANCE(ip);
+		ref = op - offset;
+
+		/* Error: offset create reference outside destination buffer */
+		if (unlikely(ref < (u8 *const) dest))
+			goto _output_error;
+
+		/* get match length */
+		length = get_length(&ip, token & ML_MASK);
+		length += MINMATCH;
+
+		/* copy first STEPSIZE bytes of match: */
+		if (unlikely(offset < STEPSIZE)) {
+			MEMCPY_ADVANCE_BYTES(op, ref, 4);
+			ref -= dec32table[offset];
+
+			memcpy(op, ref, 4);
+			op += STEPSIZE - 4;
+			ref -= dec64table[offset];
+		} else {
+			MEMCPY_ADVANCE(op, ref, STEPSIZE);
+		}
+		length -= STEPSIZE;
+		/*
+		 * Note - length could have been < STEPSIZE; that's ok, length
+		 * will now be negative and we'll just end up rewinding op:
+		 */
+
+		/* copy rest of match: */
+		cpy = op + length;
+		if (cpy > oend - COPYLENGTH) {
+			/* Error: request to write beyond destination buffer */
+			if (cpy              > oend ||
+			    ref + COPYLENGTH > oend)
+				goto _output_error;
+#if !LZ4_ARCH64
+			if (op  + COPYLENGTH > oend)
+				goto _output_error;
+#endif
+			MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH);
+			/* op could be > cpy here */
+			while (op < cpy)
+				*op++ = *ref++;
+			op = cpy;
+			/*
+			 * Check EOF (should never happen, since last 5 bytes
+			 * are supposed to be literals)
+			 */
+			if (op == oend)
+				goto _output_error;
+		} else {
+			MEMCPY_ADVANCE_CHUNKED(op, ref, length);
+		}
+	}
+	/* end of decoding */
+	return ip - source;
+
+	/* write overflow error detected */
+_output_error:
+	return -1;
+}
+
+static inline ssize_t get_length_safe(const u8 **ip, ssize_t length)
+{
+	if (length == 15) {
+		size_t len;
+
+		do {
+			length += (len = *(*ip)++);
+			if (unlikely((ssize_t) length < 0))
+				return -1;
+
+			length += len;
+		} while (len == 255);
+	}
+
+	return length;
+}
+
+static int lz4_uncompress_unknownoutputsize(const u8 *source, u8 *dest,
+				int isize, size_t maxoutputsize)
+{
+	const u8 *ip = source;
+	const u8 *const iend = ip + isize;
+	const u8 *ref;
+	u8 *op = dest;
+	u8 * const oend = op + maxoutputsize;
+	u8 *cpy;
+	unsigned token, offset;
+	size_t length;
+
+	/* Main Loop */
+	while (ip < iend) {
+		/* get runlength */
+		token = *ip++;
+		length = get_length_safe(&ip, token >> ML_BITS);
+		if (unlikely((ssize_t) length < 0))
+			goto _output_error;
+
+		/* copy literals */
+		if ((op + length > oend - COPYLENGTH) ||
+		    (ip + length > iend - COPYLENGTH)) {
+
+			if (op + length > oend)
+				goto _output_error;/* writes beyond buffer */
+
+			if (ip + length != iend)
+				goto _output_error;/*
+						    * Error: LZ4 format requires
+						    * to consume all input
+						    * at this stage
+						    */
+			MEMCPY_ADVANCE(op, ip, length);
+			break;/* Necessarily EOF, due to parsing restrictions */
+		}
+		MEMCPY_ADVANCE_CHUNKED(op, ip, length);
+
+		/* get match offset */
+		offset = GET_LE16_ADVANCE(ip);
+		ref = op - offset;
+
+		/* Error: offset create reference outside destination buffer */
+		if (ref < (u8 * const) dest)
+			goto _output_error;
+
+		/* get match length */
+		length = get_length_safe(&ip, token & ML_MASK);
+		if (unlikely((ssize_t) length < 0))
+			goto _output_error;
+
+		length += MINMATCH;
+
+		/* copy first STEPSIZE bytes of match: */
+		if (unlikely(offset < STEPSIZE)) {
+			MEMCPY_ADVANCE_BYTES(op, ref, 4);
+			ref -= dec32table[offset];
+
+			memcpy(op, ref, 4);
+			op += STEPSIZE - 4;
+			ref -= dec64table[offset];
+		} else {
+			MEMCPY_ADVANCE(op, ref, STEPSIZE);
+		}
+		length -= STEPSIZE;
+
+		/* copy rest of match: */
+		cpy = op + length;
+		if (cpy > oend - COPYLENGTH) {
+			/* Error: request to write beyond destination buffer */
+			if (cpy              > oend ||
+			    ref + COPYLENGTH > oend)
+				goto _output_error;
+#if !LZ4_ARCH64
+			if (op  + COPYLENGTH > oend)
+				goto _output_error;
+#endif
+			MEMCPY_ADVANCE_CHUNKED_NOFIXUP(op, ref, oend - COPYLENGTH);
+			while (op < cpy)
+				*op++ = *ref++;
+			op = cpy;
+			/*
+			 * Check EOF (should never happen, since last 5 bytes
+			 * are supposed to be literals)
+			 */
+			if (op == oend)
+				goto _output_error;
+		} else {
+			MEMCPY_ADVANCE_CHUNKED(op, ref, length);
+		}
+	}
+	/* end of decoding */
+	return op - dest;
+
+	/* write overflow error detected */
+_output_error:
+	return -1;
+}
+
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+		unsigned char *dest, size_t actual_dest_len)
+{
+	int ret = -1;
+	int input_len = 0;
+
+	input_len = lz4_uncompress(src, dest, actual_dest_len);
+	if (input_len < 0)
+		goto exit_0;
+	*src_len = input_len;
+
+	return 0;
+exit_0:
+	return ret;
+}
+#ifndef STATIC
+EXPORT_SYMBOL(lz4_decompress);
+#endif
+
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+		unsigned char *dest, size_t *dest_len)
+{
+	int ret = -1;
+	int out_len = 0;
+
+	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
+					*dest_len);
+	if (out_len < 0)
+		goto exit_0;
+	*dest_len = out_len;
+
+	return 0;
+exit_0:
+	return ret;
+}
+#ifndef STATIC
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("LZ4 Decompressor");
+#endif
diff --git a/libbcachefs/lz4defs.h b/libbcachefs/lz4defs.h
new file mode 100644
index 00000000..29f70f91
--- /dev/null
+++ b/libbcachefs/lz4defs.h
@@ -0,0 +1,182 @@
+/*
+ * lz4defs.h -- architecture specific defines
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Detects 64 bits mode
+ */
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#include <asm/unaligned.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#define A32(_p) get_unaligned((u32 *) (_p))
+#define A16(_p) get_unaligned((u16 *) (_p))
+
+#define GET_LE16_ADVANCE(_src)				\
+({							\
+	u16 _r = get_unaligned_le16(_src);		\
+	(_src) += 2;					\
+	_r;						\
+})
+
+#define PUT_LE16_ADVANCE(_dst, _v)			\
+do {							\
+	put_unaligned_le16((_v), (_dst));		\
+	(_dst) += 2;					\
+} while (0)
+
+#define LENGTH_LONG		15
+#define COPYLENGTH		8
+#define ML_BITS			4
+#define ML_MASK			((1U << ML_BITS) - 1)
+#define RUN_BITS		(8 - ML_BITS)
+#define RUN_MASK		((1U << RUN_BITS) - 1)
+#define MEMORY_USAGE		14
+#define MINMATCH		4
+#define SKIPSTRENGTH		6
+#define LASTLITERALS		5
+#define MFLIMIT			(COPYLENGTH + MINMATCH)
+#define MINLENGTH		(MFLIMIT + 1)
+#define MAXD_LOG		16
+#define MAXD			(1 << MAXD_LOG)
+#define MAXD_MASK		(u32)(MAXD - 1)
+#define MAX_DISTANCE		(MAXD - 1)
+#define HASH_LOG		(MAXD_LOG - 1)
+#define HASHTABLESIZE		(1 << HASH_LOG)
+#define MAX_NB_ATTEMPTS		256
+#define OPTIMAL_ML		(int)((ML_MASK-1)+MINMATCH)
+#define LZ4_64KLIMIT		((1<<16) + (MFLIMIT - 1))
+
+#define __HASH_VALUE(p, bits)				\
+	(((A32(p)) * 2654435761U) >> (32 - (bits)))
+
+#define HASH_VALUE(p)		__HASH_VALUE(p, HASH_LOG)
+
+#define MEMCPY_ADVANCE(_dst, _src, length)		\
+do {							\
+	typeof(length) _length = (length);		\
+	memcpy(_dst, _src, _length);			\
+	_src += _length;				\
+	_dst += _length;				\
+} while (0)
+
+#define MEMCPY_ADVANCE_BYTES(_dst, _src, _length)	\
+do {							\
+	const u8 *_end = (_src) + (_length);		\
+	while ((_src) < _end)				\
+		*_dst++ = *_src++;			\
+} while (0)
+
+#define STEPSIZE		__SIZEOF_LONG__
+
+#define LZ4_COPYPACKET(_src, _dst)			\
+do {							\
+	MEMCPY_ADVANCE(_dst, _src, STEPSIZE);		\
+	MEMCPY_ADVANCE(_dst, _src, COPYLENGTH - STEPSIZE);\
+} while (0)
+
+/*
+ * Equivalent to MEMCPY_ADVANCE - except may overrun @_dst and @_src by
+ * COPYLENGTH:
+ *
+ * Note: src and dst may overlap (with src < dst) - we must do the copy in
+ * STEPSIZE chunks for correctness
+ *
+ * Note also: length may be negative - we must not call memcpy if length is
+ * negative, but still adjust dst and src by length
+ */
+#define MEMCPY_ADVANCE_CHUNKED(_dst, _src, _length)	\
+do {							\
+	u8 *_end = (_dst) + (_length);			\
+	while ((_dst) < _end)				\
+		LZ4_COPYPACKET(_src, _dst);		\
+	_src -= (_dst) - _end;				\
+	_dst = _end;					\
+} while (0)
+
+#define MEMCPY_ADVANCE_CHUNKED_NOFIXUP(_dst, _src, _end)\
+do {							\
+	while ((_dst) < (_end))				\
+		LZ4_COPYPACKET((_src), (_dst));		\
+} while (0)
+
+struct lz4_hashtable {
+#if LZ4_ARCH64
+	const u8 * const	base;
+	u32			*table;
+#else
+	const int		base;
+	const u8		*table;
+#endif
+};
+
+#if LZ4_ARCH64
+#define HTYPE u32
+#else	/* 32-bit */
+#define HTYPE const u8*
+#endif
+
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzl(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzl(val) >> 3)
+#endif
+
+static inline unsigned common_length(const u8 *l, const u8 *r,
+				     const u8 *const l_end)
+{
+	const u8 *l_start = l;
+
+	while (likely(l <= l_end - sizeof(long))) {
+		unsigned long diff =
+			get_unaligned((unsigned long *) l) ^
+			get_unaligned((unsigned long *) r);
+
+		if (diff)
+			return l + LZ4_NBCOMMONBYTES(diff) - l_start;
+
+		l += sizeof(long);
+		r += sizeof(long);
+	}
+#if LZ4_ARCH64
+	if (l <= l_end - 4 && A32(r) == A32(l)) {
+		l += 4;
+		r += 4;
+	}
+#endif
+	if (l <= l_end - 2 && A16(r) == A16(l)) {
+		l += 2;
+		r += 2;
+	}
+	if (l <= l_end - 1 && *r == *l) {
+		l++;
+		r++;
+	}
+
+	return l - l_start;
+}
+
+static inline unsigned encode_length(u8 **op, unsigned length)
+{
+	if (length >= LENGTH_LONG) {
+		length -= LENGTH_LONG;
+
+		for (; length > 254 ; length -= 255)
+			*(*op)++ = 255;
+		*(*op)++ = length;
+		return LENGTH_LONG;
+	} else
+		return length;
+}