From fc50e25e550d8ca4a53dbbbb73d1ae0454fd7fdd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Sat, 14 Jan 2017 00:56:39 -0800
Subject: [PATCH] lzx_compress: fix corruption with long literal run

The last round of updates to the LZX compressor made it start being able
to use larger blocks, up to ~100KB.  Unfortunately it was overlooked
that this allows literal runs > 65535 bytes while in one place the
length of a literal run was still being stored in a u16.  Therefore, on
incompressible input data this could be wrapped around, causing
incorrect compression.  Fix this by enlarging the variable.
---
 NEWS               | 9 +++++++++
 src/lzx_compress.c | 4 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index d519010a..46cd676c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,13 @@
 Version 1.11.0-BETA5:
+	Fixed a data corruption bug (incorrect compression) when storing an
+	already highly-compressed file in an LZX-compressed WIM with a chunk
+	size greater than or equal to 64K.  Note that this is not the default
+	setting and such WIMs are not supported by Microsoft's WIM software, so
+	only users who used the --chunk-size option to wimlib-imagex or the
+	wimlib_set_output_chunk_size() API function may have been affected.
+	This bug was introduced in wimlib v1.10.0.  See
+	https://wimlib.net/forums/viewtopic.php?f=1&t=300 for more details.
+
 	On all platforms, sparse files are now extracted as sparse.
 
 	Sparse files captured from UNIX-style filesystems are now marked as
diff --git a/src/lzx_compress.c b/src/lzx_compress.c
index ca25d92f..b4930ee0 100644
--- a/src/lzx_compress.c
+++ b/src/lzx_compress.c
@@ -266,11 +266,11 @@ struct lzx_sequence {
 	/* The number of literals in the run.  This may be 0.  The literals are
 	 * not stored explicitly in this structure; instead, they are read
 	 * directly from the uncompressed data.  */
-	u16 litrunlen;
+	u32 litrunlen : 24;
 
 	/* If the next field doesn't indicate end-of-block, then this is the
 	 * match length minus LZX_MIN_MATCH_LEN.  */
-	u16 adjusted_length;
+	u32 adjusted_length : 8;
 
 	/* If bit 31 is clear, then this field contains the match header in bits
 	 * 0-8, and either the match offset plus LZX_OFFSET_ADJUSTMENT or a
-- 
2.43.0