From fc50e25e550d8ca4a53dbbbb73d1ae0454fd7fdd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Jan 2017 00:56:39 -0800 Subject: [PATCH] lzx_compress: fix corruption with long literal run The last round of updates to the LZX compressor made it start being able to use larger blocks, up to ~100KB. Unfortunately it was overlooked that this allows literal runs > 65535 bytes while in one place the length of a literal run was still being stored in a u16. Therefore, on incompressible input data this could be wrapped around, causing incorrect compression. Fix this by enlarging the variable. --- NEWS | 9 +++++++++ src/lzx_compress.c | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index d519010a..46cd676c 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,13 @@ Version 1.11.0-BETA5: + Fixed a data corruption bug (incorrect compression) when storing an + already highly-compressed file in an LZX-compressed WIM with a chunk + size greater than or equal to 64K. Note that this is not the default + setting and such WIMs are not supported by Microsoft's WIM software, so + only users who used the --chunk-size option to wimlib-imagex or the + wimlib_set_output_chunk_size() API function may have been affected. + This bug was introduced in wimlib v1.10.0. See + https://wimlib.net/forums/viewtopic.php?f=1&t=300 for more details. + On all platforms, sparse files are now extracted as sparse. Sparse files captured from UNIX-style filesystems are now marked as diff --git a/src/lzx_compress.c b/src/lzx_compress.c index ca25d92f..b4930ee0 100644 --- a/src/lzx_compress.c +++ b/src/lzx_compress.c @@ -266,11 +266,11 @@ struct lzx_sequence { /* The number of literals in the run. This may be 0. The literals are * not stored explicitly in this structure; instead, they are read * directly from the uncompressed data. */ - u16 litrunlen; + u32 litrunlen : 24; /* If the next field doesn't indicate end-of-block, then this is the * match length minus LZX_MIN_MATCH_LEN. */ - u16 adjusted_length; + u32 adjusted_length : 8; /* If bit 31 is clear, then this field contains the match header in bits * 0-8, and either the match offset plus LZX_OFFSET_ADJUSTMENT or a -- 2.43.0