From 1ab60207e56968e480be6400c67844017598b7dd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 20 Dec 2012 21:59:27 -0600 Subject: [PATCH] LZX decompression cleanups - Improve comments - Get rid of bitstream_read_bytes() because it wasn't clear what it was supposed to do. Just inline what needs to be done in the appropriate places. - Only undo the call instruction preprocessing if 0xe8 bytes may be in the input data. --- src/decompress.c | 44 +----- src/decompress.h | 66 ++++---- src/lzx-common.c | 2 + src/lzx-compress.c | 71 ++++----- src/lzx-decompress.c | 352 ++++++++++++++++++++++++------------------- src/lzx.h | 27 +++- src/resource.c | 2 +- 7 files changed, 285 insertions(+), 279 deletions(-) diff --git a/src/decompress.c b/src/decompress.c index 9a08621a..6d4b41e2 100644 --- a/src/decompress.c +++ b/src/decompress.c @@ -26,48 +26,6 @@ #include "decompress.h" #include -/* Reads @n bytes from the bitstream @stream into the location pointed to by @dest. - * The bitstream must be 16-bit aligned. */ -int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest) -{ - /* Precondition: The bitstream is 16-byte aligned. */ - wimlib_assert2(stream->bitsleft % 16 == 0); - - u8 *p = dest; - - /* Get the bytes currently in the buffer variable. */ - while (stream->bitsleft != 0) { - if (n-- == 0) - return 0; - *p++ = bitstream_peek_bits(stream, 8); - bitstream_remove_bits(stream, 8); - } - - /* Get the rest directly from the pointer to the data. Of course, it's - * necessary to check there are really n bytes available. */ - if (n > stream->data_bytes_left) { - ERROR("Unexpected end of input when reading %zu bytes from " - "bitstream (only have %u bytes left)", - n, stream->data_bytes_left); - return 1; - } - memcpy(p, stream->data, n); - stream->data += n; - stream->data_bytes_left -= n; - - /* It's possible to copy an odd number of bytes and leave the stream in - * an inconsistent state. Fix it by reading the next byte, if it is - * there. */ - if ((n & 1) && stream->data_bytes_left != 0) { - stream->bitsleft = 8; - stream->data_bytes_left--; - stream->bitbuf |= (input_bitbuf_t)(*stream->data) << - (sizeof(input_bitbuf_t) * 8 - 8); - stream->data++; - } - return 0; -} - /* * Builds a fast huffman decoding table from an array that gives the length of * the codeword for each symbol in the alphabet. Originally based on code @@ -356,7 +314,7 @@ int read_huffsym_near_end_of_input(struct input_bitstream *istream, do { if (bitsleft == 0) { ERROR("Input stream exhausted"); - return 1; + return -1; } key_bits = sym + bitstream_peek_bits(istream, 1); bitstream_remove_bits(istream, 1); diff --git a/src/decompress.h b/src/decompress.h index ae1c99be..21d8090e 100644 --- a/src/decompress.h +++ b/src/decompress.h @@ -13,12 +13,16 @@ /* Must be at least 32 bits. */ typedef unsigned long input_bitbuf_t; -/* Structure to provide a bitstream. */ +/* Structure to encapsulate a block of in-memory data that is being interpreted + * as a stream of bits. + * + * This is geared specifically towards the XPRESS and LZX compression formats + * with regards to the actual ordering the bits within the byte sequence. */ struct input_bitstream { /* A variable of length at least 32 bits that is used to hold bits that * have been read from the stream. The bits are ordered from high-order - * to low-order; the next bit is always the high-order bit. */ + * to low-order, and the next bit is always the high-order bit. */ input_bitbuf_t bitbuf; /* Pointer to the next byte to be retrieved from the input. */ @@ -41,7 +45,11 @@ static inline void init_input_bitstream(struct input_bitstream *istream, istream->data_bytes_left = num_data_bytes; } -/* Ensures that the bit buffer contains @num_bits bits. */ +/* Ensures that the bit buffer variable for the bitstream contains @num_bits + * bits. + * + * If there are at least @num_bits bits remaining in the bitstream, 0 is + * returned. Otherwise, -1 is returned. */ static inline int bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits) { @@ -70,15 +78,15 @@ static inline int bitstream_ensure_bits(struct input_bitstream *istream, istream->bitsleft += 16; istream->data_bytes_left -= 2; } else { - ret = 1; + ret = -1; } } wimlib_assert2(ret != 0 || istream->bitsleft >= num_bits); return ret; } -/* Returns the next @num_bits bits in the bit buffer. It must contain at least - * @num_bits bits to call this function. */ +/* Returns the next @num_bits bits in the buffer variable, which must contain at + * least @num_bits bits, for the bitstream. */ static inline unsigned bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits) { @@ -91,8 +99,8 @@ bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits) return ret; } -/* Removes @num_bits bits from the bit buffer. It must contain at least - * @num_bits bits to call this function. */ +/* Removes @num_bits bits from the buffer variable, which must contain at least + * @num_bits bits, for the bitstream. */ static inline void bitstream_remove_bits(struct input_bitstream *istream, unsigned num_bits) { @@ -101,28 +109,31 @@ static inline void bitstream_remove_bits(struct input_bitstream *istream, istream->bitsleft -= num_bits; } -/* Reads and returns @num_bits bits from the input bitstream. */ +/* Reads @num_bits bits from the input bitstream. @num_bits must be 16 or fewer. + * On success, returns 0 and returns the requested bits in @n. If there are + * fewer than @num_bits remaining in the bitstream, -1 is returned. */ static inline int bitstream_read_bits(struct input_bitstream *istream, unsigned num_bits, unsigned *n) { + wimlib_assert2(num_bits <= 16); int ret = bitstream_ensure_bits(istream, num_bits); if (ret == 0) { *n = bitstream_peek_bits(istream, num_bits); bitstream_remove_bits(istream, num_bits); + wimlib_assert2(istream->bitsleft < 16); } else { ERROR("bitstream_read_bits(): Input buffer exhausted"); } return ret; } -/* In the XPRESS format there can be literal length bytes embedded in the - * compressed bitstream. These bytes are basically separate from the bitstream, - * as they come AFTER the bits that are currently in the buffer variable (based - * on reading 16 bits at a time), even though the buffer variable may not be - * empty. +/* In the XPRESS format there can be literal bytes embedded in the bitstream. + * These bytes are basically separate from the bitstream, as they come AFTER the + * bits that are currently in the buffer variable (based on reading 16 bits at a + * time), even though the buffer variable may not be empty. * - * This function returns the next such literal length byte in the input - * bitstream. Returns -1 if we are at the end of the bitstream. */ + * This function returns the next such literal byte, or -1 if there are no more. + */ static inline int bitstream_read_byte(struct input_bitstream *istream) { wimlib_assert2(istream->bitsleft < 32); @@ -138,8 +149,8 @@ static inline int bitstream_read_byte(struct input_bitstream *istream) return ret; } -/* Reads @num_bits bits from the bit buffer without checking to see if that many - * bits are in the buffer or not. */ +/* Reads @num_bits bits from the buffer variable for a bistream without checking + * to see if that many bits are in the buffer or not. */ static inline unsigned bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits) { @@ -148,23 +159,6 @@ bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits) return n; } -/* Removes the bits that have been read into the bit buffer variable. */ -static inline void flush_input_bitstream(struct input_bitstream *istream) -{ - bitstream_remove_bits(istream, istream->bitsleft); - istream->bitbuf = 0; - wimlib_assert2(istream->bitsleft == 0); -} - -extern int bitstream_read_bytes(struct input_bitstream *istream, size_t n, - void *dest); - -/* Aligns the bitstream on a 16-bit boundary. */ -static inline void align_input_bitstream(struct input_bitstream *istream) -{ - bitstream_remove_bits(istream, istream->bitsleft & 15); -} - extern int read_huffsym_near_end_of_input(struct input_bitstream *istream, const u16 decode_table[], const u8 lens[], @@ -179,7 +173,7 @@ extern int read_huffsym_near_end_of_input(struct input_bitstream *istream, * large WIM file. I'm not sure it could be made much faster that it is, * especially since there isn't enough time to make a big table that allows * decoding multiple symbols per lookup. But if extracting files to a hard - * disk, the IO will be the bottleneck anyway. + * disk, the I/O will be the bottleneck anyway. * * @buf: The input buffer from which the symbol will be read. * @decode_table: The fast Huffman decoding table for the Huffman tree. diff --git a/src/lzx-common.c b/src/lzx-common.c index 9da4ddb0..5b3d118e 100644 --- a/src/lzx-common.c +++ b/src/lzx-common.c @@ -1,5 +1,6 @@ #include "lzx.h" +#ifdef USE_LZX_EXTRA_BITS_ARRAY /* LZX uses what it calls 'position slots' to represent match offsets. * What this means is that a small 'position slot' number and a small * offset from that slot are encoded instead of one large offset for @@ -20,6 +21,7 @@ const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS] = { /*17, 17, 17, 17, 17,*/ /*17*/ }; +#endif const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS] = { 0 , 1 , 2 , 3 , 4 , diff --git a/src/lzx-compress.c b/src/lzx-compress.c index b1800468..81a6256e 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -127,17 +127,6 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab) return literal; } -/* Equivalent to lzx_extra_bits[position_slot] except position_slot must be - * between 2 and 37 */ -static inline unsigned lzx_get_num_extra_bits(unsigned position_slot) -{ -#if 0 - return lzx_extra_bits[position_slot]; -#endif - wimlib_assert(position_slot >= 2 && position_slot <= 37); - return (position_slot >> 1) - 1; -} - /* Constructs a match from an offset and a length, and updates the LRU queue and * the frequency of symbols in the main, length, and aligned offset alphabets. * The return value is a 32-bit number that provides the match in an @@ -320,7 +309,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type, wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS); - num_extra_bits = lzx_extra_bits[position_slot]; + num_extra_bits = lzx_get_num_extra_bits(position_slot); /* For aligned offset blocks with at least 3 extra bits, output the * verbatim bits literally, then the aligned bits encoded using the @@ -596,41 +585,37 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs, codes->aligned_codewords); } -/* Do the 'E8' preprocessing, where the targets of x86 CALL instructions were - * changed from relative offsets to absolute offsets. This type of - * preprocessing can be used on any binary data even if it is not actually - * machine code. It seems to always be used in WIM files, even though there is - * no bit to indicate that it actually is used, unlike in the LZX compressed - * format as used in other file formats such as the cabinet format, where a bit - * is reserved for that purpose. */ -static void do_call_insn_preprocessing(u8 uncompressed_data[], - unsigned uncompressed_data_len) +static void do_call_insn_translation(u32 *call_insn_target, int input_pos, + int32_t file_size) { - int i = 0; - int file_size = LZX_MAGIC_FILESIZE; - int32_t rel_offset; int32_t abs_offset; + int32_t rel_offset; - /* Not enabled in the last 6 bytes, which means the 5-byte call - * instruction cannot start in the last *10* bytes. */ - while (i < uncompressed_data_len - 10) { - if (uncompressed_data[i] != 0xe8) { - i++; - continue; + rel_offset = le32_to_cpu(*call_insn_target); + if (rel_offset >= -input_pos && rel_offset < file_size) { + if (rel_offset < file_size - input_pos) { + /* "good translation" */ + abs_offset = rel_offset + input_pos; + } else { + /* "compensating translation" */ + abs_offset = rel_offset - file_size; } - rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1)); - - if (rel_offset >= -i && rel_offset < file_size) { - if (rel_offset < file_size - i) { - /* "good translation" */ - abs_offset = rel_offset + i; - } else { - /* "compensating translation" */ - abs_offset = rel_offset - file_size; - } - *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset); + *call_insn_target = cpu_to_le32(abs_offset); + } +} + +/* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c. + * See the comment above that function for more information. */ +static void do_call_insn_preprocessing(u8 uncompressed_data[], + int uncompressed_data_len) +{ + for (int i = 0; i < uncompressed_data_len - 10; i++) { + if (uncompressed_data[i] == 0xe8) { + do_call_insn_translation((u32*)&uncompressed_data[i + 1], + i, + LZX_WIM_MAGIC_FILESIZE); + i += 4; } - i += 5; } } @@ -670,7 +655,7 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len, void *compressed_data, unsigned *compressed_len_ret) { struct output_bitstream ostream; - u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH]; + u8 uncompressed_data[uncompressed_len + 8]; struct lzx_freq_tables freq_tabs; struct lzx_codes codes; u32 match_tab[uncompressed_len]; diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index a66e6d8b..06e3e020 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -233,7 +233,7 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], char value; ret = read_huffsym_using_pretree(istream, pretree_decode_table, - pretree_lens, &tree_code); + pretree_lens, &tree_code); if (ret != 0) return ret; switch (tree_code) { @@ -264,10 +264,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[], if (ret != 0) return ret; num_same += 4; - ret = read_huffsym_using_pretree(istream, - pretree_decode_table, - pretree_lens, &code); + pretree_decode_table, + pretree_lens, + &code); if (ret != 0) return ret; value = (char)*lens - (char)code; @@ -313,12 +313,12 @@ static int lzx_read_block_header(struct input_bitstream *istream, struct lru_queue *queue) { int ret; - int block_type; + unsigned block_type; unsigned block_size; - int s; - int i; + unsigned s; + unsigned i; unsigned len; - int32_t R[3]; + u32 R[3]; ret = bitstream_ensure_bits(istream, 4); if (ret != 0) { @@ -335,8 +335,8 @@ static int lzx_read_block_header(struct input_bitstream *istream, * 16 bits, indicated by a 0 bit. */ s = bitstream_read_bits_nocheck(istream, 1); - if (s == 1) { - block_size = 1 << 15; + if (s) { + block_size = 32768; } else { ret = bitstream_read_bits(istream, 16, &block_size); if (ret != 0) @@ -438,27 +438,39 @@ static int lzx_read_block_header(struct input_bitstream *istream, "Huffman tree"); return ret; } - + /* The bitstream of compressed literals and matches for this + * block directly follows and will be read in + * lzx_decompress_block(). */ break; - case LZX_BLOCKTYPE_UNCOMPRESSED: LZX_DEBUG("Found uncompressed block."); - - /* Mystery bit! */ - ret = bitstream_read_bits(istream, 1, &i); - if (ret != 0) - return ret; - align_input_bitstream(istream); - ret = bitstream_read_bytes(istream, sizeof(R), R); - if (ret != 0) - return ret; - queue->R0 = le32_to_cpu(R[0]); - queue->R1 = le32_to_cpu(R[1]); - queue->R2 = le32_to_cpu(R[2]); + /* Before reading the three LRU match offsets from the + * uncompressed block header, the stream needs to be aligned on + * a 16-bit boundary. But, unexpectedly, if the stream is + * *already* aligned, the correct thing to do is to throw away + * the next 16 bits. */ + if (istream->bitsleft == 0) { + if (istream->data_bytes_left < 14) + return -1; + istream->data += 2; + istream->data_bytes_left -= 2; + } else { + if (istream->data_bytes_left < 12) + return -1; + istream->bitsleft = 0; + istream->bitbuf = 0; + } + queue->R0 = le32_to_cpu(*(u32*)(istream->data + 0)); + queue->R1 = le32_to_cpu(*(u32*)(istream->data + 4)); + queue->R2 = le32_to_cpu(*(u32*)(istream->data + 8)); + istream->data += 12; + istream->data_bytes_left -= 12; + /* The uncompressed data of this block directly follows and will + * be read in lzx_decompress(). */ break; default: - LZX_DEBUG("Found invalid block."); - return 1; + ERROR("lzx_decompress(): Found invalid block"); + return -1; } *block_type_ret = block_type; *block_size_ret = block_size; @@ -466,32 +478,43 @@ static int lzx_read_block_header(struct input_bitstream *istream, } /* - * Decodes a compressed literal match value. It refers to some match_offset to - * a point earlier in the window, and some match_len, for which the data is to - * be copied to the current position in the window. + * Decodes a compressed match from a block of LZX-compressed data. A match + * refers to some match_offset to a point earlier in the window as well as some + * match_len, for which the data is to be copied to the current position in the + * window. * * @main_element: The start of the match data, as decoded using the main - * tree. - * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or - * LZX_BLOCKTYPE_VERBATIM) + * tree. + * + * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or + * LZX_BLOCKTYPE_VERBATIM) + * * @bytes_remaining: The amount of uncompressed data remaining to be - * uncompressed. It is an error if the match - * is longer than @bytes_remaining. - * @window: A pointer to the window into which the uncompressed + * uncompressed in this block. It is an error if the match + * is longer than this number. + * + * @window: A pointer to the window into which the uncompressed * data is being written. - * @window_pos: The current position in the window. - * @tables: Contains the Huffman tables for the block (main, - * length, and also aligned offset only for - * LZX_BLOCKTYPE_ALIGNED) - * @queue: The least-recently used queue for match offsets. - * @istream: The input bitstream. - * - * Returns the length of the match, or -1 on error (match would exceed - * the amount of data needing to be uncompressed, or match refers to data before - * the window, or the input bitstream ended unexpectedly). + * + * @window_pos: The current byte offset in the window. + * + * @tables: The Huffman decoding tables for this LZX block (main + * code, length code, and for LZX_BLOCKTYPE_ALIGNED blocks, + * also the aligned offset code). + * + * @queue: The least-recently used queue for match offsets. + * + * @istream: The input bitstream. + * + * Returns the length of the match, or a negative number on error. The possible + * error cases are: + * - Match would exceed the amount of data remaining to be uncompressed. + * - Match refers to data before the window. + * - The input bitstream ended unexpectedly. */ -static int lzx_decode_match(int main_element, int block_type, - int bytes_remaining, u8 *window, int window_pos, +static int lzx_decode_match(unsigned main_element, int block_type, + unsigned bytes_remaining, u8 *window, + unsigned window_pos, const struct lzx_tables *tables, struct lru_queue *queue, struct input_bitstream *istream) @@ -504,8 +527,8 @@ static int lzx_decode_match(int main_element, int block_type, unsigned num_extra_bits; unsigned verbatim_bits; unsigned aligned_bits; + unsigned i; int ret; - int i; u8 *match_dest; u8 *match_src; @@ -525,9 +548,9 @@ static int lzx_decode_match(int main_element, int block_type, match_len = LZX_MIN_MATCH + length_header; if (length_header == LZX_NUM_PRIMARY_LENS) { ret = read_huffsym_using_lentree(istream, tables, - &additional_len); + &additional_len); if (ret != 0) - return -1; + return ret; match_len += additional_len; } @@ -557,7 +580,7 @@ static int lzx_decode_match(int main_element, int block_type, * decode the match offset. */ /* Look up the number of extra bits that need to be read. */ - num_extra_bits = lzx_extra_bits[position_slot]; + num_extra_bits = lzx_get_num_extra_bits(position_slot); /* For aligned blocks, if there are at least 3 extra bits, the * actual number of extra bits is 3 less, and they encode a @@ -574,16 +597,16 @@ static int lzx_decode_match(int main_element, int block_type, * num_extra_bits == 3, the assignment to verbatim_bits * will just set it to 0. ) */ ret = bitstream_read_bits(istream, num_extra_bits - 3, - &verbatim_bits); + &verbatim_bits); if (ret != 0) - return -1; + return ret; verbatim_bits <<= 3; ret = read_huffsym_using_alignedtree(istream, tables, &aligned_bits); if (ret != 0) - return -1; + return ret; } else { /* For non-aligned blocks, or for aligned blocks with * less than 3 extra bits, the extra bits are added @@ -592,14 +615,14 @@ static int lzx_decode_match(int main_element, int block_type, ret = bitstream_read_bits(istream, num_extra_bits, &verbatim_bits); if (ret != 0) - return -1; + return ret; aligned_bits = 0; } /* Calculate the match offset. */ - match_offset = lzx_position_base[position_slot] + verbatim_bits + - aligned_bits - 2; + match_offset = lzx_position_base[position_slot] + + verbatim_bits + aligned_bits - 2; /* Update the LRU queue. */ queue->R2 = queue->R1; @@ -615,14 +638,14 @@ static int lzx_decode_match(int main_element, int block_type, match_src = match_dest - match_offset; if (match_len > bytes_remaining) { - ERROR("lzx_decode_match(): Match of length %d bytes overflows " + ERROR("lzx_decode_match(): Match of length %u bytes overflows " "uncompressed block size", match_len); return -1; } if (match_src < window) { - ERROR("lzx_decode_match(): Match of length %d bytes references " - "data before window (match_offset = %d, window_pos = %d)", + ERROR("lzx_decode_match(): Match of length %u bytes references " + "data before window (match_offset = %u, window_pos = %u)", match_len, match_offset, window_pos); return -1; } @@ -646,49 +669,62 @@ static int lzx_decode_match(int main_element, int block_type, return match_len; } - - -/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were - * changed from relative offsets to absolute offsets. This type of - * preprocessing can be used on any binary data even if it is not actually - * machine code. It seems to always be used in WIM files, even though there is - * no bit to indicate that it actually is used, unlike in the LZX compressed - * format as used in other file formats, where a bit is reserved for that - * purpose. */ -static void undo_call_insn_preprocessing(u8 uncompressed_data[], - unsigned uncompressed_data_len) +static void undo_call_insn_translation(u32 *call_insn_target, int input_pos, + int32_t file_size) { - int i = 0; - int file_size = LZX_MAGIC_FILESIZE; int32_t abs_offset; int32_t rel_offset; - /* Not enabled in the last 6 bytes, which means the 5-byte call - * instruction cannot start in the last *10* bytes. */ - while (i < uncompressed_data_len - 10) { - if (uncompressed_data[i] != 0xe8) { - i++; - continue; + abs_offset = le32_to_cpu(*call_insn_target); + if (abs_offset >= -input_pos && abs_offset < file_size) { + if (abs_offset >= 0) { + /* "good translation" */ + rel_offset = abs_offset - input_pos; + } else { + /* "compensating translation" */ + rel_offset = abs_offset + file_size; } - abs_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1)); - - if (abs_offset >= -i && abs_offset < file_size) { - if (abs_offset >= 0) { - /* "good translation" */ - rel_offset = abs_offset - i; - } else { - /* "compensating translation" */ - rel_offset = abs_offset + file_size; - } - *(int32_t*)(uncompressed_data + i + 1) = - cpu_to_le32(rel_offset); + *call_insn_target = cpu_to_le32(rel_offset); + } +} + +/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were + * changed from relative offsets to absolute offsets. + * + * Note that this call instruction preprocessing can and will be used on any + * data even if it is not actually x86 machine code. In fact, this type of + * preprocessing appears to always be used in LZX-compressed resources in WIM + * files; there is no bit to indicate whether it is used or not, unlike in the + * LZX compressed format as used in cabinet files, where a bit is reserved for + * that purpose. + * + * Call instruction preprocessing is disabled in the last 6 bytes of the + * uncompressed data, which really means the 5-byte call instruction cannot + * start in the last 10 bytes of the uncompressed data. This is one of the + * errors in the LZX documentation. + * + * Call instruction preprocessing does not appear to be disabled after the + * 32768th chunk of a WIM stream, which is apparently is yet another difference + * from the LZX compression used in cabinet files. + * + * Call instruction processing is supposed to take the file size as a parameter, + * as it is used in calculating the translated jump targets. But in WIM files, + * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/ +static void undo_call_insn_preprocessing(u8 uncompressed_data[], + int uncompressed_data_len) +{ + for (int i = 0; i < uncompressed_data_len - 10; i++) { + if (uncompressed_data[i] == 0xe8) { + undo_call_insn_translation((u32*)&uncompressed_data[i + 1], + i, + LZX_WIM_MAGIC_FILESIZE); + i += 4; } - i += 5; } } /* - * Decompresses a compressed block of data from which the header has already + * Decompresses a LZX-compressed block of data from which the header has already * been read. * * @block_type: The type of the block (LZX_BLOCKTYPE_VERBATIM or @@ -702,56 +738,63 @@ static void undo_call_insn_preprocessing(u8 uncompressed_data[], * @queue: The least-recently-used queue for match offsets. * @istream: The input bitstream for the compressed literals. */ -static int lzx_decompress_block(int block_type, int block_size, u8 *window, - int window_pos, +static int lzx_decompress_block(int block_type, unsigned block_size, + u8 *window, + unsigned window_pos, const struct lzx_tables *tables, struct lru_queue *queue, struct input_bitstream *istream) { - unsigned bytes_remaining; unsigned main_element; + unsigned end; int match_len; - int ret; - - bytes_remaining = block_size; - while (bytes_remaining > 0) { + int ret = 0; + end = window_pos + block_size; + while (window_pos < end) { ret = read_huffsym_using_maintree(istream, tables, &main_element); if (ret != 0) - return ret; + break; if (main_element < LZX_NUM_CHARS) { /* literal: 0 to LZX_NUM_CHARS - 1 */ - window[window_pos + block_size - bytes_remaining] = - main_element; - bytes_remaining--; + window[window_pos++] = main_element; } else { /* match: LZX_NUM_CHARS to LZX_MAINTREE_NUM_SYMBOLS - 1 */ - match_len = lzx_decode_match(main_element, - block_type, bytes_remaining, window, - block_size + window_pos - - bytes_remaining, - tables, queue, istream); - if (match_len == -1) - return 1; - - bytes_remaining -= match_len; + ret = lzx_decode_match(main_element, + block_type, + end - window_pos, + window, + window_pos, + tables, + queue, + istream); + if (ret < 0) + break; + window_pos += ret; } } return 0; } /* - * Decompresses a block of LZX-compressed data using a window size of 32768. + * Decompresses a block of LZX-compressed data as used in the WIM file format. + * + * Note that this will NOT work unmodified for LZX as used in the cabinet + * format, which is not the same as in the WIM format! * * @compressed_data: A pointer to the compressed data. + * * @compressed_len: The length of the compressed data, in bytes. + * * @uncompressed_data: A pointer to the buffer into which to write the - * uncompressed data. - * @uncompressed_len: The length of the uncompressed data. + * uncompressed data. + * + * @uncompressed_len: The length of the uncompressed data. It must be + * 32768 bytes or less. * - * Return non-zero on failure. + * Return 0 on success; non-zero on failure. */ int lzx_decompress(const void *compressed_data, unsigned compressed_len, void *uncompressed_data, unsigned uncompressed_len) @@ -759,10 +802,11 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, struct lzx_tables tables; struct input_bitstream istream; struct lru_queue queue; - unsigned bytes_remaining; + unsigned window_pos; unsigned block_size; unsigned block_type; int ret; + bool e8_preprocessing_done; LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, " "uncompressed_data = %p, uncompressed_len = %d).", @@ -776,31 +820,34 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, queue.R0 = 1; queue.R1 = 1; queue.R2 = 1; - bytes_remaining = uncompressed_len; - init_input_bitstream(&istream, compressed_data, compressed_len); + e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes + in the uncompressed data. */ + /* The compressed data will consist of one or more blocks. The * following loop decompresses one block, and it runs until there all * the compressed data has been decompressed, so there are no more * blocks. */ - while (bytes_remaining != 0) { - + for (window_pos = 0; + window_pos < uncompressed_len; + window_pos += block_size) + { LZX_DEBUG("Reading block header."); ret = lzx_read_block_header(&istream, &block_size, &block_type, &tables, &queue); if (ret != 0) return ret; - LZX_DEBUG("block_size = %u, bytes_remaining = %u", - block_size, bytes_remaining); + LZX_DEBUG("block_size = %u, window_pos = %u", + block_size, window_pos); - if (block_size > bytes_remaining) { + if (block_size > uncompressed_len - window_pos) { ERROR("lzx_decompress(): Expected a block size of at " "most %u bytes (found %u bytes)", - bytes_remaining, block_size); - return 1; + uncompressed_len - window_pos, block_size); + return -1; } switch (block_type) { @@ -810,41 +857,42 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len, LZX_DEBUG("LZX_BLOCKTYPE_VERBATIM"); else LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED"); - ret = lzx_decompress_block(block_type, block_size, uncompressed_data, - uncompressed_len - - bytes_remaining, - &tables, &queue, &istream); + window_pos, + &tables, + &queue, + &istream); if (ret != 0) return ret; + if (tables.maintree_lens[0xe8] != 0) + e8_preprocessing_done = true; break; case LZX_BLOCKTYPE_UNCOMPRESSED: LZX_DEBUG("LZX_BLOCKTYPE_UNCOMPRESSED"); - ret = bitstream_read_bytes(&istream, block_size, - uncompressed_data + - uncompressed_len - - bytes_remaining); - if (ret != 0) - return ret; - if (block_size & 1) - align_input_bitstream(&istream); - break; - default: - wimlib_assert(0); + if (istream.data_bytes_left < block_size) { + ERROR("Unexpected end of input when " + "reading %zu bytes from LZX bitstream " + "(only have %u bytes left)", + block_size, istream.data_bytes_left); + return -1; + } + memcpy(&((u8*)uncompressed_data)[window_pos], istream.data, + block_size); + istream.data += block_size; + istream.data_bytes_left -= block_size; + /* Re-align bitstream if an odd number of bytes were + * read. */ + if (istream.data_bytes_left && (block_size & 1)) { + istream.data_bytes_left--; + istream.data++; + } + e8_preprocessing_done = true; break; } - - bytes_remaining -= block_size; - - if (bytes_remaining != 0) - LZX_DEBUG("%d bytes remaining.", bytes_remaining); } - - if (uncompressed_len >= 10) - undo_call_insn_preprocessing(uncompressed_data, - uncompressed_len); - + if (e8_preprocessing_done) + undo_call_insn_preprocessing(uncompressed_data, uncompressed_len); return 0; } diff --git a/src/lzx.h b/src/lzx.h index ab158d86..8a396892 100644 --- a/src/lzx.h +++ b/src/lzx.h @@ -70,16 +70,35 @@ * though the blocks themselves are not this size, and the size of the actual * file resource in the WIM file is very likely to be something entirely * different as well. */ -#define LZX_MAGIC_FILESIZE 12000000 +#define LZX_WIM_MAGIC_FILESIZE 12000000 +#define USE_LZX_EXTRA_BITS_ARRAY + +#ifdef USE_LZX_EXTRA_BITS_ARRAY extern const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS]; +#endif + +/* Given the number of a LZX position slot, return the number of extra bits that + * are needed to encode the match offset. */ +static inline unsigned lzx_get_num_extra_bits(unsigned position_slot) +{ +#ifdef USE_LZX_EXTRA_BITS_ARRAY + /* Use a table */ + return lzx_extra_bits[position_slot]; +#else + /* Calculate directly using a shift and subtraction. */ + wimlib_assert(position_slot >= 2 && position_slot <= 37); + return (position_slot >> 1) - 1; +#endif +} + extern const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS]; /* Least-recently used queue for match offsets. */ struct lru_queue { - int R0; - int R1; - int R2; + u32 R0; + u32 R1; + u32 R2; }; extern int lzx_decompress(const void *compressed_data, unsigned compressed_len, diff --git a/src/resource.c b/src/resource.c index 9d6fe2dc..17aaabfa 100644 --- a/src/resource.c +++ b/src/resource.c @@ -620,7 +620,7 @@ int extract_wim_resource(const struct lookup_table_entry *lte, u64 to_read = min(bytes_remaining, sizeof(buf)); ret = read_wim_resource(lte, buf, to_read, offset, 0); if (ret != 0) - break; + return ret; sha1_update(&ctx, buf, to_read); ret = extract_chunk(buf, to_read, offset, extract_chunk_arg); if (ret != 0) { -- 2.43.0