*/
/*
- * Copyright (C) 2012 Eric Biggers
+ * Copyright (C) 2012, 2013 Eric Biggers
*
* This file is part of wimlib, a library for working with WIM files.
*
*/
#include "util.h"
+#include "wimlib.h"
#include "lzx.h"
#include "decompress.h"
#include <string.h>
/*
* Reads a Huffman-encoded symbol using the pre-tree.
*/
-static inline int read_huffsym_using_pretree(struct input_bitstream *istream,
- const u16 pretree_decode_table[],
- const u8 pretree_lens[], unsigned *n)
+static inline int
+read_huffsym_using_pretree(struct input_bitstream *istream,
+ const u16 pretree_decode_table[],
+ const u8 pretree_lens[], unsigned *n)
{
return read_huffsym(istream, pretree_decode_table, pretree_lens,
LZX_PRETREE_NUM_SYMBOLS, LZX_PRETREE_TABLEBITS, n,
}
/* Reads a Huffman-encoded symbol using the main tree. */
-static inline int read_huffsym_using_maintree(struct input_bitstream *istream,
- const struct lzx_tables *tables,
- unsigned *n)
+static inline int
+read_huffsym_using_maintree(struct input_bitstream *istream,
+ const struct lzx_tables *tables,
+ unsigned *n)
{
return read_huffsym(istream, tables->maintree_decode_table,
tables->maintree_lens, LZX_MAINTREE_NUM_SYMBOLS,
}
/* Reads a Huffman-encoded symbol using the length tree. */
-static inline int read_huffsym_using_lentree(struct input_bitstream *istream,
- const struct lzx_tables *tables,
- unsigned *n)
+static inline int
+read_huffsym_using_lentree(struct input_bitstream *istream,
+ const struct lzx_tables *tables,
+ unsigned *n)
{
return read_huffsym(istream, tables->lentree_decode_table,
tables->lentree_lens, LZX_LENTREE_NUM_SYMBOLS,
}
/* Reads a Huffman-encoded symbol using the aligned offset tree. */
-static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream,
- const struct lzx_tables *tables,
- unsigned *n)
+static inline int
+read_huffsym_using_alignedtree(struct input_bitstream *istream,
+ const struct lzx_tables *tables,
+ unsigned *n)
{
return read_huffsym(istream, tables->alignedtree_decode_table,
tables->alignedtree_lens,
* @num_lens: Number of length values to decode and return.
*
*/
-static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
- unsigned num_lens)
+static int
+lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
+ unsigned num_lens)
{
/* Declare the decoding table and length table for the pretree. */
u16 pretree_decode_table[(1 << LZX_PRETREE_TABLEBITS) +
char value;
ret = read_huffsym_using_pretree(istream, pretree_decode_table,
- pretree_lens, &tree_code);
+ pretree_lens, &tree_code);
if (ret != 0)
return ret;
switch (tree_code) {
if (ret != 0)
return ret;
num_same += 4;
-
ret = read_huffsym_using_pretree(istream,
- pretree_decode_table,
- pretree_lens, &code);
+ pretree_decode_table,
+ pretree_lens,
+ &code);
if (ret != 0)
return ret;
value = (char)*lens - (char)code;
* R0, R1, and R2 will be written (only for uncompressed
* blocks, which contain this information in the header)
*/
-static int lzx_read_block_header(struct input_bitstream *istream,
- unsigned *block_size_ret,
- unsigned *block_type_ret,
- struct lzx_tables *tables,
- struct lru_queue *queue)
+static int
+lzx_read_block_header(struct input_bitstream *istream,
+ unsigned *block_size_ret,
+ unsigned *block_type_ret,
+ struct lzx_tables *tables,
+ struct lru_queue *queue)
{
int ret;
- int block_type;
+ unsigned block_type;
unsigned block_size;
- int s;
- int i;
+ unsigned s;
+ unsigned i;
unsigned len;
- int32_t R[3];
ret = bitstream_ensure_bits(istream, 4);
- if (ret != 0) {
+ if (ret) {
ERROR("LZX input stream overrun");
return ret;
}
* 16 bits, indicated by a 0 bit. */
s = bitstream_read_bits_nocheck(istream, 1);
- if (s == 1) {
- block_size = 1 << 15;
+ if (s) {
+ block_size = 32768;
} else {
ret = bitstream_read_bits(istream, 16, &block_size);
- if (ret != 0)
+ if (ret)
return ret;
block_size = le16_to_cpu(block_size);
}
ret = bitstream_read_bits(istream,
LZX_ALIGNEDTREE_ELEMENT_SIZE,
&len);
- if (ret != 0)
+ if (ret)
return ret;
tables->alignedtree_lens[i] = len;
}
LZX_ALIGNEDTREE_TABLEBITS,
tables->alignedtree_lens,
8);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to make the decode "
"table for the aligned offset tree");
return ret;
* tree. */
ret = lzx_read_code_lens(istream, tables->maintree_lens,
LZX_NUM_CHARS);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to read the code "
"lengths for the first 256 elements of the "
"main tree");
ret = lzx_read_code_lens(istream,
tables->maintree_lens + LZX_NUM_CHARS,
LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to read the path "
"lengths for the remaining elements of the main "
"tree");
LZX_MAINTREE_TABLEBITS,
tables->maintree_lens,
LZX_MAX_CODEWORD_LEN);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to make the decode "
"table for the main tree");
return ret;
LZX_DEBUG("Reading path lengths for the length tree.");
ret = lzx_read_code_lens(istream, tables->lentree_lens,
LZX_LENTREE_NUM_SYMBOLS);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to read the path "
"lengths for the length tree");
return ret;
LZX_LENTREE_TABLEBITS,
tables->lentree_lens,
LZX_MAX_CODEWORD_LEN);
- if (ret != 0) {
+ if (ret) {
ERROR("lzx_decompress(): Failed to build the length "
"Huffman tree");
return ret;
}
-
+ /* The bitstream of compressed literals and matches for this
+ * block directly follows and will be read in
+ * lzx_decompress_block(). */
break;
-
case LZX_BLOCKTYPE_UNCOMPRESSED:
LZX_DEBUG("Found uncompressed block.");
-
- /* Mystery bit! */
- ret = bitstream_read_bits(istream, 1, &i);
- if (ret != 0)
- return ret;
- align_input_bitstream(istream);
- ret = bitstream_read_bytes(istream, sizeof(R), R);
- if (ret != 0)
- return ret;
- queue->R0 = le32_to_cpu(R[0]);
- queue->R1 = le32_to_cpu(R[1]);
- queue->R2 = le32_to_cpu(R[2]);
+ /* Before reading the three LRU match offsets from the
+ * uncompressed block header, the stream needs to be aligned on
+ * a 16-bit boundary. But, unexpectedly, if the stream is
+ * *already* aligned, the correct thing to do is to throw away
+ * the next 16 bits. */
+ if (istream->bitsleft == 0) {
+ if (istream->data_bytes_left < 14) {
+ ERROR("lzx_decompress(): Insufficient length in "
+ "uncompressed block");
+ return -1;
+ }
+ istream->data += 2;
+ istream->data_bytes_left -= 2;
+ } else {
+ if (istream->data_bytes_left < 12) {
+ ERROR("lzx_decompress(): Insufficient length in "
+ "uncompressed block");
+ return -1;
+ }
+ istream->bitsleft = 0;
+ istream->bitbuf = 0;
+ }
+ queue->R0 = le32_to_cpu(*(u32*)(istream->data + 0));
+ queue->R1 = le32_to_cpu(*(u32*)(istream->data + 4));
+ queue->R2 = le32_to_cpu(*(u32*)(istream->data + 8));
+ istream->data += 12;
+ istream->data_bytes_left -= 12;
+ /* The uncompressed data of this block directly follows and will
+ * be read in lzx_decompress(). */
break;
default:
- LZX_DEBUG("Found invalid block.");
- return 1;
+ ERROR("lzx_decompress(): Found invalid block");
+ return -1;
}
*block_type_ret = block_type;
*block_size_ret = block_size;
}
/*
- * Decodes a compressed literal match value. It refers to some match_offset to
- * a point earlier in the window, and some match_len, for which the data is to
- * be copied to the current position in the window.
+ * Decodes a compressed match from a block of LZX-compressed data. A match
+ * refers to some match_offset to a point earlier in the window as well as some
+ * match_len, for which the data is to be copied to the current position in the
+ * window.
*
* @main_element: The start of the match data, as decoded using the main
- * tree.
- * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or
- * LZX_BLOCKTYPE_VERBATIM)
+ * tree.
+ *
+ * @block_type: The type of the block (LZX_BLOCKTYPE_ALIGNED or
+ * LZX_BLOCKTYPE_VERBATIM)
+ *
* @bytes_remaining: The amount of uncompressed data remaining to be
- * uncompressed. It is an error if the match
- * is longer than @bytes_remaining.
- * @window: A pointer to the window into which the uncompressed
+ * uncompressed in this block. It is an error if the match
+ * is longer than this number.
+ *
+ * @window: A pointer to the window into which the uncompressed
* data is being written.
- * @window_pos: The current position in the window.
- * @tables: Contains the Huffman tables for the block (main,
- * length, and also aligned offset only for
- * LZX_BLOCKTYPE_ALIGNED)
- * @queue: The least-recently used queue for match offsets.
- * @istream: The input bitstream.
- *
- * Returns the length of the match, or -1 on error (match would exceed
- * the amount of data needing to be uncompressed, or match refers to data before
- * the window, or the input bitstream ended unexpectedly).
+ *
+ * @window_pos: The current byte offset in the window.
+ *
+ * @tables: The Huffman decoding tables for this LZX block (main
+ * code, length code, and for LZX_BLOCKTYPE_ALIGNED blocks,
+ * also the aligned offset code).
+ *
+ * @queue: The least-recently used queue for match offsets.
+ *
+ * @istream: The input bitstream.
+ *
+ * Returns the length of the match, or a negative number on error. The possible
+ * error cases are:
+ * - Match would exceed the amount of data remaining to be uncompressed.
+ * - Match refers to data before the window.
+ * - The input bitstream ended unexpectedly.
*/
-static int lzx_decode_match(int main_element, int block_type,
- int bytes_remaining, u8 *window, int window_pos,
- const struct lzx_tables *tables,
- struct lru_queue *queue,
- struct input_bitstream *istream)
+static int
+lzx_decode_match(unsigned main_element, int block_type,
+ unsigned bytes_remaining, u8 *window,
+ unsigned window_pos,
+ const struct lzx_tables *tables,
+ struct lru_queue *queue,
+ struct input_bitstream *istream)
{
unsigned length_header;
unsigned position_slot;
unsigned num_extra_bits;
unsigned verbatim_bits;
unsigned aligned_bits;
+ unsigned i;
int ret;
- int i;
u8 *match_dest;
u8 *match_src;
match_len = LZX_MIN_MATCH + length_header;
if (length_header == LZX_NUM_PRIMARY_LENS) {
ret = read_huffsym_using_lentree(istream, tables,
- &additional_len);
+ &additional_len);
if (ret != 0)
- return -1;
+ return ret;
match_len += additional_len;
}
* decode the match offset. */
/* Look up the number of extra bits that need to be read. */
- num_extra_bits = lzx_extra_bits[position_slot];
+ num_extra_bits = lzx_get_num_extra_bits(position_slot);
/* For aligned blocks, if there are at least 3 extra bits, the
* actual number of extra bits is 3 less, and they encode a
* num_extra_bits == 3, the assignment to verbatim_bits
* will just set it to 0. ) */
ret = bitstream_read_bits(istream, num_extra_bits - 3,
- &verbatim_bits);
+ &verbatim_bits);
if (ret != 0)
- return -1;
+ return ret;
verbatim_bits <<= 3;
ret = read_huffsym_using_alignedtree(istream, tables,
&aligned_bits);
if (ret != 0)
- return -1;
+ return ret;
} else {
/* For non-aligned blocks, or for aligned blocks with
* less than 3 extra bits, the extra bits are added
ret = bitstream_read_bits(istream, num_extra_bits,
&verbatim_bits);
if (ret != 0)
- return -1;
+ return ret;
aligned_bits = 0;
}
/* Calculate the match offset. */
- match_offset = lzx_position_base[position_slot] + verbatim_bits +
- aligned_bits - 2;
+ match_offset = lzx_position_base[position_slot] +
+ verbatim_bits + aligned_bits - 2;
/* Update the LRU queue. */
queue->R2 = queue->R1;
match_src = match_dest - match_offset;
if (match_len > bytes_remaining) {
- ERROR("lzx_decode_match(): Match of length %d bytes overflows "
+ ERROR("lzx_decode_match(): Match of length %u bytes overflows "
"uncompressed block size", match_len);
return -1;
}
if (match_src < window) {
- ERROR("lzx_decode_match(): Match of length %d bytes references "
- "data before window (match_offset = %d, window_pos = %d)",
+ ERROR("lzx_decode_match(): Match of length %u bytes references "
+ "data before window (match_offset = %u, window_pos = %u)",
match_len, match_offset, window_pos);
return -1;
}
return match_len;
}
-
-
-/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were
- * changed from relative offsets to absolute offsets. This type of
- * preprocessing can be used on any binary data even if it is not actually
- * machine code. It seems to always be used in WIM files, even though there is
- * no bit to indicate that it actually is used, unlike in the LZX compressed
- * format as used in other file formats, where a bit is reserved for that
- * purpose. */
-static void undo_call_insn_preprocessing(u8 uncompressed_data[],
- unsigned uncompressed_data_len)
+static void
+undo_call_insn_translation(u32 *call_insn_target, int input_pos,
+ int32_t file_size)
{
- int i = 0;
- int file_size = LZX_MAGIC_FILESIZE;
int32_t abs_offset;
int32_t rel_offset;
- /* Not enabled in the last 6 bytes, which means the 5-byte call
- * instruction cannot start in the last *10* bytes. */
- while (i < uncompressed_data_len - 10) {
- if (uncompressed_data[i] != 0xe8) {
- i++;
- continue;
+ abs_offset = le32_to_cpu(*call_insn_target);
+ if (abs_offset >= -input_pos && abs_offset < file_size) {
+ if (abs_offset >= 0) {
+ /* "good translation" */
+ rel_offset = abs_offset - input_pos;
+ } else {
+ /* "compensating translation" */
+ rel_offset = abs_offset + file_size;
}
- abs_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
-
- if (abs_offset >= -i && abs_offset < file_size) {
- if (abs_offset >= 0) {
- /* "good translation" */
- rel_offset = abs_offset - i;
- } else {
- /* "compensating translation" */
- rel_offset = abs_offset + file_size;
- }
- *(int32_t*)(uncompressed_data + i + 1) =
- cpu_to_le32(rel_offset);
+ *call_insn_target = cpu_to_le32(rel_offset);
+ }
+}
+
+/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were
+ * changed from relative offsets to absolute offsets.
+ *
+ * Note that this call instruction preprocessing can and will be used on any
+ * data even if it is not actually x86 machine code. In fact, this type of
+ * preprocessing appears to always be used in LZX-compressed resources in WIM
+ * files; there is no bit to indicate whether it is used or not, unlike in the
+ * LZX compressed format as used in cabinet files, where a bit is reserved for
+ * that purpose.
+ *
+ * Call instruction preprocessing is disabled in the last 6 bytes of the
+ * uncompressed data, which really means the 5-byte call instruction cannot
+ * start in the last 10 bytes of the uncompressed data. This is one of the
+ * errors in the LZX documentation.
+ *
+ * Call instruction preprocessing does not appear to be disabled after the
+ * 32768th chunk of a WIM stream, which is apparently is yet another difference
+ * from the LZX compression used in cabinet files.
+ *
+ * Call instruction processing is supposed to take the file size as a parameter,
+ * as it is used in calculating the translated jump targets. But in WIM files,
+ * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/
+static void
+undo_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len)
+{
+ for (int i = 0; i < uncompressed_data_len - 10; i++) {
+ if (uncompressed_data[i] == 0xe8) {
+ undo_call_insn_translation((u32*)&uncompressed_data[i + 1],
+ i,
+ LZX_WIM_MAGIC_FILESIZE);
+ i += 4;
}
- i += 5;
}
}
/*
- * Decompresses a compressed block of data from which the header has already
+ * Decompresses a LZX-compressed block of data from which the header has already
* been read.
*
* @block_type: The type of the block (LZX_BLOCKTYPE_VERBATIM or
* @queue: The least-recently-used queue for match offsets.
* @istream: The input bitstream for the compressed literals.
*/
-static int lzx_decompress_block(int block_type, int block_size, u8 *window,
- int window_pos,
- const struct lzx_tables *tables,
- struct lru_queue *queue,
- struct input_bitstream *istream)
+static int
+lzx_decompress_block(int block_type, unsigned block_size,
+ u8 *window,
+ unsigned window_pos,
+ const struct lzx_tables *tables,
+ struct lru_queue *queue,
+ struct input_bitstream *istream)
{
- unsigned bytes_remaining;
unsigned main_element;
- int match_len;
+ unsigned end;
int ret;
+ int match_len;
- bytes_remaining = block_size;
- while (bytes_remaining > 0) {
-
+ end = window_pos + block_size;
+ while (window_pos < end) {
ret = read_huffsym_using_maintree(istream, tables,
&main_element);
- if (ret != 0)
+ if (ret)
return ret;
if (main_element < LZX_NUM_CHARS) {
/* literal: 0 to LZX_NUM_CHARS - 1 */
- window[window_pos + block_size - bytes_remaining] =
- main_element;
- bytes_remaining--;
+ window[window_pos++] = main_element;
} else {
/* match: LZX_NUM_CHARS to LZX_MAINTREE_NUM_SYMBOLS - 1 */
match_len = lzx_decode_match(main_element,
- block_type, bytes_remaining, window,
- block_size + window_pos -
- bytes_remaining,
- tables, queue, istream);
- if (match_len == -1)
- return 1;
-
- bytes_remaining -= match_len;
+ block_type,
+ end - window_pos,
+ window,
+ window_pos,
+ tables,
+ queue,
+ istream);
+ if (match_len < 0)
+ return match_len;
+ window_pos += match_len;
}
}
return 0;
}
-/*
- * Decompresses a block of LZX-compressed data using a window size of 32768.
- *
- * @compressed_data: A pointer to the compressed data.
- * @compressed_len: The length of the compressed data, in bytes.
- * @uncompressed_data: A pointer to the buffer into which to write the
- * uncompressed data.
- * @uncompressed_len: The length of the uncompressed data.
- *
- * Return non-zero on failure.
- */
-int lzx_decompress(const void *compressed_data, unsigned compressed_len,
- void *uncompressed_data, unsigned uncompressed_len)
+/* Documented in wimlib.h */
+WIMLIBAPI int
+wimlib_lzx_decompress(const void *compressed_data, unsigned compressed_len,
+ void *uncompressed_data, unsigned uncompressed_len)
{
struct lzx_tables tables;
struct input_bitstream istream;
struct lru_queue queue;
- unsigned bytes_remaining;
+ unsigned window_pos;
unsigned block_size;
unsigned block_type;
int ret;
+ bool e8_preprocessing_done;
LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, "
"uncompressed_data = %p, uncompressed_len = %d).",
queue.R0 = 1;
queue.R1 = 1;
queue.R2 = 1;
- bytes_remaining = uncompressed_len;
-
init_input_bitstream(&istream, compressed_data, compressed_len);
+ e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes
+ in the uncompressed data. */
+
/* The compressed data will consist of one or more blocks. The
* following loop decompresses one block, and it runs until there all
* the compressed data has been decompressed, so there are no more
* blocks. */
- while (bytes_remaining != 0) {
-
+ for (window_pos = 0;
+ window_pos < uncompressed_len;
+ window_pos += block_size)
+ {
LZX_DEBUG("Reading block header.");
ret = lzx_read_block_header(&istream, &block_size,
&block_type, &tables, &queue);
- if (ret != 0)
+ if (ret)
return ret;
- LZX_DEBUG("block_size = %u, bytes_remaining = %u",
- block_size, bytes_remaining);
+ LZX_DEBUG("block_size = %u, window_pos = %u",
+ block_size, window_pos);
- if (block_size > bytes_remaining) {
+ if (block_size > uncompressed_len - window_pos) {
ERROR("lzx_decompress(): Expected a block size of at "
"most %u bytes (found %u bytes)",
- bytes_remaining, block_size);
- return 1;
+ uncompressed_len - window_pos, block_size);
+ return -1;
}
switch (block_type) {
LZX_DEBUG("LZX_BLOCKTYPE_VERBATIM");
else
LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED");
-
ret = lzx_decompress_block(block_type,
block_size,
uncompressed_data,
- uncompressed_len -
- bytes_remaining,
- &tables, &queue, &istream);
- if (ret != 0)
+ window_pos,
+ &tables,
+ &queue,
+ &istream);
+ if (ret)
return ret;
+ if (tables.maintree_lens[0xe8] != 0)
+ e8_preprocessing_done = true;
break;
case LZX_BLOCKTYPE_UNCOMPRESSED:
LZX_DEBUG("LZX_BLOCKTYPE_UNCOMPRESSED");
- ret = bitstream_read_bytes(&istream, block_size,
- uncompressed_data +
- uncompressed_len -
- bytes_remaining);
- if (ret != 0)
- return ret;
- if (block_size & 1)
- align_input_bitstream(&istream);
- break;
- default:
- wimlib_assert(0);
+ if (istream.data_bytes_left < block_size) {
+ ERROR("Unexpected end of input when "
+ "reading %u bytes from LZX bitstream "
+ "(only have %u bytes left)",
+ block_size, istream.data_bytes_left);
+ return -1;
+ }
+ memcpy(&((u8*)uncompressed_data)[window_pos], istream.data,
+ block_size);
+ istream.data += block_size;
+ istream.data_bytes_left -= block_size;
+ /* Re-align bitstream if an odd number of bytes were
+ * read. */
+ if (istream.data_bytes_left && (block_size & 1)) {
+ istream.data_bytes_left--;
+ istream.data++;
+ }
+ e8_preprocessing_done = true;
break;
}
-
- bytes_remaining -= block_size;
-
- if (bytes_remaining != 0)
- LZX_DEBUG("%d bytes remaining.", bytes_remaining);
}
-
- if (uncompressed_len >= 10)
- undo_call_insn_preprocessing(uncompressed_data,
- uncompressed_len);
-
+ if (e8_preprocessing_done)
+ undo_call_insn_preprocessing(uncompressed_data, uncompressed_len);
return 0;
}