]> wimlib.net Git - wimlib/blobdiff - src/lzx-decompress.c
make_huffman_decode_table(): Add SSE2 entry filling (with aliasing handled correctly)
[wimlib] / src / lzx-decompress.c
index 6e4098fffa1c218aae813ec30f558683be4a5df2..93ab84137be886c773757bdb86e86c22b2abcad4 100644 (file)
@@ -7,7 +7,7 @@
  */
 
 /*
- * Copyright (C) 2012 Eric Biggers
+ * Copyright (C) 2012, 2013 Eric Biggers
  *
  * This file is part of wimlib, a library for working with WIM files.
  *
  * succeed.
  */
 
-#include "util.h"
-#include "lzx.h"
-#include "decompress.h"
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib.h"
+#include "wimlib/decompress.h"
+#include "wimlib/lzx.h"
+#include "wimlib/util.h"
+
 #include <string.h>
 
 /* Huffman decoding tables and maps from symbols to code lengths. */
 struct lzx_tables {
 
        u16 maintree_decode_table[(1 << LZX_MAINTREE_TABLEBITS) +
-                                       (LZX_MAINTREE_NUM_SYMBOLS * 2)];
+                                       (LZX_MAINTREE_NUM_SYMBOLS * 2)]
+                                       _aligned_attribute(DECODE_TABLE_ALIGNMENT);
        u8 maintree_lens[LZX_MAINTREE_NUM_SYMBOLS];
 
 
        u16 lentree_decode_table[(1 << LZX_LENTREE_TABLEBITS) +
-                                       (LZX_LENTREE_NUM_SYMBOLS * 2)];
+                                       (LZX_LENTREE_NUM_SYMBOLS * 2)]
+                                       _aligned_attribute(DECODE_TABLE_ALIGNMENT);
        u8 lentree_lens[LZX_LENTREE_NUM_SYMBOLS];
 
 
        u16 alignedtree_decode_table[(1 << LZX_ALIGNEDTREE_TABLEBITS) +
-                                       (LZX_ALIGNEDTREE_NUM_SYMBOLS * 2)];
+                                       (LZX_ALIGNEDTREE_NUM_SYMBOLS * 2)]
+                                       _aligned_attribute(DECODE_TABLE_ALIGNMENT);
        u8 alignedtree_lens[LZX_ALIGNEDTREE_NUM_SYMBOLS];
-};
+} _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 
 
 /*
  * Reads a Huffman-encoded symbol using the pre-tree.
  */
-static inline int read_huffsym_using_pretree(struct input_bitstream *istream,
-                                            const u16 pretree_decode_table[],
-                                            const u8 pretree_lens[], unsigned *n)
+static inline int
+read_huffsym_using_pretree(struct input_bitstream *istream,
+                          const u16 pretree_decode_table[],
+                          const u8 pretree_lens[], unsigned *n)
 {
        return read_huffsym(istream, pretree_decode_table, pretree_lens,
                            LZX_PRETREE_NUM_SYMBOLS, LZX_PRETREE_TABLEBITS, n,
@@ -143,9 +153,10 @@ static inline int read_huffsym_using_pretree(struct input_bitstream *istream,
 }
 
 /* Reads a Huffman-encoded symbol using the main tree. */
-static inline int read_huffsym_using_maintree(struct input_bitstream *istream,
-                                             const struct lzx_tables *tables,
-                                             unsigned *n)
+static inline int
+read_huffsym_using_maintree(struct input_bitstream *istream,
+                           const struct lzx_tables *tables,
+                           unsigned *n)
 {
        return read_huffsym(istream, tables->maintree_decode_table,
                            tables->maintree_lens, LZX_MAINTREE_NUM_SYMBOLS,
@@ -153,9 +164,10 @@ static inline int read_huffsym_using_maintree(struct input_bitstream *istream,
 }
 
 /* Reads a Huffman-encoded symbol using the length tree. */
-static inline int read_huffsym_using_lentree(struct input_bitstream *istream,
-                                            const struct lzx_tables *tables,
-                                            unsigned *n)
+static inline int
+read_huffsym_using_lentree(struct input_bitstream *istream,
+                          const struct lzx_tables *tables,
+                          unsigned *n)
 {
        return read_huffsym(istream, tables->lentree_decode_table,
                            tables->lentree_lens, LZX_LENTREE_NUM_SYMBOLS,
@@ -163,9 +175,10 @@ static inline int read_huffsym_using_lentree(struct input_bitstream *istream,
 }
 
 /* Reads a Huffman-encoded symbol using the aligned offset tree. */
-static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream,
-                                                const struct lzx_tables *tables,
-                                                unsigned *n)
+static inline int
+read_huffsym_using_alignedtree(struct input_bitstream *istream,
+                              const struct lzx_tables *tables,
+                              unsigned *n)
 {
        return read_huffsym(istream, tables->alignedtree_decode_table,
                            tables->alignedtree_lens,
@@ -185,8 +198,9 @@ static inline int read_huffsym_using_alignedtree(struct input_bitstream *istream
  * @num_lens:  Number of length values to decode and return.
  *
  */
-static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
-                             unsigned num_lens)
+static int
+lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
+                  unsigned num_lens)
 {
        /* Declare the decoding table and length table for the pretree. */
        u16 pretree_decode_table[(1 << LZX_PRETREE_TABLEBITS) +
@@ -233,7 +247,7 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
                char value;
 
                ret = read_huffsym_using_pretree(istream, pretree_decode_table,
-                                               pretree_lens, &tree_code);
+                                                pretree_lens, &tree_code);
                if (ret != 0)
                        return ret;
                switch (tree_code) {
@@ -264,10 +278,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
                        if (ret != 0)
                                return ret;
                        num_same += 4;
-
                        ret = read_huffsym_using_pretree(istream,
-                                               pretree_decode_table,
-                                               pretree_lens, &code);
+                                                        pretree_decode_table,
+                                                        pretree_lens,
+                                                        &code);
                        if (ret != 0)
                                return ret;
                        value = (char)*lens - (char)code;
@@ -306,22 +320,22 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
  *                     R0, R1, and R2 will be written (only for uncompressed
  *                     blocks, which contain this information in the header)
  */
-static int lzx_read_block_header(struct input_bitstream *istream,
-                                unsigned *block_size_ret,
-                                unsigned *block_type_ret,
-                                struct lzx_tables *tables,
-                                struct lru_queue *queue)
+static int
+lzx_read_block_header(struct input_bitstream *istream,
+                     unsigned *block_size_ret,
+                     unsigned *block_type_ret,
+                     struct lzx_tables *tables,
+                     struct lru_queue *queue)
 {
        int ret;
-       int block_type;
+       unsigned block_type;
        unsigned block_size;
-       int s;
-       int i;
+       unsigned s;
+       unsigned i;
        unsigned len;
-       int32_t R[3];
 
        ret = bitstream_ensure_bits(istream, 4);
-       if (ret != 0) {
+       if (ret) {
                ERROR("LZX input stream overrun");
                return ret;
        }
@@ -335,11 +349,11 @@ static int lzx_read_block_header(struct input_bitstream *istream,
         * 16 bits, indicated by a 0 bit. */
        s = bitstream_read_bits_nocheck(istream, 1);
 
-       if (s == 1) {
-               block_size = 1 << 15;
+       if (s) {
+               block_size = 32768;
        } else {
                ret = bitstream_read_bits(istream, 16, &block_size);
-               if (ret != 0)
+               if (ret)
                        return ret;
                block_size = le16_to_cpu(block_size);
        }
@@ -353,7 +367,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                        ret = bitstream_read_bits(istream,
                                                  LZX_ALIGNEDTREE_ELEMENT_SIZE,
                                                  &len);
-                       if (ret != 0)
+                       if (ret)
                                return ret;
                        tables->alignedtree_lens[i] = len;
                }
@@ -364,7 +378,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                                                LZX_ALIGNEDTREE_TABLEBITS,
                                                tables->alignedtree_lens,
                                                8);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to make the decode "
                              "table for the aligned offset tree");
                        return ret;
@@ -382,7 +396,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                 * tree. */
                ret = lzx_read_code_lens(istream, tables->maintree_lens,
                                         LZX_NUM_CHARS);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to read the code "
                              "lengths for the first 256 elements of the "
                              "main tree");
@@ -397,7 +411,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                ret = lzx_read_code_lens(istream,
                                         tables->maintree_lens + LZX_NUM_CHARS,
                                         LZX_MAINTREE_NUM_SYMBOLS - LZX_NUM_CHARS);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to read the path "
                              "lengths for the remaining elements of the main "
                              "tree");
@@ -412,7 +426,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                                                LZX_MAINTREE_TABLEBITS,
                                                tables->maintree_lens,
                                                LZX_MAX_CODEWORD_LEN);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to make the decode "
                              "table for the main tree");
                        return ret;
@@ -421,7 +435,7 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                LZX_DEBUG("Reading path lengths for the length tree.");
                ret = lzx_read_code_lens(istream, tables->lentree_lens,
                                         LZX_LENTREE_NUM_SYMBOLS);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to read the path "
                              "lengths for the length tree");
                        return ret;
@@ -433,29 +447,50 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                                                LZX_LENTREE_TABLEBITS,
                                                tables->lentree_lens,
                                                LZX_MAX_CODEWORD_LEN);
-               if (ret != 0) {
+               if (ret) {
                        ERROR("lzx_decompress(): Failed to build the length "
                              "Huffman tree");
                        return ret;
                }
-
+               /* The bitstream of compressed literals and matches for this
+                * block directly follows and will be read in
+                * lzx_decompress_block(). */
                break;
-
        case LZX_BLOCKTYPE_UNCOMPRESSED:
                LZX_DEBUG("Found uncompressed block.");
-               ret = align_input_bitstream(istream, true);
-               if (ret != 0)
-                       return ret;
-               ret = bitstream_read_bytes(istream, sizeof(R), R);
-               if (ret != 0)
-                       return ret;
-               queue->R0 = le32_to_cpu(R[0]);
-               queue->R1 = le32_to_cpu(R[1]);
-               queue->R2 = le32_to_cpu(R[2]);
+               /* Before reading the three LRU match offsets from the
+                * uncompressed block header, the stream needs to be aligned on
+                * a 16-bit boundary.  But, unexpectedly, if the stream is
+                * *already* aligned, the correct thing to do is to throw away
+                * the next 16 bits. */
+               if (istream->bitsleft == 0) {
+                       if (istream->data_bytes_left < 14) {
+                               ERROR("lzx_decompress(): Insufficient length in "
+                                     "uncompressed block");
+                               return -1;
+                       }
+                       istream->data += 2;
+                       istream->data_bytes_left -= 2;
+               } else {
+                       if (istream->data_bytes_left < 12) {
+                               ERROR("lzx_decompress(): Insufficient length in "
+                                     "uncompressed block");
+                               return -1;
+                       }
+                       istream->bitsleft = 0;
+                       istream->bitbuf = 0;
+               }
+               queue->R0 = le32_to_cpu(*(u32*)(istream->data + 0));
+               queue->R1 = le32_to_cpu(*(u32*)(istream->data + 4));
+               queue->R2 = le32_to_cpu(*(u32*)(istream->data + 8));
+               istream->data += 12;
+               istream->data_bytes_left -= 12;
+               /* The uncompressed data of this block directly follows and will
+                * be read in lzx_decompress(). */
                break;
        default:
-               LZX_DEBUG("Found invalid block.");
-               return 1;
+               ERROR("lzx_decompress(): Found invalid block");
+               return -1;
        }
        *block_type_ret = block_type;
        *block_size_ret = block_size;
@@ -463,35 +498,47 @@ static int lzx_read_block_header(struct input_bitstream *istream,
 }
 
 /*
- * Decodes a compressed literal match value.  It refers to some match_offset to
- * a point earlier in the window, and some match_len, for which the data is to
- * be copied to the current position in the window.
+ * Decodes a compressed match from a block of LZX-compressed data.  A match
+ * refers to some match_offset to a point earlier in the window as well as some
+ * match_len, for which the data is to be copied to the current position in the
+ * window.
  *
  * @main_element:      The start of the match data, as decoded using the main
- *                             tree.
- * @block_type:        The type of the block (LZX_BLOCKTYPE_ALIGNED or
- *                     LZX_BLOCKTYPE_VERBATIM)
+ *                     tree.
+ *
+ * @block_type:                The type of the block (LZX_BLOCKTYPE_ALIGNED or
+ *                     LZX_BLOCKTYPE_VERBATIM)
+ *
  * @bytes_remaining:   The amount of uncompressed data remaining to be
- *                             uncompressed.  It is an error if the match
- *                             is longer than @bytes_remaining.
- * @window:    A pointer to the window into which the uncompressed
+ *                     uncompressed in this block.  It is an error if the match
+ *                     is longer than this number.
+ *
+ * @window:            A pointer to the window into which the uncompressed
  *                     data is being written.
- * @window_pos:        The current position in the window.
- * @tables:    Contains the Huffman tables for the block (main,
- *                     length, and also aligned offset only for
- *                     LZX_BLOCKTYPE_ALIGNED)
- * @queue:     The least-recently used queue for match offsets.
- * @istream:   The input bitstream.
- *
- * Returns the length of the match, or -1 on error (match would exceed
- * the amount of data needing to be uncompressed, or match refers to data before
- * the window, or the input bitstream ended unexpectedly).
+ *
+ * @window_pos:                The current byte offset in the window.
+ *
+ * @tables:            The Huffman decoding tables for this LZX block (main
+ *                     code, length code, and for LZX_BLOCKTYPE_ALIGNED blocks,
+ *                     also the aligned offset code).
+ *
+ * @queue:             The least-recently used queue for match offsets.
+ *
+ * @istream:           The input bitstream.
+ *
+ * Returns the length of the match, or a negative number on error.  The possible
+ * error cases are:
+ *     - Match would exceed the amount of data remaining to be uncompressed.
+ *     - Match refers to data before the window.
+ *     - The input bitstream ended unexpectedly.
  */
-static int lzx_decode_match(int main_element, int block_type,
-                           int bytes_remaining, u8 *window, int window_pos,
-                           const struct lzx_tables *tables,
-                           struct lru_queue *queue,
-                           struct input_bitstream *istream)
+static int
+lzx_decode_match(unsigned main_element, int block_type,
+                unsigned bytes_remaining, u8 *window,
+                unsigned window_pos,
+                const struct lzx_tables *tables,
+                struct lru_queue *queue,
+                struct input_bitstream *istream)
 {
        unsigned length_header;
        unsigned position_slot;
@@ -501,8 +548,8 @@ static int lzx_decode_match(int main_element, int block_type,
        unsigned num_extra_bits;
        unsigned verbatim_bits;
        unsigned aligned_bits;
+       unsigned i;
        int ret;
-       int i;
        u8 *match_dest;
        u8 *match_src;
 
@@ -522,9 +569,9 @@ static int lzx_decode_match(int main_element, int block_type,
        match_len = LZX_MIN_MATCH + length_header;
        if (length_header == LZX_NUM_PRIMARY_LENS) {
                ret = read_huffsym_using_lentree(istream, tables,
-                                               &additional_len);
+                                                &additional_len);
                if (ret != 0)
-                       return -1;
+                       return ret;
                match_len += additional_len;
        }
 
@@ -554,7 +601,7 @@ static int lzx_decode_match(int main_element, int block_type,
                 * decode the match offset. */
 
                /* Look up the number of extra bits that need to be read. */
-               num_extra_bits = lzx_extra_bits[position_slot];
+               num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
                /* For aligned blocks, if there are at least 3 extra bits, the
                 * actual number of extra bits is 3 less, and they encode a
@@ -571,16 +618,16 @@ static int lzx_decode_match(int main_element, int block_type,
                         * num_extra_bits == 3, the assignment to verbatim_bits
                         * will just set it to 0. ) */
                        ret = bitstream_read_bits(istream, num_extra_bits - 3,
-                                                               &verbatim_bits);
+                                                 &verbatim_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
 
                        verbatim_bits <<= 3;
 
                        ret = read_huffsym_using_alignedtree(istream, tables,
                                                             &aligned_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
                } else {
                        /* For non-aligned blocks, or for aligned blocks with
                         * less than 3 extra bits, the extra bits are added
@@ -589,14 +636,14 @@ static int lzx_decode_match(int main_element, int block_type,
                        ret = bitstream_read_bits(istream, num_extra_bits,
                                                  &verbatim_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
 
                        aligned_bits = 0;
                }
 
                /* Calculate the match offset. */
-               match_offset = lzx_position_base[position_slot] + verbatim_bits +
-                                                       aligned_bits - 2;
+               match_offset = lzx_position_base[position_slot] +
+                              verbatim_bits + aligned_bits - 2;
 
                /* Update the LRU queue. */
                queue->R2 = queue->R1;
@@ -612,14 +659,14 @@ static int lzx_decode_match(int main_element, int block_type,
        match_src = match_dest - match_offset;
 
        if (match_len > bytes_remaining) {
-               ERROR("lzx_decode_match(): Match of length %d bytes overflows "
+               ERROR("lzx_decode_match(): Match of length %u bytes overflows "
                      "uncompressed block size", match_len);
                return -1;
        }
 
        if (match_src < window) {
-               ERROR("lzx_decode_match(): Match of length %d bytes references "
-                     "data before window (match_offset = %d, window_pos = %d)",
+               ERROR("lzx_decode_match(): Match of length %u bytes references "
+                     "data before window (match_offset = %u, window_pos = %u)",
                      match_len, match_offset, window_pos);
                return -1;
        }
@@ -643,49 +690,63 @@ static int lzx_decode_match(int main_element, int block_type,
        return match_len;
 }
 
-
+static void
+undo_call_insn_translation(u32 *call_insn_target, int input_pos,
+                          s32 file_size)
+{
+       s32 abs_offset;
+       s32 rel_offset;
+
+       abs_offset = le32_to_cpu(*call_insn_target);
+       if (abs_offset >= -input_pos && abs_offset < file_size) {
+               if (abs_offset >= 0) {
+                       /* "good translation" */
+                       rel_offset = abs_offset - input_pos;
+               } else {
+                       /* "compensating translation" */
+                       rel_offset = abs_offset + file_size;
+               }
+               *call_insn_target = cpu_to_le32(rel_offset);
+       }
+}
 
 /* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were
- * changed from relative offsets to absolute offsets.  This type of
- * preprocessing can be used on any binary data even if it is not actually
- * machine code.  It seems to always be used in WIM files, even though there is
- * no bit to indicate that it actually is used, unlike in the LZX compressed
- * format as used in other file formats, where a bit is reserved for that
- * purpose. */
-static void undo_call_insn_preprocessing(u8 uncompressed_data[],
-                                        unsigned uncompressed_data_len)
+ * changed from relative offsets to absolute offsets.
+ *
+ * Note that this call instruction preprocessing can and will be used on any
+ * data even if it is not actually x86 machine code.  In fact, this type of
+ * preprocessing appears to always be used in LZX-compressed resources in WIM
+ * files; there is no bit to indicate whether it is used or not, unlike in the
+ * LZX compressed format as used in cabinet files, where a bit is reserved for
+ * that purpose.
+ *
+ * Call instruction preprocessing is disabled in the last 6 bytes of the
+ * uncompressed data, which really means the 5-byte call instruction cannot
+ * start in the last 10 bytes of the uncompressed data.  This is one of the
+ * errors in the LZX documentation.
+ *
+ * Call instruction preprocessing does not appear to be disabled after the
+ * 32768th chunk of a WIM stream, which is apparently is yet another difference
+ * from the LZX compression used in cabinet files.
+ *
+ * Call instruction processing is supposed to take the file size as a parameter,
+ * as it is used in calculating the translated jump targets.  But in WIM files,
+ * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/
+static void
+undo_call_insn_preprocessing(u8 uncompressed_data[], int uncompressed_data_len)
 {
-       int i = 0;
-       int file_size = LZX_MAGIC_FILESIZE;
-       int32_t abs_offset;
-       int32_t rel_offset;
-
-       /* Not enabled in the last 6 bytes, which means the 5-byte call
-        * instruction cannot start in the last *10* bytes. */
-       while (i < uncompressed_data_len - 10) {
-               if (uncompressed_data[i] != 0xe8) {
-                       i++;
-                       continue;
+       for (int i = 0; i < uncompressed_data_len - 10; i++) {
+               if (uncompressed_data[i] == 0xe8) {
+                       undo_call_insn_translation((u32*)&uncompressed_data[i + 1],
+                                                  i,
+                                                  LZX_WIM_MAGIC_FILESIZE);
+                       i += 4;
                }
-               abs_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
-
-               if (abs_offset >= -i && abs_offset < file_size) {
-                       if (abs_offset >= 0) {
-                               /* "good translation" */
-                               rel_offset = abs_offset - i;
-                       } else {
-                               /* "compensating translation" */
-                               rel_offset = abs_offset + file_size;
-                       }
-                       *(int32_t*)(uncompressed_data + i + 1) =
-                                               cpu_to_le32(rel_offset);
-               }
-               i += 5;
        }
 }
 
 /*
- * Decompresses a compressed block of data from which the header has already
+ * Decompresses a LZX-compressed block of data from which the header has already
  * been read.
  *
  * @block_type:        The type of the block (LZX_BLOCKTYPE_VERBATIM or
@@ -699,67 +760,60 @@ static void undo_call_insn_preprocessing(u8 uncompressed_data[],
  * @queue:     The least-recently-used queue for match offsets.
  * @istream:   The input bitstream for the compressed literals.
  */
-static int lzx_decompress_block(int block_type, int block_size, u8 *window,
-                               int window_pos,
-                               const struct lzx_tables *tables,
-                               struct lru_queue *queue,
-                               struct input_bitstream *istream)
+static int
+lzx_decompress_block(int block_type, unsigned block_size,
+                    u8 *window,
+                    unsigned window_pos,
+                    const struct lzx_tables *tables,
+                    struct lru_queue *queue,
+                    struct input_bitstream *istream)
 {
-       unsigned bytes_remaining;
        unsigned main_element;
-       int match_len;
+       unsigned end;
        int ret;
+       int match_len;
 
-       bytes_remaining = block_size;
-       while (bytes_remaining > 0) {
-
+       end = window_pos + block_size;
+       while (window_pos < end) {
                ret = read_huffsym_using_maintree(istream, tables,
                                                  &main_element);
-               if (ret != 0)
+               if (ret)
                        return ret;
 
                if (main_element < LZX_NUM_CHARS) {
                        /* literal: 0 to LZX_NUM_CHARS - 1 */
-                       window[window_pos + block_size - bytes_remaining] =
-                                                       main_element;
-                       bytes_remaining--;
+                       window[window_pos++] = main_element;
                } else {
                        /* match: LZX_NUM_CHARS to LZX_MAINTREE_NUM_SYMBOLS - 1 */
                        match_len = lzx_decode_match(main_element,
-                                               block_type, bytes_remaining, window,
-                                               block_size + window_pos -
-                                                       bytes_remaining,
-                                               tables, queue, istream);
-                       if (match_len == -1)
-                               return 1;
-
-                       bytes_remaining -= match_len;
+                                                    block_type,
+                                                    end - window_pos,
+                                                    window,
+                                                    window_pos,
+                                                    tables,
+                                                    queue,
+                                                    istream);
+                       if (match_len < 0)
+                               return match_len;
+                       window_pos += match_len;
                }
        }
        return 0;
 }
 
-/*
- * Decompresses a block of LZX-compressed data using a window size of 32768.
- *
- * @compressed_data:   A pointer to the compressed data.
- * @compressed_len:    The length of the compressed data, in bytes.
- * @uncompressed_data: A pointer to the buffer into which to write the
- *                             uncompressed data.
- * @uncompressed_len:  The length of the uncompressed data.
- *
- * Return non-zero on failure.
- */
-int lzx_decompress(const void *compressed_data, unsigned compressed_len,
-                  void *uncompressed_data, unsigned uncompressed_len)
+/* Documented in wimlib.h */
+WIMLIBAPI int
+wimlib_lzx_decompress(const void *compressed_data, unsigned compressed_len,
+                     void *uncompressed_data, unsigned uncompressed_len)
 {
        struct lzx_tables tables;
        struct input_bitstream istream;
        struct lru_queue queue;
-       unsigned bytes_remaining;
+       unsigned window_pos;
        unsigned block_size;
        unsigned block_type;
        int ret;
+       bool e8_preprocessing_done;
 
        LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, "
                  "uncompressed_data = %p, uncompressed_len = %d).",
@@ -773,31 +827,34 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len,
        queue.R0 = 1;
        queue.R1 = 1;
        queue.R2 = 1;
-       bytes_remaining = uncompressed_len;
-
        init_input_bitstream(&istream, compressed_data, compressed_len);
 
+       e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes
+                                         in the uncompressed data. */
+
        /* The compressed data will consist of one or more blocks.  The
         * following loop decompresses one block, and it runs until there all
         * the compressed data has been decompressed, so there are no more
         * blocks.  */
 
-       while (bytes_remaining != 0) {
-
+       for (window_pos = 0;
+            window_pos < uncompressed_len;
+            window_pos += block_size)
+       {
                LZX_DEBUG("Reading block header.");
                ret = lzx_read_block_header(&istream, &block_size,
                                            &block_type, &tables, &queue);
-               if (ret != 0)
+               if (ret)
                        return ret;
 
-               LZX_DEBUG("block_size = %u, bytes_remaining = %u",
-                         block_size, bytes_remaining);
+               LZX_DEBUG("block_size = %u, window_pos = %u",
+                         block_size, window_pos);
 
-               if (block_size > bytes_remaining) {
+               if (block_size > uncompressed_len - window_pos) {
                        ERROR("lzx_decompress(): Expected a block size of at "
                              "most %u bytes (found %u bytes)",
-                             bytes_remaining, block_size);
-                       return 1;
+                             uncompressed_len - window_pos, block_size);
+                       return -1;
                }
 
                switch (block_type) {
@@ -807,41 +864,42 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len,
                                LZX_DEBUG("LZX_BLOCKTYPE_VERBATIM");
                        else
                                LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED");
-
                        ret = lzx_decompress_block(block_type,
                                                   block_size,
                                                   uncompressed_data,
-                                                  uncompressed_len -
-                                                      bytes_remaining,
-                                                  &tables, &queue, &istream);
-                       if (ret != 0)
+                                                  window_pos,
+                                                  &tables,
+                                                  &queue,
+                                                  &istream);
+                       if (ret)
                                return ret;
+                       if (tables.maintree_lens[0xe8] != 0)
+                               e8_preprocessing_done = true;
                        break;
                case LZX_BLOCKTYPE_UNCOMPRESSED:
                        LZX_DEBUG("LZX_BLOCKTYPE_UNCOMPRESSED");
-                       ret = bitstream_read_bytes(&istream, block_size,
-                                                  uncompressed_data +
-                                                  uncompressed_len -
-                                                  bytes_remaining);
-                       if (ret != 0)
-                               return ret;
-                       if (block_size & 1)
-                               align_input_bitstream(&istream, false);
-                       break;
-               default:
-                       wimlib_assert(0);
+                       if (istream.data_bytes_left < block_size) {
+                               ERROR("Unexpected end of input when "
+                                     "reading %u bytes from LZX bitstream "
+                                     "(only have %u bytes left)",
+                                     block_size, istream.data_bytes_left);
+                               return -1;
+                       }
+                       memcpy(&((u8*)uncompressed_data)[window_pos], istream.data,
+                              block_size);
+                       istream.data += block_size;
+                       istream.data_bytes_left -= block_size;
+                       /* Re-align bitstream if an odd number of bytes were
+                        * read.  */
+                       if (istream.data_bytes_left && (block_size & 1)) {
+                               istream.data_bytes_left--;
+                               istream.data++;
+                       }
+                       e8_preprocessing_done = true;
                        break;
                }
-
-               bytes_remaining -= block_size;
-
-               if (bytes_remaining != 0)
-                       LZX_DEBUG("%d bytes remaining.", bytes_remaining);
        }
-
-       if (uncompressed_len >= 10)
-               undo_call_insn_preprocessing(uncompressed_data,
-                                            uncompressed_len);
-
+       if (e8_preprocessing_done)
+               undo_call_insn_preprocessing(uncompressed_data, uncompressed_len);
        return 0;
 }