LZX decompression cleanups
authorEric Biggers <ebiggers3@gmail.com>
Fri, 21 Dec 2012 03:59:27 +0000 (21:59 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 21 Dec 2012 03:59:27 +0000 (21:59 -0600)
- Improve comments
- Get rid of bitstream_read_bytes() because it wasn't clear what it was supposed
  to do.  Just inline what needs to be done in the appropriate places.
- Only undo the call instruction preprocessing if 0xe8 bytes may be in the input
  data.

src/decompress.c
src/decompress.h
src/lzx-common.c
src/lzx-compress.c
src/lzx-decompress.c
src/lzx.h
src/resource.c

index 9a08621..6d4b41e 100644 (file)
 #include "decompress.h"
 #include <string.h>
 
-/* Reads @n bytes from the bitstream @stream into the location pointed to by @dest.
- * The bitstream must be 16-bit aligned. */
-int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
-{
-       /* Precondition:  The bitstream is 16-byte aligned. */
-       wimlib_assert2(stream->bitsleft % 16 == 0);
-
-       u8 *p = dest;
-
-       /* Get the bytes currently in the buffer variable. */
-       while (stream->bitsleft != 0) {
-               if (n-- == 0)
-                       return 0;
-               *p++ = bitstream_peek_bits(stream, 8);
-               bitstream_remove_bits(stream, 8);
-       }
-
-       /* Get the rest directly from the pointer to the data.  Of course, it's
-        * necessary to check there are really n bytes available. */
-       if (n > stream->data_bytes_left) {
-               ERROR("Unexpected end of input when reading %zu bytes from "
-                     "bitstream (only have %u bytes left)",
-                     n, stream->data_bytes_left);
-               return 1;
-       }
-       memcpy(p, stream->data, n);
-       stream->data += n;
-       stream->data_bytes_left -= n;
-
-       /* It's possible to copy an odd number of bytes and leave the stream in
-        * an inconsistent state. Fix it by reading the next byte, if it is
-        * there. */
-       if ((n & 1) && stream->data_bytes_left != 0) {
-               stream->bitsleft = 8;
-               stream->data_bytes_left--;
-               stream->bitbuf |= (input_bitbuf_t)(*stream->data) <<
-                                       (sizeof(input_bitbuf_t) * 8 - 8);
-               stream->data++;
-       }
-       return 0;
-}
-
 /*
  * Builds a fast huffman decoding table from an array that gives the length of
  * the codeword for each symbol in the alphabet.  Originally based on code
@@ -356,7 +314,7 @@ int read_huffsym_near_end_of_input(struct input_bitstream *istream,
                do {
                        if (bitsleft == 0) {
                                ERROR("Input stream exhausted");
-                               return 1;
+                               return -1;
                        }
                        key_bits = sym + bitstream_peek_bits(istream, 1);
                        bitstream_remove_bits(istream, 1);
index ae1c99b..21d8090 100644 (file)
 /* Must be at least 32 bits. */
 typedef unsigned long input_bitbuf_t;
 
-/* Structure to provide a bitstream. */
+/* Structure to encapsulate a block of in-memory data that is being interpreted
+ * as a stream of bits.
+ *
+ * This is geared specifically towards the XPRESS and LZX compression formats
+ * with regards to the actual ordering the bits within the byte sequence. */
 struct input_bitstream {
 
        /* A variable of length at least 32 bits that is used to hold bits that
         * have been read from the stream.  The bits are ordered from high-order
-        * to low-order; the next bit is always the high-order bit. */
+        * to low-order, and the next bit is always the high-order bit. */
        input_bitbuf_t  bitbuf;
 
        /* Pointer to the next byte to be retrieved from the input. */
@@ -41,7 +45,11 @@ static inline void init_input_bitstream(struct input_bitstream *istream,
        istream->data_bytes_left = num_data_bytes;
 }
 
-/* Ensures that the bit buffer contains @num_bits bits. */
+/* Ensures that the bit buffer variable for the bitstream contains @num_bits
+ * bits.
+ *
+ * If there are at least @num_bits bits remaining in the bitstream, 0 is
+ * returned.  Otherwise, -1 is returned.  */
 static inline int bitstream_ensure_bits(struct input_bitstream *istream,
                                        unsigned num_bits)
 {
@@ -70,15 +78,15 @@ static inline int bitstream_ensure_bits(struct input_bitstream *istream,
                        istream->bitsleft += 16;
                        istream->data_bytes_left -= 2;
                } else {
-                       ret = 1;
+                       ret = -1;
                }
        }
        wimlib_assert2(ret != 0 || istream->bitsleft >= num_bits);
        return ret;
 }
 
-/* Returns the next @num_bits bits in the bit buffer.  It must contain at least
- * @num_bits bits to call this function. */
+/* Returns the next @num_bits bits in the buffer variable, which must contain at
+ * least @num_bits bits, for the bitstream. */
 static inline unsigned
 bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits)
 {
@@ -91,8 +99,8 @@ bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits)
        return ret;
 }
 
-/* Removes @num_bits bits from the bit buffer.  It must contain at least
- * @num_bits bits to call this function. */
+/* Removes @num_bits bits from the buffer variable, which must contain at least
+ * @num_bits bits, for the bitstream. */
 static inline void bitstream_remove_bits(struct input_bitstream *istream,
                                         unsigned num_bits)
 {
@@ -101,28 +109,31 @@ static inline void bitstream_remove_bits(struct input_bitstream *istream,
        istream->bitsleft -= num_bits;
 }
 
-/* Reads and returns @num_bits bits from the input bitstream. */
+/* Reads @num_bits bits from the input bitstream.  @num_bits must be 16 or fewer.
+ * On success, returns 0 and returns the requested bits in @n.  If there are
+ * fewer than @num_bits remaining in the bitstream, -1 is returned. */
 static inline int bitstream_read_bits(struct input_bitstream *istream,
                                      unsigned num_bits, unsigned *n)
 {
+       wimlib_assert2(num_bits <= 16);
        int ret = bitstream_ensure_bits(istream, num_bits);
        if (ret == 0) {
                *n = bitstream_peek_bits(istream, num_bits);
                bitstream_remove_bits(istream, num_bits);
+               wimlib_assert2(istream->bitsleft < 16);
        } else {
                ERROR("bitstream_read_bits(): Input buffer exhausted");
        }
        return ret;
 }
 
-/* In the XPRESS format there can be literal length bytes embedded in the
- * compressed bitstream.  These bytes are basically separate from the bitstream,
- * as they come AFTER the bits that are currently in the buffer variable (based
- * on reading 16 bits at a time), even though the buffer variable may not be
- * empty.
+/* In the XPRESS format there can be literal bytes embedded in the bitstream.
+ * These bytes are basically separate from the bitstream, as they come AFTER the
+ * bits that are currently in the buffer variable (based on reading 16 bits at a
+ * time), even though the buffer variable may not be empty.
  *
- * This function returns the next such literal length byte in the input
- * bitstream.  Returns -1 if we are at the end of the bitstream. */
+ * This function returns the next such literal byte, or -1 if there are no more.
+ */
 static inline int bitstream_read_byte(struct input_bitstream *istream)
 {
        wimlib_assert2(istream->bitsleft < 32);
@@ -138,8 +149,8 @@ static inline int bitstream_read_byte(struct input_bitstream *istream)
        return ret;
 }
 
-/* Reads @num_bits bits from the bit buffer without checking to see if that many
- * bits are in the buffer or not. */
+/* Reads @num_bits bits from the buffer variable for a bistream without checking
+ * to see if that many bits are in the buffer or not. */
 static inline unsigned
 bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits)
 {
@@ -148,23 +159,6 @@ bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits)
        return n;
 }
 
-/* Removes the bits that have been read into the bit buffer variable. */
-static inline void flush_input_bitstream(struct input_bitstream *istream)
-{
-       bitstream_remove_bits(istream, istream->bitsleft);
-       istream->bitbuf = 0;
-       wimlib_assert2(istream->bitsleft == 0);
-}
-
-extern int bitstream_read_bytes(struct input_bitstream *istream, size_t n,
-                               void *dest);
-
-/* Aligns the bitstream on a 16-bit boundary. */
-static inline void align_input_bitstream(struct input_bitstream *istream)
-{
-       bitstream_remove_bits(istream, istream->bitsleft & 15);
-}
-
 extern int read_huffsym_near_end_of_input(struct input_bitstream *istream,
                                          const u16 decode_table[],
                                          const u8 lens[],
@@ -179,7 +173,7 @@ extern int read_huffsym_near_end_of_input(struct input_bitstream *istream,
  * large WIM file.  I'm not sure it could be made much faster that it is,
  * especially since there isn't enough time to make a big table that allows
  * decoding multiple symbols per lookup.  But if extracting files to a hard
- * disk, the IO will be the bottleneck anyway.
+ * disk, the I/O will be the bottleneck anyway.
  *
  * @buf:       The input buffer from which the symbol will be read.
  * @decode_table:      The fast Huffman decoding table for the Huffman tree.
index 9da4ddb..5b3d118 100644 (file)
@@ -1,5 +1,6 @@
 #include "lzx.h"
 
+#ifdef USE_LZX_EXTRA_BITS_ARRAY
 /* LZX uses what it calls 'position slots' to represent match offsets.
  * What this means is that a small 'position slot' number and a small
  * offset from that slot are encoded instead of one large offset for
@@ -20,6 +21,7 @@ const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS] = {
        /*17, 17, 17, 17, 17,*/
        /*17*/
 };
+#endif
 
 const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS] = {
        0      , 1      , 2      , 3      , 4      ,
index b180046..81a6256 100644 (file)
@@ -127,17 +127,6 @@ static u32 lzx_record_literal(u8 literal, void *__main_freq_tab)
        return literal;
 }
 
-/* Equivalent to lzx_extra_bits[position_slot] except position_slot must be
- * between 2 and 37 */
-static inline unsigned lzx_get_num_extra_bits(unsigned position_slot)
-{
-#if 0
-       return lzx_extra_bits[position_slot];
-#endif
-       wimlib_assert(position_slot >= 2 && position_slot <= 37);
-       return (position_slot >> 1) - 1;
-}
-
 /* Constructs a match from an offset and a length, and updates the LRU queue and
  * the frequency of symbols in the main, length, and aligned offset alphabets.
  * The return value is a 32-bit number that provides the match in an
@@ -320,7 +309,7 @@ static int lzx_write_match(struct output_bitstream *out, int block_type,
 
        wimlib_assert(position_slot < LZX_NUM_POSITION_SLOTS);
 
-       num_extra_bits = lzx_extra_bits[position_slot];
+       num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
        /* For aligned offset blocks with at least 3 extra bits, output the
         * verbatim bits literally, then the aligned bits encoded using the
@@ -596,41 +585,37 @@ static void lzx_make_huffman_codes(const struct lzx_freq_tables *freq_tabs,
                                        codes->aligned_codewords);
 }
 
-/* Do the 'E8' preprocessing, where the targets of x86 CALL instructions were
- * changed from relative offsets to absolute offsets.  This type of
- * preprocessing can be used on any binary data even if it is not actually
- * machine code.  It seems to always be used in WIM files, even though there is
- * no bit to indicate that it actually is used, unlike in the LZX compressed
- * format as used in other file formats such as the cabinet format, where a bit
- * is reserved for that purpose. */
-static void do_call_insn_preprocessing(u8 uncompressed_data[],
-                                      unsigned uncompressed_data_len)
+static void do_call_insn_translation(u32 *call_insn_target, int input_pos,
+                                    int32_t file_size)
 {
-       int i = 0;
-       int file_size = LZX_MAGIC_FILESIZE;
-       int32_t rel_offset;
        int32_t abs_offset;
+       int32_t rel_offset;
 
-       /* Not enabled in the last 6 bytes, which means the 5-byte call
-        * instruction cannot start in the last *10* bytes. */
-       while (i < uncompressed_data_len - 10) {
-               if (uncompressed_data[i] != 0xe8) {
-                       i++;
-                       continue;
+       rel_offset = le32_to_cpu(*call_insn_target);
+       if (rel_offset >= -input_pos && rel_offset < file_size) {
+               if (rel_offset < file_size - input_pos) {
+                       /* "good translation" */
+                       abs_offset = rel_offset + input_pos;
+               } else {
+                       /* "compensating translation" */
+                       abs_offset = rel_offset - file_size;
                }
-               rel_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
-
-               if (rel_offset >= -i && rel_offset < file_size) {
-                       if (rel_offset < file_size - i) {
-                               /* "good translation" */
-                               abs_offset = rel_offset + i;
-                       } else {
-                               /* "compensating translation" */
-                               abs_offset = rel_offset - file_size;
-                       }
-                       *(int32_t*)(uncompressed_data + i + 1) = cpu_to_le32(abs_offset);
+               *call_insn_target = cpu_to_le32(abs_offset);
+       }
+}
+
+/* This is the reverse of undo_call_insn_preprocessing() in lzx-decompress.c.
+ * See the comment above that function for more information. */
+static void do_call_insn_preprocessing(u8 uncompressed_data[],
+                                      int uncompressed_data_len)
+{
+       for (int i = 0; i < uncompressed_data_len - 10; i++) {
+               if (uncompressed_data[i] == 0xe8) {
+                       do_call_insn_translation((u32*)&uncompressed_data[i + 1],
+                                                i,
+                                                LZX_WIM_MAGIC_FILESIZE);
+                       i += 4;
                }
-               i += 5;
        }
 }
 
@@ -670,7 +655,7 @@ int lzx_compress(const void *__uncompressed_data, unsigned uncompressed_len,
                 void *compressed_data, unsigned *compressed_len_ret)
 {
        struct output_bitstream ostream;
-       u8 uncompressed_data[uncompressed_len + LZX_MAX_MATCH];
+       u8 uncompressed_data[uncompressed_len + 8];
        struct lzx_freq_tables freq_tabs;
        struct lzx_codes codes;
        u32 match_tab[uncompressed_len];
index a66e6d8..06e3e02 100644 (file)
@@ -233,7 +233,7 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
                char value;
 
                ret = read_huffsym_using_pretree(istream, pretree_decode_table,
-                                               pretree_lens, &tree_code);
+                                                pretree_lens, &tree_code);
                if (ret != 0)
                        return ret;
                switch (tree_code) {
@@ -264,10 +264,10 @@ static int lzx_read_code_lens(struct input_bitstream *istream, u8 lens[],
                        if (ret != 0)
                                return ret;
                        num_same += 4;
-
                        ret = read_huffsym_using_pretree(istream,
-                                               pretree_decode_table,
-                                               pretree_lens, &code);
+                                                        pretree_decode_table,
+                                                        pretree_lens,
+                                                        &code);
                        if (ret != 0)
                                return ret;
                        value = (char)*lens - (char)code;
@@ -313,12 +313,12 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                                 struct lru_queue *queue)
 {
        int ret;
-       int block_type;
+       unsigned block_type;
        unsigned block_size;
-       int s;
-       int i;
+       unsigned s;
+       unsigned i;
        unsigned len;
-       int32_t R[3];
+       u32 R[3];
 
        ret = bitstream_ensure_bits(istream, 4);
        if (ret != 0) {
@@ -335,8 +335,8 @@ static int lzx_read_block_header(struct input_bitstream *istream,
         * 16 bits, indicated by a 0 bit. */
        s = bitstream_read_bits_nocheck(istream, 1);
 
-       if (s == 1) {
-               block_size = 1 << 15;
+       if (s) {
+               block_size = 32768;
        } else {
                ret = bitstream_read_bits(istream, 16, &block_size);
                if (ret != 0)
@@ -438,27 +438,39 @@ static int lzx_read_block_header(struct input_bitstream *istream,
                              "Huffman tree");
                        return ret;
                }
-
+               /* The bitstream of compressed literals and matches for this
+                * block directly follows and will be read in
+                * lzx_decompress_block(). */
                break;
-
        case LZX_BLOCKTYPE_UNCOMPRESSED:
                LZX_DEBUG("Found uncompressed block.");
-
-               /* Mystery bit! */
-               ret = bitstream_read_bits(istream, 1, &i);
-               if (ret != 0)
-                       return ret;
-               align_input_bitstream(istream);
-               ret = bitstream_read_bytes(istream, sizeof(R), R);
-               if (ret != 0)
-                       return ret;
-               queue->R0 = le32_to_cpu(R[0]);
-               queue->R1 = le32_to_cpu(R[1]);
-               queue->R2 = le32_to_cpu(R[2]);
+               /* Before reading the three LRU match offsets from the
+                * uncompressed block header, the stream needs to be aligned on
+                * a 16-bit boundary.  But, unexpectedly, if the stream is
+                * *already* aligned, the correct thing to do is to throw away
+                * the next 16 bits. */
+               if (istream->bitsleft == 0) {
+                       if (istream->data_bytes_left < 14)
+                               return -1;
+                       istream->data += 2;
+                       istream->data_bytes_left -= 2;
+               } else {
+                       if (istream->data_bytes_left < 12)
+                               return -1;
+                       istream->bitsleft = 0;
+                       istream->bitbuf = 0;
+               }
+               queue->R0 = le32_to_cpu(*(u32*)(istream->data + 0));
+               queue->R1 = le32_to_cpu(*(u32*)(istream->data + 4));
+               queue->R2 = le32_to_cpu(*(u32*)(istream->data + 8));
+               istream->data += 12;
+               istream->data_bytes_left -= 12;
+               /* The uncompressed data of this block directly follows and will
+                * be read in lzx_decompress(). */
                break;
        default:
-               LZX_DEBUG("Found invalid block.");
-               return 1;
+               ERROR("lzx_decompress(): Found invalid block");
+               return -1;
        }
        *block_type_ret = block_type;
        *block_size_ret = block_size;
@@ -466,32 +478,43 @@ static int lzx_read_block_header(struct input_bitstream *istream,
 }
 
 /*
- * Decodes a compressed literal match value.  It refers to some match_offset to
- * a point earlier in the window, and some match_len, for which the data is to
- * be copied to the current position in the window.
+ * Decodes a compressed match from a block of LZX-compressed data.  A match
+ * refers to some match_offset to a point earlier in the window as well as some
+ * match_len, for which the data is to be copied to the current position in the
+ * window.
  *
  * @main_element:      The start of the match data, as decoded using the main
- *                             tree.
- * @block_type:        The type of the block (LZX_BLOCKTYPE_ALIGNED or
- *                     LZX_BLOCKTYPE_VERBATIM)
+ *                     tree.
+ *
+ * @block_type:                The type of the block (LZX_BLOCKTYPE_ALIGNED or
+ *                     LZX_BLOCKTYPE_VERBATIM)
+ *
  * @bytes_remaining:   The amount of uncompressed data remaining to be
- *                             uncompressed.  It is an error if the match
- *                             is longer than @bytes_remaining.
- * @window:    A pointer to the window into which the uncompressed
+ *                     uncompressed in this block.  It is an error if the match
+ *                     is longer than this number.
+ *
+ * @window:            A pointer to the window into which the uncompressed
  *                     data is being written.
- * @window_pos:        The current position in the window.
- * @tables:    Contains the Huffman tables for the block (main,
- *                     length, and also aligned offset only for
- *                     LZX_BLOCKTYPE_ALIGNED)
- * @queue:     The least-recently used queue for match offsets.
- * @istream:   The input bitstream.
- *
- * Returns the length of the match, or -1 on error (match would exceed
- * the amount of data needing to be uncompressed, or match refers to data before
- * the window, or the input bitstream ended unexpectedly).
+ *
+ * @window_pos:                The current byte offset in the window.
+ *
+ * @tables:            The Huffman decoding tables for this LZX block (main
+ *                     code, length code, and for LZX_BLOCKTYPE_ALIGNED blocks,
+ *                     also the aligned offset code).
+ *
+ * @queue:             The least-recently used queue for match offsets.
+ *
+ * @istream:           The input bitstream.
+ *
+ * Returns the length of the match, or a negative number on error.  The possible
+ * error cases are:
+ *     - Match would exceed the amount of data remaining to be uncompressed.
+ *     - Match refers to data before the window.
+ *     - The input bitstream ended unexpectedly.
  */
-static int lzx_decode_match(int main_element, int block_type,
-                           int bytes_remaining, u8 *window, int window_pos,
+static int lzx_decode_match(unsigned main_element, int block_type,
+                           unsigned bytes_remaining, u8 *window,
+                           unsigned window_pos,
                            const struct lzx_tables *tables,
                            struct lru_queue *queue,
                            struct input_bitstream *istream)
@@ -504,8 +527,8 @@ static int lzx_decode_match(int main_element, int block_type,
        unsigned num_extra_bits;
        unsigned verbatim_bits;
        unsigned aligned_bits;
+       unsigned i;
        int ret;
-       int i;
        u8 *match_dest;
        u8 *match_src;
 
@@ -525,9 +548,9 @@ static int lzx_decode_match(int main_element, int block_type,
        match_len = LZX_MIN_MATCH + length_header;
        if (length_header == LZX_NUM_PRIMARY_LENS) {
                ret = read_huffsym_using_lentree(istream, tables,
-                                               &additional_len);
+                                                &additional_len);
                if (ret != 0)
-                       return -1;
+                       return ret;
                match_len += additional_len;
        }
 
@@ -557,7 +580,7 @@ static int lzx_decode_match(int main_element, int block_type,
                 * decode the match offset. */
 
                /* Look up the number of extra bits that need to be read. */
-               num_extra_bits = lzx_extra_bits[position_slot];
+               num_extra_bits = lzx_get_num_extra_bits(position_slot);
 
                /* For aligned blocks, if there are at least 3 extra bits, the
                 * actual number of extra bits is 3 less, and they encode a
@@ -574,16 +597,16 @@ static int lzx_decode_match(int main_element, int block_type,
                         * num_extra_bits == 3, the assignment to verbatim_bits
                         * will just set it to 0. ) */
                        ret = bitstream_read_bits(istream, num_extra_bits - 3,
-                                                               &verbatim_bits);
+                                                 &verbatim_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
 
                        verbatim_bits <<= 3;
 
                        ret = read_huffsym_using_alignedtree(istream, tables,
                                                             &aligned_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
                } else {
                        /* For non-aligned blocks, or for aligned blocks with
                         * less than 3 extra bits, the extra bits are added
@@ -592,14 +615,14 @@ static int lzx_decode_match(int main_element, int block_type,
                        ret = bitstream_read_bits(istream, num_extra_bits,
                                                  &verbatim_bits);
                        if (ret != 0)
-                               return -1;
+                               return ret;
 
                        aligned_bits = 0;
                }
 
                /* Calculate the match offset. */
-               match_offset = lzx_position_base[position_slot] + verbatim_bits +
-                                                       aligned_bits - 2;
+               match_offset = lzx_position_base[position_slot] +
+                              verbatim_bits + aligned_bits - 2;
 
                /* Update the LRU queue. */
                queue->R2 = queue->R1;
@@ -615,14 +638,14 @@ static int lzx_decode_match(int main_element, int block_type,
        match_src = match_dest - match_offset;
 
        if (match_len > bytes_remaining) {
-               ERROR("lzx_decode_match(): Match of length %d bytes overflows "
+               ERROR("lzx_decode_match(): Match of length %u bytes overflows "
                      "uncompressed block size", match_len);
                return -1;
        }
 
        if (match_src < window) {
-               ERROR("lzx_decode_match(): Match of length %d bytes references "
-                     "data before window (match_offset = %d, window_pos = %d)",
+               ERROR("lzx_decode_match(): Match of length %u bytes references "
+                     "data before window (match_offset = %u, window_pos = %u)",
                      match_len, match_offset, window_pos);
                return -1;
        }
@@ -646,49 +669,62 @@ static int lzx_decode_match(int main_element, int block_type,
        return match_len;
 }
 
-
-
-/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were
- * changed from relative offsets to absolute offsets.  This type of
- * preprocessing can be used on any binary data even if it is not actually
- * machine code.  It seems to always be used in WIM files, even though there is
- * no bit to indicate that it actually is used, unlike in the LZX compressed
- * format as used in other file formats, where a bit is reserved for that
- * purpose. */
-static void undo_call_insn_preprocessing(u8 uncompressed_data[],
-                                        unsigned uncompressed_data_len)
+static void undo_call_insn_translation(u32 *call_insn_target, int input_pos,
+                                      int32_t file_size)
 {
-       int i = 0;
-       int file_size = LZX_MAGIC_FILESIZE;
        int32_t abs_offset;
        int32_t rel_offset;
 
-       /* Not enabled in the last 6 bytes, which means the 5-byte call
-        * instruction cannot start in the last *10* bytes. */
-       while (i < uncompressed_data_len - 10) {
-               if (uncompressed_data[i] != 0xe8) {
-                       i++;
-                       continue;
+       abs_offset = le32_to_cpu(*call_insn_target);
+       if (abs_offset >= -input_pos && abs_offset < file_size) {
+               if (abs_offset >= 0) {
+                       /* "good translation" */
+                       rel_offset = abs_offset - input_pos;
+               } else {
+                       /* "compensating translation" */
+                       rel_offset = abs_offset + file_size;
                }
-               abs_offset = le32_to_cpu(*(int32_t*)(uncompressed_data + i + 1));
-
-               if (abs_offset >= -i && abs_offset < file_size) {
-                       if (abs_offset >= 0) {
-                               /* "good translation" */
-                               rel_offset = abs_offset - i;
-                       } else {
-                               /* "compensating translation" */
-                               rel_offset = abs_offset + file_size;
-                       }
-                       *(int32_t*)(uncompressed_data + i + 1) =
-                                               cpu_to_le32(rel_offset);
+               *call_insn_target = cpu_to_le32(rel_offset);
+       }
+}
+
+/* Undo the 'E8' preprocessing, where the targets of x86 CALL instructions were
+ * changed from relative offsets to absolute offsets.
+ *
+ * Note that this call instruction preprocessing can and will be used on any
+ * data even if it is not actually x86 machine code.  In fact, this type of
+ * preprocessing appears to always be used in LZX-compressed resources in WIM
+ * files; there is no bit to indicate whether it is used or not, unlike in the
+ * LZX compressed format as used in cabinet files, where a bit is reserved for
+ * that purpose.
+ *
+ * Call instruction preprocessing is disabled in the last 6 bytes of the
+ * uncompressed data, which really means the 5-byte call instruction cannot
+ * start in the last 10 bytes of the uncompressed data.  This is one of the
+ * errors in the LZX documentation.
+ *
+ * Call instruction preprocessing does not appear to be disabled after the
+ * 32768th chunk of a WIM stream, which is apparently is yet another difference
+ * from the LZX compression used in cabinet files.
+ *
+ * Call instruction processing is supposed to take the file size as a parameter,
+ * as it is used in calculating the translated jump targets.  But in WIM files,
+ * this file size is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).*/
+static void undo_call_insn_preprocessing(u8 uncompressed_data[],
+                                        int uncompressed_data_len)
+{
+       for (int i = 0; i < uncompressed_data_len - 10; i++) {
+               if (uncompressed_data[i] == 0xe8) {
+                       undo_call_insn_translation((u32*)&uncompressed_data[i + 1],
+                                                  i,
+                                                  LZX_WIM_MAGIC_FILESIZE);
+                       i += 4;
                }
-               i += 5;
        }
 }
 
 /*
- * Decompresses a compressed block of data from which the header has already
+ * Decompresses a LZX-compressed block of data from which the header has already
  * been read.
  *
  * @block_type:        The type of the block (LZX_BLOCKTYPE_VERBATIM or
@@ -702,56 +738,63 @@ static void undo_call_insn_preprocessing(u8 uncompressed_data[],
  * @queue:     The least-recently-used queue for match offsets.
  * @istream:   The input bitstream for the compressed literals.
  */
-static int lzx_decompress_block(int block_type, int block_size, u8 *window,
-                               int window_pos,
+static int lzx_decompress_block(int block_type, unsigned block_size,
+                               u8 *window,
+                               unsigned window_pos,
                                const struct lzx_tables *tables,
                                struct lru_queue *queue,
                                struct input_bitstream *istream)
 {
-       unsigned bytes_remaining;
        unsigned main_element;
+       unsigned end;
        int match_len;
-       int ret;
-
-       bytes_remaining = block_size;
-       while (bytes_remaining > 0) {
+       int ret = 0;
 
+       end = window_pos + block_size;
+       while (window_pos < end) {
                ret = read_huffsym_using_maintree(istream, tables,
                                                  &main_element);
                if (ret != 0)
-                       return ret;
+                       break;
 
                if (main_element < LZX_NUM_CHARS) {
                        /* literal: 0 to LZX_NUM_CHARS - 1 */
-                       window[window_pos + block_size - bytes_remaining] =
-                                                       main_element;
-                       bytes_remaining--;
+                       window[window_pos++] = main_element;
                } else {
                        /* match: LZX_NUM_CHARS to LZX_MAINTREE_NUM_SYMBOLS - 1 */
-                       match_len = lzx_decode_match(main_element,
-                                               block_type, bytes_remaining, window,
-                                               block_size + window_pos -
-                                                       bytes_remaining,
-                                               tables, queue, istream);
-                       if (match_len == -1)
-                               return 1;
-
-                       bytes_remaining -= match_len;
+                       ret = lzx_decode_match(main_element,
+                                              block_type,
+                                              end - window_pos,
+                                              window,
+                                              window_pos,
+                                              tables,
+                                              queue,
+                                              istream);
+                       if (ret < 0)
+                               break;
+                       window_pos += ret;
                }
        }
        return 0;
 }
 
 /*
- * Decompresses a block of LZX-compressed data using a window size of 32768.
+ * Decompresses a block of LZX-compressed data as used in the WIM file format.
+ *
+ * Note that this will NOT work unmodified for LZX as used in the cabinet
+ * format, which is not the same as in the WIM format!
  *
  * @compressed_data:   A pointer to the compressed data.
+ *
  * @compressed_len:    The length of the compressed data, in bytes.
+ *
  * @uncompressed_data: A pointer to the buffer into which to write the
- *                             uncompressed data.
- * @uncompressed_len:  The length of the uncompressed data.
+ *                     uncompressed data.
+ *
+ * @uncompressed_len:  The length of the uncompressed data.  It must be
+ *                     32768 bytes or less.
  *
- * Return non-zero on failure.
+ * Return 0 on success; non-zero on failure.
  */
 int lzx_decompress(const void *compressed_data, unsigned compressed_len,
                   void *uncompressed_data, unsigned uncompressed_len)
@@ -759,10 +802,11 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len,
        struct lzx_tables tables;
        struct input_bitstream istream;
        struct lru_queue queue;
-       unsigned bytes_remaining;
+       unsigned window_pos;
        unsigned block_size;
        unsigned block_type;
        int ret;
+       bool e8_preprocessing_done;
 
        LZX_DEBUG("lzx_decompress (compressed_data = %p, compressed_len = %d, "
                  "uncompressed_data = %p, uncompressed_len = %d).",
@@ -776,31 +820,34 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len,
        queue.R0 = 1;
        queue.R1 = 1;
        queue.R2 = 1;
-       bytes_remaining = uncompressed_len;
-
        init_input_bitstream(&istream, compressed_data, compressed_len);
 
+       e8_preprocessing_done = false; /* Set to true if there may be 0xe8 bytes
+                                         in the uncompressed data. */
+
        /* The compressed data will consist of one or more blocks.  The
         * following loop decompresses one block, and it runs until there all
         * the compressed data has been decompressed, so there are no more
         * blocks.  */
 
-       while (bytes_remaining != 0) {
-
+       for (window_pos = 0;
+            window_pos < uncompressed_len;
+            window_pos += block_size)
+       {
                LZX_DEBUG("Reading block header.");
                ret = lzx_read_block_header(&istream, &block_size,
                                            &block_type, &tables, &queue);
                if (ret != 0)
                        return ret;
 
-               LZX_DEBUG("block_size = %u, bytes_remaining = %u",
-                         block_size, bytes_remaining);
+               LZX_DEBUG("block_size = %u, window_pos = %u",
+                         block_size, window_pos);
 
-               if (block_size > bytes_remaining) {
+               if (block_size > uncompressed_len - window_pos) {
                        ERROR("lzx_decompress(): Expected a block size of at "
                              "most %u bytes (found %u bytes)",
-                             bytes_remaining, block_size);
-                       return 1;
+                             uncompressed_len - window_pos, block_size);
+                       return -1;
                }
 
                switch (block_type) {
@@ -810,41 +857,42 @@ int lzx_decompress(const void *compressed_data, unsigned compressed_len,
                                LZX_DEBUG("LZX_BLOCKTYPE_VERBATIM");
                        else
                                LZX_DEBUG("LZX_BLOCKTYPE_ALIGNED");
-
                        ret = lzx_decompress_block(block_type,
                                                   block_size,
                                                   uncompressed_data,
-                                                  uncompressed_len -
-                                                      bytes_remaining,
-                                                  &tables, &queue, &istream);
+                                                  window_pos,
+                                                  &tables,
+                                                  &queue,
+                                                  &istream);
                        if (ret != 0)
                                return ret;
+                       if (tables.maintree_lens[0xe8] != 0)
+                               e8_preprocessing_done = true;
                        break;
                case LZX_BLOCKTYPE_UNCOMPRESSED:
                        LZX_DEBUG("LZX_BLOCKTYPE_UNCOMPRESSED");
-                       ret = bitstream_read_bytes(&istream, block_size,
-                                                  uncompressed_data +
-                                                  uncompressed_len -
-                                                  bytes_remaining);
-                       if (ret != 0)
-                               return ret;
-                       if (block_size & 1)
-                               align_input_bitstream(&istream);
-                       break;
-               default:
-                       wimlib_assert(0);
+                       if (istream.data_bytes_left < block_size) {
+                               ERROR("Unexpected end of input when "
+                                     "reading %zu bytes from LZX bitstream "
+                                     "(only have %u bytes left)",
+                                     block_size, istream.data_bytes_left);
+                               return -1;
+                       }
+                       memcpy(&((u8*)uncompressed_data)[window_pos], istream.data,
+                              block_size);
+                       istream.data += block_size;
+                       istream.data_bytes_left -= block_size;
+                       /* Re-align bitstream if an odd number of bytes were
+                        * read.  */
+                       if (istream.data_bytes_left && (block_size & 1)) {
+                               istream.data_bytes_left--;
+                               istream.data++;
+                       }
+                       e8_preprocessing_done = true;
                        break;
                }
-
-               bytes_remaining -= block_size;
-
-               if (bytes_remaining != 0)
-                       LZX_DEBUG("%d bytes remaining.", bytes_remaining);
        }
-
-       if (uncompressed_len >= 10)
-               undo_call_insn_preprocessing(uncompressed_data,
-                                            uncompressed_len);
-
+       if (e8_preprocessing_done)
+               undo_call_insn_preprocessing(uncompressed_data, uncompressed_len);
        return 0;
 }
index ab158d8..8a39689 100644 (file)
--- a/src/lzx.h
+++ b/src/lzx.h
  * though the blocks themselves are not this size, and the size of the actual
  * file resource in the WIM file is very likely to be something entirely
  * different as well.  */
-#define LZX_MAGIC_FILESIZE           12000000
+#define LZX_WIM_MAGIC_FILESIZE         12000000
 
+#define USE_LZX_EXTRA_BITS_ARRAY
+
+#ifdef USE_LZX_EXTRA_BITS_ARRAY
 extern const u8 lzx_extra_bits[LZX_NUM_POSITION_SLOTS];
+#endif
+
+/* Given the number of a LZX position slot, return the number of extra bits that
+ * are needed to encode the match offset. */
+static inline unsigned lzx_get_num_extra_bits(unsigned position_slot)
+{
+#ifdef USE_LZX_EXTRA_BITS_ARRAY
+       /* Use a table */
+       return lzx_extra_bits[position_slot];
+#else
+       /* Calculate directly using a shift and subtraction. */
+       wimlib_assert(position_slot >= 2 && position_slot <= 37);
+       return (position_slot >> 1) - 1;
+#endif
+}
+
 extern const u32 lzx_position_base[LZX_NUM_POSITION_SLOTS];
 
 /* Least-recently used queue for match offsets. */
 struct lru_queue {
-       int R0;
-       int R1;
-       int R2;
+       u32 R0;
+       u32 R1;
+       u32 R2;
 };
 
 extern int lzx_decompress(const void *compressed_data, unsigned compressed_len,
index 9d6fe2d..17aaabf 100644 (file)
@@ -620,7 +620,7 @@ int extract_wim_resource(const struct lookup_table_entry *lte,
                u64 to_read = min(bytes_remaining, sizeof(buf));
                ret = read_wim_resource(lte, buf, to_read, offset, 0);
                if (ret != 0)
-                       break;
+                       return ret;
                sha1_update(&ctx, buf, to_read);
                ret = extract_chunk(buf, to_read, offset, extract_chunk_arg);
                if (ret != 0) {