Clean up other compression/decompression code
authorEric Biggers <ebiggers3@gmail.com>
Mon, 9 Dec 2013 03:06:57 +0000 (21:06 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 9 Dec 2013 06:24:46 +0000 (00:24 -0600)
include/wimlib/compress.h
include/wimlib/decompress.h
include/wimlib/lzx.h
include/wimlib/xpress.h
src/compress.c
src/decompress.c
src/lz77.c
src/lzx-compress.c
src/lzx-decompress.c
src/xpress-compress.c
src/xpress-decompress.c

index b6485a0..f3ce6e2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * compress.h
  *
- * Functions useful for compression, mainly bitstreams.
+ * Header for compression code shared by multiple compression formats.
  */
 
 #ifndef _WIMLIB_COMPRESS_H
 
 typedef u16 output_bitbuf_t;
 
-/* Assuming that WIM chunks are at most 32768 bytes, 16 bits is enough for any
- * symbol frequency. */
-typedef u16 freq_t;
+/* Variable type that can represent all possible window positions.  */
+typedef u32 freq_t;
+#ifndef INPUT_IDX_T_DEFINED
+#define INPUT_IDX_T_DEFINED
+typedef u32 input_idx_t;
+#endif
 
 /* Structure to keep track of the current position in the compressed output. */
 struct output_bitstream {
@@ -26,38 +29,42 @@ struct output_bitstream {
        /* Number of free bits in @bitbuf */
        unsigned free_bits;
 
+       /* Pointer to the start of the output buffer.  */
+       u8 *output_start;
+
+       /* Position at which to write the next 16 bits.  */
        u8 *bit_output;
+
+       /* Next position to write 16 bits, after they are written to bit_output.
+        * This is after @next_bit_output and may be separated from @bit_output
+        * by literal bytes.  */
        u8 *next_bit_output;
 
-       /* Pointer to the next byte in the compressed output. */
+       /* Next position to write literal bytes.  This is after @bit_output and
+        * @next_bit_output, and may be separated from them by literal bytes.
+        */
        u8 *output;
 
+       /* Bytes remaining in @output buffer.  */
+       input_idx_t bytes_remaining;
 
-       /* Number of bytes left in the memory pointed to by @output. */
-       int num_bytes_remaining;
+       /* Set to true if the buffer has been exhausted.  */
+       bool overrun;
 };
 
-static inline int
-bitstream_put_byte(struct output_bitstream *ostream, u8 n)
-{
-       if (ostream->num_bytes_remaining < 1)
-               return 1;
-       *ostream->output = n;
-       ostream->output++;
-       ostream->num_bytes_remaining--;
-       return 0;
-}
-
-static inline int
-bitstream_put_two_bytes(struct output_bitstream *ostream, u16 n)
-{
-       if (ostream->num_bytes_remaining < 2)
-               return 1;
-       *(u16*)ostream->output = cpu_to_le16(n);
-       ostream->output += 2;
-       ostream->num_bytes_remaining -= 2;
-       return 0;
-}
+extern void
+init_output_bitstream(struct output_bitstream *ostream,
+                     void *data, unsigned num_bytes);
+
+extern input_idx_t
+flush_output_bitstream(struct output_bitstream *ostream);
+
+extern void
+bitstream_put_bits(struct output_bitstream *ostream,
+                  output_bitbuf_t bits, unsigned num_bits);
+
+extern void
+bitstream_put_byte(struct output_bitstream *ostream, u8 n);
 
 struct lz_params {
        unsigned min_match;
@@ -69,30 +76,17 @@ struct lz_params {
        unsigned too_far;
 };
 
-typedef u32 (*lz_record_match_t)(unsigned, unsigned, void *, void *);
-typedef u32 (*lz_record_literal_t)(u8, void *);
+typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx);
+typedef void (*lz_record_literal_t)(u8 lit, void *ctx);
 
-extern unsigned
-lz_analyze_block(const u8 uncompressed_data[],
-                unsigned uncompressed_len,
-                u32 match_tab[],
+extern void
+lz_analyze_block(const u8 window[],
+                input_idx_t window_size,
                 lz_record_match_t record_match,
                 lz_record_literal_t record_literal,
-                void *record_match_arg1,
-                void *record_match_arg2,
-                void *record_literal_arg,
+                void *record_ctx,
                 const struct lz_params *params);
 
-extern int bitstream_put_bits(struct output_bitstream *ostream,
-                             output_bitbuf_t bits, unsigned num_bits);
-
-extern void
-init_output_bitstream(struct output_bitstream *ostream,
-                     void *data, unsigned num_bytes);
-
-extern int
-flush_output_bitstream(struct output_bitstream *ostream);
-
 extern void
 make_canonical_huffman_code(unsigned num_syms,
                            unsigned max_codeword_len,
index 7d0d385..b1f534f 100644 (file)
@@ -1,30 +1,37 @@
 /*
  * decompress.h
  *
- * Functions useful for decompression, mainly bitstreams.
+ * Header for decompression code shared by multiple compression formats.
  */
 
 #ifndef _WIMLIB_DECOMPRESS_H
 #define _WIMLIB_DECOMPRESS_H
 
 #include "wimlib/assert.h"
-#include "wimlib/endianness.h"
+#include "wimlib/compiler.h"
 #include "wimlib/error.h"
+#include "wimlib/endianness.h"
 #include "wimlib/types.h"
 
 /* Must be at least 32 bits. */
-typedef unsigned long input_bitbuf_t;
+typedef u32 input_bitbuf_t;
+#define INPUT_BITBUF_BITS (sizeof(input_bitbuf_t) * 8)
+
+#ifndef INPUT_IDX_T_DEFINED
+#define INPUT_IDX_T_DEFINED
+typedef u32 input_idx_t;
+#endif
 
 /* Structure to encapsulate a block of in-memory data that is being interpreted
  * as a stream of bits.
  *
  * This is geared specifically towards the XPRESS and LZX compression formats
- * with regards to the actual ordering the bits within the byte sequence. */
+ * with regards to the actual ordering the bits within the byte sequence.  */
 struct input_bitstream {
 
        /* A variable of length at least 32 bits that is used to hold bits that
         * have been read from the stream.  The bits are ordered from high-order
-        * to low-order, and the next bit is always the high-order bit. */
+        * to low-order, and the next bit is always the high-order bit.  */
        input_bitbuf_t  bitbuf;
 
        /* Pointer to the next byte to be retrieved from the input. */
@@ -33,14 +40,14 @@ struct input_bitstream {
        /* Number of bits in @bitbuf that are valid. */
        unsigned bitsleft;
 
-       /* Number of words of data that are left. */
-       unsigned data_bytes_left;
+       /* Number of words of data that are left.  */
+       input_idx_t data_bytes_left;
 };
 
 /* Initializes a bitstream to receive its input from @data. */
 static inline void
 init_input_bitstream(struct input_bitstream *istream,
-                    const void *data, unsigned num_data_bytes)
+                    const void *data, input_idx_t num_data_bytes)
 {
        istream->bitbuf          = 0;
        istream->bitsleft        = 0;
@@ -49,7 +56,7 @@ init_input_bitstream(struct input_bitstream *istream,
 }
 
 /* Ensures that the bit buffer variable for the bitstream contains @num_bits
- * bits.
+ * bits, which must be 16 or fewer.
  *
  * If there are at least @num_bits bits remaining in the bitstream, 0 is
  * returned.  Otherwise, -1 is returned.  */
@@ -58,103 +65,83 @@ bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits)
 {
        wimlib_assert2(num_bits <= 16);
 
-       int ret = 0;
-
-       /* Unfortunately this needs to be different for the different
-        * compression types.  LZX requires reading no more than the number of
-        * bits needed, otherwise the end of the compressed data may be overrun.
-        * XPRESS, on the other hand, requires that we always return with at
-        * least 16 bits in the buffer, even if fewer are requested.  This is
-        * important because this may change the location of a literal byte
-        * read with bitstream_read_byte(). */
-#ifdef XPRESS_DECOMP
-       if (istream->bitsleft < 16) {
-#else
-       if (istream->bitsleft < num_bits) {
-#endif
-               if (likely(istream->data_bytes_left >= 2)) {
-                       unsigned shift = sizeof(input_bitbuf_t) * 8 - 16 -
-                                        istream->bitsleft;
-                       istream->bitbuf |= (input_bitbuf_t)le16_to_cpu(
-                                               *(le16*)istream->data) << shift;
-                       istream->data += 2;
-                       istream->bitsleft += 16;
-                       istream->data_bytes_left -= 2;
-               } else {
-                       ret = -1;
-               }
-       }
-       wimlib_assert2(ret != 0 || istream->bitsleft >= num_bits);
-       return ret;
+       if (istream->bitsleft >= num_bits)
+               return 0;
+
+       if (unlikely(istream->data_bytes_left < 2))
+               return -1;
+
+       istream->bitbuf |= le16_to_cpu(*(le16*)istream->data) <<
+                          (INPUT_BITBUF_BITS - 16 - istream->bitsleft);
+       istream->data += 2;
+       istream->bitsleft += 16;
+       istream->data_bytes_left -= 2;
+       return 0;
 }
 
 /* Returns the next @num_bits bits in the buffer variable, which must contain at
- * least @num_bits bits, for the bitstream. */
+ * least @num_bits bits, for the bitstream.  */
 static inline unsigned
 bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits)
 {
        wimlib_assert2(istream->bitsleft >= num_bits);
-       int ret;
+
        if (unlikely(num_bits == 0))
-               ret = 0;
-       else
-               ret = istream->bitbuf >> (sizeof(input_bitbuf_t) * 8 - num_bits);
-       return ret;
+               return 0;
+
+       return istream->bitbuf >> (INPUT_BITBUF_BITS - num_bits);
 }
 
 /* Removes @num_bits bits from the buffer variable, which must contain at least
- * @num_bits bits, for the bitstream. */
+ * @num_bits bits, for the bitstream.  */
 static inline void
 bitstream_remove_bits(struct input_bitstream *istream, unsigned num_bits)
 {
        wimlib_assert2(istream->bitsleft >= num_bits);
+
        istream->bitbuf <<= num_bits;
        istream->bitsleft -= num_bits;
 }
 
-/* Reads @num_bits bits from the input bitstream.  @num_bits must be 16 or fewer.
- * On success, returns 0 and returns the requested bits in @n.  If there are
- * fewer than @num_bits remaining in the bitstream, -1 is returned. */
+/* Gets and removes @num_bits bits from the buffer variable, which must contain
+ * at least @num_bits bits, for the bitstream.  */
+static inline unsigned
+bitstream_pop_bits(struct input_bitstream *istream,
+                  unsigned num_bits)
+{
+       unsigned n = bitstream_peek_bits(istream, num_bits);
+       bitstream_remove_bits(istream, num_bits);
+       return n;
+}
+
+/* Reads @num_bits bits from the input bitstream.  @num_bits must be 16 or
+ * fewer.  On success, returns 0 and returns the requested bits in @n.  If there
+ * are fewer than @num_bits remaining in the bitstream, -1 is returned. */
 static inline int
 bitstream_read_bits(struct input_bitstream *istream,
                    unsigned num_bits, unsigned *n)
 {
-       wimlib_assert2(num_bits <= 16);
-       int ret = bitstream_ensure_bits(istream, num_bits);
-       if (likely(ret == 0)) {
-               *n = bitstream_peek_bits(istream, num_bits);
-               bitstream_remove_bits(istream, num_bits);
-       } else {
-               ERROR("bitstream_read_bits(): Input buffer exhausted");
-       }
-       return ret;
+       if (unlikely(bitstream_ensure_bits(istream, num_bits)))
+               return -1;
+
+       *n = bitstream_pop_bits(istream, num_bits);
+       return 0;
 }
 
-/* In the XPRESS format there can be literal bytes embedded in the bitstream.
- * These bytes are basically separate from the bitstream, as they come AFTER the
- * bits that are currently in the buffer variable (based on reading 16 bits at a
- * time), even though the buffer variable may not be empty.
- *
- * This function returns the next such literal byte, or -1 if there are no more.
- */
+/* Return the next literal byte embedded in the bitstream, or -1 if the input
+ * was exhausted.  */
 static inline int
 bitstream_read_byte(struct input_bitstream *istream)
 {
-       wimlib_assert2(istream->bitsleft < 32);
-       int ret;
+       if (unlikely(istream->data_bytes_left < 1))
+               return -1;
 
-       if (unlikely(istream->data_bytes_left == 0)) {
-               ERROR("bitstream_read_byte(): Input buffer exhausted");
-               ret = -1;
-       } else {
-               istream->data_bytes_left--;
-               ret = *istream->data++;
-       }
-       return ret;
+       istream->data_bytes_left--;
+       return *istream->data++;
 }
 
 /* Reads @num_bits bits from the buffer variable for a bistream without checking
- * to see if that many bits are in the buffer or not. */
+ * to see if that many bits are in the buffer or not.  */
 static inline unsigned
 bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits)
 {
@@ -171,70 +158,45 @@ read_huffsym_near_end_of_input(struct input_bitstream *istream,
                               unsigned table_bits,
                               unsigned *n);
 
-/*
- * Reads a Huffman-encoded symbol from a bitstream.
- *
- * This function may be called hundreds of millions of times when extracting a
- * large WIM file.  I'm not sure it could be made much faster that it is,
- * especially since there isn't enough time to make a big table that allows
- * decoding multiple symbols per lookup.  But if extracting files to a hard
- * disk, the I/O will be the bottleneck anyway.
- *
- * @buf:       The input buffer from which the symbol will be read.
- * @decode_table:      The fast Huffman decoding table for the Huffman tree.
- * @lengths:           The table that gives the length of the code for each
- *                             symbol.
- * @num_symbols:       The number of symbols in the Huffman code.
- * @table_bits:                Huffman codes this length or less can be looked up
- *                             directory in the decode_table, as the
- *                             decode_table contains 2**table_bits entries.
- */
+/* Read a Huffman-encoded symbol from a bitstream.  */
 static inline int
-read_huffsym(struct input_bitstream *istream,
-            const u16 decode_table[],
-            const u8 lens[],
+read_huffsym(struct input_bitstream * restrict istream,
+            const u16 decode_table[restrict],
+            const u8 lens[restrict],
             unsigned num_syms,
             unsigned table_bits,
-            unsigned *n,
+            unsigned *restrict n,
             unsigned max_codeword_len)
 {
-       int ret;
-
-       /* In the most common case, there are at least max_codeword_len bits
-        * remaining in the stream. */
-       if (likely(bitstream_ensure_bits(istream, max_codeword_len) == 0)) {
-
-               /* Use the next table_bits of the input as an index into the
-                * decode_table. */
-               u16 key_bits = bitstream_peek_bits(istream, table_bits);
-
-               u16 sym = decode_table[key_bits];
-
-               /* If the entry in the decode table is not a valid symbol, it is
-                * the offset of the root of its Huffman subtree. */
-               if (likely(sym < num_syms)) {
-                       wimlib_assert2(lens[sym] <= table_bits);
-                       bitstream_remove_bits(istream, lens[sym]);
-               } else {
-                       bitstream_remove_bits(istream, table_bits);
-                       do {
-                               key_bits = sym + bitstream_peek_bits(istream, 1);
-                               bitstream_remove_bits(istream, 1);
-
-                               wimlib_assert2(key_bits < num_syms * 2 +
-                                              (1 << table_bits));
-                       } while ((sym = decode_table[key_bits]) >= num_syms);
-               }
-               *n = sym;
-               ret = 0;
+       /* If there are fewer bits remaining in the input than the maximum
+        * codeword length, use the slow path that has extra checks.  */
+       if (unlikely(bitstream_ensure_bits(istream, max_codeword_len))) {
+               return read_huffsym_near_end_of_input(istream, decode_table,
+                                                     lens, num_syms,
+                                                     table_bits, n);
+       }
+
+       /* Use the next table_bits of the input as an index into the
+        * decode_table.  */
+       u16 key_bits = bitstream_peek_bits(istream, table_bits);
+
+       u16 sym = decode_table[key_bits];
+
+       if (likely(sym < num_syms)) {
+               /* Fast case: The decode table directly provided the symbol.  */
+               bitstream_remove_bits(istream, lens[sym]);
        } else {
-               /* Otherwise, we must be careful to use only the bits that are
-                * actually remaining.  */
-               ret = read_huffsym_near_end_of_input(istream, decode_table,
-                                                    lens, num_syms,
-                                                    table_bits, n);
+               /* Slow case: The symbol took too many bits to include directly
+                * in the decode table, so search for it in a binary tree at the
+                * end of the decode table.  */
+               bitstream_remove_bits(istream, table_bits);
+               do {
+                       key_bits = sym + bitstream_peek_bits(istream, 1);
+                       bitstream_remove_bits(istream, 1);
+               } while ((sym = decode_table[key_bits]) >= num_syms);
        }
-       return ret;
+       *n = sym;
+       return 0;
 }
 
 extern int
@@ -243,7 +205,7 @@ make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
                          unsigned max_codeword_len);
 
 /* Minimum alignment for the decode_table parameter to
- * make_huffman_decode_table(). */
+ * make_huffman_decode_table().  */
 #define DECODE_TABLE_ALIGNMENT 16
 
 #endif /* _WIMLIB_DECOMPRESS_H */
index b6d1ec2..cb17fbe 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef _WIMLIB_LZX_H
 #define _WIMLIB_LZX_H
 
+/* Constants for the LZX data compression format.  See the comments in
+ * lzx-compress.c and lzx-decompress.c for more information about this format.
+ * */
+
 #include "wimlib/assert.h"
 #include "wimlib/types.h"
 
index c07dbae..2b57bae 100644 (file)
@@ -1,13 +1,16 @@
 #ifndef _WIMLIB_XPRESS_H
 #define _WIMLIB_XPRESS_H
 
-/* See the comments in xpress-decompress.c about the XPRESS format. */
+/* Constants for the XPRESS data compression format.  See the comments in
+ * xpress-decompress.c for more information about this format.  */
 
 //#define ENABLE_XPRESS_DEBUG
 #ifdef ENABLE_XPRESS_DEBUG
 #      define XPRESS_DEBUG DEBUG
+#       define XPRESS_ASSERT wimlib_assert
 #else
 #      define XPRESS_DEBUG(format, ...)
+#      define XPRESS_ASSERT(...)
 #endif
 
 #define XPRESS_NUM_CHARS       256
@@ -20,7 +23,7 @@
 #define XPRESS_MIN_OFFSET      1
 #define XPRESS_MAX_OFFSET      65535
 
-#define XPRESS_MIN_MATCH       3
-#define XPRESS_MAX_MATCH       65538
+#define XPRESS_MIN_MATCH_LEN   3
+#define XPRESS_MAX_MATCH_LEN   65538
 
 #endif /* _WIMLIB_XPRESS_H */
index dfaabe6..f98a5a5 100644 (file)
 #  include "config.h"
 #endif
 
-
 #include "wimlib/assert.h"
+#include "wimlib/compiler.h"
 #include "wimlib/compress.h"
 #include "wimlib/util.h"
 
 #include <stdlib.h>
 #include <string.h>
 
-static inline void
-flush_bits(struct output_bitstream *ostream)
-{
-       *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf);
-       ostream->bit_output = ostream->next_bit_output;
-       ostream->next_bit_output = ostream->output;
-       ostream->output += 2;
-       ostream->num_bytes_remaining -= 2;
-}
-
 /* Writes @num_bits bits, given by the @num_bits least significant bits of
  * @bits, to the output @ostream. */
-int
+void
 bitstream_put_bits(struct output_bitstream *ostream, output_bitbuf_t bits,
                   unsigned num_bits)
 {
        unsigned rem_bits;
 
-       wimlib_assert(num_bits <= 16);
        if (num_bits <= ostream->free_bits) {
+               /* Buffer variable had space for the new bits.  */
                ostream->bitbuf = (ostream->bitbuf << num_bits) | bits;
                ostream->free_bits -= num_bits;
-       } else {
+               return;
+       }
 
-               if (ostream->num_bytes_remaining + (ostream->output -
-                                               ostream->bit_output) < 2)
-                       return 1;
-
-               /* It is tricky to output the bits correctly.  The correct way
-                * is to output little-endian 2-byte words, such that the bits
-                * in the SECOND byte logically precede those in the FIRST byte.
-                * While the byte order is little-endian, the bit order is
-                * big-endian; the first bit in a byte is the high-order one.
-                * Any multi-bit numbers are in bit-big-endian form, so the
-                * low-order bit of a multi-bit number is the LAST bit to be
-                * output. */
-               rem_bits = num_bits - ostream->free_bits;
-               ostream->bitbuf <<= ostream->free_bits;
-               ostream->bitbuf |= bits >> rem_bits;
-               flush_bits(ostream);
-               ostream->free_bits = 16 - rem_bits;
-               ostream->bitbuf = bits;
+       /* Buffer variable does not have space for the new bits.  It needs to be
+        * flushed as a 16-bit integer.  Bits in the second byte logically
+        * precede those in the first byte (little-endian), but within each byte
+        * the bits are ordered from high to low.  This is true for both XPRESS
+        * and LZX compression.  */
 
+       wimlib_assert(num_bits <= 16);
+
+       /* There must be at least 2 bytes of space remaining.  */
+       if (unlikely(ostream->bytes_remaining < 2)) {
+               ostream->overrun = true;
+               return;
        }
-       return 0;
+
+       /* Fill the buffer with as many bits that fit.  */
+       rem_bits = num_bits - ostream->free_bits;
+       ostream->bitbuf <<= ostream->free_bits;
+       ostream->bitbuf |= bits >> rem_bits;
+
+       *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf);
+       ostream->bit_output = ostream->next_bit_output;
+       ostream->next_bit_output = ostream->output;
+       ostream->output += 2;
+       ostream->bytes_remaining -= 2;
+
+       ostream->free_bits = 16 - rem_bits;
+       ostream->bitbuf = bits;
 }
 
-/* Flushes any remaining bits in the output buffer to the output byte stream. */
-int
-flush_output_bitstream(struct output_bitstream *ostream)
+void
+bitstream_put_byte(struct output_bitstream *ostream, u8 n)
 {
-       if (ostream->num_bytes_remaining + (ostream->output -
-                                       ostream->bit_output) < 2)
-               return 1;
-       if (ostream->free_bits != 16) {
-               ostream->bitbuf <<= ostream->free_bits;
-               flush_bits(ostream);
+       if (unlikely(ostream->bytes_remaining < 1)) {
+               ostream->overrun = true;
+               return;
        }
-       return 0;
+       *ostream->output++ = n;
+       ostream->bytes_remaining--;
+}
+
+/* Flushes any remaining bits to the output bitstream.
+ *
+ * Returns -1 if the stream has overrun; otherwise returns the total number of
+ * bytes in the output.  */
+input_idx_t
+flush_output_bitstream(struct output_bitstream *ostream)
+{
+       if (unlikely(ostream->overrun))
+               return ~(input_idx_t)0;
+
+       *(le16*)ostream->bit_output =
+               cpu_to_le16((u16)((u32)ostream->bitbuf << ostream->free_bits));
+       *(le16*)ostream->next_bit_output =
+               cpu_to_le16(0);
+
+       return ostream->output - ostream->output_start;
 }
 
 /* Initializes an output bit buffer to write its output to the memory location
@@ -106,10 +118,12 @@ init_output_bitstream(struct output_bitstream *ostream,
 
        ostream->bitbuf              = 0;
        ostream->free_bits           = 16;
+       ostream->output_start        = data;
        ostream->bit_output          = data;
        ostream->next_bit_output     = data + 2;
        ostream->output              = data + 4;
-       ostream->num_bytes_remaining = num_bytes - 4;
+       ostream->bytes_remaining     = num_bytes;
+       ostream->overrun             = false;
 }
 
 typedef struct {
index 227bfb0..502ca47 100644 (file)
@@ -28,6 +28,7 @@
 #endif
 
 #include "wimlib/decompress.h"
+#include "wimlib/error.h"
 #include "wimlib/util.h"
 
 #include <string.h>
@@ -417,10 +418,8 @@ read_huffsym_near_end_of_input(struct input_bitstream *istream,
        if (sym >= num_syms) {
                bitstream_remove_bits(istream, key_size);
                do {
-                       if (bitsleft == 0) {
-                               DEBUG("Input stream exhausted");
+                       if (bitsleft == 0)
                                return -1;
-                       }
                        key_bits = sym + bitstream_peek_bits(istream, 1);
                        bitstream_remove_bits(istream, 1);
                        bitsleft--;
index 5ce8d89..b9a4017 100644 (file)
@@ -27,7 +27,7 @@
  */
 
 #ifdef HAVE_CONFIG_H
-#  include "config.h"
+#  include <config.h>
 #endif
 
 #include "wimlib/compress.h"
@@ -78,7 +78,7 @@ update_hash(unsigned hash, u8 c)
  * indicating the end of the hash chain.
  */
 static inline unsigned
-insert_string(u16 hash_tab[], u16 prev_tab[],
+insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[],
              const u8 window[], unsigned str_pos,
              unsigned hash)
 {
@@ -112,7 +112,7 @@ insert_string(u16 hash_tab[], u16 prev_tab[],
  */
 static unsigned
 longest_match(const u8 window[], unsigned bytes_remaining,
-             unsigned strstart, const u16 prev_tab[],
+             unsigned strstart, const input_idx_t prev_tab[],
              unsigned cur_match, unsigned prev_len,
              unsigned *match_start_ret,
              const struct lz_params *params)
@@ -193,44 +193,23 @@ longest_match(const u8 window[], unsigned bytes_remaining,
  * Determines the sequence of matches and literals that a block of data will be
  * compressed to.
  *
- * @uncompressed_data: The data that is to be compressed.
- * @uncompressed_len:  The length of @uncompressed_data, in bytes.
- * @match_tab:         An array for the intermediate representation of matches.
- * @record_match:      A function that will be called to produce the
- *                             intermediate representation of a match, given
- *                             the offset and length.  This function should also
- *                             update the appropriate symbol frequency counts
- *                             so that any needed Huffman codes can be made
- *                             later.
- * @record_literal:    A function that will be called to produce the
- *                             intermediate representation of a literal, given
- *                             the character of the literal.  This function
- *                             should also update the appropriate symbol
- *                             frequency counts so that any needed Huffman
- *                             codes can be made later.
- * @record_match_arg_1:
- * @record_match_arg_2:        Extra arguments to be passed to @record_match.
- * @record_literal_arg:        Extra arguments to be passed to @record_literal.
+ * @window:            The data that is to be compressed.
+ * @window_size:       The length of @window, in bytes.
+ * @record_match:      Consumer for matches.
+ * @record_literal:    Consumer for literals.
+ * @record_ctx:                Context passed to @record_match and @record_literal.
  * @params:            Structure that contains parameters that affect how the
  *                             analysis proceeds (mainly how good the matches
  *                             have to be).
- *
- * Returns the total number of matches and literal bytes that were found; this
- * is the number of slots in @match_tab that have been filled with the
- * intermediate representation of a match or literal byte.
  */
-unsigned
-lz_analyze_block(const u8 uncompressed_data[],
-                unsigned uncompressed_len,
-                u32 match_tab[],
+void
+lz_analyze_block(const u8 window[],
+                input_idx_t window_size,
                 lz_record_match_t record_match,
                 lz_record_literal_t record_literal,
-                void *record_match_arg1,
-                void *record_match_arg2,
-                void *record_literal_arg,
+                void *record_ctx,
                 const struct lz_params *params)
 {
-       unsigned cur_match_pos = 0;
        unsigned cur_input_pos = 0;
        unsigned hash          = 0;
        unsigned hash_head     = 0;
@@ -239,23 +218,21 @@ lz_analyze_block(const u8 uncompressed_data[],
        unsigned match_len     = params->min_match - 1;
        unsigned match_start   = 0;
        bool match_available = false;
-       u16 hash_tab[HASH_SIZE];
-       u32 match;
-       u16 prev_tab[uncompressed_len];
+       input_idx_t hash_tab[HASH_SIZE];
+       input_idx_t prev_tab[window_size];
 
        ZERO_ARRAY(hash_tab);
-       ZERO_ARRAY(prev_tab);
 
        do {
                /* If there are at least 3 characters remaining in the input,
                 * insert the 3-character string beginning at
-                * uncompressed_data[cur_input_pos] into the hash table.
+                * window[cur_input_pos] into the hash table.
                 *
                 * hash_head is set to the index of the previous string in the
                 * hash bucket, or 0 if there is no such string */
-               if (uncompressed_len - cur_input_pos >= params->min_match) {
+               if (window_size - cur_input_pos >= params->min_match) {
                        hash = insert_string(hash_tab, prev_tab,
-                                            uncompressed_data,
+                                            window,
                                             cur_input_pos, hash);
                        hash_head = prev_tab[cur_input_pos];
                } else {
@@ -273,8 +250,8 @@ lz_analyze_block(const u8 uncompressed_data[],
                         * string of window index 0 (in particular we have to
                         * avoid a match of the string with itself at the start
                         * of the input file).  */
-                       match_len = longest_match(uncompressed_data,
-                                                 uncompressed_len - cur_input_pos,
+                       match_len = longest_match(window,
+                                                 window_size - cur_input_pos,
                                                  cur_input_pos, prev_tab,
                                                  hash_head, prev_len,
                                                  &match_start, params);
@@ -289,26 +266,18 @@ lz_analyze_block(const u8 uncompressed_data[],
                 */
                if (prev_len >= params->min_match && match_len <= prev_len) {
 
-                       /* Do not insert strings in hash table beyond this. */
-                       unsigned max_insert = uncompressed_len - params->min_match;
-
-                       /*DEBUG("Recording match (pos = %u, offset = %u, len = %u)\n",*/
-                                       /*cur_input_pos - 1, */
-                                       /*cur_input_pos - 1 - prev_start,*/
-                                       /*prev_len);*/
 
-                       match = (*record_match)(cur_input_pos - 1 - prev_start,
-                                               prev_len,
-                                               record_match_arg1,
-                                               record_match_arg2);
+                       (*record_match)(prev_len,
+                                       cur_input_pos - 1 - prev_start,
+                                       record_ctx);
 
-                       match_tab[cur_match_pos++] = match;
+                       /* Insert in hash table all strings up to the end of the
+                        * match.  strstart-1 and strstart are already inserted.
+                        * If there is not enough lookahead, the last two
+                        * strings are not inserted in the hash table.  */
 
-                       /* Insert in hash table all strings up to the end of the match.
-                        * strstart-1 and strstart are already inserted. If there is not
-                        * enough lookahead, the last two strings are not inserted in
-                        * the hash table.
-                        */
+                       /* Do not insert strings in hash table beyond this. */
+                       unsigned max_insert = window_size - params->min_match;
 #if LZ_MIN_MATCH == 2
                        if (prev_len >= 3)
 #endif
@@ -318,7 +287,7 @@ lz_analyze_block(const u8 uncompressed_data[],
                                do {
                                        if (++cur_input_pos <= max_insert) {
                                                hash = insert_string(hash_tab, prev_tab,
-                                                                    uncompressed_data,
+                                                                    window,
                                                                     cur_input_pos,
                                                                     hash);
                                        }
@@ -327,30 +296,18 @@ lz_analyze_block(const u8 uncompressed_data[],
                        match_available = false;
                        match_len = params->min_match - 1;
                } else if (match_available) {
-                       /* If there was no match at the previous position, output a
-                        * single literal. If there was a match but the current match
-                        * is longer, truncate the previous match to a single literal.
-                        */
-
-                       /*DEBUG("Recording litrl (pos = %u, value = %u)\n",*/
-                                       /*cur_input_pos - 1, */
-                                       /*uncompressed_data[cur_input_pos - 1]);*/
-
-                       match = (*record_literal)(
-                                       uncompressed_data[cur_input_pos - 1],
-                                                       record_literal_arg);
-                       match_tab[cur_match_pos++] = match;
+                       /* If there was no match at the previous position,
+                        * output a single literal. If there was a match but the
+                        * current match is longer, truncate the previous match
+                        * to a single literal.  */
+                       (*record_literal)(window[cur_input_pos - 1], record_ctx);
                } else {
                        /* There is no previous match to compare with, wait for
                         * the next step to decide.  */
                        match_available = true;
                }
-       } while (++cur_input_pos < uncompressed_len);
-
-       if (match_available) {
-               match = (*record_literal)(uncompressed_data[cur_input_pos - 1],
-                                               record_literal_arg);
-               match_tab[cur_match_pos++] = match;
-       }
-       return cur_match_pos;
+       } while (++cur_input_pos < window_size);
+
+       if (match_available)
+               (*record_literal)(window[cur_input_pos - 1], record_ctx);
 }
index 4c0e6ce..9874a7b 100644 (file)
 #include <string.h>
 
 #ifdef ENABLE_LZX_DEBUG
-#  include <wimlib/decompress.h>
+#  include "wimlib/decompress.h"
 #endif
 
 #include "divsufsort/divsufsort.h"
 
-typedef freq_t input_idx_t;
 typedef u32 block_cost_t;
 #define INFINITE_BLOCK_COST    ((block_cost_t)~0U)
 
@@ -1074,13 +1073,10 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea
 /* Constructs an LZX match from a literal byte and updates the main code symbol
  * frequencies.  */
 static u32
-lzx_record_literal(u8 literal, void *_freqs)
+lzx_tally_literal(u8 lit, struct lzx_freqs *freqs)
 {
-       struct lzx_freqs *freqs = _freqs;
-
-       freqs->main[literal]++;
-
-       return (u32)literal;
+       freqs->main[lit]++;
+       return (u32)lit;
 }
 
 /* Constructs an LZX match from an offset and a length, and updates the LRU
@@ -1088,11 +1084,9 @@ lzx_record_literal(u8 literal, void *_freqs)
  * alphabets.  The return value is a 32-bit number that provides the match in an
  * intermediate representation documented below.  */
 static u32
-lzx_record_match(unsigned match_offset, unsigned match_len,
-                void *_freqs, void *_queue)
+lzx_tally_match(unsigned match_len, unsigned match_offset,
+               struct lzx_freqs *freqs, struct lzx_lru_queue *queue)
 {
-       struct lzx_freqs *freqs = _freqs;
-       struct lzx_lru_queue *queue = _queue;
        unsigned position_slot;
        unsigned position_footer;
        u32 len_header;
@@ -1166,6 +1160,28 @@ lzx_record_match(unsigned match_offset, unsigned match_len,
                (adjusted_match_len);
 }
 
+struct lzx_record_ctx {
+       struct lzx_freqs freqs;
+       struct lzx_lru_queue queue;
+       struct lzx_match *matches;
+};
+
+static void
+lzx_record_match(unsigned len, unsigned offset, void *_ctx)
+{
+       struct lzx_record_ctx *ctx = _ctx;
+
+       (ctx->matches++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue);
+}
+
+static void
+lzx_record_literal(u8 lit, void *_ctx)
+{
+       struct lzx_record_ctx *ctx = _ctx;
+
+       (ctx->matches++)->data = lzx_tally_literal(lit, &ctx->freqs);
+}
+
 /* Returns the cost, in bits, to output a literal byte using the specified cost
  * model.  */
 static unsigned
@@ -1921,11 +1937,11 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec,
 
                        raw_match = lzx_lz_get_near_optimal_match(ctx);
                        if (raw_match.len >= LZX_MIN_MATCH_LEN) {
-                               lzx_match.data = lzx_record_match(raw_match.offset, raw_match.len,
-                                                                 &freqs, &ctx->queue);
+                               lzx_match.data = lzx_tally_match(raw_match.len, raw_match.offset,
+                                                                &freqs, &ctx->queue);
                                i += raw_match.len;
                        } else {
-                               lzx_match.data = lzx_record_literal(ctx->window[i], &freqs);
+                               lzx_match.data = lzx_tally_literal(ctx->window[i], &freqs);
                                i += 1;
                        }
                        ctx->chosen_matches[spec->chosen_matches_start_pos +
@@ -2139,9 +2155,6 @@ lzx_prepare_blocks(struct lzx_compressor * ctx)
  *     ctx->window[]
  *     ctx->window_size
  *
- * Working space:
- *     ctx->queue
- *
  * Output --- the block specification and the corresponding match/literal data:
  *
  *     ctx->block_specs[]
@@ -2151,8 +2164,7 @@ lzx_prepare_blocks(struct lzx_compressor * ctx)
 static void
 lzx_prepare_block_fast(struct lzx_compressor * ctx)
 {
-       unsigned num_matches;
-       struct lzx_freqs freqs;
+       struct lzx_record_ctx record_ctx;
        struct lzx_block_spec *spec;
 
        /* Parameters to hash chain LZ match finder
@@ -2170,19 +2182,17 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
        };
 
        /* Initialize symbol frequencies and match offset LRU queue.  */
-       memset(&freqs, 0, sizeof(struct lzx_freqs));
-       lzx_lru_queue_init(&ctx->queue);
+       memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs));
+       lzx_lru_queue_init(&record_ctx.queue);
+       record_ctx.matches = ctx->chosen_matches;
 
        /* Determine series of matches/literals to output.  */
-       num_matches = lz_analyze_block(ctx->window,
-                                      ctx->window_size,
-                                      (u32*)ctx->chosen_matches,
-                                      lzx_record_match,
-                                      lzx_record_literal,
-                                      &freqs,
-                                      &ctx->queue,
-                                      &freqs,
-                                      &lzx_lz_params);
+       lz_analyze_block(ctx->window,
+                        ctx->window_size,
+                        lzx_record_match,
+                        lzx_record_literal,
+                        &record_ctx,
+                        &lzx_lz_params);
 
 
        /* Set up block specification.  */
@@ -2190,9 +2200,9 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
        spec->block_type = LZX_BLOCKTYPE_ALIGNED;
        spec->window_pos = 0;
        spec->block_size = ctx->window_size;
-       spec->num_chosen_matches = num_matches;
+       spec->num_chosen_matches = (record_ctx.matches - ctx->chosen_matches);
        spec->chosen_matches_start_pos = 0;
-       lzx_make_huffman_codes(&freqs, &spec->codes);
+       lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes);
        ctx->num_blocks = 1;
 }
 
@@ -2239,7 +2249,7 @@ wimlib_lzx_compress2(const void                   * const restrict uncompressed_data,
 {
        struct lzx_compressor *ctx = (struct lzx_compressor*)lzx_ctx;
        struct output_bitstream ostream;
-       unsigned compressed_len;
+       input_idx_t compressed_len;
 
        if (uncompressed_len < 100) {
                LZX_DEBUG("Too small to bother compressing.");
@@ -2286,16 +2296,12 @@ wimlib_lzx_compress2(const void                 * const restrict uncompressed_data,
        lzx_write_all_blocks(ctx, &ostream);
 
        LZX_DEBUG("Flushing bitstream...");
-       if (flush_output_bitstream(&ostream)) {
-               /* If the bitstream cannot be flushed, then the output space was
-                * exhausted.  */
+       compressed_len = flush_output_bitstream(&ostream);
+       if (compressed_len == ~(input_idx_t)0) {
                LZX_DEBUG("Data did not compress to less than original length!");
                return 0;
        }
 
-       /* Compute the length of the compressed data.  */
-       compressed_len = ostream.bit_output - (u8*)compressed_data;
-
        LZX_DEBUG("Done: compressed %u => %u bytes.",
                  uncompressed_len, compressed_len);
 
@@ -2310,11 +2316,10 @@ wimlib_lzx_compress2(const void                 * const restrict uncompressed_data,
            )
        {
                u8 buf[uncompressed_len];
-               int ret;
 
-               ret = wimlib_lzx_decompress(compressed_data, compressed_len,
-                                           buf, uncompressed_len);
-               if (ret) {
+               if (wimlib_lzx_decompress(compressed_data, compressed_len,
+                                         buf, uncompressed_len))
+               {
                        ERROR("Failed to decompress data we "
                              "compressed using LZX algorithm");
                        wimlib_assert(0);
index 935c636..0a45629 100644 (file)
  *
  * A compressed WIM resource consists of a table of chunk offsets followed by
  * the compressed chunks themselves.  All compressed chunks except possibly the
- * last decompress to WIM_CHUNK_SIZE (= 32768) bytes.  This is quite similar to
- * the cabinet (.cab) file format, but they are not the same.  According to the
- * cabinet format documentation, the LZX block size is independent from the
- * CFDATA blocks, and a LZX block may span several CFDATA blocks.  However, in
- * WIMs, LZX blocks do not appear to ever span multiple WIM chunks.  Note that
- * this means any WIM chunk may be decompressed or compressed independently from
- * any other chunk, which is convenient.
+ * last decompress to 32768 bytes.  This is quite similar to the cabinet (.cab)
+ * file format, but they are not the same.  According to the cabinet format
+ * documentation, the LZX block size is independent from the CFDATA blocks, and
+ * a LZX block may span several CFDATA blocks.  However, in WIMs, LZX blocks do
+ * not appear to ever span multiple WIM chunks.  Note that this means any WIM
+ * chunk may be decompressed or compressed independently from any other chunk,
+ * which is convenient.
  *
  * A LZX compressed WIM chunk contains one or more LZX blocks of the aligned,
  * verbatim, or uncompressed block types.  For aligned and verbatim blocks, the
index 6b90ba9..5a314b6 100644 (file)
 #include "wimlib/util.h"
 #include "wimlib/xpress.h"
 
-#include <stdlib.h>
 #include <string.h>
 
+/* Intermediate XPRESS match/literal representation.  */
+struct xpress_match {
+       u16 adjusted_len;  /* Match length minus XPRESS_MIN_MATCH_LEN */
+       u16 offset;        /* Match offset */
+       /* For literals, offset == 0 and adjusted_len is the literal.  */
+};
+
 /*
  * Writes @match, which is a match given in the intermediate representation for
  * XPRESS matches, to the output stream @ostream.
  *
  * @codewords and @lens provide the Huffman code that is being used.
  */
-static int
-xpress_write_match(struct output_bitstream *restrict ostream,
-                  u32 match,
+static void
+xpress_write_match(struct xpress_match match,
+                  struct output_bitstream *restrict ostream,
                   const u16 codewords[restrict],
                   const u8 lens[restrict])
 {
-       u32 adjusted_match_len = match & 0xffff;
-       u32 match_offset = match >> 16;
-       u32 len_hdr = min(adjusted_match_len, 0xf);
-       u32 offset_bsr = bsr32(match_offset);
-       u32 sym = len_hdr | (offset_bsr << 4) | XPRESS_NUM_CHARS;
-       int ret;
+       u8 len_hdr = min(match.adjusted_len, 0xf);
+       u8 offset_bsr = bsr32(match.offset);
+       unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr);
 
-       ret = bitstream_put_bits(ostream, codewords[sym], lens[sym]);
-       if (ret)
-               return ret;
+       bitstream_put_bits(ostream, codewords[sym], lens[sym]);
 
-       if (adjusted_match_len >= 0xf) {
-               u8 byte1 = min(adjusted_match_len - 0xf, 0xff);
-               ret = bitstream_put_byte(ostream, byte1);
-               if (ret)
-                       return ret;
+       if (match.adjusted_len >= 0xf) {
+               u8 byte1 = min(match.adjusted_len - 0xf, 0xff);
+               bitstream_put_byte(ostream, byte1);
                if (byte1 == 0xff) {
-                       ret = bitstream_put_two_bytes(ostream, adjusted_match_len);
-                       if (ret)
-                               return ret;
+                       bitstream_put_byte(ostream, match.adjusted_len & 0xff);
+                       bitstream_put_byte(ostream, match.adjusted_len >> 8);
                }
        }
-       return bitstream_put_bits(ostream,
-                                 match_offset ^ (1 << offset_bsr), offset_bsr);
+       bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr);
 }
 
-static int
-xpress_write_compressed_literals(struct output_bitstream *ostream,
-                                const u32 match_tab[restrict],
-                                unsigned num_matches,
-                                const u16 codewords[restrict],
-                                const u8 lens[restrict])
+static void
+xpress_write_matches_and_literals(struct output_bitstream *ostream,
+                                 const struct xpress_match matches[restrict],
+                                 input_idx_t num_matches,
+                                 const u16 codewords[restrict],
+                                 const u8 lens[restrict])
 {
-       for (unsigned i = 0; i < num_matches; i++) {
-               int ret;
-               u32 match = match_tab[i];
-
-               if (match >= XPRESS_NUM_CHARS) /* match */
-                       ret = xpress_write_match(ostream, match, codewords,
-                                                lens);
-               else /* literal byte */
-                       ret = bitstream_put_bits(ostream, codewords[match],
-                                                lens[match]);
-               if (ret)
-                       return ret;
+       for (input_idx_t i = 0; i < num_matches; i++) {
+               if (matches[i].offset) {
+                       /* Real match  */
+                       xpress_write_match(matches[i], ostream, codewords, lens);
+               } else {
+                       /* Literal byte  */
+                       u8 lit = matches[i].adjusted_len;
+                       bitstream_put_bits(ostream, codewords[lit], lens[lit]);
+               }
        }
-       return bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA],
-                                 lens[XPRESS_END_OF_DATA]);
+       bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]);
 }
 
-static u32
-xpress_record_literal(u8 literal, void *_freq_tab)
+struct xpress_record_ctx {
+       freq_t freqs[XPRESS_NUM_SYMBOLS];
+       struct xpress_match *matches;
+};
+
+static void
+xpress_record_literal(u8 lit, void *_ctx)
 {
-       freq_t *freq_tab = _freq_tab;
-       freq_tab[literal]++;
-       return literal;
+       struct xpress_record_ctx *ctx = _ctx;
+       ctx->freqs[lit]++;
+       *(ctx->matches++) = (struct xpress_match) { .offset = 0, .adjusted_len = lit };
 }
 
-static u32
-xpress_record_match(unsigned match_offset, unsigned match_len,
-                   void *freq_tab, void *_ignore)
+static void
+xpress_record_match(unsigned len, unsigned offset, void *_ctx)
 {
-       wimlib_assert(match_len >= XPRESS_MIN_MATCH &&
-                     match_len <= XPRESS_MAX_MATCH);
-       wimlib_assert(match_offset >= XPRESS_MIN_OFFSET &&
-                     match_offset <= XPRESS_MAX_OFFSET);
+       struct xpress_record_ctx *ctx = _ctx;
 
-       /*
-        * The intermediate representation of XPRESS matches is as follows:
-        *
-        * bits    description
-        * ----    -----------------------------------------------------------
-        *
-        * 16-31   match offset (XPRESS_MIN_OFFSET < x < XPRESS_MAX_OFFSET)
-        *
-        * 0-15    adjusted match length (0 <= x <= XPRESS_MAX_MATCH - XPRESS_MIN_MATCH)
-        *
-        * Literals are simply represented as themselves and can be
-        * distinguished from matches by the fact that only literals will have
-        * the upper three bytes completely clear. */
+       XPRESS_ASSERT(len >= XPRESS_MIN_MATCH_LEN);
+       XPRESS_ASSERT(len <= XPRESS_MAX_MATCH_LEN);
+       XPRESS_ASSERT(offset >= XPRESS_MIN_OFFSET);
+       XPRESS_ASSERT(offset <= XPRESS_MAX_OFFSET);
+
+       unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN;
+       unsigned len_hdr = min(adjusted_len, 0xf);
+       unsigned sym = XPRESS_NUM_CHARS + ((bsr32(offset) << 4) | len_hdr);
 
-       u32 adjusted_match_len = match_len - XPRESS_MIN_MATCH;
-       u32 len_hdr = min(adjusted_match_len, 0xf);
-       u32 offset_bsr = bsr32(match_offset);
-       u32 sym = len_hdr | (offset_bsr << 4) | XPRESS_NUM_CHARS;
-       ((freq_t*)freq_tab)[sym]++;
-       return adjusted_match_len | (match_offset << 16);
+       XPRESS_ASSERT(sym >= XPRESS_NUM_CHARS);
+       XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS);
+
+       ctx->freqs[sym]++;
+       *(ctx->matches++) = (struct xpress_match) { .offset = offset,
+                                                   .adjusted_len = adjusted_len };
 }
 
 static const struct lz_params xpress_lz_params = {
-       .min_match      = XPRESS_MIN_MATCH,
-       .max_match      = XPRESS_MAX_MATCH,
+       .min_match      = XPRESS_MIN_MATCH_LEN,
+       .max_match      = XPRESS_MAX_MATCH_LEN,
        .good_match     = 16,
        .nice_match     = 32,
        .max_chain_len  = 16,
@@ -152,122 +141,91 @@ static const struct lz_params xpress_lz_params = {
 
 /* API function documented in wimlib.h  */
 WIMLIBAPI unsigned
-wimlib_xpress_compress(const void * restrict _uncompressed_data,
+wimlib_xpress_compress(const void * restrict uncompressed_data,
                       unsigned uncompressed_len,
-                      void * restrict _compressed_data)
+                      void * restrict compressed_data)
 {
-       u8 *compressed_data = _compressed_data;
+       u8 *cptr = compressed_data;
        struct output_bitstream ostream;
-       u32 match_tab[uncompressed_len];
-       freq_t freq_tab[XPRESS_NUM_SYMBOLS];
+
+       struct xpress_record_ctx record_ctx;
+       struct xpress_match matches[uncompressed_len];
+       u8 udata[uncompressed_len + 8];
        u16 codewords[XPRESS_NUM_SYMBOLS];
        u8 lens[XPRESS_NUM_SYMBOLS];
-       unsigned num_matches;
-       unsigned compressed_len;
-       unsigned i;
-       int ret;
-       u8 uncompressed_data[uncompressed_len + 8];
-
-       memcpy(uncompressed_data, _uncompressed_data, uncompressed_len);
-       memset(uncompressed_data + uncompressed_len, 0, 8);
+       input_idx_t num_matches;
+       input_idx_t compressed_len;
+       input_idx_t i;
 
-       wimlib_assert(uncompressed_len <= 32768);
-
-       /* XPRESS requires 256 bytes of overhead for the Huffman tables, so it's
-        * impossible cannot compress 256 bytes or less of data to less than the
+       /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
+        * impossible to compress 256 bytes or less of data to less than the
         * input size.
         *
         * +1 to take into account that the buffer for compressed data is 1 byte
         * smaller than the buffer for uncompressed data.
         *
         * +4 to take into account that init_output_bitstream() requires at
-        * least 4 bytes of data. */
+        * least 4 bytes of data.  */
        if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
                return 0;
 
-       ZERO_ARRAY(freq_tab);
-       num_matches = lz_analyze_block(uncompressed_data, uncompressed_len,
-                                      match_tab, xpress_record_match,
-                                      xpress_record_literal, freq_tab,
-                                      NULL, freq_tab,
-                                      &xpress_lz_params);
+       /* Copy the data to a temporary buffer, but only to avoid
+        * inconsequential accesses of uninitialized memory in
+        * lz_analyze_block().  */
+       memcpy(udata, uncompressed_data, uncompressed_len);
+       memset(udata + uncompressed_len, 0, 8);
+
+       /* Determine match/literal sequence to divide the data into.  */
+       ZERO_ARRAY(record_ctx.freqs);
+       record_ctx.matches = matches;
+       lz_analyze_block(udata,
+                        uncompressed_len,
+                        xpress_record_match,
+                        xpress_record_literal,
+                        &record_ctx,
+                        &xpress_lz_params);
 
-       freq_tab[XPRESS_END_OF_DATA]++;
+       num_matches = (record_ctx.matches - matches);
 
+       /* Account for end of data symbol.  */
+       record_ctx.freqs[XPRESS_END_OF_DATA]++;
+
+       /* Build the Huffman code.  */
        make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
-                                   freq_tab, lens, codewords);
+                                   record_ctx.freqs, lens, codewords);
 
-       /* IMPORTANT NOTE:
-        *
-        * It's tempting to output the 512 Huffman codeword lengths using the
-        * bitstream_put_bits() function.  However, this is NOT correct because
-        * bitstream_put_bits() will output 2 bytes at a time in little-endian
-        * order, which is the order that is needed for the compressed literals.
-        * However, the bytes in the lengths table are in order, so they need to
-        * be written one at a time without using bitstream_put_bits().
-        *
-        * Because of this, init_output_bitstream() is not called until after
-        * the lengths table is output.
-        */
+       /* Output the Huffman code as a series of 512 4-bit lengths.  */
        for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
-               *compressed_data++ = (lens[i] & 0xf) | (lens[i + 1] << 4);
+               *cptr++ = (lens[i] & 0xf) | (lens[i + 1] << 4);
 
-       init_output_bitstream(&ostream, compressed_data,
+       /* Output the encoded matches/literals.  */
+       init_output_bitstream(&ostream, cptr,
                              uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1);
+       xpress_write_matches_and_literals(&ostream, matches,
+                                         num_matches, codewords, lens);
 
-       ret = xpress_write_compressed_literals(&ostream, match_tab,
-                                              num_matches, codewords, lens);
-       if (ret)
+       /* Flush any pending data and get the length of the compressed data.  */
+       compressed_len = flush_output_bitstream(&ostream);
+       if (compressed_len == ~(input_idx_t)0)
                return 0;
+       compressed_len += XPRESS_NUM_SYMBOLS / 2;
 
-       /* Flush any bits that are buffered. */
-       ret = flush_output_bitstream(&ostream);
-       if (ret)
+#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1
+       /* Verify that we really get the same thing back when decompressing.  */
+       if (wimlib_xpress_decompress(compressed_data, compressed_len,
+                                    udata, uncompressed_len))
+       {
+               ERROR("Failed to decompress data we "
+                     "compressed using XPRESS algorithm");
+               wimlib_assert(0);
                return 0;
+       }
 
-       /* Assert that there are no output bytes between the ostream.output
-        * pointer and the ostream.next_bit_output pointer.  This can only
-        * happen if bytes had been written at the ostream.output pointer before
-        * the last bit word was written to the stream.  But, this does not
-        * occur since xpress_write_match() always finishes by writing some bits
-        * (a Huffman symbol), and the bitstream was just flushed. */
-       wimlib_assert(ostream.output - ostream.next_bit_output == 2);
-
-       /* The length of the compressed data is supposed to be the value of the
-        * ostream.output pointer before flushing, which is now the
-        * output.next_bit_output pointer after flushing.
-        *
-        * There will be an extra 2 bytes at the ostream.bit_output pointer,
-        * which is zeroed out.  (These 2 bytes may be either the last bytes in
-        * the compressed data, in which case they are actually unnecessary, or
-        * they may precede a number of bytes embedded into the bitstream.) */
-       if (ostream.bit_output >
-           (const u8*)_compressed_data + uncompressed_len - 3)
+       if (memcmp(uncompressed_data, udata, uncompressed_len)) {
+               ERROR("Data we compressed using XPRESS algorithm "
+                     "didn't decompress to original");
+               wimlib_assert(0);
                return 0;
-       *(u16*)ostream.bit_output = cpu_to_le16(0);
-       compressed_len = ostream.next_bit_output - (const u8*)_compressed_data;
-
-       wimlib_assert(compressed_len <= uncompressed_len - 1);
-
-#ifdef ENABLE_VERIFY_COMPRESSION
-       /* Verify that we really get the same thing back when decompressing. */
-       {
-               u8 buf[uncompressed_len];
-               ret = wimlib_xpress_decompress(_compressed_data, compressed_len,
-                                              buf, uncompressed_len);
-               if (ret) {
-                       ERROR("xpress_compress(): Failed to decompress data we "
-                             "compressed");
-                       abort();
-               }
-               for (i = 0; i < uncompressed_len; i++) {
-                       if (buf[i] != uncompressed_data[i]) {
-                               ERROR("xpress_compress(): Data we compressed didn't "
-                                     "decompress to the original data (difference at "
-                                     "byte %u of %u)", i + 1, uncompressed_len);
-                               abort();
-                       }
-               }
        }
 #endif
        return compressed_len;
index cf08a83..b71f6be 100644 (file)
  */
 
 
-
 /*
- * The XPRESS compression format is a LZ77 and Huffman-code based algorithm.
- * That means it is quite similar to LZX compression, but XPRESS is slightly
- * simpler, so it is a little faster to compress and decompress.
+ * The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
+ * That means it is fairly similar to LZX compression, but XPRESS is simpler, so
+ * it is a little faster to compress and decompress.
  *
  * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
  * Xpress Compression Algorithm".  In the MSDN library, it can currently be
  * If you are already familiar with the LZ77 algorithm and Huffman coding, the
  * XPRESS format is fairly simple.  The compressed data begins with 256 bytes
  * that contain 512 4-bit integers that are the lengths of the symbols in the
- * Huffman tree used for decoding compressed literals.  This is the only Huffman
- * tree that is used for the entirety of the compressed data, and the codeword
+ * Huffman code used for match/literal headers.  In contrast with more
+ * complicated formats such as DEFLATE and LZX, this is the only Huffman code
+ * that is used for the entirety of the XPRESS compressed data, and the codeword
  * lengths are not encoded with a pretree.
  *
  * The rest of the compressed data is Huffman-encoded symbols.  Values 0 through
- * 255 are literal bytes.  Values 256 through 511 are matches and may require
- * extra bits or bytes to be read to get the match offset and match length.
- *
- * There is no notion of a "compressed block" in the XPRESS format, so in the
- * XPRESS format, each WIM chunk (32768 bytes) will always use only one Huffman
- * tree.
+ * 255 represent the corresponding literal bytes.  Values 256 through 511
+ * represent matches and may require extra bits or bytes to be read to get the
+ * match offset and match length.
  *
- * The trickiest part is probably the fact that literal bytes for match lengths
- * are encoded "separately" from the bitstream.
+ * The trickiest part is probably the way in which literal bytes for match
+ * lengths are interleaved in the bitstream.
  *
  * Also, a caveat--- according to Microsoft's documentation for XPRESS,
  *
  *     fail during decompression if the Huffman symbol 256 is not found after
  *     the actual data."
  *
- * This is the case for WIM files--- in we must write this extra symbol "256" at
- * the end.  Otherwise Microsoft's software will fail to decompress the
- * XPRESS-compressed data.
- *
- * However, wimlib's decompressor in this file currently does not care if this
- * extra symbol is there or not.
+ * This is the case for the implementation in WIMGAPI.  However, wimlib's
+ * decompressor in this file currently does not care if this extra symbol is
+ * there or not.
  */
 
 #ifdef HAVE_CONFIG_H
 #endif
 
 #include "wimlib.h"
-#include "wimlib/assert.h"
-#define XPRESS_DECOMP
 #include "wimlib/decompress.h"
-#include "wimlib/util.h"
 #include "wimlib/xpress.h"
 
 /*
- * Decodes a symbol @huffsym that begins an XPRESS match.
+ * Decodes a symbol @sym that begins an XPRESS match.
  *
  * The low 8 bits of the symbol are divided into:
  *
  * bits 0-3:  length header
  * bits 4-7:  index of high-order bit of match offset
  *
- * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the
- * number of symbols reserved for literals.
- *
- * Returns the match length, or -1 on error.
+ * Returns the match length, or -1 if the data is invalid.
  */
 static int
-xpress_decode_match(unsigned huffsym, unsigned window_pos,
-                   unsigned window_len, u8 window[restrict],
+xpress_decode_match(unsigned sym, input_idx_t window_pos,
+                   input_idx_t window_len, u8 window[restrict],
                    struct input_bitstream * restrict istream)
 {
-       unsigned match_len;
-       unsigned match_offset;
-       u8 match_sym = (u8)huffsym;
-       u8 len_hdr = match_sym & 0xf;
-       u8 offset_bsr = match_sym >> 4;
+
+       u8 len_hdr;
+       u8 offset_bsr;
        int ret;
        u8 *match_dest;
        u8 *match_src;
        unsigned i;
+       unsigned match_len;
+       unsigned match_offset;
+
+       sym -= XPRESS_NUM_CHARS;
+       len_hdr = sym & 0xf;
+       offset_bsr = sym >> 4;
+
+       if (bitstream_ensure_bits(istream, 16))
+               return -1;
 
-       ret = bitstream_read_bits(istream, offset_bsr, &match_offset);
-       if (ret)
-               return ret;
-       match_offset |= (1 << offset_bsr);
+       match_offset = (1U << offset_bsr) | bitstream_pop_bits(istream, offset_bsr);
 
        if (len_hdr == 0xf) {
                ret = bitstream_read_byte(istream);
                if (ret < 0)
                        return ret;
                match_len = ret;
-               if (match_len == 0xff) {
+               if (unlikely(match_len == 0xff)) {
                        ret = bitstream_read_byte(istream);
                        if (ret < 0)
                                return ret;
@@ -137,24 +129,17 @@ xpress_decode_match(unsigned huffsym, unsigned window_pos,
        } else {
                match_len = len_hdr;
        }
-       match_len += XPRESS_MIN_MATCH;
+       match_len += XPRESS_MIN_MATCH_LEN;
 
-       /* Verify that the match is in the bounds of the part of the window
-        * currently in use, then copy the source of the match to the current
-        * position. */
 
-       if (window_pos + match_len > window_len) {
-               DEBUG("XPRESS decompression error: match of length %u "
-                     "bytes overflows window", match_len);
+       /* Verify the match is in bounds, then copy its data to the the current
+        * position.  */
+
+       if (window_pos + match_len > window_len)
                return -1;
-       }
 
-       if (match_offset > window_pos) {
-               DEBUG("XPRESS decompression error: match of length %u bytes "
-                     "references data before window (match_offset = %u, "
-                     "window_pos = %u)", match_len, match_offset, window_pos);
+       if (match_offset > window_pos)
                return -1;
-       }
 
        match_dest = window + window_pos;
        match_src = match_dest - match_offset;
@@ -165,39 +150,44 @@ xpress_decode_match(unsigned huffsym, unsigned window_pos,
        return match_len;
 }
 
-/* Decodes the Huffman-encoded matches and literal bytes in a block of
- * XPRESS-encoded data. */
+/* Decodes the Huffman-encoded matches and literal bytes in a region of
+ * XPRESS-encoded data.  */
 static int
-xpress_decompress_block(struct input_bitstream * restrict istream,
-                       u8 uncompressed_data[restrict],
-                       unsigned uncompressed_len,
-                       const u8 lens[restrict],
-                       const u16 decode_table[restrict])
+xpress_lz_decode(struct input_bitstream * restrict istream,
+                u8 uncompressed_data[restrict],
+                unsigned uncompressed_len,
+                const u8 lens[restrict],
+                const u16 decode_table[restrict])
 {
-       unsigned curpos;
-       unsigned huffsym;
-       int ret;
-       int match_len;
-
-       curpos = 0;
-       while (curpos < uncompressed_len) {
-               ret = read_huffsym(istream, decode_table, lens,
-                                  XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS,
-                                  &huffsym, XPRESS_MAX_CODEWORD_LEN);
-               if (ret)
-                       return ret;
+       input_idx_t curpos;
+       unsigned match_len;
+
+       for (curpos = 0; curpos < uncompressed_len; curpos += match_len) {
+               unsigned sym;
+               int ret;
+
+               if (unlikely(bitstream_ensure_bits(istream, 16)))
+                       return -1;
 
-               if (huffsym < XPRESS_NUM_CHARS) {
-                       uncompressed_data[curpos++] = huffsym;
+               if (unlikely(read_huffsym(istream, decode_table, lens,
+                                         XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS,
+                                         &sym, XPRESS_MAX_CODEWORD_LEN)))
+                       return -1;
+
+               if (sym < XPRESS_NUM_CHARS) {
+                       /* Literal  */
+                       uncompressed_data[curpos] = sym;
+                       match_len = 1;
                } else {
-                       match_len = xpress_decode_match(huffsym,
-                                                       curpos,
-                                                       uncompressed_len,
-                                                       uncompressed_data,
-                                                       istream);
-                       if (match_len < 0)
-                               return match_len;
-                       curpos += match_len;
+                       /* Match  */
+                       ret = xpress_decode_match(sym,
+                                                 curpos,
+                                                 uncompressed_len,
+                                                 uncompressed_data,
+                                                 istream);
+                       if (unlikely(ret < 0))
+                               return -1;
+                       match_len = ret;
                }
        }
        return 0;
@@ -206,48 +196,38 @@ xpress_decompress_block(struct input_bitstream * restrict istream,
 
 /* API function documented in wimlib.h  */
 WIMLIBAPI int
-wimlib_xpress_decompress(const void * restrict _compressed_data, unsigned compressed_len,
-                        void * restrict uncompressed_data, unsigned uncompressed_len)
+wimlib_xpress_decompress(const void * const restrict _compressed_data,
+                        const unsigned compressed_len,
+                        void * const restrict uncompressed_data,
+                        const unsigned uncompressed_len)
 {
+       const u8 *compressed_data = _compressed_data;
        u8 lens[XPRESS_NUM_SYMBOLS];
+       u8 *lens_p;
        u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS]
                        _aligned_attribute(DECODE_TABLE_ALIGNMENT);
        struct input_bitstream istream;
-       u8 *lens_p;
-       const u8 *compressed_data;
-       unsigned i;
-       int ret;
-
-       compressed_data = _compressed_data;
-       lens_p = lens;
 
-       DEBUG2("compressed_len = %d, uncompressed_len = %d",
-              compressed_len, uncompressed_len);
-
-       /* XPRESS uses only one Huffman tree.  It contains 512 symbols, and the
+       /* XPRESS uses only one Huffman code.  It contains 512 symbols, and the
         * code lengths of these symbols are given literally as 4-bit integers
-        * in the first 256 bytes of the compressed data.
-        */
-       if (compressed_len < XPRESS_NUM_SYMBOLS / 2) {
-               DEBUG("xpress_decompress(): Compressed length too short!");
+        * in the first 256 bytes of the compressed data.  */
+       if (compressed_len < XPRESS_NUM_SYMBOLS / 2)
                return -1;
-       }
 
-       for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
+       lens_p = lens;
+       for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
                *lens_p++ = compressed_data[i] & 0xf;
                *lens_p++ = compressed_data[i] >> 4;
        }
 
-       ret = make_huffman_decode_table(decode_table, XPRESS_NUM_SYMBOLS,
-                                       XPRESS_TABLEBITS, lens,
-                                       XPRESS_MAX_CODEWORD_LEN);
-       if (ret)
-               return ret;
+       if (make_huffman_decode_table(decode_table, XPRESS_NUM_SYMBOLS,
+                                     XPRESS_TABLEBITS, lens,
+                                     XPRESS_MAX_CODEWORD_LEN))
+               return -1;
 
        init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2,
                             compressed_len - XPRESS_NUM_SYMBOLS / 2);
 
-       return xpress_decompress_block(&istream, uncompressed_data,
-                                      uncompressed_len, lens,
-                                      decode_table);
+       return xpress_lz_decode(&istream, uncompressed_data,
+                               uncompressed_len, lens, decode_table);
 }