From 4757f17833c96b8c83a7e17cbc6f374c449d60db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 8 Dec 2013 21:06:57 -0600 Subject: [PATCH] Clean up other compression/decompression code --- include/wimlib/compress.h | 88 ++++++----- include/wimlib/decompress.h | 226 ++++++++++++---------------- include/wimlib/lzx.h | 4 + include/wimlib/xpress.h | 9 +- src/compress.c | 102 +++++++------ src/decompress.c | 5 +- src/lz77.c | 119 +++++---------- src/lzx-compress.c | 93 ++++++------ src/lzx-decompress.c | 14 +- src/xpress-compress.c | 286 +++++++++++++++--------------------- src/xpress-decompress.c | 198 +++++++++++-------------- 11 files changed, 510 insertions(+), 634 deletions(-) diff --git a/include/wimlib/compress.h b/include/wimlib/compress.h index b6485a0d..f3ce6e2d 100644 --- a/include/wimlib/compress.h +++ b/include/wimlib/compress.h @@ -1,7 +1,7 @@ /* * compress.h * - * Functions useful for compression, mainly bitstreams. + * Header for compression code shared by multiple compression formats. */ #ifndef _WIMLIB_COMPRESS_H @@ -12,9 +12,12 @@ typedef u16 output_bitbuf_t; -/* Assuming that WIM chunks are at most 32768 bytes, 16 bits is enough for any - * symbol frequency. */ -typedef u16 freq_t; +/* Variable type that can represent all possible window positions. */ +typedef u32 freq_t; +#ifndef INPUT_IDX_T_DEFINED +#define INPUT_IDX_T_DEFINED +typedef u32 input_idx_t; +#endif /* Structure to keep track of the current position in the compressed output. */ struct output_bitstream { @@ -26,38 +29,42 @@ struct output_bitstream { /* Number of free bits in @bitbuf */ unsigned free_bits; + /* Pointer to the start of the output buffer. */ + u8 *output_start; + + /* Position at which to write the next 16 bits. */ u8 *bit_output; + + /* Next position to write 16 bits, after they are written to bit_output. + * This is after @next_bit_output and may be separated from @bit_output + * by literal bytes. */ u8 *next_bit_output; - /* Pointer to the next byte in the compressed output. */ + /* Next position to write literal bytes. This is after @bit_output and + * @next_bit_output, and may be separated from them by literal bytes. + */ u8 *output; + /* Bytes remaining in @output buffer. */ + input_idx_t bytes_remaining; - /* Number of bytes left in the memory pointed to by @output. */ - int num_bytes_remaining; + /* Set to true if the buffer has been exhausted. */ + bool overrun; }; -static inline int -bitstream_put_byte(struct output_bitstream *ostream, u8 n) -{ - if (ostream->num_bytes_remaining < 1) - return 1; - *ostream->output = n; - ostream->output++; - ostream->num_bytes_remaining--; - return 0; -} - -static inline int -bitstream_put_two_bytes(struct output_bitstream *ostream, u16 n) -{ - if (ostream->num_bytes_remaining < 2) - return 1; - *(u16*)ostream->output = cpu_to_le16(n); - ostream->output += 2; - ostream->num_bytes_remaining -= 2; - return 0; -} +extern void +init_output_bitstream(struct output_bitstream *ostream, + void *data, unsigned num_bytes); + +extern input_idx_t +flush_output_bitstream(struct output_bitstream *ostream); + +extern void +bitstream_put_bits(struct output_bitstream *ostream, + output_bitbuf_t bits, unsigned num_bits); + +extern void +bitstream_put_byte(struct output_bitstream *ostream, u8 n); struct lz_params { unsigned min_match; @@ -69,30 +76,17 @@ struct lz_params { unsigned too_far; }; -typedef u32 (*lz_record_match_t)(unsigned, unsigned, void *, void *); -typedef u32 (*lz_record_literal_t)(u8, void *); +typedef void (*lz_record_match_t)(unsigned len, unsigned offset, void *ctx); +typedef void (*lz_record_literal_t)(u8 lit, void *ctx); -extern unsigned -lz_analyze_block(const u8 uncompressed_data[], - unsigned uncompressed_len, - u32 match_tab[], +extern void +lz_analyze_block(const u8 window[], + input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, - void *record_match_arg1, - void *record_match_arg2, - void *record_literal_arg, + void *record_ctx, const struct lz_params *params); -extern int bitstream_put_bits(struct output_bitstream *ostream, - output_bitbuf_t bits, unsigned num_bits); - -extern void -init_output_bitstream(struct output_bitstream *ostream, - void *data, unsigned num_bytes); - -extern int -flush_output_bitstream(struct output_bitstream *ostream); - extern void make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, diff --git a/include/wimlib/decompress.h b/include/wimlib/decompress.h index 7d0d3852..b1f534fb 100644 --- a/include/wimlib/decompress.h +++ b/include/wimlib/decompress.h @@ -1,30 +1,37 @@ /* * decompress.h * - * Functions useful for decompression, mainly bitstreams. + * Header for decompression code shared by multiple compression formats. */ #ifndef _WIMLIB_DECOMPRESS_H #define _WIMLIB_DECOMPRESS_H #include "wimlib/assert.h" -#include "wimlib/endianness.h" +#include "wimlib/compiler.h" #include "wimlib/error.h" +#include "wimlib/endianness.h" #include "wimlib/types.h" /* Must be at least 32 bits. */ -typedef unsigned long input_bitbuf_t; +typedef u32 input_bitbuf_t; +#define INPUT_BITBUF_BITS (sizeof(input_bitbuf_t) * 8) + +#ifndef INPUT_IDX_T_DEFINED +#define INPUT_IDX_T_DEFINED +typedef u32 input_idx_t; +#endif /* Structure to encapsulate a block of in-memory data that is being interpreted * as a stream of bits. * * This is geared specifically towards the XPRESS and LZX compression formats - * with regards to the actual ordering the bits within the byte sequence. */ + * with regards to the actual ordering the bits within the byte sequence. */ struct input_bitstream { /* A variable of length at least 32 bits that is used to hold bits that * have been read from the stream. The bits are ordered from high-order - * to low-order, and the next bit is always the high-order bit. */ + * to low-order, and the next bit is always the high-order bit. */ input_bitbuf_t bitbuf; /* Pointer to the next byte to be retrieved from the input. */ @@ -33,14 +40,14 @@ struct input_bitstream { /* Number of bits in @bitbuf that are valid. */ unsigned bitsleft; - /* Number of words of data that are left. */ - unsigned data_bytes_left; + /* Number of words of data that are left. */ + input_idx_t data_bytes_left; }; /* Initializes a bitstream to receive its input from @data. */ static inline void init_input_bitstream(struct input_bitstream *istream, - const void *data, unsigned num_data_bytes) + const void *data, input_idx_t num_data_bytes) { istream->bitbuf = 0; istream->bitsleft = 0; @@ -49,7 +56,7 @@ init_input_bitstream(struct input_bitstream *istream, } /* Ensures that the bit buffer variable for the bitstream contains @num_bits - * bits. + * bits, which must be 16 or fewer. * * If there are at least @num_bits bits remaining in the bitstream, 0 is * returned. Otherwise, -1 is returned. */ @@ -58,103 +65,83 @@ bitstream_ensure_bits(struct input_bitstream *istream, unsigned num_bits) { wimlib_assert2(num_bits <= 16); - int ret = 0; - - /* Unfortunately this needs to be different for the different - * compression types. LZX requires reading no more than the number of - * bits needed, otherwise the end of the compressed data may be overrun. - * XPRESS, on the other hand, requires that we always return with at - * least 16 bits in the buffer, even if fewer are requested. This is - * important because this may change the location of a literal byte - * read with bitstream_read_byte(). */ -#ifdef XPRESS_DECOMP - if (istream->bitsleft < 16) { -#else - if (istream->bitsleft < num_bits) { -#endif - if (likely(istream->data_bytes_left >= 2)) { - unsigned shift = sizeof(input_bitbuf_t) * 8 - 16 - - istream->bitsleft; - istream->bitbuf |= (input_bitbuf_t)le16_to_cpu( - *(le16*)istream->data) << shift; - istream->data += 2; - istream->bitsleft += 16; - istream->data_bytes_left -= 2; - } else { - ret = -1; - } - } - wimlib_assert2(ret != 0 || istream->bitsleft >= num_bits); - return ret; + if (istream->bitsleft >= num_bits) + return 0; + + if (unlikely(istream->data_bytes_left < 2)) + return -1; + + istream->bitbuf |= le16_to_cpu(*(le16*)istream->data) << + (INPUT_BITBUF_BITS - 16 - istream->bitsleft); + istream->data += 2; + istream->bitsleft += 16; + istream->data_bytes_left -= 2; + return 0; } /* Returns the next @num_bits bits in the buffer variable, which must contain at - * least @num_bits bits, for the bitstream. */ + * least @num_bits bits, for the bitstream. */ static inline unsigned bitstream_peek_bits(const struct input_bitstream *istream, unsigned num_bits) { wimlib_assert2(istream->bitsleft >= num_bits); - int ret; + if (unlikely(num_bits == 0)) - ret = 0; - else - ret = istream->bitbuf >> (sizeof(input_bitbuf_t) * 8 - num_bits); - return ret; + return 0; + + return istream->bitbuf >> (INPUT_BITBUF_BITS - num_bits); } /* Removes @num_bits bits from the buffer variable, which must contain at least - * @num_bits bits, for the bitstream. */ + * @num_bits bits, for the bitstream. */ static inline void bitstream_remove_bits(struct input_bitstream *istream, unsigned num_bits) { wimlib_assert2(istream->bitsleft >= num_bits); + istream->bitbuf <<= num_bits; istream->bitsleft -= num_bits; } -/* Reads @num_bits bits from the input bitstream. @num_bits must be 16 or fewer. - * On success, returns 0 and returns the requested bits in @n. If there are - * fewer than @num_bits remaining in the bitstream, -1 is returned. */ +/* Gets and removes @num_bits bits from the buffer variable, which must contain + * at least @num_bits bits, for the bitstream. */ +static inline unsigned +bitstream_pop_bits(struct input_bitstream *istream, + unsigned num_bits) +{ + unsigned n = bitstream_peek_bits(istream, num_bits); + bitstream_remove_bits(istream, num_bits); + return n; +} + +/* Reads @num_bits bits from the input bitstream. @num_bits must be 16 or + * fewer. On success, returns 0 and returns the requested bits in @n. If there + * are fewer than @num_bits remaining in the bitstream, -1 is returned. */ static inline int bitstream_read_bits(struct input_bitstream *istream, unsigned num_bits, unsigned *n) { - wimlib_assert2(num_bits <= 16); - int ret = bitstream_ensure_bits(istream, num_bits); - if (likely(ret == 0)) { - *n = bitstream_peek_bits(istream, num_bits); - bitstream_remove_bits(istream, num_bits); - } else { - ERROR("bitstream_read_bits(): Input buffer exhausted"); - } - return ret; + if (unlikely(bitstream_ensure_bits(istream, num_bits))) + return -1; + + *n = bitstream_pop_bits(istream, num_bits); + return 0; } -/* In the XPRESS format there can be literal bytes embedded in the bitstream. - * These bytes are basically separate from the bitstream, as they come AFTER the - * bits that are currently in the buffer variable (based on reading 16 bits at a - * time), even though the buffer variable may not be empty. - * - * This function returns the next such literal byte, or -1 if there are no more. - */ +/* Return the next literal byte embedded in the bitstream, or -1 if the input + * was exhausted. */ static inline int bitstream_read_byte(struct input_bitstream *istream) { - wimlib_assert2(istream->bitsleft < 32); - int ret; + if (unlikely(istream->data_bytes_left < 1)) + return -1; - if (unlikely(istream->data_bytes_left == 0)) { - ERROR("bitstream_read_byte(): Input buffer exhausted"); - ret = -1; - } else { - istream->data_bytes_left--; - ret = *istream->data++; - } - return ret; + istream->data_bytes_left--; + return *istream->data++; } /* Reads @num_bits bits from the buffer variable for a bistream without checking - * to see if that many bits are in the buffer or not. */ + * to see if that many bits are in the buffer or not. */ static inline unsigned bitstream_read_bits_nocheck(struct input_bitstream *istream, unsigned num_bits) { @@ -171,70 +158,45 @@ read_huffsym_near_end_of_input(struct input_bitstream *istream, unsigned table_bits, unsigned *n); -/* - * Reads a Huffman-encoded symbol from a bitstream. - * - * This function may be called hundreds of millions of times when extracting a - * large WIM file. I'm not sure it could be made much faster that it is, - * especially since there isn't enough time to make a big table that allows - * decoding multiple symbols per lookup. But if extracting files to a hard - * disk, the I/O will be the bottleneck anyway. - * - * @buf: The input buffer from which the symbol will be read. - * @decode_table: The fast Huffman decoding table for the Huffman tree. - * @lengths: The table that gives the length of the code for each - * symbol. - * @num_symbols: The number of symbols in the Huffman code. - * @table_bits: Huffman codes this length or less can be looked up - * directory in the decode_table, as the - * decode_table contains 2**table_bits entries. - */ +/* Read a Huffman-encoded symbol from a bitstream. */ static inline int -read_huffsym(struct input_bitstream *istream, - const u16 decode_table[], - const u8 lens[], +read_huffsym(struct input_bitstream * restrict istream, + const u16 decode_table[restrict], + const u8 lens[restrict], unsigned num_syms, unsigned table_bits, - unsigned *n, + unsigned *restrict n, unsigned max_codeword_len) { - int ret; - - /* In the most common case, there are at least max_codeword_len bits - * remaining in the stream. */ - if (likely(bitstream_ensure_bits(istream, max_codeword_len) == 0)) { - - /* Use the next table_bits of the input as an index into the - * decode_table. */ - u16 key_bits = bitstream_peek_bits(istream, table_bits); - - u16 sym = decode_table[key_bits]; - - /* If the entry in the decode table is not a valid symbol, it is - * the offset of the root of its Huffman subtree. */ - if (likely(sym < num_syms)) { - wimlib_assert2(lens[sym] <= table_bits); - bitstream_remove_bits(istream, lens[sym]); - } else { - bitstream_remove_bits(istream, table_bits); - do { - key_bits = sym + bitstream_peek_bits(istream, 1); - bitstream_remove_bits(istream, 1); - - wimlib_assert2(key_bits < num_syms * 2 + - (1 << table_bits)); - } while ((sym = decode_table[key_bits]) >= num_syms); - } - *n = sym; - ret = 0; + /* If there are fewer bits remaining in the input than the maximum + * codeword length, use the slow path that has extra checks. */ + if (unlikely(bitstream_ensure_bits(istream, max_codeword_len))) { + return read_huffsym_near_end_of_input(istream, decode_table, + lens, num_syms, + table_bits, n); + } + + /* Use the next table_bits of the input as an index into the + * decode_table. */ + u16 key_bits = bitstream_peek_bits(istream, table_bits); + + u16 sym = decode_table[key_bits]; + + if (likely(sym < num_syms)) { + /* Fast case: The decode table directly provided the symbol. */ + bitstream_remove_bits(istream, lens[sym]); } else { - /* Otherwise, we must be careful to use only the bits that are - * actually remaining. */ - ret = read_huffsym_near_end_of_input(istream, decode_table, - lens, num_syms, - table_bits, n); + /* Slow case: The symbol took too many bits to include directly + * in the decode table, so search for it in a binary tree at the + * end of the decode table. */ + bitstream_remove_bits(istream, table_bits); + do { + key_bits = sym + bitstream_peek_bits(istream, 1); + bitstream_remove_bits(istream, 1); + } while ((sym = decode_table[key_bits]) >= num_syms); } - return ret; + *n = sym; + return 0; } extern int @@ -243,7 +205,7 @@ make_huffman_decode_table(u16 decode_table[], unsigned num_syms, unsigned max_codeword_len); /* Minimum alignment for the decode_table parameter to - * make_huffman_decode_table(). */ + * make_huffman_decode_table(). */ #define DECODE_TABLE_ALIGNMENT 16 #endif /* _WIMLIB_DECOMPRESS_H */ diff --git a/include/wimlib/lzx.h b/include/wimlib/lzx.h index b6d1ec23..cb17fbea 100644 --- a/include/wimlib/lzx.h +++ b/include/wimlib/lzx.h @@ -1,6 +1,10 @@ #ifndef _WIMLIB_LZX_H #define _WIMLIB_LZX_H +/* Constants for the LZX data compression format. See the comments in + * lzx-compress.c and lzx-decompress.c for more information about this format. + * */ + #include "wimlib/assert.h" #include "wimlib/types.h" diff --git a/include/wimlib/xpress.h b/include/wimlib/xpress.h index c07dbae8..2b57bae7 100644 --- a/include/wimlib/xpress.h +++ b/include/wimlib/xpress.h @@ -1,13 +1,16 @@ #ifndef _WIMLIB_XPRESS_H #define _WIMLIB_XPRESS_H -/* See the comments in xpress-decompress.c about the XPRESS format. */ +/* Constants for the XPRESS data compression format. See the comments in + * xpress-decompress.c for more information about this format. */ //#define ENABLE_XPRESS_DEBUG #ifdef ENABLE_XPRESS_DEBUG # define XPRESS_DEBUG DEBUG +# define XPRESS_ASSERT wimlib_assert #else # define XPRESS_DEBUG(format, ...) +# define XPRESS_ASSERT(...) #endif #define XPRESS_NUM_CHARS 256 @@ -20,7 +23,7 @@ #define XPRESS_MIN_OFFSET 1 #define XPRESS_MAX_OFFSET 65535 -#define XPRESS_MIN_MATCH 3 -#define XPRESS_MAX_MATCH 65538 +#define XPRESS_MIN_MATCH_LEN 3 +#define XPRESS_MAX_MATCH_LEN 65538 #endif /* _WIMLIB_XPRESS_H */ diff --git a/src/compress.c b/src/compress.c index dfaabe6b..f98a5a52 100644 --- a/src/compress.c +++ b/src/compress.c @@ -27,73 +27,85 @@ # include "config.h" #endif - #include "wimlib/assert.h" +#include "wimlib/compiler.h" #include "wimlib/compress.h" #include "wimlib/util.h" #include #include -static inline void -flush_bits(struct output_bitstream *ostream) -{ - *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf); - ostream->bit_output = ostream->next_bit_output; - ostream->next_bit_output = ostream->output; - ostream->output += 2; - ostream->num_bytes_remaining -= 2; -} - /* Writes @num_bits bits, given by the @num_bits least significant bits of * @bits, to the output @ostream. */ -int +void bitstream_put_bits(struct output_bitstream *ostream, output_bitbuf_t bits, unsigned num_bits) { unsigned rem_bits; - wimlib_assert(num_bits <= 16); if (num_bits <= ostream->free_bits) { + /* Buffer variable had space for the new bits. */ ostream->bitbuf = (ostream->bitbuf << num_bits) | bits; ostream->free_bits -= num_bits; - } else { + return; + } - if (ostream->num_bytes_remaining + (ostream->output - - ostream->bit_output) < 2) - return 1; - - /* It is tricky to output the bits correctly. The correct way - * is to output little-endian 2-byte words, such that the bits - * in the SECOND byte logically precede those in the FIRST byte. - * While the byte order is little-endian, the bit order is - * big-endian; the first bit in a byte is the high-order one. - * Any multi-bit numbers are in bit-big-endian form, so the - * low-order bit of a multi-bit number is the LAST bit to be - * output. */ - rem_bits = num_bits - ostream->free_bits; - ostream->bitbuf <<= ostream->free_bits; - ostream->bitbuf |= bits >> rem_bits; - flush_bits(ostream); - ostream->free_bits = 16 - rem_bits; - ostream->bitbuf = bits; + /* Buffer variable does not have space for the new bits. It needs to be + * flushed as a 16-bit integer. Bits in the second byte logically + * precede those in the first byte (little-endian), but within each byte + * the bits are ordered from high to low. This is true for both XPRESS + * and LZX compression. */ + wimlib_assert(num_bits <= 16); + + /* There must be at least 2 bytes of space remaining. */ + if (unlikely(ostream->bytes_remaining < 2)) { + ostream->overrun = true; + return; } - return 0; + + /* Fill the buffer with as many bits that fit. */ + rem_bits = num_bits - ostream->free_bits; + ostream->bitbuf <<= ostream->free_bits; + ostream->bitbuf |= bits >> rem_bits; + + *(le16*)ostream->bit_output = cpu_to_le16(ostream->bitbuf); + ostream->bit_output = ostream->next_bit_output; + ostream->next_bit_output = ostream->output; + ostream->output += 2; + ostream->bytes_remaining -= 2; + + ostream->free_bits = 16 - rem_bits; + ostream->bitbuf = bits; } -/* Flushes any remaining bits in the output buffer to the output byte stream. */ -int -flush_output_bitstream(struct output_bitstream *ostream) +void +bitstream_put_byte(struct output_bitstream *ostream, u8 n) { - if (ostream->num_bytes_remaining + (ostream->output - - ostream->bit_output) < 2) - return 1; - if (ostream->free_bits != 16) { - ostream->bitbuf <<= ostream->free_bits; - flush_bits(ostream); + if (unlikely(ostream->bytes_remaining < 1)) { + ostream->overrun = true; + return; } - return 0; + *ostream->output++ = n; + ostream->bytes_remaining--; +} + +/* Flushes any remaining bits to the output bitstream. + * + * Returns -1 if the stream has overrun; otherwise returns the total number of + * bytes in the output. */ +input_idx_t +flush_output_bitstream(struct output_bitstream *ostream) +{ + if (unlikely(ostream->overrun)) + return ~(input_idx_t)0; + + *(le16*)ostream->bit_output = + cpu_to_le16((u16)((u32)ostream->bitbuf << ostream->free_bits)); + *(le16*)ostream->next_bit_output = + cpu_to_le16(0); + + return ostream->output - ostream->output_start; } /* Initializes an output bit buffer to write its output to the memory location @@ -106,10 +118,12 @@ init_output_bitstream(struct output_bitstream *ostream, ostream->bitbuf = 0; ostream->free_bits = 16; + ostream->output_start = data; ostream->bit_output = data; ostream->next_bit_output = data + 2; ostream->output = data + 4; - ostream->num_bytes_remaining = num_bytes - 4; + ostream->bytes_remaining = num_bytes; + ostream->overrun = false; } typedef struct { diff --git a/src/decompress.c b/src/decompress.c index 227bfb06..502ca47d 100644 --- a/src/decompress.c +++ b/src/decompress.c @@ -28,6 +28,7 @@ #endif #include "wimlib/decompress.h" +#include "wimlib/error.h" #include "wimlib/util.h" #include @@ -417,10 +418,8 @@ read_huffsym_near_end_of_input(struct input_bitstream *istream, if (sym >= num_syms) { bitstream_remove_bits(istream, key_size); do { - if (bitsleft == 0) { - DEBUG("Input stream exhausted"); + if (bitsleft == 0) return -1; - } key_bits = sym + bitstream_peek_bits(istream, 1); bitstream_remove_bits(istream, 1); bitsleft--; diff --git a/src/lz77.c b/src/lz77.c index 5ce8d89f..b9a40174 100644 --- a/src/lz77.c +++ b/src/lz77.c @@ -27,7 +27,7 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +# include #endif #include "wimlib/compress.h" @@ -78,7 +78,7 @@ update_hash(unsigned hash, u8 c) * indicating the end of the hash chain. */ static inline unsigned -insert_string(u16 hash_tab[], u16 prev_tab[], +insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[], const u8 window[], unsigned str_pos, unsigned hash) { @@ -112,7 +112,7 @@ insert_string(u16 hash_tab[], u16 prev_tab[], */ static unsigned longest_match(const u8 window[], unsigned bytes_remaining, - unsigned strstart, const u16 prev_tab[], + unsigned strstart, const input_idx_t prev_tab[], unsigned cur_match, unsigned prev_len, unsigned *match_start_ret, const struct lz_params *params) @@ -193,44 +193,23 @@ longest_match(const u8 window[], unsigned bytes_remaining, * Determines the sequence of matches and literals that a block of data will be * compressed to. * - * @uncompressed_data: The data that is to be compressed. - * @uncompressed_len: The length of @uncompressed_data, in bytes. - * @match_tab: An array for the intermediate representation of matches. - * @record_match: A function that will be called to produce the - * intermediate representation of a match, given - * the offset and length. This function should also - * update the appropriate symbol frequency counts - * so that any needed Huffman codes can be made - * later. - * @record_literal: A function that will be called to produce the - * intermediate representation of a literal, given - * the character of the literal. This function - * should also update the appropriate symbol - * frequency counts so that any needed Huffman - * codes can be made later. - * @record_match_arg_1: - * @record_match_arg_2: Extra arguments to be passed to @record_match. - * @record_literal_arg: Extra arguments to be passed to @record_literal. + * @window: The data that is to be compressed. + * @window_size: The length of @window, in bytes. + * @record_match: Consumer for matches. + * @record_literal: Consumer for literals. + * @record_ctx: Context passed to @record_match and @record_literal. * @params: Structure that contains parameters that affect how the * analysis proceeds (mainly how good the matches * have to be). - * - * Returns the total number of matches and literal bytes that were found; this - * is the number of slots in @match_tab that have been filled with the - * intermediate representation of a match or literal byte. */ -unsigned -lz_analyze_block(const u8 uncompressed_data[], - unsigned uncompressed_len, - u32 match_tab[], +void +lz_analyze_block(const u8 window[], + input_idx_t window_size, lz_record_match_t record_match, lz_record_literal_t record_literal, - void *record_match_arg1, - void *record_match_arg2, - void *record_literal_arg, + void *record_ctx, const struct lz_params *params) { - unsigned cur_match_pos = 0; unsigned cur_input_pos = 0; unsigned hash = 0; unsigned hash_head = 0; @@ -239,23 +218,21 @@ lz_analyze_block(const u8 uncompressed_data[], unsigned match_len = params->min_match - 1; unsigned match_start = 0; bool match_available = false; - u16 hash_tab[HASH_SIZE]; - u32 match; - u16 prev_tab[uncompressed_len]; + input_idx_t hash_tab[HASH_SIZE]; + input_idx_t prev_tab[window_size]; ZERO_ARRAY(hash_tab); - ZERO_ARRAY(prev_tab); do { /* If there are at least 3 characters remaining in the input, * insert the 3-character string beginning at - * uncompressed_data[cur_input_pos] into the hash table. + * window[cur_input_pos] into the hash table. * * hash_head is set to the index of the previous string in the * hash bucket, or 0 if there is no such string */ - if (uncompressed_len - cur_input_pos >= params->min_match) { + if (window_size - cur_input_pos >= params->min_match) { hash = insert_string(hash_tab, prev_tab, - uncompressed_data, + window, cur_input_pos, hash); hash_head = prev_tab[cur_input_pos]; } else { @@ -273,8 +250,8 @@ lz_analyze_block(const u8 uncompressed_data[], * string of window index 0 (in particular we have to * avoid a match of the string with itself at the start * of the input file). */ - match_len = longest_match(uncompressed_data, - uncompressed_len - cur_input_pos, + match_len = longest_match(window, + window_size - cur_input_pos, cur_input_pos, prev_tab, hash_head, prev_len, &match_start, params); @@ -289,26 +266,18 @@ lz_analyze_block(const u8 uncompressed_data[], */ if (prev_len >= params->min_match && match_len <= prev_len) { - /* Do not insert strings in hash table beyond this. */ - unsigned max_insert = uncompressed_len - params->min_match; - - /*DEBUG("Recording match (pos = %u, offset = %u, len = %u)\n",*/ - /*cur_input_pos - 1, */ - /*cur_input_pos - 1 - prev_start,*/ - /*prev_len);*/ - match = (*record_match)(cur_input_pos - 1 - prev_start, - prev_len, - record_match_arg1, - record_match_arg2); + (*record_match)(prev_len, + cur_input_pos - 1 - prev_start, + record_ctx); - match_tab[cur_match_pos++] = match; + /* Insert in hash table all strings up to the end of the + * match. strstart-1 and strstart are already inserted. + * If there is not enough lookahead, the last two + * strings are not inserted in the hash table. */ - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ + /* Do not insert strings in hash table beyond this. */ + unsigned max_insert = window_size - params->min_match; #if LZ_MIN_MATCH == 2 if (prev_len >= 3) #endif @@ -318,7 +287,7 @@ lz_analyze_block(const u8 uncompressed_data[], do { if (++cur_input_pos <= max_insert) { hash = insert_string(hash_tab, prev_tab, - uncompressed_data, + window, cur_input_pos, hash); } @@ -327,30 +296,18 @@ lz_analyze_block(const u8 uncompressed_data[], match_available = false; match_len = params->min_match - 1; } else if (match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - - /*DEBUG("Recording litrl (pos = %u, value = %u)\n",*/ - /*cur_input_pos - 1, */ - /*uncompressed_data[cur_input_pos - 1]);*/ - - match = (*record_literal)( - uncompressed_data[cur_input_pos - 1], - record_literal_arg); - match_tab[cur_match_pos++] = match; + /* If there was no match at the previous position, + * output a single literal. If there was a match but the + * current match is longer, truncate the previous match + * to a single literal. */ + (*record_literal)(window[cur_input_pos - 1], record_ctx); } else { /* There is no previous match to compare with, wait for * the next step to decide. */ match_available = true; } - } while (++cur_input_pos < uncompressed_len); - - if (match_available) { - match = (*record_literal)(uncompressed_data[cur_input_pos - 1], - record_literal_arg); - match_tab[cur_match_pos++] = match; - } - return cur_match_pos; + } while (++cur_input_pos < window_size); + + if (match_available) + (*record_literal)(window[cur_input_pos - 1], record_ctx); } diff --git a/src/lzx-compress.c b/src/lzx-compress.c index 4c0e6ce1..9874a7b9 100644 --- a/src/lzx-compress.c +++ b/src/lzx-compress.c @@ -195,12 +195,11 @@ #include #ifdef ENABLE_LZX_DEBUG -# include +# include "wimlib/decompress.h" #endif #include "divsufsort/divsufsort.h" -typedef freq_t input_idx_t; typedef u32 block_cost_t; #define INFINITE_BLOCK_COST ((block_cost_t)~0U) @@ -1074,13 +1073,10 @@ lzx_write_all_blocks(struct lzx_compressor *ctx, struct output_bitstream *ostrea /* Constructs an LZX match from a literal byte and updates the main code symbol * frequencies. */ static u32 -lzx_record_literal(u8 literal, void *_freqs) +lzx_tally_literal(u8 lit, struct lzx_freqs *freqs) { - struct lzx_freqs *freqs = _freqs; - - freqs->main[literal]++; - - return (u32)literal; + freqs->main[lit]++; + return (u32)lit; } /* Constructs an LZX match from an offset and a length, and updates the LRU @@ -1088,11 +1084,9 @@ lzx_record_literal(u8 literal, void *_freqs) * alphabets. The return value is a 32-bit number that provides the match in an * intermediate representation documented below. */ static u32 -lzx_record_match(unsigned match_offset, unsigned match_len, - void *_freqs, void *_queue) +lzx_tally_match(unsigned match_len, unsigned match_offset, + struct lzx_freqs *freqs, struct lzx_lru_queue *queue) { - struct lzx_freqs *freqs = _freqs; - struct lzx_lru_queue *queue = _queue; unsigned position_slot; unsigned position_footer; u32 len_header; @@ -1166,6 +1160,28 @@ lzx_record_match(unsigned match_offset, unsigned match_len, (adjusted_match_len); } +struct lzx_record_ctx { + struct lzx_freqs freqs; + struct lzx_lru_queue queue; + struct lzx_match *matches; +}; + +static void +lzx_record_match(unsigned len, unsigned offset, void *_ctx) +{ + struct lzx_record_ctx *ctx = _ctx; + + (ctx->matches++)->data = lzx_tally_match(len, offset, &ctx->freqs, &ctx->queue); +} + +static void +lzx_record_literal(u8 lit, void *_ctx) +{ + struct lzx_record_ctx *ctx = _ctx; + + (ctx->matches++)->data = lzx_tally_literal(lit, &ctx->freqs); +} + /* Returns the cost, in bits, to output a literal byte using the specified cost * model. */ static unsigned @@ -1921,11 +1937,11 @@ lzx_optimize_block(struct lzx_compressor *ctx, struct lzx_block_spec *spec, raw_match = lzx_lz_get_near_optimal_match(ctx); if (raw_match.len >= LZX_MIN_MATCH_LEN) { - lzx_match.data = lzx_record_match(raw_match.offset, raw_match.len, - &freqs, &ctx->queue); + lzx_match.data = lzx_tally_match(raw_match.len, raw_match.offset, + &freqs, &ctx->queue); i += raw_match.len; } else { - lzx_match.data = lzx_record_literal(ctx->window[i], &freqs); + lzx_match.data = lzx_tally_literal(ctx->window[i], &freqs); i += 1; } ctx->chosen_matches[spec->chosen_matches_start_pos + @@ -2139,9 +2155,6 @@ lzx_prepare_blocks(struct lzx_compressor * ctx) * ctx->window[] * ctx->window_size * - * Working space: - * ctx->queue - * * Output --- the block specification and the corresponding match/literal data: * * ctx->block_specs[] @@ -2151,8 +2164,7 @@ lzx_prepare_blocks(struct lzx_compressor * ctx) static void lzx_prepare_block_fast(struct lzx_compressor * ctx) { - unsigned num_matches; - struct lzx_freqs freqs; + struct lzx_record_ctx record_ctx; struct lzx_block_spec *spec; /* Parameters to hash chain LZ match finder @@ -2170,19 +2182,17 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) }; /* Initialize symbol frequencies and match offset LRU queue. */ - memset(&freqs, 0, sizeof(struct lzx_freqs)); - lzx_lru_queue_init(&ctx->queue); + memset(&record_ctx.freqs, 0, sizeof(struct lzx_freqs)); + lzx_lru_queue_init(&record_ctx.queue); + record_ctx.matches = ctx->chosen_matches; /* Determine series of matches/literals to output. */ - num_matches = lz_analyze_block(ctx->window, - ctx->window_size, - (u32*)ctx->chosen_matches, - lzx_record_match, - lzx_record_literal, - &freqs, - &ctx->queue, - &freqs, - &lzx_lz_params); + lz_analyze_block(ctx->window, + ctx->window_size, + lzx_record_match, + lzx_record_literal, + &record_ctx, + &lzx_lz_params); /* Set up block specification. */ @@ -2190,9 +2200,9 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx) spec->block_type = LZX_BLOCKTYPE_ALIGNED; spec->window_pos = 0; spec->block_size = ctx->window_size; - spec->num_chosen_matches = num_matches; + spec->num_chosen_matches = (record_ctx.matches - ctx->chosen_matches); spec->chosen_matches_start_pos = 0; - lzx_make_huffman_codes(&freqs, &spec->codes); + lzx_make_huffman_codes(&record_ctx.freqs, &spec->codes); ctx->num_blocks = 1; } @@ -2239,7 +2249,7 @@ wimlib_lzx_compress2(const void * const restrict uncompressed_data, { struct lzx_compressor *ctx = (struct lzx_compressor*)lzx_ctx; struct output_bitstream ostream; - unsigned compressed_len; + input_idx_t compressed_len; if (uncompressed_len < 100) { LZX_DEBUG("Too small to bother compressing."); @@ -2286,16 +2296,12 @@ wimlib_lzx_compress2(const void * const restrict uncompressed_data, lzx_write_all_blocks(ctx, &ostream); LZX_DEBUG("Flushing bitstream..."); - if (flush_output_bitstream(&ostream)) { - /* If the bitstream cannot be flushed, then the output space was - * exhausted. */ + compressed_len = flush_output_bitstream(&ostream); + if (compressed_len == ~(input_idx_t)0) { LZX_DEBUG("Data did not compress to less than original length!"); return 0; } - /* Compute the length of the compressed data. */ - compressed_len = ostream.bit_output - (u8*)compressed_data; - LZX_DEBUG("Done: compressed %u => %u bytes.", uncompressed_len, compressed_len); @@ -2310,11 +2316,10 @@ wimlib_lzx_compress2(const void * const restrict uncompressed_data, ) { u8 buf[uncompressed_len]; - int ret; - ret = wimlib_lzx_decompress(compressed_data, compressed_len, - buf, uncompressed_len); - if (ret) { + if (wimlib_lzx_decompress(compressed_data, compressed_len, + buf, uncompressed_len)) + { ERROR("Failed to decompress data we " "compressed using LZX algorithm"); wimlib_assert(0); diff --git a/src/lzx-decompress.c b/src/lzx-decompress.c index 935c6367..0a456297 100644 --- a/src/lzx-decompress.c +++ b/src/lzx-decompress.c @@ -38,13 +38,13 @@ * * A compressed WIM resource consists of a table of chunk offsets followed by * the compressed chunks themselves. All compressed chunks except possibly the - * last decompress to WIM_CHUNK_SIZE (= 32768) bytes. This is quite similar to - * the cabinet (.cab) file format, but they are not the same. According to the - * cabinet format documentation, the LZX block size is independent from the - * CFDATA blocks, and a LZX block may span several CFDATA blocks. However, in - * WIMs, LZX blocks do not appear to ever span multiple WIM chunks. Note that - * this means any WIM chunk may be decompressed or compressed independently from - * any other chunk, which is convenient. + * last decompress to 32768 bytes. This is quite similar to the cabinet (.cab) + * file format, but they are not the same. According to the cabinet format + * documentation, the LZX block size is independent from the CFDATA blocks, and + * a LZX block may span several CFDATA blocks. However, in WIMs, LZX blocks do + * not appear to ever span multiple WIM chunks. Note that this means any WIM + * chunk may be decompressed or compressed independently from any other chunk, + * which is convenient. * * A LZX compressed WIM chunk contains one or more LZX blocks of the aligned, * verbatim, or uncompressed block types. For aligned and verbatim blocks, the diff --git a/src/xpress-compress.c b/src/xpress-compress.c index 6b90ba9e..5a314b69 100644 --- a/src/xpress-compress.c +++ b/src/xpress-compress.c @@ -36,113 +36,102 @@ #include "wimlib/util.h" #include "wimlib/xpress.h" -#include #include +/* Intermediate XPRESS match/literal representation. */ +struct xpress_match { + u16 adjusted_len; /* Match length minus XPRESS_MIN_MATCH_LEN */ + u16 offset; /* Match offset */ + /* For literals, offset == 0 and adjusted_len is the literal. */ +}; + /* * Writes @match, which is a match given in the intermediate representation for * XPRESS matches, to the output stream @ostream. * * @codewords and @lens provide the Huffman code that is being used. */ -static int -xpress_write_match(struct output_bitstream *restrict ostream, - u32 match, +static void +xpress_write_match(struct xpress_match match, + struct output_bitstream *restrict ostream, const u16 codewords[restrict], const u8 lens[restrict]) { - u32 adjusted_match_len = match & 0xffff; - u32 match_offset = match >> 16; - u32 len_hdr = min(adjusted_match_len, 0xf); - u32 offset_bsr = bsr32(match_offset); - u32 sym = len_hdr | (offset_bsr << 4) | XPRESS_NUM_CHARS; - int ret; + u8 len_hdr = min(match.adjusted_len, 0xf); + u8 offset_bsr = bsr32(match.offset); + unsigned sym = XPRESS_NUM_CHARS + ((offset_bsr << 4) | len_hdr); - ret = bitstream_put_bits(ostream, codewords[sym], lens[sym]); - if (ret) - return ret; + bitstream_put_bits(ostream, codewords[sym], lens[sym]); - if (adjusted_match_len >= 0xf) { - u8 byte1 = min(adjusted_match_len - 0xf, 0xff); - ret = bitstream_put_byte(ostream, byte1); - if (ret) - return ret; + if (match.adjusted_len >= 0xf) { + u8 byte1 = min(match.adjusted_len - 0xf, 0xff); + bitstream_put_byte(ostream, byte1); if (byte1 == 0xff) { - ret = bitstream_put_two_bytes(ostream, adjusted_match_len); - if (ret) - return ret; + bitstream_put_byte(ostream, match.adjusted_len & 0xff); + bitstream_put_byte(ostream, match.adjusted_len >> 8); } } - return bitstream_put_bits(ostream, - match_offset ^ (1 << offset_bsr), offset_bsr); + bitstream_put_bits(ostream, match.offset ^ (1U << offset_bsr), offset_bsr); } -static int -xpress_write_compressed_literals(struct output_bitstream *ostream, - const u32 match_tab[restrict], - unsigned num_matches, - const u16 codewords[restrict], - const u8 lens[restrict]) +static void +xpress_write_matches_and_literals(struct output_bitstream *ostream, + const struct xpress_match matches[restrict], + input_idx_t num_matches, + const u16 codewords[restrict], + const u8 lens[restrict]) { - for (unsigned i = 0; i < num_matches; i++) { - int ret; - u32 match = match_tab[i]; - - if (match >= XPRESS_NUM_CHARS) /* match */ - ret = xpress_write_match(ostream, match, codewords, - lens); - else /* literal byte */ - ret = bitstream_put_bits(ostream, codewords[match], - lens[match]); - if (ret) - return ret; + for (input_idx_t i = 0; i < num_matches; i++) { + if (matches[i].offset) { + /* Real match */ + xpress_write_match(matches[i], ostream, codewords, lens); + } else { + /* Literal byte */ + u8 lit = matches[i].adjusted_len; + bitstream_put_bits(ostream, codewords[lit], lens[lit]); + } } - return bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], - lens[XPRESS_END_OF_DATA]); + bitstream_put_bits(ostream, codewords[XPRESS_END_OF_DATA], lens[XPRESS_END_OF_DATA]); } -static u32 -xpress_record_literal(u8 literal, void *_freq_tab) +struct xpress_record_ctx { + freq_t freqs[XPRESS_NUM_SYMBOLS]; + struct xpress_match *matches; +}; + +static void +xpress_record_literal(u8 lit, void *_ctx) { - freq_t *freq_tab = _freq_tab; - freq_tab[literal]++; - return literal; + struct xpress_record_ctx *ctx = _ctx; + ctx->freqs[lit]++; + *(ctx->matches++) = (struct xpress_match) { .offset = 0, .adjusted_len = lit }; } -static u32 -xpress_record_match(unsigned match_offset, unsigned match_len, - void *freq_tab, void *_ignore) +static void +xpress_record_match(unsigned len, unsigned offset, void *_ctx) { - wimlib_assert(match_len >= XPRESS_MIN_MATCH && - match_len <= XPRESS_MAX_MATCH); - wimlib_assert(match_offset >= XPRESS_MIN_OFFSET && - match_offset <= XPRESS_MAX_OFFSET); + struct xpress_record_ctx *ctx = _ctx; - /* - * The intermediate representation of XPRESS matches is as follows: - * - * bits description - * ---- ----------------------------------------------------------- - * - * 16-31 match offset (XPRESS_MIN_OFFSET < x < XPRESS_MAX_OFFSET) - * - * 0-15 adjusted match length (0 <= x <= XPRESS_MAX_MATCH - XPRESS_MIN_MATCH) - * - * Literals are simply represented as themselves and can be - * distinguished from matches by the fact that only literals will have - * the upper three bytes completely clear. */ + XPRESS_ASSERT(len >= XPRESS_MIN_MATCH_LEN); + XPRESS_ASSERT(len <= XPRESS_MAX_MATCH_LEN); + XPRESS_ASSERT(offset >= XPRESS_MIN_OFFSET); + XPRESS_ASSERT(offset <= XPRESS_MAX_OFFSET); + + unsigned adjusted_len = len - XPRESS_MIN_MATCH_LEN; + unsigned len_hdr = min(adjusted_len, 0xf); + unsigned sym = XPRESS_NUM_CHARS + ((bsr32(offset) << 4) | len_hdr); - u32 adjusted_match_len = match_len - XPRESS_MIN_MATCH; - u32 len_hdr = min(adjusted_match_len, 0xf); - u32 offset_bsr = bsr32(match_offset); - u32 sym = len_hdr | (offset_bsr << 4) | XPRESS_NUM_CHARS; - ((freq_t*)freq_tab)[sym]++; - return adjusted_match_len | (match_offset << 16); + XPRESS_ASSERT(sym >= XPRESS_NUM_CHARS); + XPRESS_ASSERT(sym < XPRESS_NUM_SYMBOLS); + + ctx->freqs[sym]++; + *(ctx->matches++) = (struct xpress_match) { .offset = offset, + .adjusted_len = adjusted_len }; } static const struct lz_params xpress_lz_params = { - .min_match = XPRESS_MIN_MATCH, - .max_match = XPRESS_MAX_MATCH, + .min_match = XPRESS_MIN_MATCH_LEN, + .max_match = XPRESS_MAX_MATCH_LEN, .good_match = 16, .nice_match = 32, .max_chain_len = 16, @@ -152,122 +141,91 @@ static const struct lz_params xpress_lz_params = { /* API function documented in wimlib.h */ WIMLIBAPI unsigned -wimlib_xpress_compress(const void * restrict _uncompressed_data, +wimlib_xpress_compress(const void * restrict uncompressed_data, unsigned uncompressed_len, - void * restrict _compressed_data) + void * restrict compressed_data) { - u8 *compressed_data = _compressed_data; + u8 *cptr = compressed_data; struct output_bitstream ostream; - u32 match_tab[uncompressed_len]; - freq_t freq_tab[XPRESS_NUM_SYMBOLS]; + + struct xpress_record_ctx record_ctx; + struct xpress_match matches[uncompressed_len]; + u8 udata[uncompressed_len + 8]; u16 codewords[XPRESS_NUM_SYMBOLS]; u8 lens[XPRESS_NUM_SYMBOLS]; - unsigned num_matches; - unsigned compressed_len; - unsigned i; - int ret; - u8 uncompressed_data[uncompressed_len + 8]; - - memcpy(uncompressed_data, _uncompressed_data, uncompressed_len); - memset(uncompressed_data + uncompressed_len, 0, 8); + input_idx_t num_matches; + input_idx_t compressed_len; + input_idx_t i; - wimlib_assert(uncompressed_len <= 32768); - - /* XPRESS requires 256 bytes of overhead for the Huffman tables, so it's - * impossible cannot compress 256 bytes or less of data to less than the + /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's + * impossible to compress 256 bytes or less of data to less than the * input size. * * +1 to take into account that the buffer for compressed data is 1 byte * smaller than the buffer for uncompressed data. * * +4 to take into account that init_output_bitstream() requires at - * least 4 bytes of data. */ + * least 4 bytes of data. */ if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4) return 0; - ZERO_ARRAY(freq_tab); - num_matches = lz_analyze_block(uncompressed_data, uncompressed_len, - match_tab, xpress_record_match, - xpress_record_literal, freq_tab, - NULL, freq_tab, - &xpress_lz_params); + /* Copy the data to a temporary buffer, but only to avoid + * inconsequential accesses of uninitialized memory in + * lz_analyze_block(). */ + memcpy(udata, uncompressed_data, uncompressed_len); + memset(udata + uncompressed_len, 0, 8); + + /* Determine match/literal sequence to divide the data into. */ + ZERO_ARRAY(record_ctx.freqs); + record_ctx.matches = matches; + lz_analyze_block(udata, + uncompressed_len, + xpress_record_match, + xpress_record_literal, + &record_ctx, + &xpress_lz_params); - freq_tab[XPRESS_END_OF_DATA]++; + num_matches = (record_ctx.matches - matches); + /* Account for end of data symbol. */ + record_ctx.freqs[XPRESS_END_OF_DATA]++; + + /* Build the Huffman code. */ make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN, - freq_tab, lens, codewords); + record_ctx.freqs, lens, codewords); - /* IMPORTANT NOTE: - * - * It's tempting to output the 512 Huffman codeword lengths using the - * bitstream_put_bits() function. However, this is NOT correct because - * bitstream_put_bits() will output 2 bytes at a time in little-endian - * order, which is the order that is needed for the compressed literals. - * However, the bytes in the lengths table are in order, so they need to - * be written one at a time without using bitstream_put_bits(). - * - * Because of this, init_output_bitstream() is not called until after - * the lengths table is output. - */ + /* Output the Huffman code as a series of 512 4-bit lengths. */ for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2) - *compressed_data++ = (lens[i] & 0xf) | (lens[i + 1] << 4); + *cptr++ = (lens[i] & 0xf) | (lens[i + 1] << 4); - init_output_bitstream(&ostream, compressed_data, + /* Output the encoded matches/literals. */ + init_output_bitstream(&ostream, cptr, uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1); + xpress_write_matches_and_literals(&ostream, matches, + num_matches, codewords, lens); - ret = xpress_write_compressed_literals(&ostream, match_tab, - num_matches, codewords, lens); - if (ret) + /* Flush any pending data and get the length of the compressed data. */ + compressed_len = flush_output_bitstream(&ostream); + if (compressed_len == ~(input_idx_t)0) return 0; + compressed_len += XPRESS_NUM_SYMBOLS / 2; - /* Flush any bits that are buffered. */ - ret = flush_output_bitstream(&ostream); - if (ret) +#if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1 + /* Verify that we really get the same thing back when decompressing. */ + if (wimlib_xpress_decompress(compressed_data, compressed_len, + udata, uncompressed_len)) + { + ERROR("Failed to decompress data we " + "compressed using XPRESS algorithm"); + wimlib_assert(0); return 0; + } - /* Assert that there are no output bytes between the ostream.output - * pointer and the ostream.next_bit_output pointer. This can only - * happen if bytes had been written at the ostream.output pointer before - * the last bit word was written to the stream. But, this does not - * occur since xpress_write_match() always finishes by writing some bits - * (a Huffman symbol), and the bitstream was just flushed. */ - wimlib_assert(ostream.output - ostream.next_bit_output == 2); - - /* The length of the compressed data is supposed to be the value of the - * ostream.output pointer before flushing, which is now the - * output.next_bit_output pointer after flushing. - * - * There will be an extra 2 bytes at the ostream.bit_output pointer, - * which is zeroed out. (These 2 bytes may be either the last bytes in - * the compressed data, in which case they are actually unnecessary, or - * they may precede a number of bytes embedded into the bitstream.) */ - if (ostream.bit_output > - (const u8*)_compressed_data + uncompressed_len - 3) + if (memcmp(uncompressed_data, udata, uncompressed_len)) { + ERROR("Data we compressed using XPRESS algorithm " + "didn't decompress to original"); + wimlib_assert(0); return 0; - *(u16*)ostream.bit_output = cpu_to_le16(0); - compressed_len = ostream.next_bit_output - (const u8*)_compressed_data; - - wimlib_assert(compressed_len <= uncompressed_len - 1); - -#ifdef ENABLE_VERIFY_COMPRESSION - /* Verify that we really get the same thing back when decompressing. */ - { - u8 buf[uncompressed_len]; - ret = wimlib_xpress_decompress(_compressed_data, compressed_len, - buf, uncompressed_len); - if (ret) { - ERROR("xpress_compress(): Failed to decompress data we " - "compressed"); - abort(); - } - for (i = 0; i < uncompressed_len; i++) { - if (buf[i] != uncompressed_data[i]) { - ERROR("xpress_compress(): Data we compressed didn't " - "decompress to the original data (difference at " - "byte %u of %u)", i + 1, uncompressed_len); - abort(); - } - } } #endif return compressed_len; diff --git a/src/xpress-decompress.c b/src/xpress-decompress.c index cf08a836..b71f6be2 100644 --- a/src/xpress-decompress.c +++ b/src/xpress-decompress.c @@ -25,11 +25,10 @@ */ - /* - * The XPRESS compression format is a LZ77 and Huffman-code based algorithm. - * That means it is quite similar to LZX compression, but XPRESS is slightly - * simpler, so it is a little faster to compress and decompress. + * The XPRESS compression format is an LZ77 and Huffman-code based algorithm. + * That means it is fairly similar to LZX compression, but XPRESS is simpler, so + * it is a little faster to compress and decompress. * * The XPRESS compression format is mostly documented in a file called "[MS-XCA] * Xpress Compression Algorithm". In the MSDN library, it can currently be @@ -41,20 +40,18 @@ * If you are already familiar with the LZ77 algorithm and Huffman coding, the * XPRESS format is fairly simple. The compressed data begins with 256 bytes * that contain 512 4-bit integers that are the lengths of the symbols in the - * Huffman tree used for decoding compressed literals. This is the only Huffman - * tree that is used for the entirety of the compressed data, and the codeword + * Huffman code used for match/literal headers. In contrast with more + * complicated formats such as DEFLATE and LZX, this is the only Huffman code + * that is used for the entirety of the XPRESS compressed data, and the codeword * lengths are not encoded with a pretree. * * The rest of the compressed data is Huffman-encoded symbols. Values 0 through - * 255 are literal bytes. Values 256 through 511 are matches and may require - * extra bits or bytes to be read to get the match offset and match length. - * - * There is no notion of a "compressed block" in the XPRESS format, so in the - * XPRESS format, each WIM chunk (32768 bytes) will always use only one Huffman - * tree. + * 255 represent the corresponding literal bytes. Values 256 through 511 + * represent matches and may require extra bits or bytes to be read to get the + * match offset and match length. * - * The trickiest part is probably the fact that literal bytes for match lengths - * are encoded "separately" from the bitstream. + * The trickiest part is probably the way in which literal bytes for match + * lengths are interleaved in the bitstream. * * Also, a caveat--- according to Microsoft's documentation for XPRESS, * @@ -63,12 +60,9 @@ * fail during decompression if the Huffman symbol 256 is not found after * the actual data." * - * This is the case for WIM files--- in we must write this extra symbol "256" at - * the end. Otherwise Microsoft's software will fail to decompress the - * XPRESS-compressed data. - * - * However, wimlib's decompressor in this file currently does not care if this - * extra symbol is there or not. + * This is the case for the implementation in WIMGAPI. However, wimlib's + * decompressor in this file currently does not care if this extra symbol is + * there or not. */ #ifdef HAVE_CONFIG_H @@ -76,51 +70,49 @@ #endif #include "wimlib.h" -#include "wimlib/assert.h" -#define XPRESS_DECOMP #include "wimlib/decompress.h" -#include "wimlib/util.h" #include "wimlib/xpress.h" /* - * Decodes a symbol @huffsym that begins an XPRESS match. + * Decodes a symbol @sym that begins an XPRESS match. * * The low 8 bits of the symbol are divided into: * * bits 0-3: length header * bits 4-7: index of high-order bit of match offset * - * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the - * number of symbols reserved for literals. - * - * Returns the match length, or -1 on error. + * Returns the match length, or -1 if the data is invalid. */ static int -xpress_decode_match(unsigned huffsym, unsigned window_pos, - unsigned window_len, u8 window[restrict], +xpress_decode_match(unsigned sym, input_idx_t window_pos, + input_idx_t window_len, u8 window[restrict], struct input_bitstream * restrict istream) { - unsigned match_len; - unsigned match_offset; - u8 match_sym = (u8)huffsym; - u8 len_hdr = match_sym & 0xf; - u8 offset_bsr = match_sym >> 4; + + u8 len_hdr; + u8 offset_bsr; int ret; u8 *match_dest; u8 *match_src; unsigned i; + unsigned match_len; + unsigned match_offset; + + sym -= XPRESS_NUM_CHARS; + len_hdr = sym & 0xf; + offset_bsr = sym >> 4; + + if (bitstream_ensure_bits(istream, 16)) + return -1; - ret = bitstream_read_bits(istream, offset_bsr, &match_offset); - if (ret) - return ret; - match_offset |= (1 << offset_bsr); + match_offset = (1U << offset_bsr) | bitstream_pop_bits(istream, offset_bsr); if (len_hdr == 0xf) { ret = bitstream_read_byte(istream); if (ret < 0) return ret; match_len = ret; - if (match_len == 0xff) { + if (unlikely(match_len == 0xff)) { ret = bitstream_read_byte(istream); if (ret < 0) return ret; @@ -137,24 +129,17 @@ xpress_decode_match(unsigned huffsym, unsigned window_pos, } else { match_len = len_hdr; } - match_len += XPRESS_MIN_MATCH; + match_len += XPRESS_MIN_MATCH_LEN; - /* Verify that the match is in the bounds of the part of the window - * currently in use, then copy the source of the match to the current - * position. */ - if (window_pos + match_len > window_len) { - DEBUG("XPRESS decompression error: match of length %u " - "bytes overflows window", match_len); + /* Verify the match is in bounds, then copy its data to the the current + * position. */ + + if (window_pos + match_len > window_len) return -1; - } - if (match_offset > window_pos) { - DEBUG("XPRESS decompression error: match of length %u bytes " - "references data before window (match_offset = %u, " - "window_pos = %u)", match_len, match_offset, window_pos); + if (match_offset > window_pos) return -1; - } match_dest = window + window_pos; match_src = match_dest - match_offset; @@ -165,39 +150,44 @@ xpress_decode_match(unsigned huffsym, unsigned window_pos, return match_len; } -/* Decodes the Huffman-encoded matches and literal bytes in a block of - * XPRESS-encoded data. */ +/* Decodes the Huffman-encoded matches and literal bytes in a region of + * XPRESS-encoded data. */ static int -xpress_decompress_block(struct input_bitstream * restrict istream, - u8 uncompressed_data[restrict], - unsigned uncompressed_len, - const u8 lens[restrict], - const u16 decode_table[restrict]) +xpress_lz_decode(struct input_bitstream * restrict istream, + u8 uncompressed_data[restrict], + unsigned uncompressed_len, + const u8 lens[restrict], + const u16 decode_table[restrict]) { - unsigned curpos; - unsigned huffsym; - int ret; - int match_len; - - curpos = 0; - while (curpos < uncompressed_len) { - ret = read_huffsym(istream, decode_table, lens, - XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, - &huffsym, XPRESS_MAX_CODEWORD_LEN); - if (ret) - return ret; + input_idx_t curpos; + unsigned match_len; + + for (curpos = 0; curpos < uncompressed_len; curpos += match_len) { + unsigned sym; + int ret; + + if (unlikely(bitstream_ensure_bits(istream, 16))) + return -1; - if (huffsym < XPRESS_NUM_CHARS) { - uncompressed_data[curpos++] = huffsym; + if (unlikely(read_huffsym(istream, decode_table, lens, + XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, + &sym, XPRESS_MAX_CODEWORD_LEN))) + return -1; + + if (sym < XPRESS_NUM_CHARS) { + /* Literal */ + uncompressed_data[curpos] = sym; + match_len = 1; } else { - match_len = xpress_decode_match(huffsym, - curpos, - uncompressed_len, - uncompressed_data, - istream); - if (match_len < 0) - return match_len; - curpos += match_len; + /* Match */ + ret = xpress_decode_match(sym, + curpos, + uncompressed_len, + uncompressed_data, + istream); + if (unlikely(ret < 0)) + return -1; + match_len = ret; } } return 0; @@ -206,48 +196,38 @@ xpress_decompress_block(struct input_bitstream * restrict istream, /* API function documented in wimlib.h */ WIMLIBAPI int -wimlib_xpress_decompress(const void * restrict _compressed_data, unsigned compressed_len, - void * restrict uncompressed_data, unsigned uncompressed_len) +wimlib_xpress_decompress(const void * const restrict _compressed_data, + const unsigned compressed_len, + void * const restrict uncompressed_data, + const unsigned uncompressed_len) { + const u8 *compressed_data = _compressed_data; u8 lens[XPRESS_NUM_SYMBOLS]; + u8 *lens_p; u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS] _aligned_attribute(DECODE_TABLE_ALIGNMENT); struct input_bitstream istream; - u8 *lens_p; - const u8 *compressed_data; - unsigned i; - int ret; - - compressed_data = _compressed_data; - lens_p = lens; - DEBUG2("compressed_len = %d, uncompressed_len = %d", - compressed_len, uncompressed_len); - - /* XPRESS uses only one Huffman tree. It contains 512 symbols, and the + /* XPRESS uses only one Huffman code. It contains 512 symbols, and the * code lengths of these symbols are given literally as 4-bit integers - * in the first 256 bytes of the compressed data. - */ - if (compressed_len < XPRESS_NUM_SYMBOLS / 2) { - DEBUG("xpress_decompress(): Compressed length too short!"); + * in the first 256 bytes of the compressed data. */ + if (compressed_len < XPRESS_NUM_SYMBOLS / 2) return -1; - } - for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { + lens_p = lens; + for (unsigned i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { *lens_p++ = compressed_data[i] & 0xf; *lens_p++ = compressed_data[i] >> 4; } - ret = make_huffman_decode_table(decode_table, XPRESS_NUM_SYMBOLS, - XPRESS_TABLEBITS, lens, - XPRESS_MAX_CODEWORD_LEN); - if (ret) - return ret; + if (make_huffman_decode_table(decode_table, XPRESS_NUM_SYMBOLS, + XPRESS_TABLEBITS, lens, + XPRESS_MAX_CODEWORD_LEN)) + return -1; init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, compressed_len - XPRESS_NUM_SYMBOLS / 2); - return xpress_decompress_block(&istream, uncompressed_data, - uncompressed_len, lens, - decode_table); + return xpress_lz_decode(&istream, uncompressed_data, + uncompressed_len, lens, decode_table); } -- 2.43.0