/*
- * The XPRESS compression format is a LZ77-based algorithm. That means it is
- * quite similar to LZX compression, but XPRESS is slightly simpler, so it is a
- * little faster to compress and decompress.
+ * The XPRESS compression format is a LZ77 and Huffman-code based algorithm.
+ * That means it is quite similar to LZX compression, but XPRESS is slightly
+ * simpler, so it is a little faster to compress and decompress.
*
* The XPRESS compression format is mostly documented in a file called "[MS-XCA]
* Xpress Compression Algorithm". In the MSDN library, it can currently be
* found under Open Specifications => Protocols => Windows Protocols => Windows
- * Server Protocols => [MS-XCA] Xpress Compression Algorithm". Note that
- * Microsoft apparently also has either a slightly different format or an
- * entirely different format that is also called XPRESS. The other one is
- * supposedly used in Windows' hibernation file or something, but the one used
- * in WIM files is the one described in the above document.
+ * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in
+ * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
+ * (there apparently are some other versions of XPRESS as well).
*
* If you are already familiar with the LZ77 algorithm and Huffman coding, the
- * XPRESS format is pretty simple. The compressed data begins with 256 bytes
+ * XPRESS format is fairly simple. The compressed data begins with 256 bytes
* that contain 512 4-bit integers that are the lengths of the symbols in the
* Huffman tree used for decoding compressed literals. This is the only Huffman
* tree that is used for the entirety of the compressed data, and the codeword
* The trickiest part is probably the fact that literal bytes for match lengths
* are encoded "separately" from the bitstream.
*
- * Also, a caveat--- according to M$'s documentation for XPRESS,
+ * Also, a caveat--- according to Microsoft's documentation for XPRESS,
*
* "Some implementation of the decompression algorithm expect an extra
* symbol to mark the end of the data. Specifically, some implementations
#include "decomp.h"
-/* Decodes @huffsym, a value >= XPRESS_NUM_CHARS, that is the header of a match.
- * */
-static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
- u8 window[], struct input_bitstream *istream)
+/*
+ * Decodes a symbol @huffsym that begins an XPRESS match.
+ *
+ * The low 8 bits of the symbol are divided into:
+ *
+ * bits 0-3: length header
+ * bits 4-7: index of high-order bit of match offset
+ *
+ * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the
+ * number of symbols reserved for literals.
+ */
+static int xpress_decode_match(int huffsym, unsigned window_pos,
+ unsigned window_len, u8 window[],
+ struct input_bitstream *istream)
{
- uint match_len;
- uint match_offset;
+ unsigned match_len;
+ unsigned match_offset;
u8 match_sym = (u8)huffsym;
u8 len_hdr = match_sym & 0xf;
u8 offset_bsr = match_sym >> 4;
int ret;
u8 *match_dest;
u8 *match_src;
- uint i;
+ unsigned i;
ret = bitstream_read_bits(istream, offset_bsr, &match_offset);
if (ret != 0)
return -1;
match_len = ret;
if (match_len == 0xff) {
-
ret = bitstream_read_byte(istream);
if (ret == -1)
return -1;
}
match_len += XPRESS_MIN_MATCH;
-
/* Verify that the match is in the bounds of the part of the window
* currently in use, then copy the source of the match to the current
* position. */
match_src = match_dest - match_offset;
if (window_pos + match_len > window_len) {
- ERROR("XPRESS dedecompression error: match of length %d "
+ ERROR("XPRESS decompression error: match of length %d "
"bytes overflows window", match_len);
return -1;
}
/* Decodes the Huffman-encoded matches and literal bytes in a block of
* XPRESS-encoded data. */
-static int xpress_decompress_literals(struct input_bitstream *istream,
- u8 uncompressed_data[],
- uint uncompressed_len,
- const u8 lens[],
+static int xpress_decompress_literals(struct input_bitstream *istream,
+ u8 uncompressed_data[],
+ unsigned uncompressed_len,
+ const u8 lens[],
const u16 decode_table[])
{
- uint curpos = 0;
- uint huffsym;
+ unsigned curpos = 0;
+ unsigned huffsym;
int match_len;
- int ret;
+ int ret = 0;
while (curpos < uncompressed_len) {
- ret = read_huffsym(istream, decode_table, lens,
- XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, &huffsym,
- XPRESS_MAX_CODEWORD_LEN);
+ ret = read_huffsym(istream, decode_table, lens,
+ XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS,
+ &huffsym, XPRESS_MAX_CODEWORD_LEN);
if (ret != 0)
- return ret;
+ break;
if (huffsym < XPRESS_NUM_CHARS) {
uncompressed_data[curpos++] = huffsym;
} else {
- match_len = xpress_decode_match(huffsym, curpos,
- uncompressed_len,
- uncompressed_data, istream);
- if (match_len == -1)
- return 1;
+ match_len = xpress_decode_match(huffsym,
+ curpos,
+ uncompressed_len,
+ uncompressed_data,
+ istream);
+ if (match_len == -1) {
+ ret = 1;
+ break;
+ }
curpos += match_len;
}
}
- return 0;
+ return ret;
}
-int xpress_decompress(const void *__compressed_data, uint compressed_len,
- void *uncompressed_data, uint uncompressed_len)
+int xpress_decompress(const void *__compressed_data, unsigned compressed_len,
+ void *uncompressed_data, unsigned uncompressed_len)
{
u8 lens[XPRESS_NUM_SYMBOLS];
u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
struct input_bitstream istream;
u8 *lens_p;
const u8 *compressed_data;
- uint i;
+ unsigned i;
int ret;
compressed_data = __compressed_data;
if (ret != 0)
return ret;
- init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2,
+ init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2,
compressed_len - XPRESS_NUM_SYMBOLS / 2);
- return xpress_decompress_literals(&istream, uncompressed_data,
- uncompressed_len, lens, decode_table);
+ return xpress_decompress_literals(&istream, uncompressed_data,
+ uncompressed_len, lens,
+ decode_table);
}