X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-decomp.c;h=ce240a09806a43cab6ea23aff13a3934ddf0e13a;hp=267720e2a8327e2fd5bfcb45a9ed3651410f6f09;hb=00614e1689f0314ad221f5b4f864ae0ab4c667a4;hpb=885632f08c75c1d7bb5d25436231c78f6ad7e0c0 diff --git a/src/xpress-decomp.c b/src/xpress-decomp.c index 267720e2..ce240a09 100644 --- a/src/xpress-decomp.c +++ b/src/xpress-decomp.c @@ -2,43 +2,44 @@ * xpress-decomp.c * * XPRESS decompression routines. + */ + +/* * * Copyright (C) 2012 Eric Biggers * - * wimlib - Library for working with WIM files + * This file is part of wimlib, a library for working with WIM files. * - * This library is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) any - * later version. + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. * - * This library is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A - * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. * - * You should have received a copy of the GNU Lesser General Public License along - * with this library; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. */ /* - * The XPRESS compression format is a LZ77-based algorithm. That means it is - * quite similar to LZX compression, but XPRESS is slightly simpler, so it is a - * little faster to compress and decompress. + * The XPRESS compression format is a LZ77 and Huffman-code based algorithm. + * That means it is quite similar to LZX compression, but XPRESS is slightly + * simpler, so it is a little faster to compress and decompress. * * The XPRESS compression format is mostly documented in a file called "[MS-XCA] * Xpress Compression Algorithm". In the MSDN library, it can currently be * found under Open Specifications => Protocols => Windows Protocols => Windows - * Server Protocols => [MS-XCA] Xpress Compression Algorithm". Note that - * Microsoft apparently also has either a slightly different format or an - * entirely different format that is also called XPRESS. The other one is - * supposedly used in Windows' hibernation file or something, but the one used - * in WIM files is the one described in the above document. + * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in + * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm" + * (there apparently are some other versions of XPRESS as well). * * If you are already familiar with the LZ77 algorithm and Huffman coding, the - * XPRESS format is pretty simple. The compressed data begins with 256 bytes + * XPRESS format is fairly simple. The compressed data begins with 256 bytes * that contain 512 4-bit integers that are the lengths of the symbols in the * Huffman tree used for decoding compressed literals. This is the only Huffman * tree that is used for the entirety of the compressed data, and the codeword @@ -55,7 +56,7 @@ * The trickiest part is probably the fact that literal bytes for match lengths * are encoded "separately" from the bitstream. * - * Also, a caveat--- according to M$'s documentation for XPRESS, + * Also, a caveat--- according to Microsoft's documentation for XPRESS, * * "Some implementation of the decompression algorithm expect an extra * symbol to mark the end of the data. Specifically, some implementations @@ -66,7 +67,7 @@ * the end. Otherwise Microsoft's software will fail to decompress the * XPRESS-compressed data. * - * Howeve, WIMLIB's decompressor in xpress-decomp.c currently does not care if + * Howeve, wimlib's decompressor in xpress-decomp.c currently does not care if * this extra symbol is there or not. */ @@ -77,24 +78,31 @@ #define XPRESS_DECOMP #include "decomp.h" -#include "huffman.h" - - -/* Decodes @huffsym, a value >= XPRESS_NUM_CHARS, that is the header of a match. - * */ -static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, - u8 window[], struct input_bitstream *istream) +/* + * Decodes a symbol @huffsym that begins an XPRESS match. + * + * The low 8 bits of the symbol are divided into: + * + * bits 0-3: length header + * bits 4-7: index of high-order bit of match offset + * + * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the + * number of symbols reserved for literals. + */ +static int xpress_decode_match(int huffsym, unsigned window_pos, + unsigned window_len, u8 window[], + struct input_bitstream *istream) { - uint match_len; - uint match_offset; + unsigned match_len; + unsigned match_offset; u8 match_sym = (u8)huffsym; u8 len_hdr = match_sym & 0xf; u8 offset_bsr = match_sym >> 4; int ret; u8 *match_dest; u8 *match_src; - uint i; + unsigned i; ret = bitstream_read_bits(istream, offset_bsr, &match_offset); if (ret != 0) @@ -107,7 +115,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, return -1; match_len = ret; if (match_len == 0xff) { - ret = bitstream_read_byte(istream); if (ret == -1) return -1; @@ -128,7 +135,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, } match_len += XPRESS_MIN_MATCH; - /* Verify that the match is in the bounds of the part of the window * currently in use, then copy the source of the match to the current * position. */ @@ -137,16 +143,15 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, match_src = match_dest - match_offset; if (window_pos + match_len > window_len) { - ERROR("XPRESS dedecompression error: match of length %d " - "bytes overflows window\n", match_len); + ERROR("XPRESS decompression error: match of length %d " + "bytes overflows window", match_len); return -1; } if (match_src < window) { ERROR("XPRESS decompression error: match of length %d bytes " - "references data before window (match_offset = " - "%d, window_pos = %d)\n", match_len, - match_offset, window_pos); + "references data before window (match_offset = %d, " + "window_pos = %d)", match_len, match_offset, window_pos); return -1; } @@ -158,55 +163,59 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, /* Decodes the Huffman-encoded matches and literal bytes in a block of * XPRESS-encoded data. */ -static int xpress_decompress_literals(struct input_bitstream *istream, - u8 uncompressed_data[], - uint uncompressed_len, - const u8 lens[], +static int xpress_decompress_literals(struct input_bitstream *istream, + u8 uncompressed_data[], + unsigned uncompressed_len, + const u8 lens[], const u16 decode_table[]) { - uint curpos = 0; - uint huffsym; + unsigned curpos = 0; + unsigned huffsym; int match_len; - int ret; + int ret = 0; while (curpos < uncompressed_len) { - ret = read_huffsym(istream, decode_table, lens, - XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, &huffsym, - XPRESS_MAX_CODEWORD_LEN); + ret = read_huffsym(istream, decode_table, lens, + XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, + &huffsym, XPRESS_MAX_CODEWORD_LEN); if (ret != 0) - return ret; + break; if (huffsym < XPRESS_NUM_CHARS) { uncompressed_data[curpos++] = huffsym; } else { - match_len = xpress_decode_match(huffsym, curpos, - uncompressed_len, - uncompressed_data, istream); - if (match_len == -1) - return 1; + match_len = xpress_decode_match(huffsym, + curpos, + uncompressed_len, + uncompressed_data, + istream); + if (match_len == -1) { + ret = 1; + break; + } curpos += match_len; } } - return 0; + return ret; } -int xpress_decompress(const void *__compressed_data, uint compressed_len, - void *uncompressed_data, uint uncompressed_len) +int xpress_decompress(const void *__compressed_data, unsigned compressed_len, + void *uncompressed_data, unsigned uncompressed_len) { u8 lens[XPRESS_NUM_SYMBOLS]; u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS]; struct input_bitstream istream; u8 *lens_p; const u8 *compressed_data; - uint i; + unsigned i; int ret; compressed_data = __compressed_data; lens_p = lens; - DEBUG2("compressed_len = %d, uncompressed_len = %d\n", - compressed_len, uncompressed_len); + DEBUG2("compressed_len = %d, uncompressed_len = %d", + compressed_len, uncompressed_len); /* XPRESS uses only one Huffman tree. It contains 512 symbols, and the * code lengths of these symbols are given literally as 4-bit integers @@ -226,9 +235,10 @@ int xpress_decompress(const void *__compressed_data, uint compressed_len, if (ret != 0) return ret; - init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, - compressed_len - XPRESS_NUM_SYMBOLS / 2); + init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, + compressed_len - XPRESS_NUM_SYMBOLS / 2); - return xpress_decompress_literals(&istream, uncompressed_data, - uncompressed_len, lens, decode_table); + return xpress_decompress_literals(&istream, uncompressed_data, + uncompressed_len, lens, + decode_table); }