X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-decomp.c;h=ce240a09806a43cab6ea23aff13a3934ddf0e13a;hp=267720e2a8327e2fd5bfcb45a9ed3651410f6f09;hb=00614e1689f0314ad221f5b4f864ae0ab4c667a4;hpb=885632f08c75c1d7bb5d25436231c78f6ad7e0c0

diff --git a/src/xpress-decomp.c b/src/xpress-decomp.c
index 267720e2..ce240a09 100644
--- a/src/xpress-decomp.c
+++ b/src/xpress-decomp.c
@@ -2,43 +2,44 @@
  * xpress-decomp.c
  *
  * XPRESS decompression routines.
+ */
+
+/*
  *
  * Copyright (C) 2012 Eric Biggers
  *
- * wimlib - Library for working with WIM files 
+ * This file is part of wimlib, a library for working with WIM files.
  *
- * This library is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by the Free
- * Software Foundation; either version 2.1 of the License, or (at your option) any
- * later version.
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
  *
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
- * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
  *
- * You should have received a copy of the GNU Lesser General Public License along
- * with this library; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
  */
 
 
 
 /*
- * The XPRESS compression format is a LZ77-based algorithm.  That means it is
- * quite similar to LZX compression, but XPRESS is slightly simpler, so it is a
- * little faster to compress and decompress.
+ * The XPRESS compression format is a LZ77 and Huffman-code based algorithm.
+ * That means it is quite similar to LZX compression, but XPRESS is slightly
+ * simpler, so it is a little faster to compress and decompress.
  *
  * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
  * Xpress Compression Algorithm".  In the MSDN library, it can currently be
  * found under Open Specifications => Protocols => Windows Protocols => Windows
- * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  Note that
- * Microsoft apparently also has either a slightly different format or an
- * entirely different format that is also called XPRESS.  The other one is
- * supposedly used in Windows' hibernation file or something, but the one used
- * in WIM files is the one described in the above document.
+ * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  The format in
+ * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
+ * (there apparently are some other versions of XPRESS as well).
  *
  * If you are already familiar with the LZ77 algorithm and Huffman coding, the
- * XPRESS format is pretty simple.  The compressed data begins with 256 bytes
+ * XPRESS format is fairly simple.  The compressed data begins with 256 bytes
  * that contain 512 4-bit integers that are the lengths of the symbols in the
  * Huffman tree used for decoding compressed literals.  This is the only Huffman
  * tree that is used for the entirety of the compressed data, and the codeword
@@ -55,7 +56,7 @@
  * The trickiest part is probably the fact that literal bytes for match lengths
  * are encoded "separately" from the bitstream.
  *
- * Also, a caveat--- according to M$'s documentation for XPRESS,
+ * Also, a caveat--- according to Microsoft's documentation for XPRESS,
  *
  * 	"Some implementation of the decompression algorithm expect an extra
  * 	symbol to mark the end of the data.  Specifically, some implementations
@@ -66,7 +67,7 @@
  * the end.  Otherwise Microsoft's software will fail to decompress the
  * XPRESS-compressed data.
  *
- * Howeve, WIMLIB's decompressor in xpress-decomp.c currently does not care if
+ * Howeve, wimlib's decompressor in xpress-decomp.c currently does not care if
  * this extra symbol is there or not.
  */
 
@@ -77,24 +78,31 @@
 #define XPRESS_DECOMP
 #include "decomp.h"
 
-#include "huffman.h"
-
 
-
-/* Decodes @huffsym, a value >= XPRESS_NUM_CHARS, that is the header of a match.
- * */
-static int xpress_decode_match(int huffsym, uint window_pos, uint window_len, 
-				u8 window[], struct input_bitstream *istream)
+/*
+ * Decodes a symbol @huffsym that begins an XPRESS match.
+ *
+ * The low 8 bits of the symbol are divided into:
+ *
+ * bits 0-3:  length header
+ * bits 4-7:  index of high-order bit of match offset
+ *
+ * Note: taking the low 8 bits of the symbol is the same as subtracting 256, the
+ * number of symbols reserved for literals.
+ */
+static int xpress_decode_match(int huffsym, unsigned window_pos,
+			       unsigned window_len, u8 window[],
+			       struct input_bitstream *istream)
 {
-	uint match_len;
-	uint match_offset;
+	unsigned match_len;
+	unsigned match_offset;
 	u8 match_sym = (u8)huffsym;
 	u8 len_hdr = match_sym & 0xf;
 	u8 offset_bsr = match_sym >> 4;
 	int ret;
 	u8 *match_dest;
 	u8 *match_src;
-	uint i;
+	unsigned i;
 
 	ret = bitstream_read_bits(istream, offset_bsr, &match_offset);
 	if (ret != 0)
@@ -107,7 +115,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
 			return -1;
 		match_len = ret;
 		if (match_len == 0xff) {
-
 			ret = bitstream_read_byte(istream);
 			if (ret == -1)
 				return -1;
@@ -128,7 +135,6 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
 	}
 	match_len += XPRESS_MIN_MATCH;
 
-
 	/* Verify that the match is in the bounds of the part of the window
 	 * currently in use, then copy the source of the match to the current
 	 * position. */
@@ -137,16 +143,15 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
 	match_src = match_dest - match_offset;
 
 	if (window_pos + match_len > window_len) {
-		ERROR("XPRESS dedecompression error: match of length %d "
-				"bytes overflows window\n", match_len);
+		ERROR("XPRESS decompression error: match of length %d "
+		      "bytes overflows window", match_len);
 		return -1;
 	}
 
 	if (match_src < window) {
 		ERROR("XPRESS decompression error: match of length %d bytes "
-				"references data before window (match_offset = "
-				"%d, window_pos = %d)\n", match_len,
-				match_offset, window_pos);
+		      "references data before window (match_offset = %d, "
+		      "window_pos = %d)", match_len, match_offset, window_pos);
 		return -1;
 	}
 
@@ -158,55 +163,59 @@ static int xpress_decode_match(int huffsym, uint window_pos, uint window_len,
 
 /* Decodes the Huffman-encoded matches and literal bytes in a block of
  * XPRESS-encoded data. */
-static int xpress_decompress_literals(struct input_bitstream *istream, 
-				      u8 uncompressed_data[], 
-				      uint uncompressed_len, 
-				      const u8 lens[], 
+static int xpress_decompress_literals(struct input_bitstream *istream,
+				      u8 uncompressed_data[],
+				      unsigned uncompressed_len,
+				      const u8 lens[],
 				      const u16 decode_table[])
 {
-	uint curpos = 0;
-	uint huffsym;
+	unsigned curpos = 0;
+	unsigned huffsym;
 	int match_len;
-	int ret;
+	int ret = 0;
 
 	while (curpos < uncompressed_len) {
-		ret = read_huffsym(istream, decode_table, lens, 
-				XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS, &huffsym,
-				XPRESS_MAX_CODEWORD_LEN);
+		ret = read_huffsym(istream, decode_table, lens,
+				   XPRESS_NUM_SYMBOLS, XPRESS_TABLEBITS,
+				   &huffsym, XPRESS_MAX_CODEWORD_LEN);
 		if (ret != 0)
-			return ret;
+			break;
 
 		if (huffsym < XPRESS_NUM_CHARS) {
 			uncompressed_data[curpos++] = huffsym;
 		} else {
-			match_len = xpress_decode_match(huffsym, curpos, 
-						uncompressed_len, 
-						uncompressed_data, istream);
-			if (match_len == -1)
-				return 1;
+			match_len = xpress_decode_match(huffsym,
+							curpos,
+							uncompressed_len,
+							uncompressed_data,
+							istream);
+			if (match_len == -1) {
+				ret = 1;
+				break;
+			}
 			curpos += match_len;
 		}
 	}
-	return 0;
+	return ret;
 }
 
 
-int xpress_decompress(const void *__compressed_data, uint compressed_len, 
-			void *uncompressed_data, uint uncompressed_len)
+int xpress_decompress(const void *__compressed_data, unsigned compressed_len,
+		      void *uncompressed_data, unsigned uncompressed_len)
 {
 	u8 lens[XPRESS_NUM_SYMBOLS];
 	u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
 	struct input_bitstream istream;
 	u8 *lens_p;
 	const u8 *compressed_data;
-	uint i;
+	unsigned i;
 	int ret;
 
 	compressed_data = __compressed_data;
 	lens_p = lens;
 
-	DEBUG2("compressed_len = %d, uncompressed_len = %d\n",
-			compressed_len, uncompressed_len);
+	DEBUG2("compressed_len = %d, uncompressed_len = %d",
+	       compressed_len, uncompressed_len);
 
 	/* XPRESS uses only one Huffman tree.  It contains 512 symbols, and the
 	 * code lengths of these symbols are given literally as 4-bit integers
@@ -226,9 +235,10 @@ int xpress_decompress(const void *__compressed_data, uint compressed_len,
 	if (ret != 0)
 		return ret;
 
-	init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2, 
-					compressed_len - XPRESS_NUM_SYMBOLS / 2);
+	init_input_bitstream(&istream, compressed_data + XPRESS_NUM_SYMBOLS / 2,
+			     compressed_len - XPRESS_NUM_SYMBOLS / 2);
 
-	return xpress_decompress_literals(&istream, uncompressed_data, 
-					uncompressed_len, lens, decode_table);
+	return xpress_decompress_literals(&istream, uncompressed_data,
+					  uncompressed_len, lens,
+					  decode_table);
 }