X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fxpress-comp.c;h=aae2a4a002f7a5155ffd792cbdedfcd32f25fb23;hp=9252eee1af21e9e0e9e8826645859d0a278d5643;hb=277957c10e96a23f822b2e6ae22c8d126a93141a;hpb=81ea19151423fa87b8698dd3fa8a5274066a76c2

diff --git a/src/xpress-comp.c b/src/xpress-comp.c
index 9252eee1..aae2a4a0 100644
--- a/src/xpress-comp.c
+++ b/src/xpress-comp.c
@@ -3,27 +3,28 @@
  *
  * XPRESS compression routines.
  *
+ * See the comments in xpress-decomp.c about the XPRESS format.
+ */
+
+/*
  * Copyright (C) 2012 Eric Biggers
  *
- * wimlib - Library for working with WIM files 
+ * This file is part of wimlib, a library for working with WIM files.
  *
- * This library is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License as published by the Free
- * Software Foundation; either version 2.1 of the License, or (at your option) any
- * later version.
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
  *
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
- * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
  *
- * You should have received a copy of the GNU Lesser General Public License along
- * with this library; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
  */
 
-/* See the comments in xpress-decomp.c about the XPRESS format. */
-
-
 #include "xpress.h"
 #include "comp.h"
 #include <stdlib.h>
@@ -45,13 +46,13 @@ static inline u32 bsr32(u32 n)
 }
 
 
-/* 
+/*
  * Writes @match, which is a match given in the intermediate representation for
  * XPRESS matches, to the output stream @ostream.
  *
  * @codewords and @lens provide the Huffman code that is being used.
  */
-static int xpress_write_match(struct output_bitstream *ostream, u32 match, 
+static int xpress_write_match(struct output_bitstream *ostream, u32 match,
 			      const u16 codewords[], const u8 lens[])
 {
 	uint main_sym;
@@ -86,14 +87,14 @@ static int xpress_write_match(struct output_bitstream *ostream, u32 match,
 				return ret;
 		}
 	}
-	return bitstream_put_bits(ostream, match_offset ^ (1 << offset_bsr), 
+	return bitstream_put_bits(ostream, match_offset ^ (1 << offset_bsr),
 							offset_bsr);
 }
 
-static int xpress_write_compressed_literals(struct output_bitstream *ostream, 
-					    const u32 match_tab[], 
+static int xpress_write_compressed_literals(struct output_bitstream *ostream,
+					    const u32 match_tab[],
 					    uint num_matches,
-					    const u16 codewords[], 
+					    const u16 codewords[],
 					    const u8 lens[])
 {
 	uint i;
@@ -102,19 +103,14 @@ static int xpress_write_compressed_literals(struct output_bitstream *ostream,
 
 	for (i = 0; i < num_matches; i++) {
 		match = match_tab[i];
-		if (match >= XPRESS_NUM_CHARS) {
-			/* match */
-			ret = xpress_write_match(ostream, match, codewords, 
+		if (match >= XPRESS_NUM_CHARS) /* match */
+			ret = xpress_write_match(ostream, match, codewords,
 						 lens);
-			if (ret != 0)
-				return ret;
-		} else {
-			/* literal byte */
-			ret = bitstream_put_bits(ostream, codewords[match], 
+		else /* literal byte */
+			ret = bitstream_put_bits(ostream, codewords[match],
 						 lens[match]);
-			if (ret != 0)
-				return ret;
-		}
+		if (ret != 0)
+			return ret;
 	}
 	return bitstream_put_bits(ostream, codewords[256], lens[256]);
 }
@@ -126,16 +122,16 @@ static u32 xpress_record_literal(u8 literal, void *__freq_tab)
 	return literal;
 }
 
-static u32 xpress_record_match(uint match_offset, uint match_len, 
-						void *__freq_tab, void *ignore)
+static u32 xpress_record_match(uint match_offset, uint match_len,
+			       void *__freq_tab, void *ignore)
 {
 	u32 *freq_tab = __freq_tab;
 	u32 len_hdr;
 	u32 offset_bsr;
 	u32 match;
 
-	wimlib_assert(match_len >= XPRESS_MIN_MATCH && 
-					match_len <= XPRESS_MAX_MATCH);
+	wimlib_assert(match_len >= XPRESS_MIN_MATCH &&
+		      match_len <= XPRESS_MAX_MATCH);
 	wimlib_assert(match_offset > 0);
 
 	len_hdr = min(match_len - XPRESS_MIN_MATCH, 15);
@@ -157,7 +153,7 @@ static const struct lz_params xpress_lz_params = {
 	.too_far        = 4096,
 };
 
-/* 
+/*
  * Performs XPRESS compression on a block of data.
  *
  * @__uncompressed_data:  Pointer to the data to be compressed.
@@ -172,7 +168,7 @@ static const struct lz_params xpress_lz_params = {
  * @compressed_data and @compressed_len_ret will contain the compressed data and
  * its length.  A return value of nonzero means that compressing the data did
  * not reduce its size, and @compressed_data will not contain the full
- * compressed data. 
+ * compressed data.
  */
 int xpress_compress(const void *__uncompressed_data, uint uncompressed_len,
 		    void *__compressed_data, uint *compressed_len_ret)
@@ -189,25 +185,25 @@ int xpress_compress(const void *__uncompressed_data, uint uncompressed_len,
 	uint i;
 	int ret;
 
-	XPRESS_DEBUG("uncompressed_len = %u\n", uncompressed_len);
+	XPRESS_DEBUG("uncompressed_len = %u", uncompressed_len);
 
 	if (uncompressed_len < 300)
 		return 1;
 
 	ZERO_ARRAY(freq_tab);
 
-	num_matches = lz_analyze_block(uncompressed_data, uncompressed_len, 
-					match_tab, xpress_record_match, 
-					xpress_record_literal, freq_tab, 
-					NULL, freq_tab,
-					&xpress_lz_params);
+	num_matches = lz_analyze_block(uncompressed_data, uncompressed_len,
+				       match_tab, xpress_record_match,
+				       xpress_record_literal, freq_tab,
+				       NULL, freq_tab,
+				       &xpress_lz_params);
 
-	XPRESS_DEBUG("using %u matches\n", num_matches);
+	XPRESS_DEBUG("using %u matches", num_matches);
 
 	freq_tab[256]++;
 
 	make_canonical_huffman_code(XPRESS_NUM_SYMBOLS, XPRESS_MAX_CODEWORD_LEN,
-							freq_tab, lens, codewords);
+				    freq_tab, lens, codewords);
 
 	/* IMPORTANT NOTE:
 	 *
@@ -216,7 +212,7 @@ int xpress_compress(const void *__uncompressed_data, uint uncompressed_len,
 	 * bitstream_put_bits() will output 2 bytes at a time in little-endian
 	 * order, which is the order that is needed for the compressed literals.
 	 * However, the bytes in the lengths table are in order, so they need to
-	 * be written one at a time without using bitstream_put_bits(). 
+	 * be written one at a time without using bitstream_put_bits().
 	 *
 	 * Because of this, init_output_bitstream() is not called until after
 	 * the lengths table is output.
@@ -224,47 +220,70 @@ int xpress_compress(const void *__uncompressed_data, uint uncompressed_len,
 	for (i = 0; i < XPRESS_NUM_SYMBOLS; i += 2)
 		*compressed_data++ = (lens[i] & 0xf) | (lens[i + 1] << 4);
 
-	init_output_bitstream(&ostream, compressed_data, uncompressed_len - 
-						XPRESS_NUM_SYMBOLS / 2 - 1);
+	init_output_bitstream(&ostream, compressed_data,
+			      uncompressed_len - XPRESS_NUM_SYMBOLS / 2 - 1);
 
-	ret = xpress_write_compressed_literals(&ostream, match_tab, num_matches,
-					       codewords, lens);
+	ret = xpress_write_compressed_literals(&ostream, match_tab,
+					       num_matches, codewords, lens);
 	if (ret != 0)
 		return ret;
 
+	/* Flush any bits that are buffered. */
 	ret = flush_output_bitstream(&ostream);
 	if (ret != 0)
 		return ret;
 
+	/* Assert that there are no output bytes between the ostream.output
+	 * pointer and the ostream.next_bit_output pointer.  This can only
+	 * happen if bytes had been written at the ostream.output pointer before
+	 * the last bit word was written to the stream.  But, this does not
+	 * occur since xpress_write_match() always finishes by writing some bits
+	 * (a Huffman symbol), and the bitstream was just flushed. */
+	wimlib_assert(ostream.output - ostream.next_bit_output == 2);
+
+	/*
+	 * The length of the compressed data is supposed to be the value of the
+	 * ostream.output pointer before flushing, which is now the
+	 * output.next_bit_output pointer after flushing.
+	 *
+	 * There will be an extra 2 bytes at the ostream.bit_output pointer,
+	 * which is zeroed out.  (These 2 bytes may be either the last bytes in
+	 * the compressed data, in which case they are actually unnecessary, or
+	 * they may precede a number of bytes embedded into the bitstream.)
+	 */
+	if (ostream.bit_output >
+	    (const u8*)__compressed_data + uncompressed_len - 3)
+		return 1;
+	*(u16*)ostream.bit_output = cpu_to_le16(0);
+	compressed_len = ostream.next_bit_output - (const u8*)__compressed_data;
 
-	compressed_len = ostream.output - (u8*)__compressed_data;
+	wimlib_assert(compressed_len <= uncompressed_len - 1);
 
-	XPRESS_DEBUG("Compressed %u => %u bytes\n",
-			uncompressed_len, compressed_len);
+	XPRESS_DEBUG("Compressed %u => %u bytes",
+		     uncompressed_len, compressed_len);
 
 	*compressed_len_ret = compressed_len;
 
 #ifdef ENABLE_VERIFY_COMPRESSION
 	/* Verify that we really get the same thing back when decompressing. */
-	XPRESS_DEBUG("Verifying the compressed data.\n");
+	XPRESS_DEBUG("Verifying the compressed data.");
 	u8 buf[uncompressed_len];
-	ret = xpress_decompress(__compressed_data, compressed_len, buf, 
+	ret = xpress_decompress(__compressed_data, compressed_len, buf,
 				uncompressed_len);
 	if (ret != 0) {
-		fprintf(stderr, "xpress_compress(): Failed to decompress data "
-				"we compressed!\n");
+		ERROR("xpress_compress(): Failed to decompress data we "
+		      "compressed");
 		abort();
 	}
 	for (i = 0; i < uncompressed_len; i++) {
 		if (buf[i] != uncompressed_data[i]) {
-			fprintf(stderr, "xpress_compress(): Data we compressed "
-					"didn't decompress to the original data "
-					"(difference at byte %u of %u)\n", 
-					i + 1, uncompressed_len);
+			ERROR("xpress_compress(): Data we compressed didn't "
+			      "decompress to the original data (difference at "
+			      "byte %u of %u)", i + 1, uncompressed_len);
 			abort();
 		}
 	}
-	XPRESS_DEBUG("Compression verified to be correct.\n");
+	XPRESS_DEBUG("Compression verified to be correct.");
 #endif
 
 	return 0;