X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fdecompress.c;h=6ca0f1616c767bb4b4b8fcb7abf343478b4b4c7a;hb=c6a1140e085f633273fcf47a6462bd9382ce118a;hp=9a08621aea7e5b53aa93870537906c6109cf03b8;hpb=e78f0d2a97781f3c5f242417c747fdc4eef43cdf;p=wimlib

diff --git a/src/decompress.c b/src/decompress.c
index 9a08621a..6ca0f161 100644
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -5,7 +5,7 @@
  */
 
 /*
- * Copyright (C) 2012 Eric Biggers
+ * Copyright (C) 2012, 2013 Eric Biggers
  *
  * This file is part of wimlib, a library for working with WIM files.
  *
@@ -26,61 +26,20 @@
 #include "decompress.h"
 #include <string.h>
 
-/* Reads @n bytes from the bitstream @stream into the location pointed to by @dest.
- * The bitstream must be 16-bit aligned. */
-int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
-{
-	/* Precondition:  The bitstream is 16-byte aligned. */
-	wimlib_assert2(stream->bitsleft % 16 == 0);
-
-	u8 *p = dest;
-
-	/* Get the bytes currently in the buffer variable. */
-	while (stream->bitsleft != 0) {
-		if (n-- == 0)
-			return 0;
-		*p++ = bitstream_peek_bits(stream, 8);
-		bitstream_remove_bits(stream, 8);
-	}
-
-	/* Get the rest directly from the pointer to the data.  Of course, it's
-	 * necessary to check there are really n bytes available. */
-	if (n > stream->data_bytes_left) {
-		ERROR("Unexpected end of input when reading %zu bytes from "
-		      "bitstream (only have %u bytes left)",
-		      n, stream->data_bytes_left);
-		return 1;
-	}
-	memcpy(p, stream->data, n);
-	stream->data += n;
-	stream->data_bytes_left -= n;
-
-	/* It's possible to copy an odd number of bytes and leave the stream in
-	 * an inconsistent state. Fix it by reading the next byte, if it is
-	 * there. */
-	if ((n & 1) && stream->data_bytes_left != 0) {
-		stream->bitsleft = 8;
-		stream->data_bytes_left--;
-		stream->bitbuf |= (input_bitbuf_t)(*stream->data) <<
-					(sizeof(input_bitbuf_t) * 8 - 8);
-		stream->data++;
-	}
-	return 0;
-}
-
 /*
- * Builds a fast huffman decoding table from an array that gives the length of
- * the codeword for each symbol in the alphabet.  Originally based on code
- * written by David Tritscher (taken the original LZX decompression code); also
- * heavily modified to add some optimizations used in the zlib code, as well as
- * more comments.
+ * make_huffman_decode_table: - Builds a fast huffman decoding table from an
+ * array that gives the length of the codeword for each symbol in the alphabet.
+ * Originally based on code written by David Tritscher (taken the original LZX
+ * decompression code); also heavily modified to add some optimizations used in
+ * the zlib code, as well as more comments.
  *
  * @decode_table:	The array in which to create the fast huffman decoding
  * 			table.  It must have a length of at least
  * 			(2**table_bits) + 2 * num_syms to guarantee
  * 			that there is enough space.
  *
- * @num_syms: 		Total number of symbols in the Huffman tree.
+ * @num_syms: 		Number of symbols in the alphabet, including symbols
+ *			that do not appear in this particular input chunk.
  *
  * @table_bits:		Any symbols with a code length of table_bits or less can
  * 			be decoded in one lookup of the table.  2**table_bits
@@ -88,7 +47,7 @@ int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
  * 			any Huffman codes longer than @table_bits.
  *
  * @lens:		An array of length @num_syms, indexable by symbol, that
- * 			gives the length of the Huffman codeward for that
+ * 			gives the length of the Huffman codeword for that
  * 			symbol.  Because the Huffman tree is in canonical form,
  * 			it can be reconstructed by only knowing the length of
  * 			the codeword for each symbol.  It is assumed, but not
@@ -130,9 +89,10 @@ int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
  * indices into the decoding table, and symbol entries are distinguished from
  * pointers by the fact that values less than @num_syms must be symbol values.
  */
-int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
-			      unsigned table_bits, const u8 lens[],
-			      unsigned max_codeword_len)
+int
+make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
+			  unsigned table_bits, const u8 lens[],
+			  unsigned max_codeword_len)
 {
 	unsigned len_counts[max_codeword_len + 1];
 	u16 sorted_syms[num_syms];
@@ -205,28 +165,37 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
 			break;
 
 		unsigned num_entries = 1 << (table_bits - codeword_len);
-		if (num_entries >=
-			(sizeof(unsigned long) / sizeof(decode_table[0])))
-		{
-			wimlib_assert2(decode_table_pos % 4 == 0);
+		const unsigned entries_per_long = sizeof(unsigned long) /
+						  sizeof(decode_table[0]);
+		if (num_entries >= entries_per_long) {
+			/* Fill in the Huffman decode table entries one unsigned
+			 * long at a time.  On 32-bit systems this is 2 entries
+			 * per store, while on 64-bit systems this is 4 entries
+			 * per store. */
+			wimlib_assert2(decode_table_pos % entries_per_long == 0);
 			BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
 				     sizeof(unsigned long) != 8);
 
 			unsigned long *p = (unsigned long *)&decode_table[decode_table_pos];
-			unsigned long n = num_entries /
-						(sizeof(unsigned long) /
-							sizeof(decode_table[0]));
+			unsigned n = num_entries / entries_per_long;
 			unsigned long v = sym;
 			if (sizeof(unsigned long) >= 4)
 				v |= v << 16;
-			if (sizeof(unsigned long) >= 8)
+			if (sizeof(unsigned long) >= 8) {
+				/* This may produce a compiler warning if an
+				 * unsigned long is 32 bits, but this won't be
+				 * executed unless an unsigned long is at least
+				 * 64 bits anyway. */
 				v |= v << 32;
+			}
 			do {
 				*p++ = v;
 			} while (--n);
 
 			decode_table_pos += num_entries;
 		} else {
+			/* Fill in the Huffman decode table entries one 16-bit
+			 * integer at a time. */
 			do {
 				decode_table[decode_table_pos++] = sym;
 			} while (--num_entries);
@@ -277,7 +246,6 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
 		unsigned sym = sorted_syms[i];
 		unsigned codeword_len = lens[sym];
 		unsigned extra_bits = codeword_len - table_bits;
-		unsigned extra_mask;
 
 		cur_codeword <<= (codeword_len - prev_codeword_len);
 		prev_codeword_len = codeword_len;
@@ -325,14 +293,15 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
 	return 0;
 }
 
-/* Reads a Huffman-encoded symbol when it is known there are less than
- * MAX_CODE_LEN bits remaining in the bitstream. */
-int read_huffsym_near_end_of_input(struct input_bitstream *istream,
-				   const u16 decode_table[],
-				   const u8 lens[],
-				   unsigned num_syms,
-				   unsigned table_bits,
-				   unsigned *n)
+/* Reads a Huffman-encoded symbol from the bistream when the number of remaining
+ * bits is less than the maximum codeword length. */
+int
+read_huffsym_near_end_of_input(struct input_bitstream *istream,
+			       const u16 decode_table[],
+			       const u8 lens[],
+			       unsigned num_syms,
+			       unsigned table_bits,
+			       unsigned *n)
 {
 	unsigned bitsleft = istream->bitsleft;
 	unsigned key_size;
@@ -356,7 +325,7 @@ int read_huffsym_near_end_of_input(struct input_bitstream *istream,
 		do {
 			if (bitsleft == 0) {
 				ERROR("Input stream exhausted");
-				return 1;
+				return -1;
 			}
 			key_bits = sym + bitstream_peek_bits(istream, 1);
 			bitstream_remove_bits(istream, 1);