]> wimlib.net Git - wimlib/blobdiff - src/decompress.c
Cleanup
[wimlib] / src / decompress.c
index 9a08621aea7e5b53aa93870537906c6109cf03b8..bed7b5a486876c3d056f4888f20cd9e0cb031823 100644 (file)
 #include "decompress.h"
 #include <string.h>
 
-/* Reads @n bytes from the bitstream @stream into the location pointed to by @dest.
- * The bitstream must be 16-bit aligned. */
-int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
-{
-       /* Precondition:  The bitstream is 16-byte aligned. */
-       wimlib_assert2(stream->bitsleft % 16 == 0);
-
-       u8 *p = dest;
-
-       /* Get the bytes currently in the buffer variable. */
-       while (stream->bitsleft != 0) {
-               if (n-- == 0)
-                       return 0;
-               *p++ = bitstream_peek_bits(stream, 8);
-               bitstream_remove_bits(stream, 8);
-       }
-
-       /* Get the rest directly from the pointer to the data.  Of course, it's
-        * necessary to check there are really n bytes available. */
-       if (n > stream->data_bytes_left) {
-               ERROR("Unexpected end of input when reading %zu bytes from "
-                     "bitstream (only have %u bytes left)",
-                     n, stream->data_bytes_left);
-               return 1;
-       }
-       memcpy(p, stream->data, n);
-       stream->data += n;
-       stream->data_bytes_left -= n;
-
-       /* It's possible to copy an odd number of bytes and leave the stream in
-        * an inconsistent state. Fix it by reading the next byte, if it is
-        * there. */
-       if ((n & 1) && stream->data_bytes_left != 0) {
-               stream->bitsleft = 8;
-               stream->data_bytes_left--;
-               stream->bitbuf |= (input_bitbuf_t)(*stream->data) <<
-                                       (sizeof(input_bitbuf_t) * 8 - 8);
-               stream->data++;
-       }
-       return 0;
-}
-
 /*
- * Builds a fast huffman decoding table from an array that gives the length of
- * the codeword for each symbol in the alphabet.  Originally based on code
- * written by David Tritscher (taken the original LZX decompression code); also
- * heavily modified to add some optimizations used in the zlib code, as well as
- * more comments.
+ * make_huffman_decode_table: - Builds a fast huffman decoding table from an
+ * array that gives the length of the codeword for each symbol in the alphabet.
+ * Originally based on code written by David Tritscher (taken the original LZX
+ * decompression code); also heavily modified to add some optimizations used in
+ * the zlib code, as well as more comments.
  *
  * @decode_table:      The array in which to create the fast huffman decoding
  *                     table.  It must have a length of at least
  *                     (2**table_bits) + 2 * num_syms to guarantee
  *                     that there is enough space.
  *
- * @num_syms:          Total number of symbols in the Huffman tree.
+ * @num_syms:          Number of symbols in the alphabet, including symbols
+ *                     that do not appear in this particular input chunk.
  *
  * @table_bits:                Any symbols with a code length of table_bits or less can
  *                     be decoded in one lookup of the table.  2**table_bits
@@ -88,7 +47,7 @@ int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest)
  *                     any Huffman codes longer than @table_bits.
  *
  * @lens:              An array of length @num_syms, indexable by symbol, that
- *                     gives the length of the Huffman codeward for that
+ *                     gives the length of the Huffman codeword for that
  *                     symbol.  Because the Huffman tree is in canonical form,
  *                     it can be reconstructed by only knowing the length of
  *                     the codeword for each symbol.  It is assumed, but not
@@ -205,28 +164,37 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
                        break;
 
                unsigned num_entries = 1 << (table_bits - codeword_len);
-               if (num_entries >=
-                       (sizeof(unsigned long) / sizeof(decode_table[0])))
-               {
-                       wimlib_assert2(decode_table_pos % 4 == 0);
+               const unsigned entries_per_long = sizeof(unsigned long) /
+                                                 sizeof(decode_table[0]);
+               if (num_entries >= entries_per_long) {
+                       /* Fill in the Huffman decode table entries one unsigned
+                        * long at a time.  On 32-bit systems this is 2 entries
+                        * per store, while on 64-bit systems this is 4 entries
+                        * per store. */
+                       wimlib_assert2(decode_table_pos % entries_per_long == 0);
                        BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
                                     sizeof(unsigned long) != 8);
 
                        unsigned long *p = (unsigned long *)&decode_table[decode_table_pos];
-                       unsigned long n = num_entries /
-                                               (sizeof(unsigned long) /
-                                                       sizeof(decode_table[0]));
+                       unsigned n = num_entries / entries_per_long;
                        unsigned long v = sym;
                        if (sizeof(unsigned long) >= 4)
                                v |= v << 16;
-                       if (sizeof(unsigned long) >= 8)
+                       if (sizeof(unsigned long) >= 8) {
+                               /* This may produce a compiler warning if an
+                                * unsigned long is 32 bits, but this won't be
+                                * executed unless an unsigned long is at least
+                                * 64 bits anyway. */
                                v |= v << 32;
+                       }
                        do {
                                *p++ = v;
                        } while (--n);
 
                        decode_table_pos += num_entries;
                } else {
+                       /* Fill in the Huffman decode table entries one 16-bit
+                        * integer at a time. */
                        do {
                                decode_table[decode_table_pos++] = sym;
                        } while (--num_entries);
@@ -277,7 +245,6 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
                unsigned sym = sorted_syms[i];
                unsigned codeword_len = lens[sym];
                unsigned extra_bits = codeword_len - table_bits;
-               unsigned extra_mask;
 
                cur_codeword <<= (codeword_len - prev_codeword_len);
                prev_codeword_len = codeword_len;
@@ -325,8 +292,8 @@ int make_huffman_decode_table(u16 decode_table[],  unsigned num_syms,
        return 0;
 }
 
-/* Reads a Huffman-encoded symbol when it is known there are less than
- * MAX_CODE_LEN bits remaining in the bitstream. */
+/* Reads a Huffman-encoded symbol from the bistream when the number of remaining
+ * bits is less than the maximum codeword length. */
 int read_huffsym_near_end_of_input(struct input_bitstream *istream,
                                   const u16 decode_table[],
                                   const u8 lens[],
@@ -356,7 +323,7 @@ int read_huffsym_near_end_of_input(struct input_bitstream *istream,
                do {
                        if (bitsleft == 0) {
                                ERROR("Input stream exhausted");
-                               return 1;
+                               return -1;
                        }
                        key_bits = sym + bitstream_peek_bits(istream, 1);
                        bitstream_remove_bits(istream, 1);