X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fdecompress.c;h=6ca0f1616c767bb4b4b8fcb7abf343478b4b4c7a;hb=c6a1140e085f633273fcf47a6462bd9382ce118a;hp=9a08621aea7e5b53aa93870537906c6109cf03b8;hpb=e78f0d2a97781f3c5f242417c747fdc4eef43cdf;p=wimlib diff --git a/src/decompress.c b/src/decompress.c index 9a08621a..6ca0f161 100644 --- a/src/decompress.c +++ b/src/decompress.c @@ -5,7 +5,7 @@ */ /* - * Copyright (C) 2012 Eric Biggers + * Copyright (C) 2012, 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -26,61 +26,20 @@ #include "decompress.h" #include -/* Reads @n bytes from the bitstream @stream into the location pointed to by @dest. - * The bitstream must be 16-bit aligned. */ -int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest) -{ - /* Precondition: The bitstream is 16-byte aligned. */ - wimlib_assert2(stream->bitsleft % 16 == 0); - - u8 *p = dest; - - /* Get the bytes currently in the buffer variable. */ - while (stream->bitsleft != 0) { - if (n-- == 0) - return 0; - *p++ = bitstream_peek_bits(stream, 8); - bitstream_remove_bits(stream, 8); - } - - /* Get the rest directly from the pointer to the data. Of course, it's - * necessary to check there are really n bytes available. */ - if (n > stream->data_bytes_left) { - ERROR("Unexpected end of input when reading %zu bytes from " - "bitstream (only have %u bytes left)", - n, stream->data_bytes_left); - return 1; - } - memcpy(p, stream->data, n); - stream->data += n; - stream->data_bytes_left -= n; - - /* It's possible to copy an odd number of bytes and leave the stream in - * an inconsistent state. Fix it by reading the next byte, if it is - * there. */ - if ((n & 1) && stream->data_bytes_left != 0) { - stream->bitsleft = 8; - stream->data_bytes_left--; - stream->bitbuf |= (input_bitbuf_t)(*stream->data) << - (sizeof(input_bitbuf_t) * 8 - 8); - stream->data++; - } - return 0; -} - /* - * Builds a fast huffman decoding table from an array that gives the length of - * the codeword for each symbol in the alphabet. Originally based on code - * written by David Tritscher (taken the original LZX decompression code); also - * heavily modified to add some optimizations used in the zlib code, as well as - * more comments. + * make_huffman_decode_table: - Builds a fast huffman decoding table from an + * array that gives the length of the codeword for each symbol in the alphabet. + * Originally based on code written by David Tritscher (taken the original LZX + * decompression code); also heavily modified to add some optimizations used in + * the zlib code, as well as more comments. * * @decode_table: The array in which to create the fast huffman decoding * table. It must have a length of at least * (2**table_bits) + 2 * num_syms to guarantee * that there is enough space. * - * @num_syms: Total number of symbols in the Huffman tree. + * @num_syms: Number of symbols in the alphabet, including symbols + * that do not appear in this particular input chunk. * * @table_bits: Any symbols with a code length of table_bits or less can * be decoded in one lookup of the table. 2**table_bits @@ -88,7 +47,7 @@ int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest) * any Huffman codes longer than @table_bits. * * @lens: An array of length @num_syms, indexable by symbol, that - * gives the length of the Huffman codeward for that + * gives the length of the Huffman codeword for that * symbol. Because the Huffman tree is in canonical form, * it can be reconstructed by only knowing the length of * the codeword for each symbol. It is assumed, but not @@ -130,9 +89,10 @@ int bitstream_read_bytes(struct input_bitstream *stream, size_t n, void *dest) * indices into the decoding table, and symbol entries are distinguished from * pointers by the fact that values less than @num_syms must be symbol values. */ -int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, - unsigned table_bits, const u8 lens[], - unsigned max_codeword_len) +int +make_huffman_decode_table(u16 decode_table[], unsigned num_syms, + unsigned table_bits, const u8 lens[], + unsigned max_codeword_len) { unsigned len_counts[max_codeword_len + 1]; u16 sorted_syms[num_syms]; @@ -205,28 +165,37 @@ int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, break; unsigned num_entries = 1 << (table_bits - codeword_len); - if (num_entries >= - (sizeof(unsigned long) / sizeof(decode_table[0]))) - { - wimlib_assert2(decode_table_pos % 4 == 0); + const unsigned entries_per_long = sizeof(unsigned long) / + sizeof(decode_table[0]); + if (num_entries >= entries_per_long) { + /* Fill in the Huffman decode table entries one unsigned + * long at a time. On 32-bit systems this is 2 entries + * per store, while on 64-bit systems this is 4 entries + * per store. */ + wimlib_assert2(decode_table_pos % entries_per_long == 0); BUILD_BUG_ON(sizeof(unsigned long) != 4 && sizeof(unsigned long) != 8); unsigned long *p = (unsigned long *)&decode_table[decode_table_pos]; - unsigned long n = num_entries / - (sizeof(unsigned long) / - sizeof(decode_table[0])); + unsigned n = num_entries / entries_per_long; unsigned long v = sym; if (sizeof(unsigned long) >= 4) v |= v << 16; - if (sizeof(unsigned long) >= 8) + if (sizeof(unsigned long) >= 8) { + /* This may produce a compiler warning if an + * unsigned long is 32 bits, but this won't be + * executed unless an unsigned long is at least + * 64 bits anyway. */ v |= v << 32; + } do { *p++ = v; } while (--n); decode_table_pos += num_entries; } else { + /* Fill in the Huffman decode table entries one 16-bit + * integer at a time. */ do { decode_table[decode_table_pos++] = sym; } while (--num_entries); @@ -277,7 +246,6 @@ int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, unsigned sym = sorted_syms[i]; unsigned codeword_len = lens[sym]; unsigned extra_bits = codeword_len - table_bits; - unsigned extra_mask; cur_codeword <<= (codeword_len - prev_codeword_len); prev_codeword_len = codeword_len; @@ -325,14 +293,15 @@ int make_huffman_decode_table(u16 decode_table[], unsigned num_syms, return 0; } -/* Reads a Huffman-encoded symbol when it is known there are less than - * MAX_CODE_LEN bits remaining in the bitstream. */ -int read_huffsym_near_end_of_input(struct input_bitstream *istream, - const u16 decode_table[], - const u8 lens[], - unsigned num_syms, - unsigned table_bits, - unsigned *n) +/* Reads a Huffman-encoded symbol from the bistream when the number of remaining + * bits is less than the maximum codeword length. */ +int +read_huffsym_near_end_of_input(struct input_bitstream *istream, + const u16 decode_table[], + const u8 lens[], + unsigned num_syms, + unsigned table_bits, + unsigned *n) { unsigned bitsleft = istream->bitsleft; unsigned key_size; @@ -356,7 +325,7 @@ int read_huffsym_near_end_of_input(struct input_bitstream *istream, do { if (bitsleft == 0) { ERROR("Input stream exhausted"); - return 1; + return -1; } key_bits = sym + bitstream_peek_bits(istream, 1); bitstream_remove_bits(istream, 1);