/* * huffman.h * * Copyright (C) 2012 Eric Biggers * * wimlib - Library for working with WIM files * * This library is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) any * later version. * * This library is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along * with this library; if not, write to the Free Software Foundation, Inc., 59 * Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _WIMLIB_HUFFMAN_H #define _WIMLIB_HUFFMAN_H #include "util.h" #include "decomp.h" extern void make_canonical_huffman_code(uint num_syms, uint max_codeword_len, const u32 freq_tab[], u8 lens[], u16 codewords[]); extern int make_huffman_decode_table(u16 decode_table[], uint num_syms, uint num_bits, const u8 lengths[], uint max_codeword_len); extern int read_huffsym_near_end_of_input(struct input_bitstream *istream, const u16 decode_table[], const u8 lengths[], uint num_symbols, uint table_bits, uint *n); /* * Reads a Huffman-encoded symbol from a bitstream. * * This function may be called hundreds of millions of times when extracting a * large WIM file, and it is declared to be always inlined for improved * performance. I'm not sure it could be made much faster that it is, * especially since there isn't enough time to make a big table that allows * decoding multiple symbols per lookup. But if extracting files to a hard * disk, the IO will be the bottleneck anyway. * * @buf: The input buffer from which the symbol will be read. * @decode_table: The fast Huffman decoding table for the Huffman tree. * @lengths: The table that gives the length of the code for each * symbol. * @num_symbols: The number of symbols in the Huffman code. * @table_bits: Huffman codes this length or less can be looked up * directory in the decode_table, as the * decode_table contains 2**table_bits entries. */ static int ALWAYS_INLINE read_huffsym(struct input_bitstream *stream, const u16 decode_table[], const u8 lengths[], unsigned num_symbols, unsigned table_bits, uint *n, unsigned max_codeword_len) { /* In the most common case, there are at least max_codeword_len bits * remaining in the stream. */ if (bitstream_ensure_bits(stream, max_codeword_len) == 0) { /* Use the next table_bits of the input as an index into the * decode_table. */ u16 key_bits = bitstream_peek_bits(stream, table_bits); u16 sym = decode_table[key_bits]; /* If the entry in the decode table is not a valid symbol, it is * the offset of the root of its Huffman subtree. */ if (sym >= num_symbols) { bitstream_remove_bits(stream, table_bits); do { key_bits = sym + bitstream_peek_bits(stream, 1); bitstream_remove_bits(stream, 1); wimlib_assert(key_bits < num_symbols * 2 + (1 << table_bits)); } while ((sym = decode_table[key_bits]) >= num_symbols); } else { wimlib_assert(lengths[sym] <= table_bits); bitstream_remove_bits(stream, lengths[sym]); } *n = sym; return 0; } else { /* Otherwise, we must be careful to use only the bits that are * actually remaining. Don't inline this part since it is very * rarely used. */ return read_huffsym_near_end_of_input(stream, decode_table, lengths, num_symbols, table_bits, n); } } #endif /* _WIMLIB_HUFFMAN_H */