4 * Copyright (C) 2012 Eric Biggers
6 * wimlib - Library for working with WIM files
8 * This library is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 2.1 of the License, or (at your option) any
13 * This library is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
15 * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License along
18 * with this library; if not, write to the Free Software Foundation, Inc., 59
19 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #ifndef _WIMLIB_HUFFMAN_H
22 #define _WIMLIB_HUFFMAN_H
27 extern void make_canonical_huffman_code(uint num_syms, uint max_codeword_len,
28 const u32 freq_tab[], u8 lens[],
31 extern int make_huffman_decode_table(u16 decode_table[], uint num_syms,
32 uint num_bits, const u8 lengths[],
33 uint max_codeword_len);
35 extern int read_huffsym_near_end_of_input(struct input_bitstream *istream,
36 const u16 decode_table[],
43 * Reads a Huffman-encoded symbol from a bitstream.
45 * This function may be called hundreds of millions of times when extracting a
46 * large WIM file, and it is declared to be always inlined for improved
47 * performance. I'm not sure it could be made much faster that it is,
48 * especially since there isn't enough time to make a big table that allows
49 * decoding multiple symbols per lookup. But if extracting files to a hard
50 * disk, the IO will be the bottleneck anyway.
52 * @buf: The input buffer from which the symbol will be read.
53 * @decode_table: The fast Huffman decoding table for the Huffman tree.
54 * @lengths: The table that gives the length of the code for each
56 * @num_symbols: The number of symbols in the Huffman code.
57 * @table_bits: Huffman codes this length or less can be looked up
58 * directory in the decode_table, as the
59 * decode_table contains 2**table_bits entries.
61 static int ALWAYS_INLINE
62 read_huffsym(struct input_bitstream *stream,
63 const u16 decode_table[],
68 unsigned max_codeword_len)
70 /* In the most common case, there are at least max_codeword_len bits
71 * remaining in the stream. */
72 if (bitstream_ensure_bits(stream, max_codeword_len) == 0) {
74 /* Use the next table_bits of the input as an index into the
76 u16 key_bits = bitstream_peek_bits(stream, table_bits);
78 u16 sym = decode_table[key_bits];
80 /* If the entry in the decode table is not a valid symbol, it is
81 * the offset of the root of its Huffman subtree. */
82 if (sym >= num_symbols) {
83 bitstream_remove_bits(stream, table_bits);
85 key_bits = sym + bitstream_peek_bits(stream, 1);
86 bitstream_remove_bits(stream, 1);
88 wimlib_assert(key_bits < num_symbols * 2 +
90 } while ((sym = decode_table[key_bits]) >= num_symbols);
92 wimlib_assert(lengths[sym] <= table_bits);
93 bitstream_remove_bits(stream, lengths[sym]);
98 /* Otherwise, we must be careful to use only the bits that are
99 * actually remaining. Don't inline this part since it is very
101 return read_huffsym_near_end_of_input(stream, decode_table, lengths,
102 num_symbols, table_bits, n);
108 #endif /* _WIMLIB_HUFFMAN_H */