- /* Number of entries in the decode table. */
- u32 table_num_entries = 1 << num_bits;
-
- /* Current position in the decode table. */
- u32 decode_table_pos = 0;
-
- /* Fill entries for codes short enough for a direct mapping. Here we
- * are taking advantage of the ordering of the codes, since they are for
- * a canonical Huffman tree. It must be the case that all the codes of
- * some length @code_length, zero-extended or one-extended, numerically
- * precede all the codes of length @code_length + 1. Furthermore, if we
- * have 2 symbols A and B, such that A is listed before B in the lens
- * array, and both symbols have the same code length, then we know that
- * the code for A numerically precedes the code for B.
- * */
- for (uint code_len = 1; code_len <= num_bits; code_len++) {
-
- /* Number of entries that a code of length @code_length would
- * need. */
- u32 code_num_entries = 1 << (num_bits - code_len);
-
-
- /* For each symbol of length @code_len, fill in its entries in
- * the decode table. */
- for (uint sym = 0; sym < num_syms; sym++) {
-
- if (lens[sym] != code_len)
- continue;
-
-
- /* Check for table overrun. This can only happen if the
- * given lengths do not correspond to a valid Huffman
- * tree. */
- if (decode_table_pos >= table_num_entries) {
- ERROR("Huffman decoding table overrun: "
- "pos = %u, num_entries = %u",
- decode_table_pos, table_num_entries);
- return 1;
- }
-
- /* Fill all possible lookups of this symbol with
- * the symbol itself. */
- for (uint i = 0; i < code_num_entries; i++)
- decode_table[decode_table_pos + i] = sym;
-
- /* Increment the position in the decode table by
- * the number of entries that were just filled
- * in. */
- decode_table_pos += code_num_entries;
- }
- }
-
- /* If all entries of the decode table have been filled in, there are no
- * codes longer than num_bits, so we are done filling in the decode
- * table. */
- if (decode_table_pos == table_num_entries)
- return 0;
-
- /* Otherwise, fill in the remaining entries, which correspond to codes longer
- * than @num_bits. */
-
-
- /* First, zero out the rest of the entries; this is necessary so
- * that the entries appear as "unallocated" in the next part. */
- for (uint i = decode_table_pos; i < table_num_entries; i++)
- decode_table[i] = 0;
-
- /* Assert that 2**num_bits is at least num_syms. If this wasn't the
- * case, we wouldn't be able to distinguish pointer entries from symbol
- * entries. */
- wimlib_assert((1 << num_bits) >= num_syms);
-
-
- /* The current Huffman code. */
- uint current_code = decode_table_pos;
-
- /* The tree nodes are allocated starting at
- * decode_table[table_num_entries]. Remember that the full size of the
- * table, including the extra space for the tree nodes, is actually
- * 2**num_bits + 2 * num_syms slots, while table_num_entries is only
- * 2**num_bits. */
- uint next_free_tree_slot = table_num_entries;
-
- /* Go through every codeword of length greater than @num_bits. Note:
- * the LZX format guarantees that the codeword length can be at most 16
- * bits. */
- for (uint code_len = num_bits + 1; code_len <= max_code_len;
- code_len++)
- {
- current_code <<= 1;
- for (uint sym = 0; sym < num_syms; sym++) {
- if (lens[sym] != code_len)
- continue;
-
-
- /* i is the index of the current node; find it from the
- * prefix of the current Huffman code. */
- uint i = current_code >> (code_len - num_bits);
-
- if (i >= (1 << num_bits)) {
- ERROR("Invalid canonical Huffman code");
- return 1;
- }
-
- /* Go through each bit of the current Huffman code
- * beyond the prefix of length num_bits and walk the
- * tree, "allocating" slots that have not yet been
- * allocated. */
- for (int bit_num = num_bits + 1; bit_num <= code_len; bit_num++) {
-
- /* If the current tree node points to nowhere
- * but we need to follow it, allocate a new node
- * for it to point to. */
- if (decode_table[i] == 0) {
- decode_table[i] = next_free_tree_slot;
- decode_table[next_free_tree_slot++] = 0;
- decode_table[next_free_tree_slot++] = 0;
- }
-
- i = decode_table[i];
-
- /* Is the next bit 0 or 1? If 0, go left;
- * otherwise, go right (by incrementing i by 1) */
- int bit_pos = code_len - bit_num;
-
- int bit = (current_code & (1 << bit_pos)) >>
- bit_pos;
- i += bit;
- }
-
- /* i is now the index of the leaf entry into which the
- * actual symbol will go. */
- decode_table[i] = sym;
-
- /* Increment decode_table_pos only if the prefix of the
- * Huffman code changes. */
- if (current_code >> (code_len - num_bits) !=
- (current_code + 1) >> (code_len - num_bits))
- decode_table_pos++;
-
- /* current_code is always incremented because this is
- * how canonical Huffman codes are generated (add 1 for
- * each code, then left shift whenever the code length
- * increases) */
- current_code++;
- }
- }
-
-
- /* If the lengths really represented a valid Huffman tree, all
- * @table_num_entries in the table will have been filled. However, it
- * is also possible that the tree is completely empty (as noted
- * earlier) with all 0 lengths, and this is expected to succeed. */
-
- if (decode_table_pos != table_num_entries) {
-
- for (uint i = 0; i < num_syms; i++) {
- if (lens[i] != 0) {
- ERROR("Lengths do not form a valid canonical "
- "Huffman tree (only filled %u of %u "
- "decode table slots)",
- decode_table_pos, table_num_entries);
- return 1;
- }