- /* Set node_idx to left child */
- node_idx = decode_table[node_idx];
-
- /* Is the next bit 0 or 1? If 0, go left (already done).
- * If 1, go right by incrementing node_idx. */
- --extra_bits;
- node_idx += (cur_codeword >> extra_bits) & 1;
- } while (extra_bits != 0);
-
- /* node_idx is now the index of the leaf entry into which the
- * actual symbol will go. */
- decode_table[node_idx] = sym;
-
- /* cur_codeword is always incremented because this is
- * how canonical Huffman codes are generated (add 1 for
- * each code, then left shift whenever the code length
- * increases) */
- cur_codeword++;
- } while (++i != num_used_syms);
+ /* The tree nodes are allocated starting at decode_table[1 <<
+ * table_bits]. Remember that the full size of the table,
+ * including the extra space for the tree nodes, is actually
+ * 2**table_bits + 2 * num_syms slots, while table_num_entries
+ * is only 2**table_bits. */
+ next_free_tree_slot = table_num_entries;
+
+ /* The current Huffman codeword */
+ cur_codeword = decode_table_pos << 1;
+
+ /* Go through every codeword of length greater than @table_bits,
+ * primarily in order of codeword length and secondarily in
+ * order of symbol. */
+ wimlib_assert2(codeword_len == table_bits + 1);
+ for (; codeword_len <= max_codeword_len; codeword_len++, cur_codeword <<= 1)
+ {
+ unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
+ for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
+ unsigned sym = sorted_syms[sym_idx];
+ unsigned extra_bits = codeword_len - table_bits;
+
+ /* index of the current node; find it from the
+ * prefix of the current Huffman codeword. */
+ unsigned node_idx = cur_codeword >> extra_bits;
+ wimlib_assert2(node_idx < table_num_entries);
+
+ /* Go through each bit of the current Huffman
+ * codeword beyond the prefix of length
+ * @table_bits and walk the tree, allocating any
+ * slots that have not yet been allocated. */
+ do {
+
+ /* If the current tree node points to
+ * nowhere but we need to follow it,
+ * allocate a new node for it to point
+ * to. */
+ if (decode_table[node_idx] == 0) {
+ decode_table[node_idx] = next_free_tree_slot;
+ decode_table[next_free_tree_slot++] = 0;
+ decode_table[next_free_tree_slot++] = 0;
+ wimlib_assert2(next_free_tree_slot <=
+ table_num_entries + 2 * num_syms);
+ }
+
+ /* Set node_idx to left child */
+ node_idx = decode_table[node_idx];
+
+ /* Is the next bit 0 or 1? If 0, go left
+ * (already done). If 1, go right by
+ * incrementing node_idx. */
+ --extra_bits;
+ node_idx += (cur_codeword >> extra_bits) & 1;
+ } while (extra_bits != 0);
+
+ /* node_idx is now the index of the leaf entry
+ * into which the actual symbol will go. */
+ decode_table[node_idx] = sym;
+
+ /* Note: cur_codeword is always incremented at
+ * the end of this loop because this is how
+ * canonical Huffman codes are generated (add 1
+ * for each code, then left shift whenever the
+ * code length increases) */
+ }
+ }
+ }