- /* Otherwise, there are at least 2 symbols in the input, so we need to
- * find a real Huffman code. */
-
-
- /* Declare the array of intermediate nodes. An intermediate node is not
- * associated with a symbol. Instead, it represents some binary code
- * prefix that is shared between at least 2 codewords. There can be at
- * most num_used_symbols - 1 intermediate nodes when creating a Huffman
- * code. This is because if there were at least num_used_symbols nodes,
- * the code would be suboptimal because there would be at least one
- * unnecessary intermediate node.
- *
- * The worst case (greatest number of intermediate nodes) would be if
- * all the intermediate nodes were chained together. This results in
- * num_used_symbols - 1 intermediate nodes. If num_used_symbols is at
- * least 17, this configuration would not be allowed because the LZX
- * format constrains codes to 16 bits or less each. However, it is
- * still possible for there to be more than 16 intermediate nodes, as
- * long as no leaf has a depth of more than 16. */
- HuffmanIntermediateNode inodes[num_used_symbols - 1];
-
-
- /* Pointer to the leaf node of lowest frequency that hasn't already been
- * added as the child of some intermediate note. */
- HuffmanNode *cur_leaf;
-
- /* Pointer past the end of the array of leaves. */
- HuffmanNode *end_leaf = &leaves[num_used_symbols];
-
- /* Pointer to the intermediate node of lowest frequency. */
- HuffmanIntermediateNode *cur_inode;
-
- /* Pointer to the next unallocated intermediate node. */
- HuffmanIntermediateNode *next_inode;
-
- /* Only jump back to here if the maximum length of the codewords allowed
- * by the LZX format (16 bits) is exceeded. */
-try_building_tree_again:
-
- /* Sort the leaves from those that correspond to the least frequent
- * symbol, to those that correspond to the most frequent symbol. If two
- * leaves have the same frequency, they are sorted by symbol. */
- qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_freq);
-
- cur_leaf = &leaves[0];
- cur_inode = &inodes[0];
- next_inode = &inodes[0];
-
- /* The following loop takes the two lowest frequency nodes of those
- * remaining and makes them the children of the next available
- * intermediate node. It continues until all the leaf nodes and
- * intermediate nodes have been used up, or the maximum allowed length
- * for the codewords is exceeded. For the latter case, we must adjust
- * the frequencies to be more equal and then execute this loop again. */
- while (1) {
-
- /* Lowest frequency node. */
- HuffmanNode *f1;
-
- /* Second lowest frequency node. */
- HuffmanNode *f2;
-
- /* Get the lowest and second lowest frequency nodes from the
- * remaining leaves or from the intermediate nodes. */
-
- if (cur_leaf != end_leaf && (cur_inode == next_inode ||
- cur_leaf->freq <= cur_inode->node_base.freq)) {
- f1 = cur_leaf++;
- } else if (cur_inode != next_inode) {
- f1 = (HuffmanNode*)cur_inode++;
- }
-
- if (cur_leaf != end_leaf && (cur_inode == next_inode ||
- cur_leaf->freq <= cur_inode->node_base.freq)) {
- f2 = cur_leaf++;
- } else if (cur_inode != next_inode) {
- f2 = (HuffmanNode*)cur_inode++;
- } else {
- /* All nodes used up! */
- break;
- }
-
- /* next_inode becomes the parent of f1 and f2. */
-
- next_inode->node_base.freq = f1->freq + f2->freq;
- next_inode->node_base.sym = INVALID_SYMBOL;
- next_inode->left_child = f1;
- next_inode->right_child = f2;
-
- /* We need to keep track of the height so that we can detect if
- * the length of a codeword has execeed max_codeword_len. The
- * parent node has a height one higher than the maximum height
- * of its children. */
- next_inode->node_base.height = max(f1->height, f2->height) + 1;
-
- /* Check to see if the code length of the leaf farthest away
- * from next_inode has exceeded the maximum code length. */
- if (next_inode->node_base.height > max_codeword_len) {
- /* The code lengths can be made more uniform by making
- * the frequencies more uniform. Divide all the
- * frequencies by 2, leaving 1 as the minimum frequency.
- * If this keeps happening, the symbol frequencies will
- * approach equality, which makes their Huffman
- * codewords approach the length
- * log_2(num_used_symbols).
- * */
- for (unsigned i = 0; i < num_used_symbols; i++)
- if (leaves[i].freq > 1)
- leaves[i].freq >>= 1;
- goto try_building_tree_again;
- }
- next_inode++;
- }
-
- /* The Huffman tree is now complete, and its height is no more than
- * max_codeword_len. */
-
- HuffmanIntermediateNode *root = next_inode - 1;
- wimlib_assert(root->node_base.height <= max_codeword_len);
-
- /* Compute the path lengths for the leaf nodes. */
- huffman_tree_compute_path_lengths(&root->node_base, 0);
-
- /* Sort the leaf nodes primarily by code length and secondarily by
- * symbol. */
- qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_code_len);
-
- u16 cur_codeword = 0;
- unsigned cur_codeword_len = 0;
- for (unsigned i = 0; i < num_used_symbols; i++) {
-
- /* Each time a codeword becomes one longer, the current codeword
- * is left shifted by one place. This is part of the procedure
- * for enumerating the canonical Huffman code. Additionally,
- * whenever a codeword is used, 1 is added to the current
- * codeword. */
-
- unsigned len_diff = leaves[i].path_len - cur_codeword_len;
- cur_codeword <<= len_diff;
- cur_codeword_len += len_diff;