-
- /* Fill entries for codewords short enough for a direct mapping. We can
- * take advantage of the ordering of the codewords, since the Huffman
- * code is canonical. It must be the case that all the codewords of
- * some length L numerically precede all the codewords of length L + 1.
- * Furthermore, if we have 2 symbols A and B with the same codeword
- * length but symbol A is sorted before symbol B, then then we know that
- * the codeword for A numerically precedes the codeword for B. */
- unsigned decode_table_pos = 0;
- unsigned i = 0;
-
- wimlib_assert2(num_used_syms != 0);
- while (1) {
- unsigned sym = sorted_syms[i];
- unsigned codeword_len = lens[sym];
- if (codeword_len > table_bits)
- break;
-
- unsigned num_entries = 1 << (table_bits - codeword_len);
- if (num_entries >=
- (sizeof(unsigned long) / sizeof(decode_table[0])))
- {
- wimlib_assert2(decode_table_pos % 4 == 0);
- BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
- sizeof(unsigned long) != 8);
-
- unsigned long *p = (unsigned long *)&decode_table[decode_table_pos];
- unsigned long n = num_entries /
- (sizeof(unsigned long) /
- sizeof(decode_table[0]));
- unsigned long v = sym;
- if (sizeof(unsigned long) >= 4)
- v |= v << 16;
- if (sizeof(unsigned long) >= 8)
- v |= v << 32;
- do {
- *p++ = v;
- } while (--n);
-
- decode_table_pos += num_entries;
- } else {
- do {
- decode_table[decode_table_pos++] = sym;
- } while (--num_entries);
- }
- wimlib_assert2(decode_table_pos <= table_num_entries);
- if (++i == num_used_syms) {
- wimlib_assert2(decode_table_pos == table_num_entries);
- /* No codewords were longer than @table_bits, so the
- * table is now entirely filled with the codewords. */
- return 0;
- }
- }
-
- wimlib_assert2(i < num_used_syms);
- wimlib_assert2(decode_table_pos < table_num_entries);
-
- /* Fill in the remaining entries, which correspond to codes longer than
- * @table_bits.
- *
- * First, zero out the rest of the entries. This is necessary so that
- * the entries appear as "unallocated" in the next part. */
- {
- unsigned j = decode_table_pos;
- do {
- decode_table[j] = 0;
- } while (++j != table_num_entries);
- }
-
- /* Assert that 2**table_bits is at least num_syms. If this wasn't the
- * case, we wouldn't be able to distinguish pointer entries from symbol
- * entries. */
- wimlib_assert2(table_num_entries >= num_syms);
-
- /* The current Huffman codeword */
- unsigned cur_codeword = decode_table_pos;
-
- /* The tree nodes are allocated starting at decode_table[1 <<
- * table_bits]. Remember that the full size of the table, including the
- * extra space for the tree nodes, is actually 2**table_bits + 2 *
- * num_syms slots, while table_num_entries is only 2**table_Bits. */
- unsigned next_free_tree_slot = table_num_entries;
-
- /* Go through every codeword of length greater than @table_bits,
- * primarily in order of codeword length and secondarily in order of
- * symbol. */
- unsigned prev_codeword_len = table_bits;
- do {
- unsigned sym = sorted_syms[i];
- unsigned codeword_len = lens[sym];
- unsigned extra_bits = codeword_len - table_bits;
- unsigned extra_mask;
-
- cur_codeword <<= (codeword_len - prev_codeword_len);
- prev_codeword_len = codeword_len;
-
- /* index of the current node; find it from the prefix of the
- * current Huffman codeword. */
- unsigned node_idx = cur_codeword >> extra_bits;
- wimlib_assert2(node_idx < table_num_entries);
-
- /* Go through each bit of the current Huffman codeword beyond
- * the prefix of length @table_bits and walk the tree,
- * allocating any slots that have not yet been allocated. */
- do {
-
- /* If the current tree node points to nowhere
- * but we need to follow it, allocate a new node
- * for it to point to. */
- if (decode_table[node_idx] == 0) {
- decode_table[node_idx] = next_free_tree_slot;
- decode_table[next_free_tree_slot++] = 0;
- decode_table[next_free_tree_slot++] = 0;
- wimlib_assert2(next_free_tree_slot <=
- table_num_entries + 2 * num_syms);
- }
-
- /* Set node_idx to left child */
- node_idx = decode_table[node_idx];
-
- /* Is the next bit 0 or 1? If 0, go left (already done).
- * If 1, go right by incrementing node_idx. */
- --extra_bits;
- node_idx += (cur_codeword >> extra_bits) & 1;
- } while (extra_bits != 0);
-
- /* node_idx is now the index of the leaf entry into which the
- * actual symbol will go. */
- decode_table[node_idx] = sym;
-
- /* cur_codeword is always incremented because this is
- * how canonical Huffman codes are generated (add 1 for
- * each code, then left shift whenever the code length
- * increases) */
- cur_codeword++;
- } while (++i != num_used_syms);