/*
 * compress_common.c
 *
 * Code for compression shared among multiple compression formats.
 *
 * Copyright 2022 Eric Biggers
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <string.h>

#include "wimlib/assert.h"
#include "wimlib/compress_common.h"
#include "wimlib/util.h"

/*
 * Given the binary tree node A[subtree_idx] whose children already satisfy the
 * maxheap property, swap the node with its greater child until it is greater
 * than or equal to both of its children, so that the maxheap property is
 * satisfied in the subtree rooted at A[subtree_idx].  'A' uses 1-based indices.
 */
static void
heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx)
{
	unsigned parent_idx;
	unsigned child_idx;
	u32 v;

	v = A[subtree_idx];
	parent_idx = subtree_idx;
	while ((child_idx = parent_idx * 2) <= length) {
		if (child_idx < length && A[child_idx + 1] > A[child_idx])
			child_idx++;
		if (v >= A[child_idx])
			break;
		A[parent_idx] = A[child_idx];
		parent_idx = child_idx;
	}
	A[parent_idx] = v;
}

/*
 * Rearrange the array 'A' so that it satisfies the maxheap property.
 * 'A' uses 1-based indices, so the children of A[i] are A[i*2] and A[i*2 + 1].
 */
static void
heapify_array(u32 A[], unsigned length)
{
	unsigned subtree_idx;

	for (subtree_idx = length / 2; subtree_idx >= 1; subtree_idx--)
		heapify_subtree(A, length, subtree_idx);
}

/*
 * Sort the array 'A', which contains 'length' unsigned 32-bit integers.
 *
 * Note: name this function heap_sort() instead of heapsort() to avoid colliding
 * with heapsort() from stdlib.h on BSD-derived systems --- though this isn't
 * necessary when compiling with -D_ANSI_SOURCE, which is the better solution.
 */
static void
heap_sort(u32 A[], unsigned length)
{
	A--; /* Use 1-based indices  */

	heapify_array(A, length);

	while (length >= 2) {
		u32 tmp = A[length];

		A[length] = A[1];
		A[1] = tmp;
		length--;
		heapify_subtree(A, length, 1);
	}
}

#define NUM_SYMBOL_BITS 10
#define NUM_FREQ_BITS	(32 - NUM_SYMBOL_BITS)
#define SYMBOL_MASK	((1 << NUM_SYMBOL_BITS) - 1)
#define FREQ_MASK	(~SYMBOL_MASK)

#define GET_NUM_COUNTERS(num_syms)	(num_syms)

/*
 * Sort the symbols primarily by frequency and secondarily by symbol value.
 * Discard symbols with zero frequency and fill in an array with the remaining
 * symbols, along with their frequencies.  The low NUM_SYMBOL_BITS bits of each
 * array entry will contain the symbol value, and the remaining bits will
 * contain the frequency.
 *
 * @num_syms
 *	Number of symbols in the alphabet, at most 1 << NUM_SYMBOL_BITS.
 *
 * @freqs[num_syms]
 *	Frequency of each symbol, summing to at most (1 << NUM_FREQ_BITS) - 1.
 *
 * @lens[num_syms]
 *	An array that eventually will hold the length of each codeword.  This
 *	function only fills in the codeword lengths for symbols that have zero
 *	frequency, which are not well defined per se but will be set to 0.
 *
 * @symout[num_syms]
 *	The output array, described above.
 *
 * Returns the number of entries in 'symout' that were filled.  This is the
 * number of symbols that have nonzero frequency.
 */
static unsigned
sort_symbols(unsigned num_syms, const u32 freqs[], u8 lens[], u32 symout[])
{
	unsigned sym;
	unsigned i;
	unsigned num_used_syms;
	unsigned num_counters;
	unsigned counters[GET_NUM_COUNTERS(MAX_NUM_SYMS)];

	/*
	 * We use heapsort, but with an added optimization.  Since often most
	 * symbol frequencies are low, we first do a count sort using a limited
	 * number of counters.  High frequencies are counted in the last
	 * counter, and only they will be sorted with heapsort.
	 *
	 * Note: with more symbols, it is generally beneficial to have more
	 * counters.  About 1 counter per symbol seems fastest.
	 */

	num_counters = GET_NUM_COUNTERS(num_syms);

	memset(counters, 0, num_counters * sizeof(counters[0]));

	/* Count the frequencies. */
	for (sym = 0; sym < num_syms; sym++)
		counters[MIN(freqs[sym], num_counters - 1)]++;

	/*
	 * Make the counters cumulative, ignoring the zero-th, which counted
	 * symbols with zero frequency.  As a side effect, this calculates the
	 * number of symbols with nonzero frequency.
	 */
	num_used_syms = 0;
	for (i = 1; i < num_counters; i++) {
		unsigned count = counters[i];

		counters[i] = num_used_syms;
		num_used_syms += count;
	}

	/*
	 * Sort nonzero-frequency symbols using the counters.  At the same time,
	 * set the codeword lengths of zero-frequency symbols to 0.
	 */
	for (sym = 0; sym < num_syms; sym++) {
		u32 freq = freqs[sym];

		if (freq != 0) {
			symout[counters[MIN(freq, num_counters - 1)]++] =
				sym | (freq << NUM_SYMBOL_BITS);
		} else {
			lens[sym] = 0;
		}
	}

	/* Sort the symbols counted in the last counter. */
	heap_sort(symout + counters[num_counters - 2],
		  counters[num_counters - 1] - counters[num_counters - 2]);

	return num_used_syms;
}

/*
 * Build a Huffman tree.
 *
 * This is an optimized implementation that
 *	(a) takes advantage of the frequencies being already sorted;
 *	(b) only generates non-leaf nodes, since the non-leaf nodes of a Huffman
 *	    tree are sufficient to generate a canonical code;
 *	(c) Only stores parent pointers, not child pointers;
 *	(d) Produces the nodes in the same memory used for input frequency
 *	    information.
 *
 * Array 'A', which contains 'sym_count' entries, is used for both input and
 * output.  For this function, 'sym_count' must be at least 2.
 *
 * For input, the array must contain the frequencies of the symbols, sorted in
 * increasing order.  Specifically, each entry must contain a frequency left
 * shifted by NUM_SYMBOL_BITS bits.  Any data in the low NUM_SYMBOL_BITS bits of
 * the entries will be ignored by this function.  Although these bits will, in
 * fact, contain the symbols that correspond to the frequencies, this function
 * is concerned with frequencies only and keeps the symbols as-is.
 *
 * For output, this function will produce the non-leaf nodes of the Huffman
 * tree.  These nodes will be stored in the first (sym_count - 1) entries of the
 * array.  Entry A[sym_count - 2] will represent the root node.  Each other node
 * will contain the zero-based index of its parent node in 'A', left shifted by
 * NUM_SYMBOL_BITS bits.  The low NUM_SYMBOL_BITS bits of each entry in A will
 * be kept as-is.  Again, note that although these low bits will, in fact,
 * contain a symbol value, this symbol will have *no relationship* with the
 * Huffman tree node that happens to occupy the same slot.  This is because this
 * implementation only generates the non-leaf nodes of the tree.
 */
static void
build_tree(u32 A[], unsigned sym_count)
{
	const unsigned last_idx = sym_count - 1;

	/* Index of the next lowest frequency leaf that still needs a parent */
	unsigned i = 0;

	/*
	 * Index of the next lowest frequency non-leaf that still needs a
	 * parent, or 'e' if there is currently no such node
	 */
	unsigned b = 0;

	/* Index of the next spot for a non-leaf (will overwrite a leaf) */
	unsigned e = 0;

	do {
		u32 new_freq;

		/*
		 * Select the next two lowest frequency nodes among the leaves
		 * A[i] and non-leaves A[b], and create a new node A[e] to be
		 * their parent.  Set the new node's frequency to the sum of the
		 * frequencies of its two children.
		 *
		 * Usually the next two lowest frequency nodes are of the same
		 * type (leaf or non-leaf), so check those cases first.
		 */
		if (i + 1 <= last_idx &&
		    (b == e || (A[i + 1] & FREQ_MASK) <= (A[b] & FREQ_MASK))) {
			/* Two leaves */
			new_freq = (A[i] & FREQ_MASK) + (A[i + 1] & FREQ_MASK);
			i += 2;
		} else if (b + 2 <= e &&
			   (i > last_idx ||
			    (A[b + 1] & FREQ_MASK) < (A[i] & FREQ_MASK))) {
			/* Two non-leaves */
			new_freq = (A[b] & FREQ_MASK) + (A[b + 1] & FREQ_MASK);
			A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK);
			A[b + 1] = (e << NUM_SYMBOL_BITS) |
				   (A[b + 1] & SYMBOL_MASK);
			b += 2;
		} else {
			/* One leaf and one non-leaf */
			new_freq = (A[i] & FREQ_MASK) + (A[b] & FREQ_MASK);
			A[b] = (e << NUM_SYMBOL_BITS) | (A[b] & SYMBOL_MASK);
			i++;
			b++;
		}
		A[e] = new_freq | (A[e] & SYMBOL_MASK);
		/*
		 * A binary tree with 'n' leaves has 'n - 1' non-leaves, so the
		 * tree is complete once we've created 'n - 1' non-leaves.
		 */
	} while (++e < last_idx);
}

/*
 * Given the stripped-down Huffman tree constructed by build_tree(), determine
 * the number of codewords that should be assigned each possible length, taking
 * into account the length-limited constraint.
 *
 * @A
 *	The array produced by build_tree(), containing parent index information
 *	for the non-leaf nodes of the Huffman tree.  Each entry in this array is
 *	a node; a node's parent always has a greater index than that node
 *	itself.  This function will overwrite the parent index information in
 *	this array, so essentially it will destroy the tree.  However, the data
 *	in the low NUM_SYMBOL_BITS of each entry will be preserved.
 *
 * @root_idx
 *	The 0-based index of the root node in 'A', and consequently one less
 *	than the number of tree node entries in 'A'.  (Or, really 2 less than
 *	the actual length of 'A'.)
 *
 * @len_counts
 *	An array of length ('max_codeword_len' + 1) in which the number of
 *	codewords having each length <= max_codeword_len will be returned.
 *
 * @max_codeword_len
 *	The maximum permissible codeword length.
 */
static void
compute_length_counts(u32 A[], unsigned root_idx, unsigned len_counts[],
		      unsigned max_codeword_len)
{
	unsigned len;
	int node;

	/*
	 * The key observations are:
	 *
	 * (1) We can traverse the non-leaf nodes of the tree, always visiting a
	 *     parent before its children, by simply iterating through the array
	 *     in reverse order.  Consequently, we can compute the depth of each
	 *     node in one pass, overwriting the parent indices with depths.
	 *
	 * (2) We can initially assume that in the real Huffman tree, both
	 *     children of the root are leaves.  This corresponds to two
	 *     codewords of length 1.  Then, whenever we visit a (non-leaf) node
	 *     during the traversal, we modify this assumption to account for
	 *     the current node *not* being a leaf, but rather its two children
	 *     being leaves.  This causes the loss of one codeword for the
	 *     current depth and the addition of two codewords for the current
	 *     depth plus one.
	 *
	 * (3) We can handle the length-limited constraint fairly easily by
	 *     simply using the largest length available when a depth exceeds
	 *     max_codeword_len.
	 */

	for (len = 0; len <= max_codeword_len; len++)
		len_counts[len] = 0;
	len_counts[1] = 2;

	/* Set the root node's depth to 0. */
	A[root_idx] &= SYMBOL_MASK;

	for (node = root_idx - 1; node >= 0; node--) {

		/* Calculate the depth of this node. */

		unsigned parent = A[node] >> NUM_SYMBOL_BITS;
		unsigned parent_depth = A[parent] >> NUM_SYMBOL_BITS;
		unsigned depth = parent_depth + 1;
		unsigned len = depth;

		/*
		 * Set the depth of this node so that it is available when its
		 * children (if any) are processed.
		 */
		A[node] = (A[node] & SYMBOL_MASK) | (depth << NUM_SYMBOL_BITS);

		/*
		 * If needed, decrease the length to meet the length-limited
		 * constraint.  This is not the optimal method for generating
		 * length-limited Huffman codes!  But it should be good enough.
		 */
		if (len >= max_codeword_len) {
			len = max_codeword_len;
			do {
				len--;
			} while (len_counts[len] == 0);
		}

		/*
		 * Account for the fact that we have a non-leaf node at the
		 * current depth.
		 */
		len_counts[len]--;
		len_counts[len + 1] += 2;
	}
}

/*
 * Generate the codewords for a canonical Huffman code.
 *
 * @A
 *	The output array for codewords.  In addition, initially this
 *	array must contain the symbols, sorted primarily by frequency and
 *	secondarily by symbol value, in the low NUM_SYMBOL_BITS bits of
 *	each entry.
 *
 * @len
 *	Output array for codeword lengths.
 *
 * @len_counts
 *	An array that provides the number of codewords that will have
 *	each possible length <= max_codeword_len.
 *
 * @max_codeword_len
 *	Maximum length, in bits, of each codeword.
 *
 * @num_syms
 *	Number of symbols in the alphabet, including symbols with zero
 *	frequency.  This is the length of the 'A' and 'len' arrays.
 */
static void
gen_codewords(u32 A[], u8 lens[], const unsigned len_counts[],
	      unsigned max_codeword_len, unsigned num_syms)
{
	u32 next_codewords[MAX_CODEWORD_LEN + 1];
	unsigned i;
	unsigned len;
	unsigned sym;

	/*
	 * Given the number of codewords that will have each length, assign
	 * codeword lengths to symbols.  We do this by assigning the lengths in
	 * decreasing order to the symbols sorted primarily by increasing
	 * frequency and secondarily by increasing symbol value.
	 */
	for (i = 0, len = max_codeword_len; len >= 1; len--) {
		unsigned count = len_counts[len];

		while (count--)
			lens[A[i++] & SYMBOL_MASK] = len;
	}

	/*
	 * Generate the codewords themselves.  We initialize the
	 * 'next_codewords' array to provide the lexicographically first
	 * codeword of each length, then assign codewords in symbol order.  This
	 * produces a canonical code.
	 */
	next_codewords[0] = 0;
	next_codewords[1] = 0;
	for (len = 2; len <= max_codeword_len; len++)
		next_codewords[len] =
			(next_codewords[len - 1] + len_counts[len - 1]) << 1;

	for (sym = 0; sym < num_syms; sym++)
		A[sym] = next_codewords[lens[sym]]++;
}

/*
 * ---------------------------------------------------------------------
 *			make_canonical_huffman_code()
 * ---------------------------------------------------------------------
 *
 * Given an alphabet and the frequency of each symbol in it, construct a
 * length-limited canonical Huffman code.
 *
 * @num_syms
 *	The number of symbols in the alphabet.  The symbols are the integers in
 *	the range [0, num_syms - 1].  This parameter must be at least 2 and
 *	must not exceed (1 << NUM_SYMBOL_BITS).
 *
 * @max_codeword_len
 *	The maximum permissible codeword length.
 *
 * @freqs
 *	An array of length @num_syms that gives the frequency of each symbol.
 *	It is valid for some, none, or all of the frequencies to be 0.  The sum
 *	of frequencies must not exceed (1 << NUM_FREQ_BITS) - 1.
 *
 * @lens
 *	An array of @num_syms entries in which this function will return the
 *	length, in bits, of the codeword assigned to each symbol.  Symbols with
 *	0 frequency will not have codewords per se, but their entries in this
 *	array will be set to 0.  No lengths greater than @max_codeword_len will
 *	be assigned.
 *
 * @codewords
 *	An array of @num_syms entries in which this function will return the
 *	codeword for each symbol, right-justified and padded on the left with
 *	zeroes.  Codewords for symbols with 0 frequency will be undefined.
 *
 * ---------------------------------------------------------------------
 *
 * This function builds a length-limited canonical Huffman code.
 *
 * A length-limited Huffman code contains no codewords longer than some
 * specified length, and has exactly (with some algorithms) or approximately
 * (with the algorithm used here) the minimum weighted path length from the
 * root, given this constraint.
 *
 * A canonical Huffman code satisfies the properties that a longer codeword
 * never lexicographically precedes a shorter codeword, and the lexicographic
 * ordering of codewords of the same length is the same as the lexicographic
 * ordering of the corresponding symbols.  A canonical Huffman code, or more
 * generally a canonical prefix code, can be reconstructed from only a list
 * containing the codeword length of each symbol.
 *
 * The classic algorithm to generate a Huffman code creates a node for each
 * symbol, then inserts these nodes into a min-heap keyed by symbol frequency.
 * Then, repeatedly, the two lowest-frequency nodes are removed from the
 * min-heap and added as the children of a new node having frequency equal to
 * the sum of its two children, which is then inserted into the min-heap.  When
 * only a single node remains in the min-heap, it is the root of the Huffman
 * tree.  The codeword for each symbol is determined by the path needed to reach
 * the corresponding node from the root.  Descending to the left child appends a
 * 0 bit, whereas descending to the right child appends a 1 bit.
 *
 * The classic algorithm is relatively easy to understand, but it is subject to
 * a number of inefficiencies.  In practice, it is fastest to first sort the
 * symbols by frequency.  (This itself can be subject to an optimization based
 * on the fact that most frequencies tend to be low.)  At the same time, we sort
 * secondarily by symbol value, which aids the process of generating a canonical
 * code.  Then, during tree construction, no heap is necessary because both the
 * leaf nodes and the unparented non-leaf nodes can be easily maintained in
 * sorted order.  Consequently, there can never be more than two possibilities
 * for the next-lowest-frequency node.
 *
 * In addition, because we're generating a canonical code, we actually don't
 * need the leaf nodes of the tree at all, only the non-leaf nodes.  This is
 * because for canonical code generation we don't need to know where the symbols
 * are in the tree.  Rather, we only need to know how many leaf nodes have each
 * depth (codeword length).  And this information can, in fact, be quickly
 * generated from the tree of non-leaves only.
 *
 * Furthermore, we can build this stripped-down Huffman tree directly in the
 * array in which the codewords are to be generated, provided that these array
 * slots are large enough to hold a symbol and frequency value.
 *
 * Still furthermore, we don't even need to maintain explicit child pointers.
 * We only need the parent pointers, and even those can be overwritten in-place
 * with depth information as part of the process of extracting codeword lengths
 * from the tree.  So in summary, we do NOT need a big structure like:
 *
 *	struct huffman_tree_node {
 *		unsigned int symbol;
 *		unsigned int frequency;
 *		unsigned int depth;
 *		struct huffman_tree_node *left_child;
 *		struct huffman_tree_node *right_child;
 *	};
 *
 *
 * ... which often gets used in "naive" implementations of Huffman code
 * generation.
 *
 * Many of these optimizations are based on the implementation in 7-Zip (source
 * file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov.
 *
 * NOTE: in general, the same frequencies can be used to generate different
 * length-limited canonical Huffman codes.  One choice we have is during tree
 * construction, when we must decide whether to prefer a leaf or non-leaf when
 * there is a tie in frequency.  Another choice we have is how to deal with
 * codewords that would exceed @max_codeword_len bits in length.  Both of these
 * choices affect the resulting codeword lengths, which otherwise can be mapped
 * uniquely onto the resulting canonical Huffman code.
 *
 * Normally, there is no problem with choosing one valid code over another,
 * provided that they produce similar compression ratios.  However, the LZMS
 * compression format uses adaptive Huffman coding.  It requires that both the
 * decompressor and compressor build a canonical code equivalent to that which
 * can be generated by using the classic Huffman tree construction algorithm and
 * always processing leaves before non-leaves when there is a frequency tie.
 * Therefore, we make sure to do this.  This method also has the advantage of
 * sometimes shortening the longest codeword that is generated.
 *
 * There also is the issue of how codewords longer than @max_codeword_len are
 * dealt with.  Fortunately, for LZMS this is irrelevant because for the LZMS
 * alphabets no codeword can ever exceed LZMS_MAX_CODEWORD_LEN (= 15).  Since
 * the LZMS algorithm regularly halves all frequencies, the frequencies cannot
 * become high enough for a length 16 codeword to be generated.  Specifically, I
 * think that if ties are broken in favor of non-leaves (as we do), the lowest
 * total frequency that would give a length-16 codeword would be the sum of the
 * frequencies 1 1 1 3 4 7 11 18 29 47 76 123 199 322 521 843 1364, which is
 * 3570.  And in LZMS we can't get a frequency that high based on the alphabet
 * sizes, rebuild frequencies, and scaling factors.  This worst-case scenario is
 * based on the following degenerate case (only the bottom of the tree shown):
 *
 *                          ...
 *                        17
 *                       /  \
 *                      10   7
 *                     / \
 *                    6   4
 *                   / \
 *                  3   3
 *                 / \
 *                2   1
 *               / \
 *              1   1
 *
 * Excluding the first leaves (those with value 1), each leaf value must be
 * greater than the non-leaf up 1 and down 2 from it; otherwise that leaf would
 * have taken precedence over that non-leaf and been combined with the leaf
 * below, thereby decreasing the height compared to that shown.
 *
 * Interesting fact: if we were to instead prioritize non-leaves over leaves,
 * then the worst case frequencies would be the Fibonacci sequence, plus an
 * extra frequency of 1.  In this hypothetical scenario, it would be slightly
 * easier for longer codewords to be generated.
 */
void
make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len,
			    const u32 freqs[], u8 lens[], u32 codewords[])
{
	u32 *A = codewords;
	unsigned num_used_syms;

	wimlib_assert(num_syms <= MAX_NUM_SYMS);
	STATIC_ASSERT(MAX_NUM_SYMS <= 1 << NUM_SYMBOL_BITS);
	wimlib_assert(max_codeword_len <= MAX_CODEWORD_LEN);

	/*
	 * We begin by sorting the symbols primarily by frequency and
	 * secondarily by symbol value.  As an optimization, the array used for
	 * this purpose ('A') shares storage with the space in which we will
	 * eventually return the codewords.
	 */
	num_used_syms = sort_symbols(num_syms, freqs, lens, A);

	/*
	 * 'num_used_syms' is the number of symbols with nonzero frequency.
	 * This may be less than @num_syms.  'num_used_syms' is also the number
	 * of entries in 'A' that are valid.  Each entry consists of a distinct
	 * symbol and a nonzero frequency packed into a 32-bit integer.
	 */

	/*
	 * Handle special cases where only 0 or 1 symbols were used (had nonzero
	 * frequency).
	 */

	if (unlikely(num_used_syms == 0)) {
		/*
		 * Code is empty.  sort_symbols() already set all lengths to 0,
		 * so there is nothing more to do.
		 */
		return;
	}

	if (unlikely(num_used_syms == 1)) {
		/*
		 * Only one symbol was used, so we only need one codeword.  But
		 * two codewords are needed to form the smallest complete
		 * Huffman code, which uses codewords 0 and 1.  Therefore, we
		 * choose another symbol to which to assign a codeword.  We use
		 * 0 (if the used symbol is not 0) or 1 (if the used symbol is
		 * 0).  In either case, the lesser-valued symbol must be
		 * assigned codeword 0 so that the resulting code is canonical.
		 */

		unsigned sym = A[0] & SYMBOL_MASK;
		unsigned nonzero_idx = sym ? sym : 1;

		codewords[0] = 0;
		lens[0] = 1;
		codewords[nonzero_idx] = 1;
		lens[nonzero_idx] = 1;
		return;
	}

	/*
	 * Build a stripped-down version of the Huffman tree, sharing the array
	 * 'A' with the symbol values.  Then extract length counts from the tree
	 * and use them to generate the final codewords.
	 */

	build_tree(A, num_used_syms);

	{
		unsigned len_counts[MAX_CODEWORD_LEN + 1];

		compute_length_counts(A, num_used_syms - 2,
				      len_counts, max_codeword_len);

		gen_codewords(A, lens, len_counts, max_codeword_len, num_syms);
	}
}