- /* We require at least 2 possible symbols in the alphabet to produce a
- * valid Huffman decoding table. It is allowed that fewer than 2 symbols
- * are actually used, though. */
- wimlib_assert(num_syms >= 2);
-
- /* Initialize the lengths and codewords to 0 */
- memset(lens, 0, num_syms * sizeof(lens[0]));
- memset(codewords, 0, num_syms * sizeof(codewords[0]));
-
- /* Calculate how many symbols have non-zero frequency. These are the
- * symbols that actually appeared in the input. */
- unsigned num_used_symbols = 0;
- for (unsigned i = 0; i < num_syms; i++)
- if (freq_tab[i] != 0)
- num_used_symbols++;
-
-
- /* It is impossible to make a code for num_used_symbols symbols if there
- * aren't enough code bits to uniquely represent all of them. */
- wimlib_assert((1 << max_codeword_len) > num_used_symbols);
-
- /* Initialize the array of leaf nodes with the symbols and their
- * frequencies. */
- HuffmanNode leaves[num_used_symbols];
- unsigned leaf_idx = 0;
- for (unsigned i = 0; i < num_syms; i++) {
- if (freq_tab[i] != 0) {
- leaves[leaf_idx].freq = freq_tab[i];
- leaves[leaf_idx].sym = i;
- leaves[leaf_idx].height = 0;
- leaf_idx++;
- }
- }
-
- /* Deal with the special cases where num_used_symbols < 2. */
- if (num_used_symbols < 2) {
- if (num_used_symbols == 0) {
- /* If num_used_symbols is 0, there are no symbols in the
- * input, so it must be empty. This should be an error,
- * but the LZX format expects this case to succeed. All
- * the codeword lengths are simply marked as 0 (which
- * was already done.) */
- } else {
- /* If only one symbol is present, the LZX format
- * requires that the Huffman code include two codewords.
- * One is not used. Note that this doesn't make the
- * encoded data take up more room anyway, since binary
- * data itself has 2 symbols. */
-
- unsigned sym = leaves[0].sym;
-
- codewords[0] = 0;
- lens[0] = 1;
- if (sym == 0) {
- /* dummy symbol is 1, real symbol is 0 */
- codewords[1] = 1;
- lens[1] = 1;
- } else {
- /* dummy symbol is 0, real symbol is sym */
- codewords[sym] = 1;
- lens[sym] = 1;
- }
- }
- return;
- }
-
- /* Otherwise, there are at least 2 symbols in the input, so we need to
- * find a real Huffman code. */
-
-
- /* Declare the array of intermediate nodes. An intermediate node is not
- * associated with a symbol. Instead, it represents some binary code
- * prefix that is shared between at least 2 codewords. There can be at
- * most num_used_symbols - 1 intermediate nodes when creating a Huffman
- * code. This is because if there were at least num_used_symbols nodes,
- * the code would be suboptimal because there would be at least one
- * unnecessary intermediate node.
- *
- * The worst case (greatest number of intermediate nodes) would be if
- * all the intermediate nodes were chained together. This results in
- * num_used_symbols - 1 intermediate nodes. If num_used_symbols is at
- * least 17, this configuration would not be allowed because the LZX
- * format constrains codes to 16 bits or less each. However, it is
- * still possible for there to be more than 16 intermediate nodes, as
- * long as no leaf has a depth of more than 16. */
- HuffmanIntermediateNode inodes[num_used_symbols - 1];
-
-
- /* Pointer to the leaf node of lowest frequency that hasn't already been
- * added as the child of some intermediate note. */
- HuffmanNode *cur_leaf;
-
- /* Pointer past the end of the array of leaves. */
- HuffmanNode *end_leaf = &leaves[num_used_symbols];
-
- /* Pointer to the intermediate node of lowest frequency. */
- HuffmanIntermediateNode *cur_inode;
-
- /* Pointer to the next unallocated intermediate node. */
- HuffmanIntermediateNode *next_inode;
-
- /* Only jump back to here if the maximum length of the codewords allowed
- * by the LZX format (16 bits) is exceeded. */
-try_building_tree_again:
-
- /* Sort the leaves from those that correspond to the least frequent
- * symbol, to those that correspond to the most frequent symbol. If two
- * leaves have the same frequency, they are sorted by symbol. */
- qsort(leaves, num_used_symbols, sizeof(leaves[0]), cmp_nodes_by_freq);
-
- cur_leaf = &leaves[0];
- cur_inode = &inodes[0];
- next_inode = &inodes[0];
-
- /* The following loop takes the two lowest frequency nodes of those
- * remaining and makes them the children of the next available
- * intermediate node. It continues until all the leaf nodes and
- * intermediate nodes have been used up, or the maximum allowed length
- * for the codewords is exceeded. For the latter case, we must adjust
- * the frequencies to be more equal and then execute this loop again. */
- while (1) {
-
- /* Lowest frequency node. */
- HuffmanNode *f1;
-
- /* Second lowest frequency node. */
- HuffmanNode *f2;
-
- /* Get the lowest and second lowest frequency nodes from the
- * remaining leaves or from the intermediate nodes. */
-
- if (cur_leaf != end_leaf && (cur_inode == next_inode ||
- cur_leaf->freq <= cur_inode->node_base.freq)) {
- f1 = cur_leaf++;
- } else if (cur_inode != next_inode) {
- f1 = (HuffmanNode*)cur_inode++;
- }
-
- if (cur_leaf != end_leaf && (cur_inode == next_inode ||
- cur_leaf->freq <= cur_inode->node_base.freq)) {
- f2 = cur_leaf++;
- } else if (cur_inode != next_inode) {
- f2 = (HuffmanNode*)cur_inode++;
- } else {
- /* All nodes used up! */
- break;
+ bool destructive;
+ struct wimlib_compressor *c;
+
+ destructive = (compression_level & WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE);
+ compression_level &= ~WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE;
+
+ if (!compressor_ctype_valid(ctype))
+ return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
+
+ if (compression_level > 0xFFFFFF)
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ if (c_ret == NULL)
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ if (max_block_size == 0)
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ c = MALLOC(sizeof(*c));
+ if (c == NULL)
+ return WIMLIB_ERR_NOMEM;
+ c->ops = compressor_ops[ctype];
+ c->private = NULL;
+ c->ctype = ctype;
+ c->max_block_size = max_block_size;
+ if (c->ops->create_compressor) {
+ int ret;
+
+ if (compression_level == 0)
+ compression_level = default_compression_levels[ctype];
+ if (compression_level == 0)
+ compression_level = DEFAULT_COMPRESSION_LEVEL;
+
+ ret = c->ops->create_compressor(max_block_size,
+ compression_level,
+ destructive,
+ &c->private);
+ if (ret) {
+ FREE(c);
+ return ret;