- unsigned len_counts[max_codeword_len + 1];
- u16 sorted_syms[num_syms];
- unsigned offsets[max_codeword_len + 1];
- const unsigned table_num_entries = 1 << table_bits;
-
- /* accumulate lengths for codes */
- for (unsigned i = 0; i <= max_codeword_len; i++)
- len_counts[i] = 0;
-
- for (unsigned sym = 0; sym < num_syms; sym++) {
- wimlib_assert2(lens[sym] <= max_codeword_len);
- len_counts[lens[sym]]++;
- }
-
- /* check for an over-subscribed or incomplete set of lengths */
- int left = 1;
- for (unsigned len = 1; len <= max_codeword_len; len++) {
- left <<= 1;
- left -= len_counts[len];
- if (left < 0) { /* over-subscribed */
- ERROR("Invalid Huffman code (over-subscribed)");
- return -1;
- }
- }
- if (left != 0) /* incomplete set */{
- if (left == 1 << max_codeword_len) {
- /* Empty code--- okay in XPRESS and LZX */
- memset(decode_table, 0,
- table_num_entries * sizeof(decode_table[0]));
- return 0;
- } else {
- ERROR("Invalid Huffman code (incomplete set)");
- return -1;
- }
- }
-
- /* Generate offsets into symbol table for each length for sorting */
- offsets[1] = 0;
- for (unsigned len = 1; len < max_codeword_len; len++)
- offsets[len + 1] = offsets[len] + len_counts[len];
-
- /* Sort symbols primarily by length and secondarily by symbol order.
- * This is basically a count-sort over the codeword lengths.
- * In the process, calculate the number of symbols that have nonzero
- * length and are therefore used in the symbol stream. */
- unsigned num_used_syms = 0;
- for (unsigned sym = 0; sym < num_syms; sym++) {
- if (lens[sym] != 0) {
- sorted_syms[offsets[lens[sym]]++] = sym;
- num_used_syms++;
- }
- }
-
- /* Fill entries for codewords short enough for a direct mapping. We can
- * take advantage of the ordering of the codewords, since the Huffman
- * code is canonical. It must be the case that all the codewords of
- * some length L numerically precede all the codewords of length L + 1.
- * Furthermore, if we have 2 symbols A and B with the same codeword
- * length but symbol A is sorted before symbol B, then then we know that
- * the codeword for A numerically precedes the codeword for B. */
- unsigned decode_table_pos = 0;
- unsigned i = 0;
-
- wimlib_assert2(num_used_syms != 0);
- while (1) {
- unsigned sym = sorted_syms[i];
- unsigned codeword_len = lens[sym];
- if (codeword_len > table_bits)
- break;
-
- unsigned num_entries = 1 << (table_bits - codeword_len);
- if (num_entries >=
- (sizeof(unsigned long) / sizeof(decode_table[0])))
- {
- wimlib_assert2(decode_table_pos % 4 == 0);
- BUILD_BUG_ON(sizeof(unsigned long) != 4 &&
- sizeof(unsigned long) != 8);
-
- unsigned long *p = (unsigned long *)&decode_table[decode_table_pos];
- unsigned long n = num_entries /
- (sizeof(unsigned long) /
- sizeof(decode_table[0]));
- unsigned long v = sym;
- if (sizeof(unsigned long) >= 4)
- v |= v << 16;
- if (sizeof(unsigned long) >= 8)
- v |= v << 32;
- do {
- *p++ = v;
- } while (--n);
-
- decode_table_pos += num_entries;
- } else {
- do {
- decode_table[decode_table_pos++] = sym;
- } while (--num_entries);
- }
- wimlib_assert2(decode_table_pos <= table_num_entries);
- if (++i == num_used_syms) {
- wimlib_assert2(decode_table_pos == table_num_entries);
- /* No codewords were longer than @table_bits, so the
- * table is now entirely filled with the codewords. */
- return 0;
+ struct wimlib_decompressor *dec;
+
+ if (!decompressor_ctype_valid(ctype))
+ return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
+
+ if (dec_ret == NULL)
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ if (max_block_size == 0)
+ return WIMLIB_ERR_INVALID_PARAM;
+
+ dec = MALLOC(sizeof(*dec));
+ if (dec == NULL)
+ return WIMLIB_ERR_NOMEM;
+ dec->ops = decompressor_ops[ctype];
+ dec->max_block_size = max_block_size;
+ dec->private = NULL;
+ if (dec->ops->create_decompressor) {
+ int ret;
+
+ ret = dec->ops->create_decompressor(max_block_size,
+ &dec->private);
+ if (ret) {
+ FREE(dec);
+ return ret;